@ -5,16 +5,34 @@ use std.textio.all;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				library work;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				use work.wishbone_types.all;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				use work.utils.all;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				use work.helpers.all;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				entity litedram_wrapper is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    generic (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					DRAM_ABITS     : positive;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					DRAM_ALINES    : positive;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Pseudo-ROM payload
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        PAYLOAD_SIZE      : natural;    
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        PAYLOAD_FILE      : string;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- L2 cache --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Line size in bytes
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        LINE_SIZE         : positive := 128;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Number of lines in a set
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        NUM_LINES         : positive := 32;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Number of ways
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        NUM_WAYS          : positive := 4;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Max number of stores in the queue
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        STOREQ_DEPTH      : positive := 8;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Don't send loads until all pending stores acked in litedram
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        NO_LS_OVERLAP     : boolean  := false;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Debug
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        LITEDRAM_TRACE    : boolean  := false
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        LITEDRAM_TRACE    : boolean  := false;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        TRACE             : boolean  := false
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        );
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    port(
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- LiteDRAM generates the system clock and reset
 
			
		 
		
	
	
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
				
			
			 
			 
			
				@ -123,13 +141,11 @@ architecture behaviour of litedram_wrapper is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal user_port0_rdata_ready       : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal user_port0_rdata_data        : std_ulogic_vector(127 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal ad3                          : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_adr                  : std_ulogic_vector(29 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_dat_w                : std_ulogic_vector(31 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_dat_r                : std_ulogic_vector(31 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_sel                  : std_ulogic_vector(3 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_cyc                  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_cyc                  : std_ulogic := '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_stb                  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_ack                  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_ctrl_we                   : std_ulogic;
 
			
		 
		
	
	
		
			
				
					
						
						
						
							
								 
							 
						
					 
				
			
			 
			 
			
				@ -137,11 +153,239 @@ architecture behaviour of litedram_wrapper is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_init_in                   : wb_io_master_out;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_init_out                  : wb_io_slave_out;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type state_t is (CMD, MWRITE, MREAD);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- DRAM data port width
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant DRAM_DBITS                 : natural := 128;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant DRAM_SBITS                 : natural := (DRAM_DBITS / 8);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- BRAM organisation: We never access more than wishbone_data_bits at
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- a time so to save resources we make the array only that wide, and
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- use consecutive indices for to make a cache "line"
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- ROW_SIZE is the width in bytes of the BRAM (based on litedram, so 128-bits)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant ROW_SIZE      : natural := DRAM_DBITS / 8;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- ROW_PER_LINE is the number of row (litedram transactions) in a line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- BRAM_ROWS is the number of rows in BRAM needed to represent the full
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- dcache
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Bit fields counts in the address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- ROW_BITS is the number of bits to select a row
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant ROW_BITS      : natural := log2(BRAM_ROWS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- ROW_LINEBITS is the number of bits to select a row within a line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- LINE_OFF_BITS is the number of bits for the offset in a cache line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- ROW_OFF_BITS is the number of bits for the offset in a row
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- REAL_ADDR_BITS is the number of real address bits that we store
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant REAL_ADDR_BITS : positive := DRAM_ABITS + ROW_OFF_BITS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- INDEX_BITS is the number if bits to select a cache line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant INDEX_BITS    : natural := log2(NUM_LINES);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- SET_SIZE_BITS is the log base 2 of the set size
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- TAG_BITS is the number of bits of the tag part of the address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- WAY_BITS is the number of bits to select a way
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant WAY_BITS     : natural := log2(NUM_WAYS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype row_t is integer range 0 to BRAM_ROWS-1;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype index_t is integer range 0 to NUM_LINES-1;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype way_t is integer range 0 to NUM_WAYS-1;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- The cache data BRAM organized as described above for each way
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype cache_row_t is std_ulogic_vector(DRAM_DBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- not handle a clean (commented) definition of the cache tags as a 3d
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- memory. For now, work around it by putting all the tags
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				--    type cache_tags_set_t is array(way_t) of cache_tag_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				--    type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- The cache valid bits
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type cache_valids_t is array(index_t) of cache_way_valids_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal cache_tags   : cache_tags_array_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal cache_valids : cache_valids_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    attribute ram_style : string;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    attribute ram_style of cache_tags : signal is "distributed";
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Store queue signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- We store a single wishbone dword per entry (64-bit) but all
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- 16 sel bits for the DRAM.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- XXX Investigate storing only AD3 and 8 sel bits if it's better
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    constant STOREQ_BITS  : positive := wishbone_data_bits + DRAM_SBITS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_rd_ready : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_rd_valid : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_rd_data  : std_ulogic_vector(STOREQ_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_wr_ready : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_wr_valid : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal storeq_wr_data  : std_ulogic_vector(STOREQ_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Cache management signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				     -- Cache state machine
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type state_t is (IDLE,             -- Normal load hit processing
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                     REFILL_WAIT_ACK); -- Cache refill wait ack
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal state : state_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Latched WB request.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal wb_req : wishbone_master_out := wishbone_master_out_init;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Read pipeline (to handle cache RAM latency)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_ack_0  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_ack_1  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_ad3_0  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_ad3_1  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_way_0  : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal read_way_1  : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Async signals decoding latched request
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type req_op_t is (OP_NONE,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                      OP_LOAD_HIT,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                      OP_LOAD_MISS,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                      OP_STORE_HIT,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                      OP_STORE_MISS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_index    : index_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_row      : row_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_hit_way  : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_tag      : cache_tag_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_op       : req_op_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_laddr    : std_ulogic_vector(REAL_ADDR_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_ad3      : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_we       : std_ulogic_vector(DRAM_SBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal req_wdata    : std_ulogic_vector(DRAM_DBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal accept_store : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Line refill command signals and latches
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal refill_cmd_valid : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal refill_cmd_addr  : std_ulogic_vector(DRAM_ABITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal refill_way       : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal refill_index     : index_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal refill_row       : row_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Cache RAM interface
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type cache_ram_out_t is array(way_t) of cache_row_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal cache_out   : cache_ram_out_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- PLRU output interface
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    signal plru_victim : plru_out_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Helper functions to decode incoming requests
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Return the cache line index (tag index) for an address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function get_index(addr: wishbone_addr_type) return index_t is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Return the cache row index (data memory) for an address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function get_row(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto 0)) return row_t is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Returns whether this is the last row of a line. It takes a DRAM address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function is_last_row_addr(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS))
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return boolean is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Returns whether this is the last row of a line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function is_last_row(row: row_t) return boolean is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        constant ones  : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return row_v(ROW_LINEBITS-1 downto 0) = ones;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Return the address of the next row in the current cache line. It takes a
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- DRAM address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function next_row_addr(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS))
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return std_ulogic_vector is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable result  : std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        result := addr;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return result;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Return the next row in the current cache line. We use a dedicated
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- function in order to limit the size of the generated adder to be
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- only the bits within a cache line (3 bits with default settings)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function next_row(row: row_t) return row_t is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       variable row_v  : std_ulogic_vector(ROW_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				       return to_integer(unsigned(row_v));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Get the tag value from the address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function get_tag(addr: wishbone_addr_type) return cache_tag_t is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Read a tag from a tag memory row
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Write a tag to tag memory row
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        tag: cache_tag_t) is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Sanity checks
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert ispow2(LINE_SIZE)    report "LINE_SIZE not power of 2" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert ispow2(NUM_LINES)    report "NUM_LINES not power of 2" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        report "geometry bits don't add up" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        report "geometry bits don't add up" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        report "geometry bits don't add up" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        report "geometry bits don't add up" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    assert (128 = DRAM_DBITS)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        report "Can't yet handle a DRAM width that isn't 128-bits" severity FAILURE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- alternate core reset address set when DRAM is not initialized.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    core_alt_reset <= not init_done;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
	
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
				
			
			 
			 
			
				@ -170,7 +414,15 @@ begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_init_in.stb <= wb_ctrl_in.stb;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- DRAM CSR IN signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- DRAM CSR IN signals. Extra latch to help with timing
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    csr_latch: process(system_clk)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if system_reset = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_cyc <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_stb <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- XXX Maybe only update addr when cyc = '1' to save power ?
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_adr   <= x"0000" & wb_ctrl_in.adr(15 downto 2);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_dat_w <= wb_ctrl_in.dat;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_sel   <= wb_ctrl_in.sel;
 
			
		 
		
	
	
		
			
				
					
						
						
						
							
								 
							 
						
					 
				
			
			 
			 
			
				@ -178,7 +430,19 @@ begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_cyc   <= wb_ctrl_in.cyc and wb_ctrl_is_csr;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_ctrl_stb   <= wb_ctrl_in.stb and wb_ctrl_is_csr;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Ctrl bus wishbone OUT signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- Clear stb on ack otherwise the memory will latch
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- the write twice which breaks levelling. On the next
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- cycle we will latch an updated stb that takes the
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- ack into account.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if wb_ctrl_ack = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    wb_ctrl_stb <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Ctrl bus wishbone OUT signals. XXX Consider adding latch on
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- CSR response to help timing
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_ctrl_out.ack   <= wb_ctrl_ack when wb_ctrl_is_csr = '1'
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                         else wb_init_out.ack;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_ctrl_out.dat   <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1'
 
			
		 
		
	
	
		
			
				
					
						
						
						
							
								 
							 
						
					 
				
			
			 
			 
			
				@ -186,56 +450,531 @@ begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                         '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Generate a cache RAM for each way
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    rams: for i in 0 to NUM_WAYS-1 generate
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal do_read  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal do_write : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal wr_data  : std_ulogic_vector(DRAM_DBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        signal dout     : cache_row_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				   begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        way: entity work.cache_ram
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            generic map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                ROW_BITS => ROW_BITS,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                WIDTH    => DRAM_DBITS,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                ADD_BUF  => true
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                )
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            port map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                clk     => system_clk,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                rd_en   => do_read,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                rd_addr => rd_addr,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                rd_data => dout,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_sel  => wr_sel_m,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_addr => wr_addr,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_data => wr_data
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                );
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        process(all)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- Read port
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            do_read <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            cache_out(i) <= dout;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- Write mux: cache refills from DRAM or writes from Wishbone
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if state = IDLE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- Write from wishbone
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_data <= req_wdata;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_sel  <= req_we;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- Refill from DRAM
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_data <= user_port0_rdata_data;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_sel  <= (others => '1');
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_addr <= std_ulogic_vector(to_unsigned(refill_row, ROW_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- Write enable logic
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            do_write <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if req_op = OP_STORE_HIT and req_hit_way = i then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                do_write <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            elsif user_port0_rdata_valid = '1' and refill_way = i then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                do_write <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- Mask write selects with do_write since BRAM doesn't always
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- have a global write-enable (Vivado generates TDP instead
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- of SDP when using one, thus doubling cache BRAM usage).
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            for i in 0 to ROW_SIZE-1 loop
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wr_sel_m(i) <= wr_sel(i) and do_write;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end loop;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if TRACE and rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if do_write = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "cache write way:" & integer'image(i) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " addr:" & to_hstring(wr_addr) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " sel:" & to_hstring(wr_sel_m) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " data:" & to_hstring(wr_data);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end generate;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Generate PLRUs
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    maybe_plrus: if NUM_WAYS > 1 generate
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        plrus: for i in 0 to NUM_LINES-1 generate
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- PLRU interface
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            signal plru_acc_en : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            plru : entity work.plru
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                generic map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    BITS => WAY_BITS
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    )
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                port map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    clk => system_clk,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    rst => system_reset,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    acc => plru_acc,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    acc_en => plru_acc_en,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    lru => plru_out
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    );
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            process(req_index, req_op, req_hit_way, plru_out)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- PLRU interface
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if (req_op = OP_LOAD_HIT or
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    req_op = OP_STORE_HIT) and req_index = i then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    plru_acc_en <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    plru_acc_en <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                plru_victim(i) <= plru_out;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end generate;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end generate;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Wishbone interface:
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --  - Incoming wishbone request latch (to help with timing)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --  - Read response pipeline (to match BRAM output buffer delay)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --  - Stall generation
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- XXX TODO: Properly handle cyc drops before all acks are sent...
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    request_latch: process(system_clk)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- We can latch a new request if we are idle (for now). We also
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- latch the absence of request. This is a pipeline that takes
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- one per-cycle unless non-IDLE.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if wb_out.stall = '0' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- Avoid constantly updating addr/data for unrelated requests
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if wb_in.cyc = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    wb_req <= wb_in;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    wb_req.cyc <= wb_in.cyc;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    wb_req.stb <= wb_in.stb;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if wb_in.cyc = '1' and wb_in.stb = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        report "latch new wb req ! addr:" & to_hstring(wb_in.adr) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            " we:" & std_ulogic'image(wb_in.we) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            " sel:" & to_hstring(wb_in.sel);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Read response pipeline
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- XXX Might have to put store acks in there too (see comment in wb_response)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    read_pipe: process(system_clk)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_ack_0 <= '1' when req_op = OP_LOAD_HIT else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_ad3_0 <= req_ad3;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_way_0 <= req_hit_way;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_ack_1 <= read_ack_0;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_ad3_1 <= read_ad3_0;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            read_way_1 <= read_way_0;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if req_op = OP_LOAD_HIT then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "Load hit addr:" & to_hstring(wb_req.adr) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " idx:" & integer'image(req_index) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " tag:" & to_hstring(req_tag) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " way:" & integer'image(req_hit_way);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                elsif req_op = OP_LOAD_MISS then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "Load miss addr:" & to_hstring(wb_req.adr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if read_ack_0 = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "read data:" & to_hstring(cache_out(read_way_0));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_reponse: process(all)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable rdata      : std_ulogic_vector(DRAM_DBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable store_done : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Can we accept a store ? This is set when IDLE and the store
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- queue & command queue are not full.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Data bus wishbone to LiteDRAM native port
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Note: This is only used to control the WB request latch, stall
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- and store "early complete". We don't want to use this to control
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- cmd_valid to DRAM as this would create a circular dependency inside
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- LiteDRAM as cmd_ready I think is driven from cmd_valid.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Address bit 3 selects the top or bottom half of the data
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- bus (64-bit wishbone vs. 128-bit DRAM interface)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- The state machine that controls the command queue must thus
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- reproduce this logic at least partially.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- XXX TODO: Figure out how to pipeline this
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Note also that user_port0_cmd_ready from LiteDRAM is combinational
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- from user_port0_cmd_valid. IE. we won't know that LiteDRAM cannot
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- accept a command until we try to send one.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    ad3 <= wb_in.adr(3);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if state = IDLE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            accept_store <= user_port0_cmd_ready and storeq_wr_ready;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Wishbone port IN signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_cmd_valid   <= wb_in.cyc and wb_in.stb when state = CMD else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_cmd_we      <= wb_in.we when state = CMD else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_wdata_valid <= '1' when state = MWRITE else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_rdata_ready <= '1' when state = MREAD else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_cmd_addr    <= wb_in.adr(DRAM_ABITS+3 downto 4);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_wdata_data  <= wb_in.dat & wb_in.dat;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    user_port0_wdata_we    <= wb_in.sel & "00000000" when ad3 = '1' else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                              "00000000" & wb_in.sel;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- Corner case !!! The read acks pipeline takes two extra cycles
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- which means a store ack can collide with a previous load hit
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- ack. Thus we stall stores if we have a load ack pending.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if read_ack_0 = '1' or read_ack_1 = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                accept_store <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            accept_store <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Generate stalls. For loads, we stall if we are going to take a load
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- miss or are in the middle of a refill. For stores, if we can't
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- accept it.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        case state is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        when IDLE =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            case req_op is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            when OP_LOAD_MISS =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_out.stall <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            when OP_STORE_MISS | OP_STORE_HIT =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_out.stall <= not accept_store;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            when others =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                wb_out.stall <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end case;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        when REFILL_WAIT_ACK =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            wb_out.stall <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end case;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Wishbone OUT signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_out.ack <= user_port0_wdata_ready when state = MWRITE else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						  user_port0_rdata_valid when state = MREAD else '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Data out mux
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        rdata := cache_out(read_way_1);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        wb_out.dat <= rdata(127 downto 64) when read_ad3_1 = '1' else rdata(63 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						  user_port0_rdata_data(63 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Early-complete stores on wishbone.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if req_op = OP_STORE_HIT or req_op = OP_STORE_MISS then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            store_done := accept_store;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            store_done := '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- We don't do pipelining yet.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Generate ACKs on read hits and store complete
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- XXXX TODO: This can happen on store right behind loads !
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- This probably need to be fixed by putting store acks in
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- the same pipeline as the read acks. TOOD: Create a testbench
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- to exercise those corner cases as the core can't yet.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        wb_out.ack <= read_ack_1 or store_done;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        assert read_ack_0 = '0' or store_done = '0' report
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            "Read ack and store ack collision !"
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            severity failure;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- DRAM user port State machine
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    sm: process(system_clk)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Cache request decode
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    request_decode: process(all)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable valid   : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable is_hit  : std_ulogic;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable hit_way : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Extract line, row and tag from request
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_index <= get_index(wb_req.adr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_row <= get_row(wb_req.adr(REAL_ADDR_BITS-1 downto 0));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_tag <= get_tag(wb_req.adr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Calculate address of beginning of cache line, will be
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- used for cache miss processing if needed
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_laddr <= wb_req.adr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                     (LINE_OFF_BITS-1 downto 0 => '0');
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Do we have a valid request in the WB latch ?
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if state = IDLE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            valid := wb_req.cyc and wb_req.stb;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            valid := '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Store signals
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_ad3      <= wb_req.adr(3);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_wdata    <= wb_req.dat & wb_req.dat;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_we       <= wb_req.sel & "00000000" when req_ad3 = '1' else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        "00000000" & wb_req.sel;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Test if pending request is a hit on any way
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        hit_way := 0;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        is_hit := '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        for i in way_t loop
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if valid = '1' and cache_valids(req_index)(i) = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if read_tag(i, cache_tags(req_index)) = req_tag then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    hit_way := i;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    is_hit := '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end loop;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Generate the req op. We only allow OP_LOAD_* when in the
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- IDLE state as our PLRU and ACK generation rely on this,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- stores are allowed in IDLE state.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_op <= OP_NONE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if valid = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if wb_req.we = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if is_hit = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    req_op <= OP_STORE_HIT;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    req_op <= OP_STORE_MISS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if is_hit = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    req_op <= OP_LOAD_HIT;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    req_op <= OP_LOAD_MISS;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        req_hit_way <= hit_way;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				   end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- Store queue
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- For now, queue up to 16 stores
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    store_queue: entity work.sync_fifo
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					generic map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					    DEPTH => STOREQ_DEPTH,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					    WIDTH => STOREQ_BITS
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
					    )
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        port map (
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            clk      => system_clk,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            reset    => system_reset,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            rd_ready => storeq_rd_ready,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            rd_valid => storeq_rd_valid,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            rd_data  => storeq_rd_data,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            wr_ready => storeq_wr_ready,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            wr_valid => storeq_wr_valid,
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            wr_data  => storeq_wr_data
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            );
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    storeq_control : process(all)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable stq_data : wishbone_data_type;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable stq_sel  : std_ulogic_vector(DRAM_SBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        storeq_wr_data <= wb_req.dat & req_we;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        -- Only accept store if we can send a command
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if req_op = OP_STORE_HIT or req_op = OP_STORE_MISS then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            storeq_wr_valid <= user_port0_cmd_ready;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            storeq_wr_valid <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        stq_data := storeq_rd_data(storeq_rd_data'left downto DRAM_SBITS);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        stq_sel  := storeq_rd_data(DRAM_SBITS-1 downto 0);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        user_port0_wdata_data  <= stq_data & stq_data;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        user_port0_wdata_we    <= stq_sel;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        user_port0_wdata_valid <= storeq_rd_valid;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        storeq_rd_ready        <= user_port0_wdata_ready;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if req_op = OP_STORE_HIT then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "Store hit to:" &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        to_hstring(wb_req.adr(DRAM_ABITS+3 downto 0)) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " data:" & to_hstring(req_wdata) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " we:" & to_hstring(req_we) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " V:" & std_ulogic'image(accept_store);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "Store miss to:" &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        to_hstring(wb_req.adr(DRAM_ABITS+3 downto 0)) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " data:" & to_hstring(req_wdata) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " we:" & to_hstring(req_we) &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        " V:" & std_ulogic'image(accept_store);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if storeq_wr_valid = '1' and storeq_wr_ready = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "storeq push " & to_hstring(storeq_wr_data);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                if storeq_rd_valid = '1' and storeq_rd_ready = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    report "storeq pop " & to_hstring(storeq_rd_data);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- LiteDRAM command mux
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    dram_commands: process(all)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if state = IDLE and (req_op = OP_STORE_HIT or req_op = OP_STORE_MISS) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- For stores, forward signals directly. Only send command if
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- the FIFO can accept a store
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_addr  <= wb_req.adr(DRAM_ABITS+3 downto 4);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_we    <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_valid <= storeq_wr_ready;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- For loads, we route via a latch controlled by the refill machine
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_addr  <= refill_cmd_addr;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_valid <= refill_cmd_valid;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            user_port0_cmd_we    <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        user_port0_rdata_ready <= '1'; -- Always 1
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    end process;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- LiteDRAM refill machine
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    -- This handles the cache line refills
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    refill_machine : process(system_clk)
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable tagset      : cache_tags_set_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable cmds_done   : boolean;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable replace_way : way_t;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        variable wait_qdrain : boolean;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				    begin
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				        if rising_edge(system_clk) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            -- On reset, clear all valid bits to force misses
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            if system_reset = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						state <= CMD;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                for i in index_t loop
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    cache_valids(i) <= (others => '0');
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end loop;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                state <= IDLE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                refill_cmd_valid <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                -- Main state machine
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                case state is
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						when CMD =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if (user_port0_cmd_ready and user_port0_cmd_valid) = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
							state <= MWRITE when wb_in.we = '1' else MREAD;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                when IDLE =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    assert refill_cmd_valid = '0' report "refill cmd valid in IDLE state !"
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        severity failure;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- If NO_LS_OVERLAP is set, disallow a load miss if the store
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- queue still has data in it.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    wait_qdrain := false;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if NO_LS_OVERLAP then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        wait_qdrain := storeq_rd_valid = '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- We need to read a cache line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if req_op = OP_LOAD_MISS and not wait_qdrain then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Grab way to replace
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        replace_way := to_integer(unsigned(plru_victim(req_index)));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Force misses on that way while refilling that line
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        cache_valids(req_index)(replace_way) <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Store new tag in selected way
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        for i in 0 to NUM_WAYS-1 loop
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            if i = replace_way then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                tagset := cache_tags(req_index);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                write_tag(i, tagset, req_tag);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                cache_tags(req_index) <= tagset;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						when MWRITE =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						    if user_port0_wdata_ready = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
							state <= CMD;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end loop;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Keep track of our index and way for subsequent stores
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_index <= req_index;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_way   <= replace_way;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_row   <= get_row(req_laddr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Prep for first DRAM read
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- XXX TODO: We could start a cycle early here by using
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- combo logic to generate the first command in
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- "dram_commands". In fact, we could make refill_cmd_addr
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- only contain the "counter" bits and wire it with the
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- other bits from req_laddr.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_cmd_addr    <= req_laddr(DRAM_ABITS+3 downto 4);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_cmd_valid   <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            report "refill addr " & to_hstring(req_laddr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
						when MREAD =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Track that we had one request sent
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        state <= REFILL_WAIT_ACK;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                when REFILL_WAIT_ACK =>
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- Commands are all sent if user_port0_cmd_valid is 0
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    cmds_done := refill_cmd_valid = '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- If we are still sending requests, was one accepted ?
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if user_port0_cmd_ready = '1' and not cmds_done then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- That was the last word ? We are done sending. Clear
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- command valid and set cmds_done so we can handle an
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- eventual last ack on the same cycle.
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        --
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            report "got refill cmd ack !";
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        if is_last_row_addr(refill_cmd_addr) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            refill_cmd_valid <= '0';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            cmds_done := true;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                report "all refill cmds done !";
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        else
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            -- Calculate the next row address
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            refill_cmd_addr <= next_row_addr(refill_cmd_addr);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                report "refill addr " &
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                    to_hstring(next_row_addr(refill_cmd_addr));
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    -- Incoming read data processing
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    if user_port0_rdata_valid = '1' then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
							state <= CMD;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            report "got refill data ack !";
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Check for completion
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        if cmds_done and is_last_row(refill_row) then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            if TRACE then
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                                report "all refill data done !";
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            -- Cache line is now valid
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            cache_valids(refill_index)(refill_way) <= '1';
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            -- We are done
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                            state <= IDLE;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        -- Increment store row counter
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                        refill_row <= next_row(refill_row);
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                    end if;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				                end case;
 
			
		 
		
	
		
			
				 
				 
			
			 
			 
			
				            end if;