litedram: Add an L2 cache with store queue
This adds a cache between the wishbone and litedram with the following features (at this point, it's still evolving) - 128 bytes line width in order to have a reasonable amount of litedram pipelining on the 128-bit wide data port. - Configurable geometry otherwise - Stores are acked immediately on wishbone whether hit or miss (minus a 2 cycles delay if there's a previous load response in the way) and sent to LiteDRAM via 8 entries (configurable) store queue Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>pull/190/head
							parent
							
								
									bf1b98b958
								
							
						
					
					
						commit
						a3857aac94
					
				
											
												
													File diff suppressed because one or more lines are too long
												
											
										
									
								@ -0,0 +1,84 @@
 | 
				
			|||||||
 | 
					$ version 1.1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Signals in entities :
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rst
 | 
				
			||||||
 | 
					/core_dram_tb/dram/system_clk
 | 
				
			||||||
 | 
					/core_dram_tb/dram/system_reset
 | 
				
			||||||
 | 
					/core_dram_tb/dram/wb_in
 | 
				
			||||||
 | 
					/core_dram_tb/dram/wb_out
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_cmd_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_cmd_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_cmd_we
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_cmd_addr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_wdata_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_wdata_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_wdata_we
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_wdata_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_rdata_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_rdata_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/user_port0_rdata_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/cache_tags
 | 
				
			||||||
 | 
					/core_dram_tb/dram/cache_valids
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_rd_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_rd_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_rd_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_wr_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_wr_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/storeq_wr_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/accept_store
 | 
				
			||||||
 | 
					/core_dram_tb/dram/state
 | 
				
			||||||
 | 
					/core_dram_tb/dram/wb_req
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queued
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_ack_0
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_ack_1
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_ad3_0
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_ad3_1
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_way_0
 | 
				
			||||||
 | 
					/core_dram_tb/dram/read_way_1
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_index
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_row
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_hit_way
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_tag
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_op
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_laddr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_ad3
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_we
 | 
				
			||||||
 | 
					/core_dram_tb/dram/req_wdata
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_way
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_index
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_row
 | 
				
			||||||
 | 
					/core_dram_tb/dram/cache_out
 | 
				
			||||||
 | 
					/core_dram_tb/dram/plru_victim
 | 
				
			||||||
 | 
					/core_dram_tb/dram/replace_way
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/do_read
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/do_write
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/rd_addr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/wr_addr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/wr_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/wr_sel
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/wr_sel_m
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/dout
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/clk
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/rd_en
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/rd_addr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/rd_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/wr_sel
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/wr_addr
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/wr_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/rams/way/rd_data0
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/wr_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/wr_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/wr_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/rd_ready
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/rd_valid
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/rd_data
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/rd_idx
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/rd_next
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/wr_idx
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/wr_next
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/op_prev
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/op_next
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/full
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/empty
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/push
 | 
				
			||||||
 | 
					/core_dram_tb/dram/store_queue/pop
 | 
				
			||||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								@ -0,0 +1,163 @@
 | 
				
			|||||||
 | 
					-- Synchronous FIFO with a protocol similar to AXI
 | 
				
			||||||
 | 
					--
 | 
				
			||||||
 | 
					-- The outputs are generated combinationally from the inputs
 | 
				
			||||||
 | 
					-- in order to allow for back-to-back transfers with the type
 | 
				
			||||||
 | 
					-- of flow control used by busses lite AXI, pipelined WB or
 | 
				
			||||||
 | 
					-- LiteDRAM native port when the FIFO is full.
 | 
				
			||||||
 | 
					--
 | 
				
			||||||
 | 
					-- That means that care needs to be taken by the user not to
 | 
				
			||||||
 | 
					-- generate the inputs combinationally from the outputs otherwise
 | 
				
			||||||
 | 
					-- it would create a logic loop.
 | 
				
			||||||
 | 
					--
 | 
				
			||||||
 | 
					-- If breaking that loop is required, a stash buffer could be
 | 
				
			||||||
 | 
					-- added to break the flow control "loop" between the read and
 | 
				
			||||||
 | 
					-- the write port.
 | 
				
			||||||
 | 
					--
 | 
				
			||||||
 | 
					library ieee;
 | 
				
			||||||
 | 
					use ieee.std_logic_1164.all;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					library work;
 | 
				
			||||||
 | 
					use work.utils.all;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entity sync_fifo is
 | 
				
			||||||
 | 
					    generic(
 | 
				
			||||||
 | 
					        -- Fifo depth in entries
 | 
				
			||||||
 | 
					        DEPTH     : natural := 64;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- Fifo width in bits
 | 
				
			||||||
 | 
					        WIDTH     : natural := 32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- When INIT_ZERO is set, the memory is pre-initialized to 0's
 | 
				
			||||||
 | 
					        INIT_ZERO : boolean := false
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					    port(
 | 
				
			||||||
 | 
					        -- Control lines:
 | 
				
			||||||
 | 
					        clk      : in std_ulogic;
 | 
				
			||||||
 | 
					        reset    : in std_ulogic;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- Write port
 | 
				
			||||||
 | 
					        wr_ready : out std_ulogic;
 | 
				
			||||||
 | 
					        wr_valid : in std_ulogic;
 | 
				
			||||||
 | 
					        wr_data  : in std_ulogic_vector(WIDTH - 1 downto 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        -- Read port
 | 
				
			||||||
 | 
					        rd_ready : in std_ulogic;
 | 
				
			||||||
 | 
					        rd_valid : out std_ulogic;
 | 
				
			||||||
 | 
					        rd_data  : out std_ulogic_vector(WIDTH - 1 downto 0)
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					end entity sync_fifo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					architecture behaviour of sync_fifo is
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    subtype data_t is std_ulogic_vector(WIDTH - 1 downto 0);    
 | 
				
			||||||
 | 
					    type memory_t is array(0 to DEPTH - 1) of data_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    function init_mem return memory_t is
 | 
				
			||||||
 | 
					        variable m : memory_t;
 | 
				
			||||||
 | 
					    begin
 | 
				
			||||||
 | 
					        if INIT_ZERO then
 | 
				
			||||||
 | 
					            for i in 0 to DEPTH - 1 loop
 | 
				
			||||||
 | 
					                m(i) := (others => '0');
 | 
				
			||||||
 | 
					            end loop;
 | 
				
			||||||
 | 
					        end if;
 | 
				
			||||||
 | 
					        return m;
 | 
				
			||||||
 | 
					    end function;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    signal memory : memory_t := init_mem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    subtype index_t is integer range 0 to DEPTH - 1;
 | 
				
			||||||
 | 
					    signal rd_idx  : index_t;
 | 
				
			||||||
 | 
					    signal rd_next : index_t;
 | 
				
			||||||
 | 
					    signal wr_idx  : index_t;
 | 
				
			||||||
 | 
					    signal wr_next : index_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    function next_index(idx : index_t) return index_t is
 | 
				
			||||||
 | 
					        variable r : index_t;
 | 
				
			||||||
 | 
					    begin
 | 
				
			||||||
 | 
					        if ispow2(DEPTH) then
 | 
				
			||||||
 | 
					            r := (idx + 1) mod DEPTH;
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					            r := idx + 1;
 | 
				
			||||||
 | 
					            if r = DEPTH then
 | 
				
			||||||
 | 
					                r := 0;
 | 
				
			||||||
 | 
					            end if;
 | 
				
			||||||
 | 
					        end if;
 | 
				
			||||||
 | 
					        return r;
 | 
				
			||||||
 | 
					    end function;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    type op_t is (OP_POP, OP_PUSH);
 | 
				
			||||||
 | 
					    signal op_prev : op_t := OP_POP;
 | 
				
			||||||
 | 
					    signal op_next : op_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    signal full, empty : std_ulogic;
 | 
				
			||||||
 | 
					    signal push, pop   : std_ulogic;
 | 
				
			||||||
 | 
					begin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Current state at last clock edge
 | 
				
			||||||
 | 
					    empty <= '1' when rd_idx = wr_idx and op_prev = OP_POP  else '0';
 | 
				
			||||||
 | 
					    full  <= '1' when rd_idx = wr_idx and op_prev = OP_PUSH else '0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- We can accept new data if we aren't full or we are but
 | 
				
			||||||
 | 
					    -- the read port is going to accept data this cycle    
 | 
				
			||||||
 | 
					    wr_ready <= rd_ready or not full;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- We can provide data if we aren't empty or we are but
 | 
				
			||||||
 | 
					    -- the write port is going to provide data this cycle
 | 
				
			||||||
 | 
					    rd_valid <= wr_valid or not empty;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Internal control signals
 | 
				
			||||||
 | 
					    push <= wr_ready and wr_valid;
 | 
				
			||||||
 | 
					    pop  <= rd_ready and rd_valid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Next state
 | 
				
			||||||
 | 
					    rd_next <= next_index(rd_idx) when pop  = '1' else rd_idx;
 | 
				
			||||||
 | 
					    wr_next <= next_index(wr_idx) when push = '1' else wr_idx;
 | 
				
			||||||
 | 
					    with push & pop select op_next <=
 | 
				
			||||||
 | 
					        OP_PUSH when "10",
 | 
				
			||||||
 | 
					        OP_POP  when "01",
 | 
				
			||||||
 | 
					        op_prev when others;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Read port output
 | 
				
			||||||
 | 
					    rd_data <= memory(rd_idx) when empty = '0' else wr_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Read counter
 | 
				
			||||||
 | 
					    reader: process(clk)
 | 
				
			||||||
 | 
					    begin
 | 
				
			||||||
 | 
					        if rising_edge(clk) then
 | 
				
			||||||
 | 
					            if reset = '1' then
 | 
				
			||||||
 | 
					                rd_idx <= 0;
 | 
				
			||||||
 | 
					            else
 | 
				
			||||||
 | 
					                rd_idx <= rd_next;
 | 
				
			||||||
 | 
					            end if;
 | 
				
			||||||
 | 
					        end if;
 | 
				
			||||||
 | 
					    end process;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Write counter and memory write
 | 
				
			||||||
 | 
					    producer: process(clk)
 | 
				
			||||||
 | 
					    begin
 | 
				
			||||||
 | 
					        if rising_edge(clk) then
 | 
				
			||||||
 | 
					            if reset = '1' then
 | 
				
			||||||
 | 
					                wr_idx <= 0;
 | 
				
			||||||
 | 
					            else
 | 
				
			||||||
 | 
					                wr_idx <= wr_next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if push = '1' then
 | 
				
			||||||
 | 
					                    memory(wr_idx) <= wr_data;
 | 
				
			||||||
 | 
					                end if;
 | 
				
			||||||
 | 
					            end if;
 | 
				
			||||||
 | 
					        end if;
 | 
				
			||||||
 | 
					    end process;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Previous op latch used for generating empty/full
 | 
				
			||||||
 | 
					    op: process(clk)
 | 
				
			||||||
 | 
					    begin
 | 
				
			||||||
 | 
					        if rising_edge(clk) then
 | 
				
			||||||
 | 
					            if reset = '1' then
 | 
				
			||||||
 | 
					                op_prev <= OP_POP;
 | 
				
			||||||
 | 
					            else
 | 
				
			||||||
 | 
					                op_prev <= op_next;
 | 
				
			||||||
 | 
					            end if;
 | 
				
			||||||
 | 
					        end if;
 | 
				
			||||||
 | 
					    end process;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					end architecture behaviour;
 | 
				
			||||||
					Loading…
					
					
				
		Reference in New Issue