litedram: Add an L2 cache with store queue
This adds a cache between the wishbone and litedram with the following features (at this point, it's still evolving) - 128 bytes line width in order to have a reasonable amount of litedram pipelining on the 128-bit wide data port. - Configurable geometry otherwise - Stores are acked immediately on wishbone whether hit or miss (minus a 2 cycles delay if there's a previous load response in the way) and sent to LiteDRAM via 8 entries (configurable) store queue Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>pull/190/head
parent
bf1b98b958
commit
a3857aac94
File diff suppressed because one or more lines are too long
@ -0,0 +1,84 @@
|
|||||||
|
$ version 1.1
|
||||||
|
|
||||||
|
# Signals in entities :
|
||||||
|
/core_dram_tb/dram/rst
|
||||||
|
/core_dram_tb/dram/system_clk
|
||||||
|
/core_dram_tb/dram/system_reset
|
||||||
|
/core_dram_tb/dram/wb_in
|
||||||
|
/core_dram_tb/dram/wb_out
|
||||||
|
/core_dram_tb/dram/user_port0_cmd_valid
|
||||||
|
/core_dram_tb/dram/user_port0_cmd_ready
|
||||||
|
/core_dram_tb/dram/user_port0_cmd_we
|
||||||
|
/core_dram_tb/dram/user_port0_cmd_addr
|
||||||
|
/core_dram_tb/dram/user_port0_wdata_valid
|
||||||
|
/core_dram_tb/dram/user_port0_wdata_ready
|
||||||
|
/core_dram_tb/dram/user_port0_wdata_we
|
||||||
|
/core_dram_tb/dram/user_port0_wdata_data
|
||||||
|
/core_dram_tb/dram/user_port0_rdata_valid
|
||||||
|
/core_dram_tb/dram/user_port0_rdata_ready
|
||||||
|
/core_dram_tb/dram/user_port0_rdata_data
|
||||||
|
/core_dram_tb/dram/cache_tags
|
||||||
|
/core_dram_tb/dram/cache_valids
|
||||||
|
/core_dram_tb/dram/storeq_rd_ready
|
||||||
|
/core_dram_tb/dram/storeq_rd_valid
|
||||||
|
/core_dram_tb/dram/storeq_rd_data
|
||||||
|
/core_dram_tb/dram/storeq_wr_ready
|
||||||
|
/core_dram_tb/dram/storeq_wr_valid
|
||||||
|
/core_dram_tb/dram/storeq_wr_data
|
||||||
|
/core_dram_tb/dram/accept_store
|
||||||
|
/core_dram_tb/dram/state
|
||||||
|
/core_dram_tb/dram/wb_req
|
||||||
|
/core_dram_tb/dram/store_queued
|
||||||
|
/core_dram_tb/dram/read_ack_0
|
||||||
|
/core_dram_tb/dram/read_ack_1
|
||||||
|
/core_dram_tb/dram/read_ad3_0
|
||||||
|
/core_dram_tb/dram/read_ad3_1
|
||||||
|
/core_dram_tb/dram/read_way_0
|
||||||
|
/core_dram_tb/dram/read_way_1
|
||||||
|
/core_dram_tb/dram/req_index
|
||||||
|
/core_dram_tb/dram/req_row
|
||||||
|
/core_dram_tb/dram/req_hit_way
|
||||||
|
/core_dram_tb/dram/req_tag
|
||||||
|
/core_dram_tb/dram/req_op
|
||||||
|
/core_dram_tb/dram/req_laddr
|
||||||
|
/core_dram_tb/dram/req_ad3
|
||||||
|
/core_dram_tb/dram/req_we
|
||||||
|
/core_dram_tb/dram/req_wdata
|
||||||
|
/core_dram_tb/dram/store_way
|
||||||
|
/core_dram_tb/dram/store_index
|
||||||
|
/core_dram_tb/dram/store_row
|
||||||
|
/core_dram_tb/dram/cache_out
|
||||||
|
/core_dram_tb/dram/plru_victim
|
||||||
|
/core_dram_tb/dram/replace_way
|
||||||
|
/core_dram_tb/dram/rams/do_read
|
||||||
|
/core_dram_tb/dram/rams/do_write
|
||||||
|
/core_dram_tb/dram/rams/rd_addr
|
||||||
|
/core_dram_tb/dram/rams/wr_addr
|
||||||
|
/core_dram_tb/dram/rams/wr_data
|
||||||
|
/core_dram_tb/dram/rams/wr_sel
|
||||||
|
/core_dram_tb/dram/rams/wr_sel_m
|
||||||
|
/core_dram_tb/dram/rams/dout
|
||||||
|
/core_dram_tb/dram/rams/way/clk
|
||||||
|
/core_dram_tb/dram/rams/way/rd_en
|
||||||
|
/core_dram_tb/dram/rams/way/rd_addr
|
||||||
|
/core_dram_tb/dram/rams/way/rd_data
|
||||||
|
/core_dram_tb/dram/rams/way/wr_sel
|
||||||
|
/core_dram_tb/dram/rams/way/wr_addr
|
||||||
|
/core_dram_tb/dram/rams/way/wr_data
|
||||||
|
/core_dram_tb/dram/rams/way/rd_data0
|
||||||
|
/core_dram_tb/dram/store_queue/wr_ready
|
||||||
|
/core_dram_tb/dram/store_queue/wr_valid
|
||||||
|
/core_dram_tb/dram/store_queue/wr_data
|
||||||
|
/core_dram_tb/dram/store_queue/rd_ready
|
||||||
|
/core_dram_tb/dram/store_queue/rd_valid
|
||||||
|
/core_dram_tb/dram/store_queue/rd_data
|
||||||
|
/core_dram_tb/dram/store_queue/rd_idx
|
||||||
|
/core_dram_tb/dram/store_queue/rd_next
|
||||||
|
/core_dram_tb/dram/store_queue/wr_idx
|
||||||
|
/core_dram_tb/dram/store_queue/wr_next
|
||||||
|
/core_dram_tb/dram/store_queue/op_prev
|
||||||
|
/core_dram_tb/dram/store_queue/op_next
|
||||||
|
/core_dram_tb/dram/store_queue/full
|
||||||
|
/core_dram_tb/dram/store_queue/empty
|
||||||
|
/core_dram_tb/dram/store_queue/push
|
||||||
|
/core_dram_tb/dram/store_queue/pop
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,163 @@
|
|||||||
|
-- Synchronous FIFO with a protocol similar to AXI
|
||||||
|
--
|
||||||
|
-- The outputs are generated combinationally from the inputs
|
||||||
|
-- in order to allow for back-to-back transfers with the type
|
||||||
|
-- of flow control used by busses lite AXI, pipelined WB or
|
||||||
|
-- LiteDRAM native port when the FIFO is full.
|
||||||
|
--
|
||||||
|
-- That means that care needs to be taken by the user not to
|
||||||
|
-- generate the inputs combinationally from the outputs otherwise
|
||||||
|
-- it would create a logic loop.
|
||||||
|
--
|
||||||
|
-- If breaking that loop is required, a stash buffer could be
|
||||||
|
-- added to break the flow control "loop" between the read and
|
||||||
|
-- the write port.
|
||||||
|
--
|
||||||
|
library ieee;
|
||||||
|
use ieee.std_logic_1164.all;
|
||||||
|
|
||||||
|
library work;
|
||||||
|
use work.utils.all;
|
||||||
|
|
||||||
|
entity sync_fifo is
|
||||||
|
generic(
|
||||||
|
-- Fifo depth in entries
|
||||||
|
DEPTH : natural := 64;
|
||||||
|
|
||||||
|
-- Fifo width in bits
|
||||||
|
WIDTH : natural := 32;
|
||||||
|
|
||||||
|
-- When INIT_ZERO is set, the memory is pre-initialized to 0's
|
||||||
|
INIT_ZERO : boolean := false
|
||||||
|
);
|
||||||
|
port(
|
||||||
|
-- Control lines:
|
||||||
|
clk : in std_ulogic;
|
||||||
|
reset : in std_ulogic;
|
||||||
|
|
||||||
|
-- Write port
|
||||||
|
wr_ready : out std_ulogic;
|
||||||
|
wr_valid : in std_ulogic;
|
||||||
|
wr_data : in std_ulogic_vector(WIDTH - 1 downto 0);
|
||||||
|
|
||||||
|
-- Read port
|
||||||
|
rd_ready : in std_ulogic;
|
||||||
|
rd_valid : out std_ulogic;
|
||||||
|
rd_data : out std_ulogic_vector(WIDTH - 1 downto 0)
|
||||||
|
);
|
||||||
|
end entity sync_fifo;
|
||||||
|
|
||||||
|
architecture behaviour of sync_fifo is
|
||||||
|
|
||||||
|
subtype data_t is std_ulogic_vector(WIDTH - 1 downto 0);
|
||||||
|
type memory_t is array(0 to DEPTH - 1) of data_t;
|
||||||
|
|
||||||
|
function init_mem return memory_t is
|
||||||
|
variable m : memory_t;
|
||||||
|
begin
|
||||||
|
if INIT_ZERO then
|
||||||
|
for i in 0 to DEPTH - 1 loop
|
||||||
|
m(i) := (others => '0');
|
||||||
|
end loop;
|
||||||
|
end if;
|
||||||
|
return m;
|
||||||
|
end function;
|
||||||
|
|
||||||
|
signal memory : memory_t := init_mem;
|
||||||
|
|
||||||
|
subtype index_t is integer range 0 to DEPTH - 1;
|
||||||
|
signal rd_idx : index_t;
|
||||||
|
signal rd_next : index_t;
|
||||||
|
signal wr_idx : index_t;
|
||||||
|
signal wr_next : index_t;
|
||||||
|
|
||||||
|
function next_index(idx : index_t) return index_t is
|
||||||
|
variable r : index_t;
|
||||||
|
begin
|
||||||
|
if ispow2(DEPTH) then
|
||||||
|
r := (idx + 1) mod DEPTH;
|
||||||
|
else
|
||||||
|
r := idx + 1;
|
||||||
|
if r = DEPTH then
|
||||||
|
r := 0;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
return r;
|
||||||
|
end function;
|
||||||
|
|
||||||
|
type op_t is (OP_POP, OP_PUSH);
|
||||||
|
signal op_prev : op_t := OP_POP;
|
||||||
|
signal op_next : op_t;
|
||||||
|
|
||||||
|
signal full, empty : std_ulogic;
|
||||||
|
signal push, pop : std_ulogic;
|
||||||
|
begin
|
||||||
|
|
||||||
|
-- Current state at last clock edge
|
||||||
|
empty <= '1' when rd_idx = wr_idx and op_prev = OP_POP else '0';
|
||||||
|
full <= '1' when rd_idx = wr_idx and op_prev = OP_PUSH else '0';
|
||||||
|
|
||||||
|
-- We can accept new data if we aren't full or we are but
|
||||||
|
-- the read port is going to accept data this cycle
|
||||||
|
wr_ready <= rd_ready or not full;
|
||||||
|
|
||||||
|
-- We can provide data if we aren't empty or we are but
|
||||||
|
-- the write port is going to provide data this cycle
|
||||||
|
rd_valid <= wr_valid or not empty;
|
||||||
|
|
||||||
|
-- Internal control signals
|
||||||
|
push <= wr_ready and wr_valid;
|
||||||
|
pop <= rd_ready and rd_valid;
|
||||||
|
|
||||||
|
-- Next state
|
||||||
|
rd_next <= next_index(rd_idx) when pop = '1' else rd_idx;
|
||||||
|
wr_next <= next_index(wr_idx) when push = '1' else wr_idx;
|
||||||
|
with push & pop select op_next <=
|
||||||
|
OP_PUSH when "10",
|
||||||
|
OP_POP when "01",
|
||||||
|
op_prev when others;
|
||||||
|
|
||||||
|
-- Read port output
|
||||||
|
rd_data <= memory(rd_idx) when empty = '0' else wr_data;
|
||||||
|
|
||||||
|
-- Read counter
|
||||||
|
reader: process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if reset = '1' then
|
||||||
|
rd_idx <= 0;
|
||||||
|
else
|
||||||
|
rd_idx <= rd_next;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
|
||||||
|
-- Write counter and memory write
|
||||||
|
producer: process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if reset = '1' then
|
||||||
|
wr_idx <= 0;
|
||||||
|
else
|
||||||
|
wr_idx <= wr_next;
|
||||||
|
|
||||||
|
if push = '1' then
|
||||||
|
memory(wr_idx) <= wr_data;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
|
||||||
|
-- Previous op latch used for generating empty/full
|
||||||
|
op: process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if reset = '1' then
|
||||||
|
op_prev <= OP_POP;
|
||||||
|
else
|
||||||
|
op_prev <= op_next;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
|
||||||
|
end architecture behaviour;
|
Loading…
Reference in New Issue