litedram: Add an L2 cache with store queue
This adds a cache between the wishbone and litedram with the following features (at this point, it's still evolving) - 128 bytes line width in order to have a reasonable amount of litedram pipelining on the 128-bit wide data port. - Configurable geometry otherwise - Stores are acked immediately on wishbone whether hit or miss (minus a 2 cycles delay if there's a previous load response in the way) and sent to LiteDRAM via 8 entries (configurable) store queue Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>pull/190/head
parent
bf1b98b958
commit
a3857aac94
File diff suppressed because one or more lines are too long
@ -0,0 +1,84 @@
|
||||
$ version 1.1
|
||||
|
||||
# Signals in entities :
|
||||
/core_dram_tb/dram/rst
|
||||
/core_dram_tb/dram/system_clk
|
||||
/core_dram_tb/dram/system_reset
|
||||
/core_dram_tb/dram/wb_in
|
||||
/core_dram_tb/dram/wb_out
|
||||
/core_dram_tb/dram/user_port0_cmd_valid
|
||||
/core_dram_tb/dram/user_port0_cmd_ready
|
||||
/core_dram_tb/dram/user_port0_cmd_we
|
||||
/core_dram_tb/dram/user_port0_cmd_addr
|
||||
/core_dram_tb/dram/user_port0_wdata_valid
|
||||
/core_dram_tb/dram/user_port0_wdata_ready
|
||||
/core_dram_tb/dram/user_port0_wdata_we
|
||||
/core_dram_tb/dram/user_port0_wdata_data
|
||||
/core_dram_tb/dram/user_port0_rdata_valid
|
||||
/core_dram_tb/dram/user_port0_rdata_ready
|
||||
/core_dram_tb/dram/user_port0_rdata_data
|
||||
/core_dram_tb/dram/cache_tags
|
||||
/core_dram_tb/dram/cache_valids
|
||||
/core_dram_tb/dram/storeq_rd_ready
|
||||
/core_dram_tb/dram/storeq_rd_valid
|
||||
/core_dram_tb/dram/storeq_rd_data
|
||||
/core_dram_tb/dram/storeq_wr_ready
|
||||
/core_dram_tb/dram/storeq_wr_valid
|
||||
/core_dram_tb/dram/storeq_wr_data
|
||||
/core_dram_tb/dram/accept_store
|
||||
/core_dram_tb/dram/state
|
||||
/core_dram_tb/dram/wb_req
|
||||
/core_dram_tb/dram/store_queued
|
||||
/core_dram_tb/dram/read_ack_0
|
||||
/core_dram_tb/dram/read_ack_1
|
||||
/core_dram_tb/dram/read_ad3_0
|
||||
/core_dram_tb/dram/read_ad3_1
|
||||
/core_dram_tb/dram/read_way_0
|
||||
/core_dram_tb/dram/read_way_1
|
||||
/core_dram_tb/dram/req_index
|
||||
/core_dram_tb/dram/req_row
|
||||
/core_dram_tb/dram/req_hit_way
|
||||
/core_dram_tb/dram/req_tag
|
||||
/core_dram_tb/dram/req_op
|
||||
/core_dram_tb/dram/req_laddr
|
||||
/core_dram_tb/dram/req_ad3
|
||||
/core_dram_tb/dram/req_we
|
||||
/core_dram_tb/dram/req_wdata
|
||||
/core_dram_tb/dram/store_way
|
||||
/core_dram_tb/dram/store_index
|
||||
/core_dram_tb/dram/store_row
|
||||
/core_dram_tb/dram/cache_out
|
||||
/core_dram_tb/dram/plru_victim
|
||||
/core_dram_tb/dram/replace_way
|
||||
/core_dram_tb/dram/rams/do_read
|
||||
/core_dram_tb/dram/rams/do_write
|
||||
/core_dram_tb/dram/rams/rd_addr
|
||||
/core_dram_tb/dram/rams/wr_addr
|
||||
/core_dram_tb/dram/rams/wr_data
|
||||
/core_dram_tb/dram/rams/wr_sel
|
||||
/core_dram_tb/dram/rams/wr_sel_m
|
||||
/core_dram_tb/dram/rams/dout
|
||||
/core_dram_tb/dram/rams/way/clk
|
||||
/core_dram_tb/dram/rams/way/rd_en
|
||||
/core_dram_tb/dram/rams/way/rd_addr
|
||||
/core_dram_tb/dram/rams/way/rd_data
|
||||
/core_dram_tb/dram/rams/way/wr_sel
|
||||
/core_dram_tb/dram/rams/way/wr_addr
|
||||
/core_dram_tb/dram/rams/way/wr_data
|
||||
/core_dram_tb/dram/rams/way/rd_data0
|
||||
/core_dram_tb/dram/store_queue/wr_ready
|
||||
/core_dram_tb/dram/store_queue/wr_valid
|
||||
/core_dram_tb/dram/store_queue/wr_data
|
||||
/core_dram_tb/dram/store_queue/rd_ready
|
||||
/core_dram_tb/dram/store_queue/rd_valid
|
||||
/core_dram_tb/dram/store_queue/rd_data
|
||||
/core_dram_tb/dram/store_queue/rd_idx
|
||||
/core_dram_tb/dram/store_queue/rd_next
|
||||
/core_dram_tb/dram/store_queue/wr_idx
|
||||
/core_dram_tb/dram/store_queue/wr_next
|
||||
/core_dram_tb/dram/store_queue/op_prev
|
||||
/core_dram_tb/dram/store_queue/op_next
|
||||
/core_dram_tb/dram/store_queue/full
|
||||
/core_dram_tb/dram/store_queue/empty
|
||||
/core_dram_tb/dram/store_queue/push
|
||||
/core_dram_tb/dram/store_queue/pop
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,163 @@
|
||||
-- Synchronous FIFO with a protocol similar to AXI
|
||||
--
|
||||
-- The outputs are generated combinationally from the inputs
|
||||
-- in order to allow for back-to-back transfers with the type
|
||||
-- of flow control used by busses lite AXI, pipelined WB or
|
||||
-- LiteDRAM native port when the FIFO is full.
|
||||
--
|
||||
-- That means that care needs to be taken by the user not to
|
||||
-- generate the inputs combinationally from the outputs otherwise
|
||||
-- it would create a logic loop.
|
||||
--
|
||||
-- If breaking that loop is required, a stash buffer could be
|
||||
-- added to break the flow control "loop" between the read and
|
||||
-- the write port.
|
||||
--
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
|
||||
library work;
|
||||
use work.utils.all;
|
||||
|
||||
entity sync_fifo is
|
||||
generic(
|
||||
-- Fifo depth in entries
|
||||
DEPTH : natural := 64;
|
||||
|
||||
-- Fifo width in bits
|
||||
WIDTH : natural := 32;
|
||||
|
||||
-- When INIT_ZERO is set, the memory is pre-initialized to 0's
|
||||
INIT_ZERO : boolean := false
|
||||
);
|
||||
port(
|
||||
-- Control lines:
|
||||
clk : in std_ulogic;
|
||||
reset : in std_ulogic;
|
||||
|
||||
-- Write port
|
||||
wr_ready : out std_ulogic;
|
||||
wr_valid : in std_ulogic;
|
||||
wr_data : in std_ulogic_vector(WIDTH - 1 downto 0);
|
||||
|
||||
-- Read port
|
||||
rd_ready : in std_ulogic;
|
||||
rd_valid : out std_ulogic;
|
||||
rd_data : out std_ulogic_vector(WIDTH - 1 downto 0)
|
||||
);
|
||||
end entity sync_fifo;
|
||||
|
||||
architecture behaviour of sync_fifo is
|
||||
|
||||
subtype data_t is std_ulogic_vector(WIDTH - 1 downto 0);
|
||||
type memory_t is array(0 to DEPTH - 1) of data_t;
|
||||
|
||||
function init_mem return memory_t is
|
||||
variable m : memory_t;
|
||||
begin
|
||||
if INIT_ZERO then
|
||||
for i in 0 to DEPTH - 1 loop
|
||||
m(i) := (others => '0');
|
||||
end loop;
|
||||
end if;
|
||||
return m;
|
||||
end function;
|
||||
|
||||
signal memory : memory_t := init_mem;
|
||||
|
||||
subtype index_t is integer range 0 to DEPTH - 1;
|
||||
signal rd_idx : index_t;
|
||||
signal rd_next : index_t;
|
||||
signal wr_idx : index_t;
|
||||
signal wr_next : index_t;
|
||||
|
||||
function next_index(idx : index_t) return index_t is
|
||||
variable r : index_t;
|
||||
begin
|
||||
if ispow2(DEPTH) then
|
||||
r := (idx + 1) mod DEPTH;
|
||||
else
|
||||
r := idx + 1;
|
||||
if r = DEPTH then
|
||||
r := 0;
|
||||
end if;
|
||||
end if;
|
||||
return r;
|
||||
end function;
|
||||
|
||||
type op_t is (OP_POP, OP_PUSH);
|
||||
signal op_prev : op_t := OP_POP;
|
||||
signal op_next : op_t;
|
||||
|
||||
signal full, empty : std_ulogic;
|
||||
signal push, pop : std_ulogic;
|
||||
begin
|
||||
|
||||
-- Current state at last clock edge
|
||||
empty <= '1' when rd_idx = wr_idx and op_prev = OP_POP else '0';
|
||||
full <= '1' when rd_idx = wr_idx and op_prev = OP_PUSH else '0';
|
||||
|
||||
-- We can accept new data if we aren't full or we are but
|
||||
-- the read port is going to accept data this cycle
|
||||
wr_ready <= rd_ready or not full;
|
||||
|
||||
-- We can provide data if we aren't empty or we are but
|
||||
-- the write port is going to provide data this cycle
|
||||
rd_valid <= wr_valid or not empty;
|
||||
|
||||
-- Internal control signals
|
||||
push <= wr_ready and wr_valid;
|
||||
pop <= rd_ready and rd_valid;
|
||||
|
||||
-- Next state
|
||||
rd_next <= next_index(rd_idx) when pop = '1' else rd_idx;
|
||||
wr_next <= next_index(wr_idx) when push = '1' else wr_idx;
|
||||
with push & pop select op_next <=
|
||||
OP_PUSH when "10",
|
||||
OP_POP when "01",
|
||||
op_prev when others;
|
||||
|
||||
-- Read port output
|
||||
rd_data <= memory(rd_idx) when empty = '0' else wr_data;
|
||||
|
||||
-- Read counter
|
||||
reader: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if reset = '1' then
|
||||
rd_idx <= 0;
|
||||
else
|
||||
rd_idx <= rd_next;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- Write counter and memory write
|
||||
producer: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if reset = '1' then
|
||||
wr_idx <= 0;
|
||||
else
|
||||
wr_idx <= wr_next;
|
||||
|
||||
if push = '1' then
|
||||
memory(wr_idx) <= wr_data;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- Previous op latch used for generating empty/full
|
||||
op: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if reset = '1' then
|
||||
op_prev <= OP_POP;
|
||||
else
|
||||
op_prev <= op_next;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
end architecture behaviour;
|
Loading…
Reference in New Issue