From 3edbbf5f1876a46069dd12f52626ae5042fb55e3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 30 Sep 2022 12:04:26 +1000 Subject: [PATCH 1/2] Fix dcache_tb (and add dump of victim way to dcache) It bitrotted... more signals need to be initialized. This also adds a lot more accesses with different timing conditions allowing to test cases of hit during reloads, hit with reload formward, hit on idle cache etc... It also exposes a bug where the cache miss caused by the read of 0x140 uses the same victim way as previous cache miss of 0x40 (same index). This bug will need to be fixed separately, but at least this exposes it. Signed-off-by: Benjamin Herrenschmidt --- dcache.vhdl | 1 + dcache_tb.vhdl | 123 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/dcache.vhdl b/dcache.vhdl index 75c2ce0..f24e9ec 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -1496,6 +1496,7 @@ begin -- Record victim way in the cycle after we see a load or dcbz miss if r1.choose_victim = '1' then r1.victim_way <= plru_victim; + report "victim way:" & to_hstring(plru_victim); end if; if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then r1.choose_victim <= '1'; diff --git a/dcache_tb.vhdl b/dcache_tb.vhdl index 35f727a..b39ad39 100644 --- a/dcache_tb.vhdl +++ b/dcache_tb.vhdl @@ -22,6 +22,8 @@ architecture behave of dcache_tb is signal wb_bram_out : wishbone_slave_out; constant clk_period : time := 10 ns; + + signal stall : std_ulogic; begin dcache0: entity work.dcache generic map( @@ -33,6 +35,7 @@ begin rst => rst, d_in => d_in, d_out => d_out, + stall_out => stall, m_in => m_in, m_out => m_out, wishbone_out => wb_bram_in, @@ -74,21 +77,31 @@ begin d_in.valid <= '0'; d_in.load <= '0'; d_in.nc <= '0'; + d_in.hold <= '0'; + d_in.dcbz <= '0'; + d_in.reserve <= '0'; + d_in.virt_mode <= '0'; + d_in.priv_mode <= '1'; d_in.addr <= (others => '0'); d_in.data <= (others => '0'); + d_in.byte_sel <= (others => '1'); m_in.valid <= '0'; m_in.addr <= (others => '0'); m_in.pte <= (others => '0'); + m_in.tlbie <= '0'; + m_in.doall <= '0'; + m_in.tlbld <= '0'; wait for 4*clk_period; wait until rising_edge(clk); -- Cacheable read of address 4 + report "cache read address 4..."; d_in.load <= '1'; d_in.nc <= '0'; d_in.addr <= x"0000000000000004"; d_in.valid <= '1'; - wait until rising_edge(clk); + wait until rising_edge(clk) and stall = '0'; d_in.valid <= '0'; wait until rising_edge(clk) and d_out.valid = '1'; @@ -97,14 +110,14 @@ begin "=" & to_hstring(d_out.data) & " expected 0000000100000000" severity failure; --- wait for clk_period; - -- Cacheable read of address 30 + -- Cacheable read of address 30 (hit after hit forward from reload) + report "cache read address 30..."; d_in.load <= '1'; d_in.nc <= '0'; d_in.addr <= x"0000000000000030"; d_in.valid <= '1'; - wait until rising_edge(clk); + wait until rising_edge(clk) and stall = '0'; d_in.valid <= '0'; wait until rising_edge(clk) and d_out.valid = '1'; @@ -114,18 +127,110 @@ begin " expected 0000000D0000000C" severity failure; - -- Non-cacheable read of address 100 + -- Ensure reload completes + wait for 100*clk_period; + wait until rising_edge(clk); + + -- Cacheable read of address 38 (hit on idle cache) + report "cache read address 38..."; d_in.load <= '1'; - d_in.nc <= '1'; - d_in.addr <= x"0000000000000100"; + d_in.nc <= '0'; + d_in.addr <= x"0000000000000038"; + d_in.valid <= '1'; + wait until rising_edge(clk) and stall = '0'; + d_in.valid <= '0'; + + wait until rising_edge(clk) and d_out.valid = '1'; + assert d_out.data = x"0000000F0000000E" + report "data @" & to_hstring(d_in.addr) & + "=" & to_hstring(d_out.data) & + " expected 0000000F0000000E" + severity failure; + + -- Cacheable read of address 130 (miss after hit, same index) + -- This will use way 2 + report "cache read address 130..."; + d_in.load <= '1'; + d_in.nc <= '0'; + d_in.addr <= x"0000000000000130"; d_in.valid <= '1'; + wait until rising_edge(clk) and stall = '0'; + d_in.valid <= '0'; + + wait until rising_edge(clk) and d_out.valid = '1'; + assert d_out.data = x"0000004d0000004c" + report "data @" & to_hstring(d_in.addr) & + "=" & to_hstring(d_out.data) & + " expected 0000004d0000004c" + severity failure; + + -- Ensure reload completes + wait for 100*clk_period; wait until rising_edge(clk); + + -- Cacheable read again of address 130 (hit in idle cache) + -- This should feed from way 2 + report "cache read address 130..."; + d_in.load <= '1'; + d_in.nc <= '0'; + d_in.addr <= x"0000000000000130"; + d_in.valid <= '1'; + wait until rising_edge(clk) and stall = '0'; + d_in.valid <= '0'; + + wait until rising_edge(clk) and d_out.valid = '1'; + assert d_out.data = x"0000004d0000004c" + report "data @" & to_hstring(d_in.addr) & + "=" & to_hstring(d_out.data) & + " expected 0000004d0000004c" + severity failure; + + -- Cacheable read of address 40 + report "cache read address 40..."; + d_in.load <= '1'; + d_in.nc <= '0'; + d_in.addr <= x"0000000000000040"; + d_in.valid <= '1'; + wait until rising_edge(clk); + d_in.valid <= '0'; + + wait until rising_edge(clk) and d_out.valid = '1'; + assert d_out.data = x"0000001100000010" + report "data @" & to_hstring(d_in.addr) & + "=" & to_hstring(d_out.data) & + " expected 0000001100000010" + severity failure; + + -- Cacheable read of address 140 (miss after miss, same index) + -- This should use way 2 + report "cache read address 140..."; + d_in.load <= '1'; + d_in.nc <= '0'; + d_in.addr <= x"0000000000000140"; + d_in.valid <= '1'; + wait until rising_edge(clk) and stall = '0'; + d_in.valid <= '0'; + + wait until rising_edge(clk) and d_out.valid = '1'; + assert d_out.data = x"0000005100000050" + report "data @" & to_hstring(d_in.addr) & + "=" & to_hstring(d_out.data) & + " expected 0000005100000050" + severity failure; + + -- Non-cacheable read of address 200 + report "non-cache read address 200..."; + d_in.load <= '1'; + d_in.nc <= '1'; + d_in.addr <= x"0000000000000200"; + d_in.valid <= '1'; + wait until rising_edge(clk) and stall = '0'; d_in.valid <= '0'; wait until rising_edge(clk) and d_out.valid = '1'; - assert d_out.data = x"0000004100000040" + assert d_out.data = x"0000008100000080" report "data @" & to_hstring(d_in.addr) & "=" & to_hstring(d_out.data) & - " expected 0000004100000040" + " expected 0000008100000080" severity failure; wait until rising_edge(clk); From 76f61ef823f03349d4a41791b9c2840259eb3009 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 2 Oct 2022 17:43:58 +1100 Subject: [PATCH 2/2] dcache: Update PLRU on misses as well as hits The current dcache will not update the PLRU on a cache miss which is later satisfied during the reload process. Thus subsequent misses will potentially evict the same cache line. The same issue happens with dcbz which are treated more/less as load misses. This fixes it by triggering a PLRU update when r1.choose_victim, which is set on a miss for one cycle to snapshot the PLRU output. This means we will update the PLRU on the same cycle as we capture its output, which is fine (the new value will be visible on the next cycle). That way, a "miss" will result in a PLRU update to reflect that the entry being refilled is actually used (and will be used to serve subsequent load operations from the same cache line while being refilled). Signed-off-by: Benjamin Herrenschmidt --- dcache.vhdl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dcache.vhdl b/dcache.vhdl index f24e9ec..08b17f8 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -818,7 +818,12 @@ begin process(clk) begin if rising_edge(clk) then - if r1.cache_hit = '1' then + -- We update the PLRU when hitting the cache or when replacing + -- an entry. The PLRU update will be "visible" on the next cycle + -- so the victim selection will correctly see the *old* value. + if r1.cache_hit = '1' or r1.choose_victim = '1' then + report "PLRU update, index=" & to_hstring(r1.hit_index) & + " way=" & to_hstring(r1.hit_way); assert not is_X(r1.hit_index) severity failure; plru_ram(to_integer(r1.hit_index)) <= plru_upd; end if; @@ -1336,6 +1341,8 @@ begin else r1.hit_load_valid <= '0'; end if; + + -- The cache hit indication is used for PLRU updates if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then r1.cache_hit <= '1'; else