From 3edbbf5f1876a46069dd12f52626ae5042fb55e3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 30 Sep 2022 12:04:26 +1000
Subject: [PATCH 1/2] Fix dcache_tb (and add dump of victim way to dcache)

It bitrotted... more signals need to be initialized. This also adds
a lot more accesses with different timing conditions allowing to
test cases of hit during reloads, hit with reload formward, hit on idle
cache etc...

It also exposes a bug where the cache miss caused by the read of 0x140
uses the same victim way as previous cache miss of 0x40 (same index).

This bug will need to be fixed separately, but at least this exposes it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 dcache.vhdl    |   1 +
 dcache_tb.vhdl | 123 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/dcache.vhdl b/dcache.vhdl
index 75c2ce0..f24e9ec 100644
--- a/dcache.vhdl
+++ b/dcache.vhdl
@@ -1496,6 +1496,7 @@ begin
                 -- Record victim way in the cycle after we see a load or dcbz miss
                 if r1.choose_victim = '1' then
                     r1.victim_way <= plru_victim;
+                    report "victim way:" & to_hstring(plru_victim);
                 end if;
                 if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then
                     r1.choose_victim <= '1';
diff --git a/dcache_tb.vhdl b/dcache_tb.vhdl
index 35f727a..b39ad39 100644
--- a/dcache_tb.vhdl
+++ b/dcache_tb.vhdl
@@ -22,6 +22,8 @@ architecture behave of dcache_tb is
     signal wb_bram_out  : wishbone_slave_out;
 
     constant clk_period : time := 10 ns;
+
+    signal stall : std_ulogic;
 begin
     dcache0: entity work.dcache
         generic map(
@@ -33,6 +35,7 @@ begin
             rst => rst,
             d_in => d_in,
             d_out => d_out,
+            stall_out => stall,
             m_in => m_in,
             m_out => m_out,
             wishbone_out => wb_bram_in,
@@ -74,21 +77,31 @@ begin
         d_in.valid <= '0';
         d_in.load <= '0';
         d_in.nc <= '0';
+        d_in.hold <= '0';
+        d_in.dcbz <= '0';
+        d_in.reserve <= '0';
+        d_in.virt_mode <= '0';
+        d_in.priv_mode <= '1';
         d_in.addr <= (others => '0');
         d_in.data <= (others => '0');
+        d_in.byte_sel <= (others => '1');
         m_in.valid <= '0';
         m_in.addr <= (others => '0');
         m_in.pte <= (others => '0');
+        m_in.tlbie <= '0';
+        m_in.doall <= '0';
+        m_in.tlbld <= '0';
 
         wait for 4*clk_period;
         wait until rising_edge(clk);
 
         -- Cacheable read of address 4
+        report "cache read address 4...";
         d_in.load <= '1';
         d_in.nc <= '0';
         d_in.addr <= x"0000000000000004";
         d_in.valid <= '1';
-        wait until rising_edge(clk);
+        wait until rising_edge(clk) and stall = '0';
         d_in.valid <= '0';
 
         wait until rising_edge(clk) and d_out.valid = '1';
@@ -97,14 +110,14 @@ begin
             "=" & to_hstring(d_out.data) &
             " expected 0000000100000000"
             severity failure;
---      wait for clk_period;
 
-        -- Cacheable read of address 30
+        -- Cacheable read of address 30 (hit after hit forward from reload)
+        report "cache read address 30...";
         d_in.load <= '1';
         d_in.nc <= '0';
         d_in.addr <= x"0000000000000030";
         d_in.valid <= '1';
-        wait until rising_edge(clk);
+        wait until rising_edge(clk) and stall = '0';
         d_in.valid <= '0';
 
         wait until rising_edge(clk) and d_out.valid = '1';
@@ -114,18 +127,110 @@ begin
             " expected 0000000D0000000C"
             severity failure;
 
-        -- Non-cacheable read of address 100
+        -- Ensure reload completes
+        wait for 100*clk_period;
+        wait until rising_edge(clk);
+
+        -- Cacheable read of address 38 (hit on idle cache)
+        report "cache read address 38...";
         d_in.load <= '1';
-        d_in.nc <= '1';
-        d_in.addr <= x"0000000000000100";
+        d_in.nc <= '0';
+        d_in.addr <= x"0000000000000038";
+        d_in.valid <= '1';
+        wait until rising_edge(clk) and stall = '0';
+        d_in.valid <= '0';
+
+        wait until rising_edge(clk) and d_out.valid = '1';
+        assert d_out.data = x"0000000F0000000E"
+            report "data @" & to_hstring(d_in.addr) &
+            "=" & to_hstring(d_out.data) &
+            " expected 0000000F0000000E"
+            severity failure;
+
+        -- Cacheable read of address 130 (miss after hit, same index)
+        -- This will use way 2
+        report "cache read address 130...";
+        d_in.load <= '1';
+        d_in.nc <= '0';
+        d_in.addr <= x"0000000000000130";
         d_in.valid <= '1';
+        wait until rising_edge(clk) and stall = '0';
+        d_in.valid <= '0';
+
+        wait until rising_edge(clk) and d_out.valid = '1';
+        assert d_out.data = x"0000004d0000004c"
+            report "data @" & to_hstring(d_in.addr) &
+            "=" & to_hstring(d_out.data) &
+            " expected 0000004d0000004c"
+            severity failure;
+
+        -- Ensure reload completes
+        wait for 100*clk_period;
         wait until rising_edge(clk);
+
+        -- Cacheable read again of address 130 (hit in idle cache)
+        -- This should feed from way 2
+        report "cache read address 130...";
+        d_in.load <= '1';
+        d_in.nc <= '0';
+        d_in.addr <= x"0000000000000130";
+        d_in.valid <= '1';
+        wait until rising_edge(clk) and stall = '0';
+        d_in.valid <= '0';
+
+        wait until rising_edge(clk) and d_out.valid = '1';
+        assert d_out.data = x"0000004d0000004c"
+            report "data @" & to_hstring(d_in.addr) &
+            "=" & to_hstring(d_out.data) &
+            " expected 0000004d0000004c"
+            severity failure;
+
+        -- Cacheable read of address 40
+        report "cache read address 40...";
+        d_in.load <= '1';
+        d_in.nc <= '0';
+        d_in.addr <= x"0000000000000040";
+        d_in.valid <= '1';
+        wait until rising_edge(clk);
+        d_in.valid <= '0';
+
+        wait until rising_edge(clk) and d_out.valid = '1';
+        assert d_out.data = x"0000001100000010"
+            report "data @" & to_hstring(d_in.addr) &
+            "=" & to_hstring(d_out.data) &
+            " expected 0000001100000010"
+            severity failure;
+
+        -- Cacheable read of address 140 (miss after miss, same index)
+        -- This should use way 2
+        report "cache read address 140...";
+        d_in.load <= '1';
+        d_in.nc <= '0';
+        d_in.addr <= x"0000000000000140";
+        d_in.valid <= '1';
+        wait until rising_edge(clk) and stall = '0';
+        d_in.valid <= '0';
+
+        wait until rising_edge(clk) and d_out.valid = '1';
+        assert d_out.data = x"0000005100000050"
+            report "data @" & to_hstring(d_in.addr) &
+            "=" & to_hstring(d_out.data) &
+            " expected 0000005100000050"
+            severity failure;
+
+        -- Non-cacheable read of address 200
+        report "non-cache read address 200...";
+        d_in.load <= '1';
+        d_in.nc <= '1';
+        d_in.addr <= x"0000000000000200";
+        d_in.valid <= '1';
+        wait until rising_edge(clk) and stall = '0';
         d_in.valid <= '0';
         wait until rising_edge(clk) and d_out.valid = '1';
-        assert d_out.data = x"0000004100000040"
+        assert d_out.data = x"0000008100000080"
             report "data @" & to_hstring(d_in.addr) &
             "=" & to_hstring(d_out.data) &
-            " expected 0000004100000040"
+            " expected 0000008100000080"
             severity failure;
 
         wait until rising_edge(clk);

From 76f61ef823f03349d4a41791b9c2840259eb3009 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sun, 2 Oct 2022 17:43:58 +1100
Subject: [PATCH 2/2] dcache: Update PLRU on misses as well as hits

The current dcache will not update the PLRU on a cache miss which is later
satisfied during the reload process. Thus subsequent misses will potentially
evict the same cache line. The same issue happens with dcbz which are
treated more/less as load misses.

This fixes it by triggering a PLRU update when r1.choose_victim, which is
set on a miss for one cycle to snapshot the PLRU output. This means we will
update the PLRU on the same cycle as we capture its output, which is fine
(the new value will be visible on the next cycle).

That way, a "miss" will result in a PLRU update to reflect that the entry
being refilled is actually used (and will be used to serve subsequent
load operations from the same cache line while being refilled).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 dcache.vhdl | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/dcache.vhdl b/dcache.vhdl
index f24e9ec..08b17f8 100644
--- a/dcache.vhdl
+++ b/dcache.vhdl
@@ -818,7 +818,12 @@ begin
         process(clk)
         begin
             if rising_edge(clk) then
-                if r1.cache_hit = '1' then
+                -- We update the PLRU when hitting the cache or when replacing
+                -- an entry. The PLRU update will be "visible" on the next cycle
+                -- so the victim selection will correctly see the *old* value.
+                if r1.cache_hit = '1' or r1.choose_victim = '1' then
+                    report "PLRU update, index=" & to_hstring(r1.hit_index) &
+                        " way=" & to_hstring(r1.hit_way);
                     assert not is_X(r1.hit_index) severity failure;
                     plru_ram(to_integer(r1.hit_index)) <= plru_upd;
                 end if;
@@ -1336,6 +1341,8 @@ begin
 	    else
 		r1.hit_load_valid <= '0';
 	    end if;
+
+            -- The cache hit indication is used for PLRU updates
             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
                 r1.cache_hit <= '1';
             else