dcache: Do PLRU update one cycle later

This does the PLRU update based on r1.cache_hit and r1.hit_way rather
than req_op and req_hit_way, which means there is now a register
between the TLB and cache tag lookup and the PLRU update, which should
help with timing.

The PLRU victim selection now becomes valid one cycle later, in the
cycle where r1.write_tag = 1.  We now have replace_way coming from
the PLRU when r1.write_tag = 1 and from r1.store_way at other times,
and we use that instead of r1.store_way in situations where we need
it to be valid in the first cycle of the RELOAD_WAIT_ACK state.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/233/head
Paul Mackerras 5 years ago
parent 144d8e3c61
commit 31587affb3

@ -231,7 +231,6 @@ architecture rtl of dcache is
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
hit_way : way_t; hit_way : way_t;
repl_way : way_t;
same_tag : std_ulogic; same_tag : std_ulogic;
end record; end record;


@ -247,6 +246,8 @@ architecture rtl of dcache is
-- Cache hit state -- Cache hit state
hit_way : way_t; hit_way : way_t;
hit_load_valid : std_ulogic; hit_load_valid : std_ulogic;
hit_index : index_t;
cache_hit : std_ulogic;


-- 2-stage data buffer for data forwarded from writes to reads -- 2-stage data buffer for data forwarded from writes to reads
forward_data1 : std_ulogic_vector(63 downto 0); forward_data1 : std_ulogic_vector(63 downto 0);
@ -677,16 +678,15 @@ begin
lru => plru_out lru => plru_out
); );


process(req_index, req_op, req_hit_way, plru_out) process(all)
begin begin
-- PLRU interface -- PLRU interface
if (req_op = OP_LOAD_HIT or if r1.hit_index = i then
req_op = OP_STORE_HIT) and req_index = i then plru_acc_en <= r1.cache_hit;
plru_acc_en <= '1';
else else
plru_acc_en <= '0'; plru_acc_en <= '0';
end if; end if;
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS)); plru_acc <= std_ulogic_vector(to_unsigned(r1.hit_way, WAY_BITS));
plru_victim(i) <= plru_out; plru_victim(i) <= plru_out;
end process; end process;
end generate; end generate;
@ -788,7 +788,7 @@ begin
-- since it will be by the time we perform the store. -- since it will be by the time we perform the store.
-- For a load, check the appropriate row valid bit. -- For a load, check the appropriate row valid bit.
is_hit := not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE); is_hit := not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE);
hit_way := r1.store_way; hit_way := replace_way;
end if; end if;


-- Whether to use forwarded data for a load or not -- Whether to use forwarded data for a load or not
@ -812,7 +812,11 @@ begin
req_hit_way <= hit_way; req_hit_way <= hit_way;


-- The way to replace on a miss -- The way to replace on a miss
replace_way <= to_integer(unsigned(plru_victim(req_index))); if r1.write_tag = '1' then
replace_way <= to_integer(unsigned(plru_victim(r1.store_index)));
else
replace_way <= r1.store_way;
end if;


-- work out whether we have permission for this access -- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP -- NB we don't yet implement AMR, thus no KUAP
@ -1079,7 +1083,7 @@ begin
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
wr_sel <= (others => '1'); wr_sel <= (others => '1');


if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and replace_way = i then
do_write <= '1'; do_write <= '1';
end if; end if;
end if; end if;
@ -1113,12 +1117,18 @@ begin
end if; end if;


-- Fast path for load/store hits. Set signals for the writeback controls. -- Fast path for load/store hits. Set signals for the writeback controls.
if req_op = OP_LOAD_HIT then
r1.hit_way <= req_hit_way; r1.hit_way <= req_hit_way;
r1.hit_index <= req_index;
if req_op = OP_LOAD_HIT then
r1.hit_load_valid <= '1'; r1.hit_load_valid <= '1';
else else
r1.hit_load_valid <= '0'; r1.hit_load_valid <= '0';
end if; end if;
if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
r1.cache_hit <= '1';
else
r1.cache_hit <= '0';
end if;


if req_op = OP_BAD then if req_op = OP_BAD then
report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) & report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) &
@ -1179,7 +1189,7 @@ begin
r1.forward_data1 <= wishbone_in.dat; r1.forward_data1 <= wishbone_in.dat;
end if; end if;
r1.forward_sel1 <= (others => '1'); r1.forward_sel1 <= (others => '1');
r1.forward_way1 <= r1.store_way; r1.forward_way1 <= replace_way;
r1.forward_row1 <= r1.store_row; r1.forward_row1 <= r1.store_row;
r1.forward_valid1 <= '0'; r1.forward_valid1 <= '0';
end if; end if;
@ -1205,11 +1215,12 @@ begin
if r1.write_tag = '1' then if r1.write_tag = '1' then
-- Store new tag in selected way -- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop for i in 0 to NUM_WAYS-1 loop
if i = r1.store_way then if i = replace_way then
cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <= cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
(TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag; (TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
end if; end if;
end loop; end loop;
r1.store_way <= replace_way;
r1.write_tag <= '0'; r1.write_tag <= '0';
end if; end if;


@ -1224,7 +1235,6 @@ begin
req.data := r0.req.data; req.data := r0.req.data;
req.byte_sel := r0.req.byte_sel; req.byte_sel := r0.req.byte_sel;
req.hit_way := req_hit_way; req.hit_way := req_hit_way;
req.repl_way := replace_way;
req.same_tag := req_same_tag; req.same_tag := req_same_tag;


-- Store the incoming request from r0, if it is a slow request -- Store the incoming request from r0, if it is a slow request
@ -1251,8 +1261,6 @@ begin


if req.op = OP_STORE_HIT then if req.op = OP_STORE_HIT then
r1.store_way <= req.hit_way; r1.store_way <= req.hit_way;
else
r1.store_way <= req.repl_way;
end if; end if;


-- Reset per-row valid bits, ready for handling OP_LOAD_MISS -- Reset per-row valid bits, ready for handling OP_LOAD_MISS
@ -1269,7 +1277,6 @@ begin
-- --
report "cache miss real addr:" & to_hstring(req.real_addr) & report "cache miss real addr:" & to_hstring(req.real_addr) &
" idx:" & integer'image(get_index(req.real_addr)) & " idx:" & integer'image(get_index(req.real_addr)) &
" way:" & integer'image(req.repl_way) &
" tag:" & to_hstring(get_tag(req.real_addr)); " tag:" & to_hstring(get_tag(req.real_addr));


-- Start the wishbone cycle -- Start the wishbone cycle
@ -1310,7 +1317,9 @@ begin


-- Handle the rest like a load miss -- Handle the rest like a load miss
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
if req.op = OP_STORE_MISS then
r1.write_tag <= '1'; r1.write_tag <= '1';
end if;
r1.dcbz <= '1'; r1.dcbz <= '1';
end if; end if;
r1.wb.we <= '1'; r1.wb.we <= '1';

Loading…
Cancel
Save