dcache: Update TLB PLRU one cycle later

This puts the inputs to the TLB PLRU through a register stage, so
the TLB PLRU update is done in the cycle after the TLB tag
matching rather than the same cycle.  This improves timing.
The PLRU output is only used when writing the TLB in response to
a tlbwe request from the MMU, and that doesn't happen within one
cycle of a virtual-mode load or store, so the fact that the
tlb victim way information is delayed by one cycle doesn't
create any problems.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 5 years ago
parent b2ba024a48
commit c01e1c7b91

@ -249,6 +249,11 @@ architecture rtl of dcache is
hit_index : index_t;
cache_hit : std_ulogic;

-- TLB hit state
tlb_hit : std_ulogic;
tlb_hit_way : tlb_way_t;
tlb_hit_index : tlb_index_t;

-- 2-stage data buffer for data forwarded from writes to reads
forward_data1 : std_ulogic_vector(63 downto 0);
forward_data2 : std_ulogic_vector(63 downto 0);
@ -567,15 +572,15 @@ begin
lru => tlb_plru_out
);

process(tlb_req_index, tlb_hit, tlb_hit_way, tlb_plru_out)
process(all)
begin
-- PLRU interface
if tlb_hit = '1' and tlb_req_index = i then
tlb_plru_acc_en <= '1';
if r1.tlb_hit_index = i then
tlb_plru_acc_en <= r1.tlb_hit;
else
tlb_plru_acc_en <= '0';
end if;
tlb_plru_acc <= std_ulogic_vector(to_unsigned(tlb_hit_way, TLB_WAY_BITS));
tlb_plru_acc <= std_ulogic_vector(to_unsigned(r1.tlb_hit_way, TLB_WAY_BITS));
tlb_plru_victim(i) <= tlb_plru_out;
end process;
end generate;
@ -1146,6 +1151,11 @@ begin
r1.stcx_fail <= '0';
end if;

-- Record TLB hit information for updating TLB PLRU
r1.tlb_hit <= tlb_hit;
r1.tlb_hit_way <= tlb_hit_way;
r1.tlb_hit_index <= tlb_req_index;

-- complete tlbies and TLB loads in the third cycle
r1.tlbie_done <= r0_valid and (r0.tlbie or r0.tlbld);
end if;

Loading…
Cancel
Save