dcache: Use expanded per-way TLB and cache tag hit information

Rather than combining the results of the per-way comparators into
an encoded 'hit_way' variable, use the individual results directly
using AND-OR type networks where possible, in order to reduce
utilization and improve timing.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/441/head
Paul Mackerras 2 months ago
parent c5abe3c0a9
commit ec323897e3

@ -101,6 +101,7 @@ architecture rtl of dcache is
subtype row_t is unsigned(ROW_BITS-1 downto 0); subtype row_t is unsigned(ROW_BITS-1 downto 0);
subtype index_t is unsigned(INDEX_BITS-1 downto 0); subtype index_t is unsigned(INDEX_BITS-1 downto 0);
subtype way_t is unsigned(WAY_BITS-1 downto 0); subtype way_t is unsigned(WAY_BITS-1 downto 0);
subtype way_expand_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0); subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);


-- The cache data BRAM organized as described above for each way -- The cache data BRAM organized as described above for each way
@ -149,7 +150,7 @@ architecture rtl of dcache is
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0); subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
subtype tlb_way_ptes_t is std_ulogic_vector(TLB_PTE_WAY_BITS-1 downto 0); subtype tlb_way_ptes_t is std_ulogic_vector(TLB_PTE_WAY_BITS-1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_way_ptes_t; type tlb_ptes_t is array(tlb_index_t) of tlb_way_ptes_t;
type hit_way_set_t is array(tlb_way_t) of way_t; type tlb_expand_t is array(tlb_way_t) of std_ulogic;


signal dtlb_valids : tlb_valids_t; signal dtlb_valids : tlb_valids_t;
signal dtlb_tags : tlb_tags_t; signal dtlb_tags : tlb_tags_t;
@ -179,6 +180,13 @@ architecture rtl of dcache is
return pa; return pa;
end; end;


function andor(mask : std_ulogic; in1 : std_ulogic_vector(7 downto 0);
in2 : std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
variable t : std_ulogic_vector(7 downto 0) := (others => mask);
begin
return in2 or (in1 and t);
end;

constant real_mode_perm_attr : perm_attr_t := (nocache => '0', others => '1'); constant real_mode_perm_attr : perm_attr_t := (nocache => '0', others => '1');


-- Cache state machine -- Cache state machine
@ -401,6 +409,7 @@ architecture rtl of dcache is
-- Async signals on incoming request -- Async signals on incoming request
signal req_index : index_t; signal req_index : index_t;
signal req_hit_way : way_t; signal req_hit_way : way_t;
signal req_hit_ways : way_expand_t;
signal req_is_hit : std_ulogic; signal req_is_hit : std_ulogic;
signal req_tag : cache_tag_t; signal req_tag : cache_tag_t;
signal req_op_load_hit : std_ulogic; signal req_op_load_hit : std_ulogic;
@ -448,6 +457,7 @@ architecture rtl of dcache is
signal tlb_read_valid : std_ulogic; signal tlb_read_valid : std_ulogic;
signal tlb_hit : std_ulogic; signal tlb_hit : std_ulogic;
signal tlb_hit_way : tlb_way_sig_t; signal tlb_hit_way : tlb_way_sig_t;
signal tlb_hit_expand : tlb_expand_t;
signal pte : tlb_pte_t; signal pte : tlb_pte_t;
signal ra : real_addr_t; signal ra : real_addr_t;
signal valid_ra : std_ulogic; signal valid_ra : std_ulogic;
@ -741,33 +751,34 @@ begin
variable hitway : tlb_way_sig_t; variable hitway : tlb_way_sig_t;
variable hit : std_ulogic; variable hit : std_ulogic;
variable eatag : tlb_tag_t; variable eatag : tlb_tag_t;
variable hitpte : tlb_pte_t;
begin begin
tlb_req_index <= unsigned(r0.req.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 tlb_req_index <= unsigned(r0.req.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
downto TLB_LG_PGSZ)); downto TLB_LG_PGSZ));
hitway := to_unsigned(0, TLB_WAY_BITS); hitway := to_unsigned(0, TLB_WAY_BITS);
hit := '0'; hit := '0';
hitpte := (others => '0');
tlb_hit_expand <= (others => '0');
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
for i in tlb_way_t loop for i in tlb_way_t loop
if tlb_read_valid = '1' and tlb_valid_way(i) = '1' and if tlb_valid_way(i) = '1' and
read_tlb_tag(i, tlb_tag_way) = eatag then read_tlb_tag(i, tlb_tag_way) = eatag then
hitway := to_unsigned(i, TLB_WAY_BITS); hitway := to_unsigned(i, TLB_WAY_BITS);
hit := '1'; hit := tlb_read_valid;
hitpte := hitpte or read_tlb_pte(i, tlb_pte_way);
tlb_hit_expand(i) <= '1';
end if; end if;
end loop; end loop;
tlb_hit <= hit and r0_valid; tlb_hit <= hit and r0_valid;
tlb_hit_way <= hitway; tlb_hit_way <= hitway;
if tlb_hit = '1' then pte <= hitpte;
pte <= read_tlb_pte(to_integer(hitway), tlb_pte_way);
else
pte <= (others => '0');
end if;
valid_ra <= tlb_hit or not r0.req.virt_mode; valid_ra <= tlb_hit or not r0.req.virt_mode;
tlb_miss <= r0_valid and r0.req.virt_mode and not tlb_hit; tlb_miss <= r0_valid and r0.req.virt_mode and not tlb_hit;
if r0.req.virt_mode = '1' then if r0.req.virt_mode = '1' then
ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & ra <= hitpte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) & r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
(ROW_OFF_BITS-1 downto 0 => '0'); (ROW_OFF_BITS-1 downto 0 => '0');
perm_attr <= extract_perm_attr(pte); perm_attr <= extract_perm_attr(hitpte);
else else
ra <= r0.req.addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) & ra <= r0.req.addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
(ROW_OFF_BITS-1 downto 0 => '0'); (ROW_OFF_BITS-1 downto 0 => '0');
@ -793,11 +804,13 @@ begin
dtlb_valids(i) <= (others => '0'); dtlb_valids(i) <= (others => '0');
end loop; end loop;
elsif tlbie = '1' then elsif tlbie = '1' then
if tlb_hit = '1' then for i in tlb_way_t loop
assert not is_X(tlb_req_index); if tlb_hit_expand(i) = '1' then
assert not is_X(tlb_hit_way); assert not is_X(tlb_req_index);
dtlb_valids(to_integer(tlb_req_index))(to_integer(tlb_hit_way)) <= '0'; assert not is_X(tlb_hit_way);
end if; dtlb_valids(to_integer(tlb_req_index))(i) <= '0';
end if;
end loop;
elsif tlbwe = '1' then elsif tlbwe = '1' then
assert not is_X(tlb_req_index); assert not is_X(tlb_req_index);
repl_way := to_unsigned(0, TLB_WAY_BITS); repl_way := to_unsigned(0, TLB_WAY_BITS);
@ -941,19 +954,15 @@ begin
variable rindex : index_t; variable rindex : index_t;
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_t; variable hit_way : way_t;
variable hit_ways : way_expand_t;
variable go : std_ulogic; variable go : std_ulogic;
variable nc : std_ulogic; variable nc : std_ulogic;
variable s_hit : std_ulogic; variable s_hit : std_ulogic;
variable s_tag : cache_tag_t; variable s_tag : cache_tag_t;
variable s_pte : tlb_pte_t; variable s_pte : tlb_pte_t;
variable s_ra : real_addr_t; variable s_ra : real_addr_t;
variable hit_set : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable hit_way_set : hit_way_set_t;
variable rel_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable rel_match : std_ulogic; variable rel_match : std_ulogic;
variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable fwd_match : std_ulogic; variable fwd_match : std_ulogic;
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snoop_match : std_ulogic; variable snoop_match : std_ulogic;
variable hit_reload : std_ulogic; variable hit_reload : std_ulogic;
variable dawr_match : std_ulogic; variable dawr_match : std_ulogic;
@ -982,51 +991,44 @@ begin
-- we compare each way with each of the real addresses from each way of -- we compare each way with each of the real addresses from each way of
-- the TLB, and then decide later which match to use. -- the TLB, and then decide later which match to use.
hit_way := to_unsigned(0, WAY_BITS); hit_way := to_unsigned(0, WAY_BITS);
hit_ways := (others => '0');
is_hit := '0'; is_hit := '0';
rel_match := '0'; rel_match := '0';
fwd_match := '0'; fwd_match := '0';
snoop_match := '0'; snoop_match := '0';
if r0.req.virt_mode = '1' then if r0.req.virt_mode = '1' then
rel_matches := (others => '0');
fwd_matches := (others => '0');
snp_matches := (others => '0');
for j in tlb_way_t loop for j in tlb_way_t loop
hit_way_set(j) := to_unsigned(0, WAY_BITS); if tlb_valid_way(j) = '1' then
s_hit := '0'; s_hit := '0';
s_pte := read_tlb_pte(j, tlb_pte_way); s_pte := read_tlb_pte(j, tlb_pte_way);
s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.req.addr(TLB_LG_PGSZ - 1 downto 0); r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
s_tag := get_tag(s_ra); s_tag := get_tag(s_ra);
if go = '1' then
assert not is_X(s_tag); assert not is_X(s_tag);
end if; for i in 0 to NUM_WAYS-1 loop
for i in 0 to NUM_WAYS-1 loop if cache_valids(to_integer(rindex))(i) = '1' and
if go = '1' and cache_valids(to_integer(rindex))(i) = '1' and read_tag(i, cache_tag_set) = s_tag and
read_tag(i, cache_tag_set) = s_tag and tlb_hit_expand(j) = '1' then
tlb_valid_way(j) = '1' then hit_ways(i) := '1';
hit_way_set(j) := to_unsigned(i, WAY_BITS); hit_way := to_unsigned(i, WAY_BITS);
s_hit := '1'; if go = '1' then
if snoop_hits(i) = '1' then is_hit := '1';
snp_matches(j) := '1'; if snoop_hits(i) = '1' then
snoop_match := '1';
end if;
end if;
end if;
end loop;
if go = '1' and tlb_hit_expand(j) = '1' then
if not is_X(r1.reload_tag) and s_tag = r1.reload_tag then
rel_match := '1';
end if;
if s_tag = r1.forward_tag then
fwd_match := '1';
end if; end if;
end if; end if;
end loop;
hit_set(j) := s_hit;
if go = '1' and not is_X(r1.reload_tag) and s_tag = r1.reload_tag then
rel_matches(j) := '1';
end if;
if go = '1' and s_tag = r1.forward_tag then
fwd_matches(j) := '1';
end if; end if;
end loop; end loop;
if tlb_hit = '1' and go = '1' then
assert not is_X(tlb_hit_way);
is_hit := hit_set(to_integer(tlb_hit_way));
hit_way := hit_way_set(to_integer(tlb_hit_way));
rel_match := rel_matches(to_integer(tlb_hit_way));
fwd_match := fwd_matches(to_integer(tlb_hit_way));
snoop_match := snp_matches(to_integer(tlb_hit_way));
end if;
else else
s_tag := get_tag(r0.req.addr); s_tag := get_tag(r0.req.addr);
if go = '1' then if go = '1' then
@ -1035,6 +1037,7 @@ begin
for i in 0 to NUM_WAYS-1 loop for i in 0 to NUM_WAYS-1 loop
if go = '1' and cache_valids(to_integer(rindex))(i) = '1' and if go = '1' and cache_valids(to_integer(rindex))(i) = '1' and
read_tag(i, cache_tag_set) = s_tag then read_tag(i, cache_tag_set) = s_tag then
hit_ways(i) := '1';
hit_way := to_unsigned(i, WAY_BITS); hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1'; is_hit := '1';
if snoop_hits(i) = '1' then if snoop_hits(i) = '1' then
@ -1121,6 +1124,8 @@ begin
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
use_forward_rl; use_forward_rl;
hit_way := replace_way; hit_way := replace_way;
hit_ways := (others => '0');
hit_ways(to_integer(replace_way)) := '1';
hit_reload := is_hit; hit_reload := is_hit;
elsif r0.req.load = '1' and r0.req.atomic_qw = '1' and r0.req.atomic_first = '0' and elsif r0.req.load = '1' and r0.req.atomic_qw = '1' and r0.req.atomic_first = '0' and
r0.req.nc = '0' and perm_attr.nocache = '0' and r1.prev_hit = '1' then r0.req.nc = '0' and perm_attr.nocache = '0' and r1.prev_hit = '1' then
@ -1132,10 +1137,13 @@ begin
-- NB lq to noncacheable isn't required to be atomic per the ISA. -- NB lq to noncacheable isn't required to be atomic per the ISA.
is_hit := '1'; is_hit := '1';
hit_way := r1.prev_way; hit_way := r1.prev_way;
hit_ways := (others => '0');
hit_ways(to_integer(r1.prev_way)) := '1';
end if; end if;


-- The way that matched on a hit -- The way that matched on a hit
req_hit_way <= hit_way; req_hit_way <= hit_way;
req_hit_ways <= hit_ways;
req_is_hit <= is_hit; req_is_hit <= is_hit;
req_hit_reload <= hit_reload; req_hit_reload <= hit_reload;


@ -1357,6 +1365,7 @@ begin
variable j : integer; variable j : integer;
variable sel : std_ulogic_vector(1 downto 0); variable sel : std_ulogic_vector(1 downto 0);
variable data_out : std_ulogic_vector(63 downto 0); variable data_out : std_ulogic_vector(63 downto 0);
variable byte_out : std_ulogic_vector(7 downto 0);
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if r0_valid = '1' then if r0_valid = '1' then
@ -1386,11 +1395,12 @@ begin
when "10" => when "10" =>
data_out(j + 7 downto j) := r1.forward_data(j + 7 downto j); data_out(j + 7 downto j) := r1.forward_data(j + 7 downto j);
when others => when others =>
if is_X(req_hit_way) then byte_out := (others => '0');
data_out(j + 7 downto j) := (others => 'X'); for w in 0 to NUM_WAYS-1 loop
else byte_out := andor(req_hit_ways(w), cache_out(w)(j + 7 downto j),
data_out(j + 7 downto j) := cache_out(to_integer(req_hit_way))(j + 7 downto j); byte_out);
end if; end loop;
data_out(j + 7 downto j) := byte_out;
end case; end case;
end loop; end loop;
r1.data_out <= data_out; r1.data_out <= data_out;

Loading…
Cancel
Save