icache: Use next real address to index icache

Now that we are translating the fetch effective address to real one
cycle earlier, we can use the real address to index the icache array.
This has the benefit that the set size can be larger than a page,
enabling us to configure the icache to be larger without having to
increase its associativity.  Previously the set size was limited to
the page size to avoid aliasing problems.  Thus for example a 32kB
icache would need to be 8-way associative, resulting in large numbers
of LUTs being used for tag comparisons in FPGA implementations, and
poor timing.  With this change, a 32kB icache can be 1 or 2-way
associative, which means deeper and narrower tag and data RAMs and
fewer tag comparators.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/422/head
Paul Mackerras 1 year ago
parent f9e5622327
commit 73b6004ac6

@ -245,6 +245,7 @@ package common is
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
next_nia: std_ulogic_vector(63 downto 0); next_nia: std_ulogic_vector(63 downto 0);
rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0); rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
next_rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
end record; end record;


type IcacheToDecode1Type is record type IcacheToDecode1Type is record

@ -438,6 +438,7 @@ begin
-- Update outputs to the icache -- Update outputs to the icache
i_out <= r; i_out <= r;
i_out.next_nia <= next_nia; i_out.next_nia <= next_nia;
i_out.next_rpn <= v.rpn;


end process; end process;



@ -158,6 +158,7 @@ architecture rtl of icache is
-- Cache hit state (Latches for 1 cycle BRAM access) -- Cache hit state (Latches for 1 cycle BRAM access)
hit_way : way_sig_t; hit_way : way_sig_t;
hit_nia : std_ulogic_vector(63 downto 0); hit_nia : std_ulogic_vector(63 downto 0);
hit_ra : real_addr_t;
hit_smark : std_ulogic; hit_smark : std_ulogic;
hit_valid : std_ulogic; hit_valid : std_ulogic;
big_endian: std_ulogic; big_endian: std_ulogic;
@ -218,7 +219,7 @@ architecture rtl of icache is
signal log_insn : std_ulogic_vector(35 downto 0); signal log_insn : std_ulogic_vector(35 downto 0);


-- Return the cache line index (tag index) for an address -- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector) return index_sig_t is function get_index(addr: real_addr_t) return index_sig_t is
begin begin
return unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)); return unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS));
end; end;
@ -400,6 +401,7 @@ begin
process(clk) process(clk)
variable replace_way : way_sig_t; variable replace_way : way_sig_t;
variable snoop_addr : real_addr_t; variable snoop_addr : real_addr_t;
variable next_raddr : real_addr_t;
begin begin
replace_way := to_unsigned(0, WAY_BITS); replace_way := to_unsigned(0, WAY_BITS);
if NUM_WAYS > 1 then if NUM_WAYS > 1 then
@ -409,10 +411,11 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
-- Read tags using NIA for next cycle -- Read tags using NIA for next cycle
if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then
cache_tags_set(i) <= ic_tags(to_integer(get_index(i_in.next_nia))); next_raddr := i_in.next_rpn & i_in.next_nia(MIN_LG_PGSZ - 1 downto 0);
cache_tags_set(i) <= ic_tags(to_integer(get_index(next_raddr)));
-- Check for simultaneous write to the same location -- Check for simultaneous write to the same location
tag_overwrite(i) <= '0'; tag_overwrite(i) <= '0';
if r.state = CLR_TAG and r.store_index = get_index(i_in.next_nia) and if r.state = CLR_TAG and r.store_index = get_index(next_raddr) and
to_unsigned(i, WAY_BITS) = replace_way then to_unsigned(i, WAY_BITS) = replace_way then
tag_overwrite(i) <= '1'; tag_overwrite(i) <= '1';
end if; end if;
@ -459,10 +462,10 @@ begin
process(all) process(all)
begin begin
-- Read PLRU bits from array -- Read PLRU bits from array
if is_X(r.hit_nia) then if is_X(r.hit_ra) then
plru_cur <= (others => 'X'); plru_cur <= (others => 'X');
else else
plru_cur <= plru_ram(to_integer(get_index(r.hit_nia))); plru_cur <= plru_ram(to_integer(get_index(r.hit_ra)));
end if; end if;


-- PLRU interface -- PLRU interface
@ -475,35 +478,32 @@ begin
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if r.hit_valid = '1' then if r.hit_valid = '1' then
assert not is_X(r.hit_nia) severity failure; assert not is_X(r.hit_ra) severity failure;
plru_ram(to_integer(get_index(r.hit_nia))) <= plru_upd; plru_ram(to_integer(get_index(r.hit_ra))) <= plru_upd;
end if; end if;
end if; end if;
end process; end process;
end generate; end generate;


-- TLB hit detection and real address generation
itlb_lookup : process(all)
begin
real_addr <= i_in.rpn & i_in.nia(MIN_LG_PGSZ - 1 downto 0);
end process;

-- Cache hit detection, output to fetch2 and other misc logic -- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all) icache_comb : process(all)
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_sig_t; variable hit_way : way_sig_t;
variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0); variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0);
variable icode : insn_code; variable icode : insn_code;
variable ra : real_addr_t;
begin begin
-- Extract line, row and tag from request -- Extract line, row and tag from request
req_index <= get_index(i_in.nia); ra := i_in.rpn & i_in.nia(MIN_LG_PGSZ - 1 downto 0);
req_row <= get_row(i_in.nia); real_addr <= ra;
req_tag <= get_tag(real_addr, i_in.big_endian); req_index <= get_index(ra);
req_row <= get_row(ra);
req_tag <= get_tag(ra, i_in.big_endian);


-- Calculate address of beginning of cache row, will be -- Calculate address of beginning of cache row, will be
-- used for cache miss processing if needed -- used for cache miss processing if needed
-- --
req_raddr <= real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) & req_raddr <= ra(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
(ROW_OFF_BITS-1 downto 0 => '0'); (ROW_OFF_BITS-1 downto 0 => '0');


-- Test if pending request is a hit on any way -- Test if pending request is a hit on any way
@ -627,6 +627,7 @@ begin
-- Send stop marks and NIA down regardless of validity -- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark; r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia; r.hit_nia <= i_in.nia;
r.hit_ra <= real_addr;
r.big_endian <= i_in.big_endian; r.big_endian <= i_in.big_endian;
r.predicted <= i_in.predicted; r.predicted <= i_in.predicted;
r.pred_ntaken <= i_in.pred_ntaken; r.pred_ntaken <= i_in.pred_ntaken;

Loading…
Cancel
Save