Move iTLB from icache to fetch1

This moves the address translation step for instruction fetches one
cycle earlier, so that it now happens in the fetch1 stage.  There is
now a 2-entry mini translation cache ("ERAT", or effective to real
address translation cache) which operates on the output of the
multiplexer that selects the instruction address for the next cycle.
The ERAT consists of two effective address registers and two
corresponding real address registers.  They store the page number part
of the addresses for a 4kB page size, which is the smallest page size
supported by the architecture.

If the effective address doesn't match either of the EA registers, and
address translation is enabled, then i_out.req goes low for two cycles
while the iTLB is looked up.  Experimentally, this delay results in a
0.1% drop in coremark performance; allowing two cycles for the lookup
results in better timing.  The result from the iTLB is placed into the
least recently used ERAT entry and then used to translate the address
as normal.  If address translation is not enabled then the EA is used
directly as the real address.

The iTLB structure is the same as it was before; direct mapped,
indexed using a hashed EA.

The "fetch failed" signal, which indicates a TLB miss or protection
violation, is now generated in fetch1 and passed through icache.
When it is asserted, fetch1 goes into a stalled state until a PTE
arrives from the MMU (which gets put into both the iTLB and the ERAT),
or an interrupt or redirect occurs.

Any TLB invalidations from the MMU invalidate the whole ERAT.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/422/head
Paul Mackerras 1 year ago
parent 27c50bc311
commit f9e5622327

@ -194,6 +194,10 @@ package common is
subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t; function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t;


-- Minimum page size
constant MIN_LG_PGSZ : positive := 12;
constant MIN_PAGESZ : positive := 2 ** MIN_LG_PGSZ;

-- Used for tracking instruction completion and pending register writes -- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4; constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT); constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
@ -231,6 +235,7 @@ package common is


type Fetch1ToIcacheType is record type Fetch1ToIcacheType is record
req: std_ulogic; req: std_ulogic;
fetch_fail : std_ulogic;
virt_mode : std_ulogic; virt_mode : std_ulogic;
priv_mode : std_ulogic; priv_mode : std_ulogic;
big_endian : std_ulogic; big_endian : std_ulogic;
@ -239,6 +244,7 @@ package common is
pred_ntaken : std_ulogic; pred_ntaken : std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
next_nia: std_ulogic_vector(63 downto 0); next_nia: std_ulogic_vector(63 downto 0);
rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
end record; end record;


type IcacheToDecode1Type is record type IcacheToDecode1Type is record
@ -607,7 +613,7 @@ package common is
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
end record; end record;


type MmuToIcacheType is record type MmuToITLBType is record
tlbld : std_ulogic; tlbld : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
doall : std_ulogic; doall : std_ulogic;

@ -57,7 +57,7 @@ architecture behave of core is
signal fetch1_to_icache : Fetch1ToIcacheType; signal fetch1_to_icache : Fetch1ToIcacheType;
signal writeback_to_fetch1: WritebackToFetch1Type; signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type; signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType; signal mmu_to_itlb : MmuToITLBType;


-- decode signals -- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type; signal decode1_to_decode2: Decode1ToDecode2Type;
@ -223,6 +223,7 @@ begin
generic map ( generic map (
RESET_ADDRESS => (others => '0'), RESET_ADDRESS => (others => '0'),
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS, ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
TLB_SIZE => ICACHE_TLB_SIZE,
HAS_BTC => HAS_BTC HAS_BTC => HAS_BTC
) )
port map ( port map (
@ -231,8 +232,9 @@ begin
alt_reset_in => alt_reset_d, alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in, stall_in => fetch1_stall_in,
flush_in => fetch1_flush, flush_in => fetch1_flush,
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie, inval_btc => ex1_icache_inval or mmu_to_itlb.tlbie,
stop_in => dbg_core_stop, stop_in => dbg_core_stop,
m_in => mmu_to_itlb,
d_in => decode1_to_fetch1, d_in => decode1_to_fetch1,
w_in => writeback_to_fetch1, w_in => writeback_to_fetch1,
i_out => fetch1_to_icache, i_out => fetch1_to_icache,
@ -249,7 +251,6 @@ begin
LINE_SIZE => 64, LINE_SIZE => 64,
NUM_LINES => ICACHE_NUM_LINES, NUM_LINES => ICACHE_NUM_LINES,
NUM_WAYS => ICACHE_NUM_WAYS, NUM_WAYS => ICACHE_NUM_WAYS,
TLB_SIZE => ICACHE_TLB_SIZE,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map( port map(
@ -257,7 +258,6 @@ begin
rst => rst_icache, rst => rst_icache,
i_in => fetch1_to_icache, i_in => fetch1_to_icache,
i_out => icache_to_decode1, i_out => icache_to_decode1,
m_in => mmu_to_icache,
flush_in => fetch1_flush, flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval, inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in, stall_in => icache_stall_in,
@ -454,7 +454,7 @@ begin
l_out => mmu_to_loadstore1, l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache, d_out => mmu_to_dcache,
d_in => dcache_to_mmu, d_in => dcache_to_mmu,
i_out => mmu_to_icache i_out => mmu_to_itlb
); );


dcache_0: entity work.dcache dcache_0: entity work.dcache

@ -3,12 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.common.all; use work.common.all;


entity fetch1 is entity fetch1 is
generic( generic(
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0'); RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0'); ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
TLB_SIZE : positive := 64; -- L1 ITLB number of entries (direct mapped)
HAS_BTC : boolean := true HAS_BTC : boolean := true
); );
port( port(
@ -21,6 +23,7 @@ entity fetch1 is
inval_btc : in std_ulogic; inval_btc : in std_ulogic;
stop_in : in std_ulogic; stop_in : in std_ulogic;
alt_reset_in : in std_ulogic; alt_reset_in : in std_ulogic;
m_in : in MmuToITLBType;


-- redirect from writeback unit -- redirect from writeback unit
w_in : in WritebackToFetch1Type; w_in : in WritebackToFetch1Type;
@ -40,13 +43,32 @@ architecture behaviour of fetch1 is
type reg_internal_t is record type reg_internal_t is record
mode_32bit: std_ulogic; mode_32bit: std_ulogic;
rd_is_niap4: std_ulogic; rd_is_niap4: std_ulogic;
tlbcheck: std_ulogic;
tlbstall: std_ulogic;
next_nia: std_ulogic_vector(63 downto 0); next_nia: std_ulogic_vector(63 downto 0);
end record; end record;

-- Mini effective to real translation cache
type erat_t is record
epn0: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
epn1: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
rpn0: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
rpn1: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
priv0: std_ulogic;
priv1: std_ulogic;
valid: std_ulogic_vector(1 downto 0);
mru: std_ulogic; -- '1' => entry 1 most recently used
end record;

signal r, r_next : Fetch1ToIcacheType; signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t; signal r_int, r_next_int : reg_internal_t;
signal advance_nia : std_ulogic; signal advance_nia : std_ulogic;
signal log_nia : std_ulogic_vector(42 downto 0); signal log_nia : std_ulogic_vector(42 downto 0);


signal erat : erat_t;
signal erat_hit : std_ulogic;
signal erat_sel : std_ulogic;

constant BTC_ADDR_BITS : integer := 10; constant BTC_ADDR_BITS : integer := 10;
constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS; constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
constant BTC_TARGET_BITS : integer := 62; constant BTC_TARGET_BITS : integer := 62;
@ -58,6 +80,41 @@ architecture behaviour of fetch1 is
signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0'); signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
signal btc_rd_valid : std_ulogic := '0'; signal btc_rd_valid : std_ulogic := '0';


-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (MIN_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;

-- Values read from above arrays on a clock edge
signal itlb_valid : std_ulogic;
signal itlb_ttag : tlb_tag_t;
signal itlb_pte : tlb_pte_t;
signal itlb_hit : std_ulogic;

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(MIN_LG_PGSZ + TLB_BITS - 1 downto MIN_LG_PGSZ)
xor addr(MIN_LG_PGSZ + 2 * TLB_BITS - 1 downto MIN_LG_PGSZ + TLB_BITS)
xor addr(MIN_LG_PGSZ + 3 * TLB_BITS - 1 downto MIN_LG_PGSZ + 2 * TLB_BITS);
return hash;
end;

begin begin


regs : process(clk) regs : process(clk)
@ -75,7 +132,8 @@ begin
" S:" & std_ulogic'image(stall_in) & " S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) & " T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia) & " nia:" & to_hstring(r_next.nia) &
" req:" & std_ulogic'image(r_next.req); " req:" & std_ulogic'image(r_next.req) &
" FF:" & std_ulogic'image(r_next.fetch_fail);
end if; end if;
if advance_nia = '1' then if advance_nia = '1' then
r <= r_next; r <= r_next;
@ -84,6 +142,9 @@ begin
-- always send the up-to-date stop mark and req -- always send the up-to-date stop mark and req
r.stop_mark <= stop_in; r.stop_mark <= stop_in;
r.req <= r_next.req; r.req <= r_next.req;
r.fetch_fail <= r_next.fetch_fail;
r_int.tlbcheck <= r_next_int.tlbcheck;
r_int.tlbstall <= r_next_int.tlbstall;
end if; end if;
end process; end process;
log_out <= log_nia; log_out <= log_nia;
@ -134,20 +195,113 @@ begin
end process; end process;
end generate; end generate;


erat_sync : process(clk)
begin
if rising_edge(clk) then
if rst /= '0' or m_in.tlbie = '1' then
erat.valid <= "00";
erat.mru <= '0';
else
if erat_hit = '1' then
erat.mru <= erat_sel;
end if;
if m_in.tlbld = '1' then
erat.epn0 <= m_in.addr(63 downto MIN_LG_PGSZ);
erat.rpn0 <= m_in.pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= m_in.pte(3);
erat.valid(0) <= '1';
erat.valid(1) <= '0';
erat.mru <= '0';
elsif r_int.tlbcheck = '1' and itlb_hit = '1' then
if erat.mru = '0' then
erat.epn1 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn1 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv1 <= itlb_pte(3);
erat.valid(1) <= '1';
else
erat.epn0 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn0 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= itlb_pte(3);
erat.valid(0) <= '1';
end if;
erat.mru <= not erat.mru;
end if;
end if;
end if;
end process;

-- Read TLB using the NIA for the next cycle
itlb_read : process(clk)
variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if advance_nia = '1' then
tlb_req_index := hash_ea(r_next.nia);
if is_X(tlb_req_index) then
itlb_pte <= (others => 'X');
itlb_ttag <= (others => 'X');
itlb_valid <= 'X';
else
itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index)));
itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index)));
itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
end if;
end if;
end if;
end process;

-- TLB hit detection
itlb_lookup : process(all)
begin
itlb_hit <= '0';
if itlb_ttag = r.nia(63 downto MIN_LG_PGSZ + TLB_BITS) then
itlb_hit <= itlb_valid;
end if;
end process;

-- iTLB update
itlb_update: process(clk)
variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
-- clear entry regardless of hit or miss
itlb_valids(to_integer(unsigned(wr_index))) <= '0';
elsif m_in.tlbld = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto MIN_LG_PGSZ + TLB_BITS);
itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
itlb_valids(to_integer(unsigned(wr_index))) <= '1';
end if;
--ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process;

comb : process(all) comb : process(all)
variable v : Fetch1ToIcacheType; variable v : Fetch1ToIcacheType;
variable v_int : reg_internal_t; variable v_int : reg_internal_t;
variable next_nia : std_ulogic_vector(63 downto 0); variable next_nia : std_ulogic_vector(63 downto 0);
variable m32 : std_ulogic; variable m32 : std_ulogic;
variable ehit, esel : std_ulogic;
variable eaa_priv : std_ulogic;
begin begin
v := r; v := r;
v_int := r_int; v_int := r_int;
v.predicted := '0'; v.predicted := '0';
v.pred_ntaken := '0'; v.pred_ntaken := '0';
v.req := not (rst or w_in.interrupt or stop_in); v.req := not stop_in;
-- reduce metavalue warnings in sim v_int.tlbstall := r_int.tlbcheck;
if is_X(rst) then v_int.tlbcheck := '0';
v.req := '0';
if r_int.tlbcheck = '1' and itlb_hit = '0' then
v.fetch_fail := '1';
end if; end if;


-- Combinatorial computation of the CIA for the next cycle. -- Combinatorial computation of the CIA for the next cycle.
@ -163,8 +317,13 @@ begin
v.priv_mode := w_in.priv_mode; v.priv_mode := w_in.priv_mode;
v.big_endian := w_in.big_endian; v.big_endian := w_in.big_endian;
v_int.mode_32bit := w_in.mode_32bit; v_int.mode_32bit := w_in.mode_32bit;
v.fetch_fail := '0';
elsif d_in.redirect = '1' then elsif d_in.redirect = '1' then
next_nia := d_in.redirect_nia(63 downto 2) & "00"; next_nia := d_in.redirect_nia(63 downto 2) & "00";
v.fetch_fail := '0';
elsif r_int.tlbstall = '1' then
-- this case is needed so that the correct icache tags are read
next_nia := r.nia;
else else
next_nia := r_int.next_nia; next_nia := r_int.next_nia;
end if; end if;
@ -182,6 +341,52 @@ begin
btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2)); btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2));
v_int.rd_is_niap4 := '1'; v_int.rd_is_niap4 := '1';


-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or
(not r.stop_mark and not (r.req and stall_in));
-- reduce metavalue warnings in sim
if is_X(rst) then
advance_nia <= '1';
end if;

-- Translate next_nia to real if possible, otherwise we have to stall
-- and look up the TLB.
ehit := '0';
esel := '0';
eaa_priv := '1';
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn1 and erat.valid(1) = '1' then
ehit := '1';
esel := '1';
end if;
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn0 and erat.valid(0) = '1' then
ehit := '1';
end if;
if v.virt_mode = '0' then
v.rpn := v.nia(REAL_ADDR_BITS - 1 downto MIN_LG_PGSZ);
eaa_priv := '1';
elsif esel = '1' then
v.rpn := erat.rpn1;
eaa_priv := erat.priv1;
else
v.rpn := erat.rpn0;
eaa_priv := erat.priv0;
end if;
if advance_nia = '1' and ehit = '0' and v.virt_mode = '1' and
r_int.tlbcheck = '0' and v.fetch_fail = '0' then
v_int.tlbstall := '1';
v_int.tlbcheck := '1';
end if;
if ehit = '1' or v.virt_mode = '0' then
if eaa_priv = '1' and v.priv_mode = '0' then
v.fetch_fail := '1';
else
v.fetch_fail := '0';
end if;
end if;
erat_hit <= ehit and advance_nia;
erat_sel <= esel;

if rst /= '0' then if rst /= '0' then
if alt_reset_in = '1' then if alt_reset_in = '1' then
v_int.next_nia := ALT_RESET_ADDRESS; v_int.next_nia := ALT_RESET_ADDRESS;
@ -192,16 +397,29 @@ begin
v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00"; v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00";
end if; end if;
if rst /= '0' or w_in.interrupt = '1' then if rst /= '0' or w_in.interrupt = '1' then
v.req := '0';
v.virt_mode := '0'; v.virt_mode := '0';
v.priv_mode := '1'; v.priv_mode := '1';
v.big_endian := '0'; v.big_endian := '0';
v_int.mode_32bit := '0'; v_int.mode_32bit := '0';
v_int.rd_is_niap4 := '0'; v_int.rd_is_niap4 := '0';
v_int.tlbstall := '0';
v_int.tlbcheck := '0';
v.fetch_fail := '0';
end if;
if v.fetch_fail = '1' then
v_int.tlbstall := '1';
end if;
if v_int.tlbstall = '1' then
v.req := '0';
end if; end if;


-- If there is a valid entry in the BTC which corresponds to the next instruction, -- If there is a valid entry in the BTC which corresponds to the next instruction,
-- use that to predict the address of the instruction after that. -- use that to predict the address of the instruction after that.
if rst = '0' and w_in.interrupt = '0' and w_in.redirect = '0' and d_in.redirect = '0' and -- (w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0')
-- implies v.nia = r_int.next_nia.
-- r_int.rd_is_niap4 implies r_int.next_nia is the address used to read the BTC.
if v.req = '1' and w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0' and
btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and
btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS) btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
@ -214,15 +432,6 @@ begin
end if; end if;
end if; end if;


-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or
(not r.stop_mark and not stall_in);
-- reduce metavalue warnings in sim
if is_X(rst) then
advance_nia <= '1';
end if;

r_next <= v; r_next <= v;
r_next_int <= v_int; r_next_int <= v_int;



@ -41,10 +41,6 @@ entity icache is
NUM_LINES : positive := 32; NUM_LINES : positive := 32;
-- Number of ways -- Number of ways
NUM_WAYS : positive := 4; NUM_WAYS : positive := 4;
-- L1 ITLB number of entries (direct mapped)
TLB_SIZE : positive := 64;
-- L1 ITLB log_2(page_size)
TLB_LG_PGSZ : positive := 12;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -55,8 +51,6 @@ entity icache is
i_in : in Fetch1ToIcacheType; i_in : in Fetch1ToIcacheType;
i_out : out IcacheToDecode1Type; i_out : out IcacheToDecode1Type;


m_in : in MmuToIcacheType;

stall_in : in std_ulogic; stall_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
@ -157,35 +151,6 @@ architecture rtl of icache is
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic; type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
signal cache_valids : cache_valids_t; signal cache_valids : cache_valids_t;


-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;

-- Values read from above arrays on a clock edge
signal itlb_valid : std_ulogic;
signal itlb_ttag : tlb_tag_t;
signal itlb_pte : tlb_pte_t;

-- Values captured from a write to a TLB
signal itlb_bypass_valid : std_ulogic;
signal itlb_bypass_ra : std_ulogic_vector(REAL_ADDR_BITS - TLB_LG_PGSZ - 1 downto 0);
signal itlb_bypass_priv : std_ulogic;

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Cache reload state machine -- Cache reload state machine
type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK); type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK);


@ -233,9 +198,6 @@ architecture rtl of icache is
signal req_raddr : real_addr_t; signal req_raddr : real_addr_t;


signal real_addr : real_addr_t; signal real_addr : real_addr_t;
signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -330,16 +292,6 @@ architecture rtl of icache is
return endian & addr(addr'left downto SET_SIZE_BITS); return endian & addr(addr'left downto SET_SIZE_BITS);
end; end;


-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
return hash;
end;

begin begin


-- byte-swap read data if big endian -- byte-swap read data if big endian
@ -530,95 +482,10 @@ begin
end process; end process;
end generate; end generate;


-- Read TLB using the NIA for the next cycle
itlb_read : process(clk)
variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then
tlb_req_index := hash_ea(i_in.next_nia);
if is_X(tlb_req_index) then
itlb_pte <= (others => 'X');
itlb_ttag <= (others => 'X');
itlb_valid <= 'X';
else
itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index)));
itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index)));
itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
end if;
end if;
end if;
end process;

-- Store TLB data being written for use in servicing the current request
itlb_bypass: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
itlb_bypass_valid <= '0';
itlb_bypass_ra <= (others => '0');
itlb_bypass_priv <= '0';
elsif flush_in = '1' or i_in.req = '0' or stall_out = '0' then
itlb_bypass_valid <= '0';
elsif m_in.tlbld = '1' then
assert i_in.nia(63 downto TLB_LG_PGSZ) = m_in.addr(63 downto TLB_LG_PGSZ);
itlb_bypass_valid <= '1';
itlb_bypass_ra <= m_in.pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ);
itlb_bypass_priv <= m_in.pte(3);
end if;
end if;
end process;

-- TLB hit detection and real address generation -- TLB hit detection and real address generation
itlb_lookup : process(all) itlb_lookup : process(all)
begin begin
if itlb_bypass_valid = '1' then real_addr <= i_in.rpn & i_in.nia(MIN_LG_PGSZ - 1 downto 0);
real_addr <= itlb_bypass_ra & i_in.nia(TLB_LG_PGSZ - 1 downto 0);
ra_valid <= '1';
eaa_priv <= itlb_bypass_priv;
elsif i_in.virt_mode = '1' then
real_addr <= itlb_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
i_in.nia(TLB_LG_PGSZ - 1 downto 0);
if itlb_ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
ra_valid <= itlb_valid;
else
ra_valid <= '0';
end if;
eaa_priv <= itlb_pte(3);
else
real_addr <= addr_to_real(i_in.nia);
ra_valid <= '1';
eaa_priv <= '1';
end if;

-- no IAMR, so no KUEP support for now
priv_fault <= eaa_priv and not i_in.priv_mode;
access_ok <= ra_valid and not priv_fault;
end process;

-- iTLB update
itlb_update: process(clk)
variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
-- clear entry regardless of hit or miss
itlb_valids(to_integer(unsigned(wr_index))) <= '0';
elsif m_in.tlbld = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
itlb_valids(to_integer(unsigned(wr_index))) <= '1';
end if;
ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process; end process;


-- Cache hit detection, output to fetch2 and other misc logic -- Cache hit detection, output to fetch2 and other misc logic
@ -667,7 +534,7 @@ begin
end if; end if;


-- Generate the "hit" and "miss" signals for the synchronous blocks -- Generate the "hit" and "miss" signals for the synchronous blocks
if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then if i_in.req = '1' and flush_in = '0' and rst = '0' then
req_is_hit <= is_hit; req_is_hit <= is_hit;
req_is_miss <= not is_hit; req_is_miss <= not is_hit;
else else
@ -711,8 +578,8 @@ begin
i_out.next_predicted <= r.predicted; i_out.next_predicted <= r.predicted;
i_out.next_pred_ntaken <= r.pred_ntaken; i_out.next_pred_ntaken <= r.pred_ntaken;


-- Stall fetch1 if we have a miss on cache or TLB or a protection fault -- Stall fetch1 if we have a cache miss
stall_out <= i_in.req and not (is_hit and access_ok) and not flush_in; stall_out <= i_in.req and not is_hit and not flush_in;


-- Wishbone requests output (from the cache miss reload machine) -- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb; wishbone_out <= r.wb;
@ -763,6 +630,7 @@ begin
r.big_endian <= i_in.big_endian; r.big_endian <= i_in.big_endian;
r.predicted <= i_in.predicted; r.predicted <= i_in.predicted;
r.pred_ntaken <= i_in.pred_ntaken; r.pred_ntaken <= i_in.pred_ntaken;
r.fetch_failed <= i_in.fetch_fail and not flush_in;
end if; end if;
if i_out.valid = '1' then if i_out.valid = '1' then
assert not is_X(i_out.insn) severity failure; assert not is_X(i_out.insn) severity failure;
@ -955,13 +823,6 @@ begin
end if; end if;
end case; end case;
end if; end if;

-- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
r.fetch_failed <= '1';
end if;
end if; end if;
end process; end process;


@ -991,8 +852,8 @@ begin
wstate & wstate &
std_ulogic_vector(resize(lway, 3)) & std_ulogic_vector(resize(lway, 3)) &
req_is_hit & req_is_miss & req_is_hit & req_is_miss &
access_ok & '1' & -- was access_ok
ra_valid; '1'; -- was ra_valid
end if; end if;
end process; end process;
log_out <= log_data; log_out <= log_data;

@ -20,7 +20,7 @@ entity mmu is
d_out : out MmuToDcacheType; d_out : out MmuToDcacheType;
d_in : in DcacheToMmuType; d_in : in DcacheToMmuType;


i_out : out MmuToIcacheType i_out : out MmuToITLBType
); );
end mmu; end mmu;



Loading…
Cancel
Save