From f9e5622327e5d6b0e2e624acead9b71c91948fe7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Aug 2023 20:50:17 +1000 Subject: [PATCH] Move iTLB from icache to fetch1 This moves the address translation step for instruction fetches one cycle earlier, so that it now happens in the fetch1 stage. There is now a 2-entry mini translation cache ("ERAT", or effective to real address translation cache) which operates on the output of the multiplexer that selects the instruction address for the next cycle. The ERAT consists of two effective address registers and two corresponding real address registers. They store the page number part of the addresses for a 4kB page size, which is the smallest page size supported by the architecture. If the effective address doesn't match either of the EA registers, and address translation is enabled, then i_out.req goes low for two cycles while the iTLB is looked up. Experimentally, this delay results in a 0.1% drop in coremark performance; allowing two cycles for the lookup results in better timing. The result from the iTLB is placed into the least recently used ERAT entry and then used to translate the address as normal. If address translation is not enabled then the EA is used directly as the real address. The iTLB structure is the same as it was before; direct mapped, indexed using a hashed EA. The "fetch failed" signal, which indicates a TLB miss or protection violation, is now generated in fetch1 and passed through icache. When it is asserted, fetch1 goes into a stalled state until a PTE arrives from the MMU (which gets put into both the iTLB and the ERAT), or an interrupt or redirect occurs. Any TLB invalidations from the MMU invalidate the whole ERAT. Signed-off-by: Paul Mackerras --- common.vhdl | 8 +- core.vhdl | 10 +-- fetch1.vhdl | 239 ++++++++++++++++++++++++++++++++++++++++++++++++---- icache.vhdl | 153 ++------------------------------- mmu.vhdl | 2 +- 5 files changed, 244 insertions(+), 168 deletions(-) diff --git a/common.vhdl b/common.vhdl index 041a5f2..efcf7b3 100644 --- a/common.vhdl +++ b/common.vhdl @@ -194,6 +194,10 @@ package common is subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t; + -- Minimum page size + constant MIN_LG_PGSZ : positive := 12; + constant MIN_PAGESZ : positive := 2 ** MIN_LG_PGSZ; + -- Used for tracking instruction completion and pending register writes constant TAG_COUNT : positive := 4; constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT); @@ -231,6 +235,7 @@ package common is type Fetch1ToIcacheType is record req: std_ulogic; + fetch_fail : std_ulogic; virt_mode : std_ulogic; priv_mode : std_ulogic; big_endian : std_ulogic; @@ -239,6 +244,7 @@ package common is pred_ntaken : std_ulogic; nia: std_ulogic_vector(63 downto 0); next_nia: std_ulogic_vector(63 downto 0); + rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0); end record; type IcacheToDecode1Type is record @@ -607,7 +613,7 @@ package common is data : std_ulogic_vector(63 downto 0); end record; - type MmuToIcacheType is record + type MmuToITLBType is record tlbld : std_ulogic; tlbie : std_ulogic; doall : std_ulogic; diff --git a/core.vhdl b/core.vhdl index a556069..35a860e 100644 --- a/core.vhdl +++ b/core.vhdl @@ -57,7 +57,7 @@ architecture behave of core is signal fetch1_to_icache : Fetch1ToIcacheType; signal writeback_to_fetch1: WritebackToFetch1Type; signal icache_to_decode1 : IcacheToDecode1Type; - signal mmu_to_icache : MmuToIcacheType; + signal mmu_to_itlb : MmuToITLBType; -- decode signals signal decode1_to_decode2: Decode1ToDecode2Type; @@ -223,6 +223,7 @@ begin generic map ( RESET_ADDRESS => (others => '0'), ALT_RESET_ADDRESS => ALT_RESET_ADDRESS, + TLB_SIZE => ICACHE_TLB_SIZE, HAS_BTC => HAS_BTC ) port map ( @@ -231,8 +232,9 @@ begin alt_reset_in => alt_reset_d, stall_in => fetch1_stall_in, flush_in => fetch1_flush, - inval_btc => ex1_icache_inval or mmu_to_icache.tlbie, + inval_btc => ex1_icache_inval or mmu_to_itlb.tlbie, stop_in => dbg_core_stop, + m_in => mmu_to_itlb, d_in => decode1_to_fetch1, w_in => writeback_to_fetch1, i_out => fetch1_to_icache, @@ -249,7 +251,6 @@ begin LINE_SIZE => 64, NUM_LINES => ICACHE_NUM_LINES, NUM_WAYS => ICACHE_NUM_WAYS, - TLB_SIZE => ICACHE_TLB_SIZE, LOG_LENGTH => LOG_LENGTH ) port map( @@ -257,7 +258,6 @@ begin rst => rst_icache, i_in => fetch1_to_icache, i_out => icache_to_decode1, - m_in => mmu_to_icache, flush_in => fetch1_flush, inval_in => dbg_icache_rst or ex1_icache_inval, stall_in => icache_stall_in, @@ -454,7 +454,7 @@ begin l_out => mmu_to_loadstore1, d_out => mmu_to_dcache, d_in => dcache_to_mmu, - i_out => mmu_to_icache + i_out => mmu_to_itlb ); dcache_0: entity work.dcache diff --git a/fetch1.vhdl b/fetch1.vhdl index b6c2205..98116f9 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -3,12 +3,14 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.utils.all; use work.common.all; entity fetch1 is generic( RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0'); ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0'); + TLB_SIZE : positive := 64; -- L1 ITLB number of entries (direct mapped) HAS_BTC : boolean := true ); port( @@ -21,6 +23,7 @@ entity fetch1 is inval_btc : in std_ulogic; stop_in : in std_ulogic; alt_reset_in : in std_ulogic; + m_in : in MmuToITLBType; -- redirect from writeback unit w_in : in WritebackToFetch1Type; @@ -40,13 +43,32 @@ architecture behaviour of fetch1 is type reg_internal_t is record mode_32bit: std_ulogic; rd_is_niap4: std_ulogic; + tlbcheck: std_ulogic; + tlbstall: std_ulogic; next_nia: std_ulogic_vector(63 downto 0); end record; + + -- Mini effective to real translation cache + type erat_t is record + epn0: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0); + epn1: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0); + rpn0: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0); + rpn1: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0); + priv0: std_ulogic; + priv1: std_ulogic; + valid: std_ulogic_vector(1 downto 0); + mru: std_ulogic; -- '1' => entry 1 most recently used + end record; + signal r, r_next : Fetch1ToIcacheType; signal r_int, r_next_int : reg_internal_t; signal advance_nia : std_ulogic; signal log_nia : std_ulogic_vector(42 downto 0); + signal erat : erat_t; + signal erat_hit : std_ulogic; + signal erat_sel : std_ulogic; + constant BTC_ADDR_BITS : integer := 10; constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS; constant BTC_TARGET_BITS : integer := 62; @@ -58,6 +80,41 @@ architecture behaviour of fetch1 is signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0'); signal btc_rd_valid : std_ulogic := '0'; + -- L1 ITLB. + constant TLB_BITS : natural := log2(TLB_SIZE); + constant TLB_EA_TAG_BITS : natural := 64 - (MIN_LG_PGSZ + TLB_BITS); + constant TLB_PTE_BITS : natural := 64; + + subtype tlb_index_t is integer range 0 to TLB_SIZE - 1; + type tlb_valids_t is array(tlb_index_t) of std_ulogic; + subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0); + type tlb_tags_t is array(tlb_index_t) of tlb_tag_t; + subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0); + type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t; + + signal itlb_valids : tlb_valids_t; + signal itlb_tags : tlb_tags_t; + signal itlb_ptes : tlb_ptes_t; + + -- Values read from above arrays on a clock edge + signal itlb_valid : std_ulogic; + signal itlb_ttag : tlb_tag_t; + signal itlb_pte : tlb_pte_t; + signal itlb_hit : std_ulogic; + + -- Privilege bit from PTE EAA field + signal eaa_priv : std_ulogic; + + -- Simple hash for direct-mapped TLB index + function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is + variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0); + begin + hash := addr(MIN_LG_PGSZ + TLB_BITS - 1 downto MIN_LG_PGSZ) + xor addr(MIN_LG_PGSZ + 2 * TLB_BITS - 1 downto MIN_LG_PGSZ + TLB_BITS) + xor addr(MIN_LG_PGSZ + 3 * TLB_BITS - 1 downto MIN_LG_PGSZ + 2 * TLB_BITS); + return hash; + end; + begin regs : process(clk) @@ -75,7 +132,8 @@ begin " S:" & std_ulogic'image(stall_in) & " T:" & std_ulogic'image(stop_in) & " nia:" & to_hstring(r_next.nia) & - " req:" & std_ulogic'image(r_next.req); + " req:" & std_ulogic'image(r_next.req) & + " FF:" & std_ulogic'image(r_next.fetch_fail); end if; if advance_nia = '1' then r <= r_next; @@ -84,6 +142,9 @@ begin -- always send the up-to-date stop mark and req r.stop_mark <= stop_in; r.req <= r_next.req; + r.fetch_fail <= r_next.fetch_fail; + r_int.tlbcheck <= r_next_int.tlbcheck; + r_int.tlbstall <= r_next_int.tlbstall; end if; end process; log_out <= log_nia; @@ -134,20 +195,113 @@ begin end process; end generate; + erat_sync : process(clk) + begin + if rising_edge(clk) then + if rst /= '0' or m_in.tlbie = '1' then + erat.valid <= "00"; + erat.mru <= '0'; + else + if erat_hit = '1' then + erat.mru <= erat_sel; + end if; + if m_in.tlbld = '1' then + erat.epn0 <= m_in.addr(63 downto MIN_LG_PGSZ); + erat.rpn0 <= m_in.pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ); + erat.priv0 <= m_in.pte(3); + erat.valid(0) <= '1'; + erat.valid(1) <= '0'; + erat.mru <= '0'; + elsif r_int.tlbcheck = '1' and itlb_hit = '1' then + if erat.mru = '0' then + erat.epn1 <= r.nia(63 downto MIN_LG_PGSZ); + erat.rpn1 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ); + erat.priv1 <= itlb_pte(3); + erat.valid(1) <= '1'; + else + erat.epn0 <= r.nia(63 downto MIN_LG_PGSZ); + erat.rpn0 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ); + erat.priv0 <= itlb_pte(3); + erat.valid(0) <= '1'; + end if; + erat.mru <= not erat.mru; + end if; + end if; + end if; + end process; + + -- Read TLB using the NIA for the next cycle + itlb_read : process(clk) + variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0); + begin + if rising_edge(clk) then + if advance_nia = '1' then + tlb_req_index := hash_ea(r_next.nia); + if is_X(tlb_req_index) then + itlb_pte <= (others => 'X'); + itlb_ttag <= (others => 'X'); + itlb_valid <= 'X'; + else + itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index))); + itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index))); + itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index))); + end if; + end if; + end if; + end process; + + -- TLB hit detection + itlb_lookup : process(all) + begin + itlb_hit <= '0'; + if itlb_ttag = r.nia(63 downto MIN_LG_PGSZ + TLB_BITS) then + itlb_hit <= itlb_valid; + end if; + end process; + + -- iTLB update + itlb_update: process(clk) + variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0); + begin + if rising_edge(clk) then + wr_index := hash_ea(m_in.addr); + if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then + -- clear all valid bits + for i in tlb_index_t loop + itlb_valids(i) <= '0'; + end loop; + elsif m_in.tlbie = '1' then + assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE; + -- clear entry regardless of hit or miss + itlb_valids(to_integer(unsigned(wr_index))) <= '0'; + elsif m_in.tlbld = '1' then + assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE; + itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto MIN_LG_PGSZ + TLB_BITS); + itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte; + itlb_valids(to_integer(unsigned(wr_index))) <= '1'; + end if; + --ev.itlb_miss_resolved <= m_in.tlbld and not rst; + end if; + end process; + comb : process(all) variable v : Fetch1ToIcacheType; variable v_int : reg_internal_t; variable next_nia : std_ulogic_vector(63 downto 0); variable m32 : std_ulogic; + variable ehit, esel : std_ulogic; + variable eaa_priv : std_ulogic; begin v := r; v_int := r_int; v.predicted := '0'; v.pred_ntaken := '0'; - v.req := not (rst or w_in.interrupt or stop_in); - -- reduce metavalue warnings in sim - if is_X(rst) then - v.req := '0'; + v.req := not stop_in; + v_int.tlbstall := r_int.tlbcheck; + v_int.tlbcheck := '0'; + + if r_int.tlbcheck = '1' and itlb_hit = '0' then + v.fetch_fail := '1'; end if; -- Combinatorial computation of the CIA for the next cycle. @@ -163,8 +317,13 @@ begin v.priv_mode := w_in.priv_mode; v.big_endian := w_in.big_endian; v_int.mode_32bit := w_in.mode_32bit; + v.fetch_fail := '0'; elsif d_in.redirect = '1' then next_nia := d_in.redirect_nia(63 downto 2) & "00"; + v.fetch_fail := '0'; + elsif r_int.tlbstall = '1' then + -- this case is needed so that the correct icache tags are read + next_nia := r.nia; else next_nia := r_int.next_nia; end if; @@ -182,6 +341,52 @@ begin btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2)); v_int.rd_is_niap4 := '1'; + -- If the last NIA value went down with a stop mark, it didn't get + -- executed, and hence we shouldn't increment NIA. + advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or + (not r.stop_mark and not (r.req and stall_in)); + -- reduce metavalue warnings in sim + if is_X(rst) then + advance_nia <= '1'; + end if; + + -- Translate next_nia to real if possible, otherwise we have to stall + -- and look up the TLB. + ehit := '0'; + esel := '0'; + eaa_priv := '1'; + if next_nia(63 downto MIN_LG_PGSZ) = erat.epn1 and erat.valid(1) = '1' then + ehit := '1'; + esel := '1'; + end if; + if next_nia(63 downto MIN_LG_PGSZ) = erat.epn0 and erat.valid(0) = '1' then + ehit := '1'; + end if; + if v.virt_mode = '0' then + v.rpn := v.nia(REAL_ADDR_BITS - 1 downto MIN_LG_PGSZ); + eaa_priv := '1'; + elsif esel = '1' then + v.rpn := erat.rpn1; + eaa_priv := erat.priv1; + else + v.rpn := erat.rpn0; + eaa_priv := erat.priv0; + end if; + if advance_nia = '1' and ehit = '0' and v.virt_mode = '1' and + r_int.tlbcheck = '0' and v.fetch_fail = '0' then + v_int.tlbstall := '1'; + v_int.tlbcheck := '1'; + end if; + if ehit = '1' or v.virt_mode = '0' then + if eaa_priv = '1' and v.priv_mode = '0' then + v.fetch_fail := '1'; + else + v.fetch_fail := '0'; + end if; + end if; + erat_hit <= ehit and advance_nia; + erat_sel <= esel; + if rst /= '0' then if alt_reset_in = '1' then v_int.next_nia := ALT_RESET_ADDRESS; @@ -192,16 +397,29 @@ begin v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00"; end if; if rst /= '0' or w_in.interrupt = '1' then + v.req := '0'; v.virt_mode := '0'; v.priv_mode := '1'; v.big_endian := '0'; v_int.mode_32bit := '0'; v_int.rd_is_niap4 := '0'; + v_int.tlbstall := '0'; + v_int.tlbcheck := '0'; + v.fetch_fail := '0'; + end if; + if v.fetch_fail = '1' then + v_int.tlbstall := '1'; + end if; + if v_int.tlbstall = '1' then + v.req := '0'; end if; -- If there is a valid entry in the BTC which corresponds to the next instruction, -- use that to predict the address of the instruction after that. - if rst = '0' and w_in.interrupt = '0' and w_in.redirect = '0' and d_in.redirect = '0' and + -- (w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0') + -- implies v.nia = r_int.next_nia. + -- r_int.rd_is_niap4 implies r_int.next_nia is the address used to read the BTC. + if v.req = '1' and w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0' and btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS) @@ -214,15 +432,6 @@ begin end if; end if; - -- If the last NIA value went down with a stop mark, it didn't get - -- executed, and hence we shouldn't increment NIA. - advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or - (not r.stop_mark and not stall_in); - -- reduce metavalue warnings in sim - if is_X(rst) then - advance_nia <= '1'; - end if; - r_next <= v; r_next_int <= v_int; diff --git a/icache.vhdl b/icache.vhdl index 4bd4491..cc1b2b3 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -41,10 +41,6 @@ entity icache is NUM_LINES : positive := 32; -- Number of ways NUM_WAYS : positive := 4; - -- L1 ITLB number of entries (direct mapped) - TLB_SIZE : positive := 64; - -- L1 ITLB log_2(page_size) - TLB_LG_PGSZ : positive := 12; -- Non-zero to enable log data collection LOG_LENGTH : natural := 0 ); @@ -55,8 +51,6 @@ entity icache is i_in : in Fetch1ToIcacheType; i_out : out IcacheToDecode1Type; - m_in : in MmuToIcacheType; - stall_in : in std_ulogic; stall_out : out std_ulogic; flush_in : in std_ulogic; @@ -157,35 +151,6 @@ architecture rtl of icache is type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic; signal cache_valids : cache_valids_t; - -- L1 ITLB. - constant TLB_BITS : natural := log2(TLB_SIZE); - constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS); - constant TLB_PTE_BITS : natural := 64; - - subtype tlb_index_t is integer range 0 to TLB_SIZE - 1; - type tlb_valids_t is array(tlb_index_t) of std_ulogic; - subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0); - type tlb_tags_t is array(tlb_index_t) of tlb_tag_t; - subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0); - type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t; - - signal itlb_valids : tlb_valids_t; - signal itlb_tags : tlb_tags_t; - signal itlb_ptes : tlb_ptes_t; - - -- Values read from above arrays on a clock edge - signal itlb_valid : std_ulogic; - signal itlb_ttag : tlb_tag_t; - signal itlb_pte : tlb_pte_t; - - -- Values captured from a write to a TLB - signal itlb_bypass_valid : std_ulogic; - signal itlb_bypass_ra : std_ulogic_vector(REAL_ADDR_BITS - TLB_LG_PGSZ - 1 downto 0); - signal itlb_bypass_priv : std_ulogic; - - -- Privilege bit from PTE EAA field - signal eaa_priv : std_ulogic; - -- Cache reload state machine type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK); @@ -233,9 +198,6 @@ architecture rtl of icache is signal req_raddr : real_addr_t; signal real_addr : real_addr_t; - signal ra_valid : std_ulogic; - signal priv_fault : std_ulogic; - signal access_ok : std_ulogic; -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; @@ -330,16 +292,6 @@ architecture rtl of icache is return endian & addr(addr'left downto SET_SIZE_BITS); end; - -- Simple hash for direct-mapped TLB index - function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is - variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0); - begin - hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ) - xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS) - xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS); - return hash; - end; - begin -- byte-swap read data if big endian @@ -530,95 +482,10 @@ begin end process; end generate; - -- Read TLB using the NIA for the next cycle - itlb_read : process(clk) - variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0); - begin - if rising_edge(clk) then - if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then - tlb_req_index := hash_ea(i_in.next_nia); - if is_X(tlb_req_index) then - itlb_pte <= (others => 'X'); - itlb_ttag <= (others => 'X'); - itlb_valid <= 'X'; - else - itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index))); - itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index))); - itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index))); - end if; - end if; - end if; - end process; - - -- Store TLB data being written for use in servicing the current request - itlb_bypass: process(clk) - begin - if rising_edge(clk) then - if rst = '1' then - itlb_bypass_valid <= '0'; - itlb_bypass_ra <= (others => '0'); - itlb_bypass_priv <= '0'; - elsif flush_in = '1' or i_in.req = '0' or stall_out = '0' then - itlb_bypass_valid <= '0'; - elsif m_in.tlbld = '1' then - assert i_in.nia(63 downto TLB_LG_PGSZ) = m_in.addr(63 downto TLB_LG_PGSZ); - itlb_bypass_valid <= '1'; - itlb_bypass_ra <= m_in.pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ); - itlb_bypass_priv <= m_in.pte(3); - end if; - end if; - end process; - -- TLB hit detection and real address generation itlb_lookup : process(all) begin - if itlb_bypass_valid = '1' then - real_addr <= itlb_bypass_ra & i_in.nia(TLB_LG_PGSZ - 1 downto 0); - ra_valid <= '1'; - eaa_priv <= itlb_bypass_priv; - elsif i_in.virt_mode = '1' then - real_addr <= itlb_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & - i_in.nia(TLB_LG_PGSZ - 1 downto 0); - if itlb_ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then - ra_valid <= itlb_valid; - else - ra_valid <= '0'; - end if; - eaa_priv <= itlb_pte(3); - else - real_addr <= addr_to_real(i_in.nia); - ra_valid <= '1'; - eaa_priv <= '1'; - end if; - - -- no IAMR, so no KUEP support for now - priv_fault <= eaa_priv and not i_in.priv_mode; - access_ok <= ra_valid and not priv_fault; - end process; - - -- iTLB update - itlb_update: process(clk) - variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0); - begin - if rising_edge(clk) then - wr_index := hash_ea(m_in.addr); - if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then - -- clear all valid bits - for i in tlb_index_t loop - itlb_valids(i) <= '0'; - end loop; - elsif m_in.tlbie = '1' then - assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE; - -- clear entry regardless of hit or miss - itlb_valids(to_integer(unsigned(wr_index))) <= '0'; - elsif m_in.tlbld = '1' then - assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE; - itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS); - itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte; - itlb_valids(to_integer(unsigned(wr_index))) <= '1'; - end if; - ev.itlb_miss_resolved <= m_in.tlbld and not rst; - end if; + real_addr <= i_in.rpn & i_in.nia(MIN_LG_PGSZ - 1 downto 0); end process; -- Cache hit detection, output to fetch2 and other misc logic @@ -667,7 +534,7 @@ begin end if; -- Generate the "hit" and "miss" signals for the synchronous blocks - if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then + if i_in.req = '1' and flush_in = '0' and rst = '0' then req_is_hit <= is_hit; req_is_miss <= not is_hit; else @@ -711,8 +578,8 @@ begin i_out.next_predicted <= r.predicted; i_out.next_pred_ntaken <= r.pred_ntaken; - -- Stall fetch1 if we have a miss on cache or TLB or a protection fault - stall_out <= i_in.req and not (is_hit and access_ok) and not flush_in; + -- Stall fetch1 if we have a cache miss + stall_out <= i_in.req and not is_hit and not flush_in; -- Wishbone requests output (from the cache miss reload machine) wishbone_out <= r.wb; @@ -763,6 +630,7 @@ begin r.big_endian <= i_in.big_endian; r.predicted <= i_in.predicted; r.pred_ntaken <= i_in.pred_ntaken; + r.fetch_failed <= i_in.fetch_fail and not flush_in; end if; if i_out.valid = '1' then assert not is_X(i_out.insn) severity failure; @@ -955,13 +823,6 @@ begin end if; end case; end if; - - -- TLB miss and protection fault processing - if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then - r.fetch_failed <= '0'; - elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then - r.fetch_failed <= '1'; - end if; end if; end process; @@ -991,8 +852,8 @@ begin wstate & std_ulogic_vector(resize(lway, 3)) & req_is_hit & req_is_miss & - access_ok & - ra_valid; + '1' & -- was access_ok + '1'; -- was ra_valid end if; end process; log_out <= log_data; diff --git a/mmu.vhdl b/mmu.vhdl index 1774822..fb63cfd 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -20,7 +20,7 @@ entity mmu is d_out : out MmuToDcacheType; d_in : in DcacheToMmuType; - i_out : out MmuToIcacheType + i_out : out MmuToITLBType ); end mmu;