diff --git a/common.vhdl b/common.vhdl index 0207fe1..8c42caf 100644 --- a/common.vhdl +++ b/common.vhdl @@ -675,6 +675,7 @@ package common is atomic_last : std_ulogic; virt_mode : std_ulogic; priv_mode : std_ulogic; + tlb_probe : std_ulogic; addr : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 byte_sel : std_ulogic_vector(7 downto 0); diff --git a/dcache.vhdl b/dcache.vhdl index 6409ab0..fe9950b 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -135,7 +135,8 @@ architecture rtl of dcache is constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE); constant TLB_WAY_BITS : natural := maximum(log2(TLB_NUM_WAYS), 1); constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_SET_BITS); - constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_BITS; + constant TLB_EA_TAG_WIDTH : natural := TLB_EA_TAG_BITS + 7 - ((TLB_EA_TAG_BITS + 7) mod 8); + constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_WIDTH; constant TLB_PTE_BITS : natural := 64; constant TLB_PTE_WAY_BITS : natural := TLB_NUM_WAYS * TLB_PTE_BITS; @@ -294,9 +295,6 @@ architecture rtl of dcache is -- Stage 0 register, basically contains just the latched request type reg_stage_0_t is record req : Loadstore1ToDcacheType; - tlbie : std_ulogic; -- indicates a tlbie request (from MMU) - doall : std_ulogic; -- with tlbie, indicates flush whole TLB - tlbld : std_ulogic; -- indicates a TLB load request (from MMU) mmu_req : std_ulogic; -- indicates source of request d_valid : std_ulogic; -- indicates req.data is valid now end record; @@ -356,6 +354,7 @@ architecture rtl of dcache is -- TLB hit state tlb_hit : std_ulogic; tlb_hit_way : tlb_way_sig_t; + tlb_hit_ways : tlb_expand_t; tlb_hit_index : tlb_index_sig_t; tlb_victim : tlb_way_sig_t; ls_tlb_hit : std_ulogic; @@ -566,19 +565,10 @@ architecture rtl of dcache is function read_tlb_tag(way: tlb_way_t; tags: tlb_way_tags_t) return tlb_tag_t is variable j : integer; begin - j := way * TLB_EA_TAG_BITS; + j := way * TLB_EA_TAG_WIDTH; return tags(j + TLB_EA_TAG_BITS - 1 downto j); end; - -- Write a TLB tag to a TLB tag memory row - procedure write_tlb_tag(way: tlb_way_t; tags: inout tlb_way_tags_t; - tag: tlb_tag_t) is - variable j : integer; - begin - j := way * TLB_EA_TAG_BITS; - tags(j + TLB_EA_TAG_BITS - 1 downto j) := tag; - end; - -- Read a PTE from a TLB PTE memory row function read_tlb_pte(way: tlb_way_t; ptes: tlb_way_ptes_t) return tlb_pte_t is variable j : integer; @@ -587,13 +577,6 @@ architecture rtl of dcache is return ptes(j + TLB_PTE_BITS - 1 downto j); end; - procedure write_tlb_pte(way: tlb_way_t; ptes: inout tlb_way_ptes_t; newpte: tlb_pte_t) is - variable j : integer; - begin - j := way * TLB_PTE_BITS; - ptes(j + TLB_PTE_BITS - 1 downto j) := newpte; - end; - begin assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE; @@ -623,26 +606,19 @@ begin if m_in.valid = '1' then r.req := Loadstore1ToDcacheInit; r.req.valid := '1'; - r.req.load := not (m_in.tlbie or m_in.tlbld); + r.req.load := '1'; r.req.priv_mode := '1'; r.req.addr := m_in.addr; - r.req.data := m_in.pte; r.req.byte_sel := (others => '1'); - r.tlbie := m_in.tlbie; - r.doall := m_in.doall; - r.tlbld := m_in.tlbld; r.mmu_req := '1'; r.d_valid := '1'; else r.req := d_in; r.req.data := (others => '0'); - r.tlbie := '0'; - r.doall := '0'; - r.tlbld := '0'; r.mmu_req := '0'; r.d_valid := '0'; end if; - if r.req.valid = '1' and r.doall = '0' then + if r.req.valid = '1' then assert not is_X(r.req.addr) severity failure; end if; if rst = '1' then @@ -809,48 +785,39 @@ begin end process; tlb_update : process(clk) - variable tlbie : std_ulogic; - variable tlbwe : std_ulogic; - variable repl_way : tlb_way_sig_t; - variable eatag : tlb_tag_t; - variable tagset : tlb_way_tags_t; - variable pteset : tlb_way_ptes_t; + variable tlb_wr_index : tlb_index_sig_t; + variable j, k : integer; begin if rising_edge(clk) then - tlbie := r0_valid and r0.tlbie; - tlbwe := r0_valid and r0.tlbld; - ev.dtlb_miss_resolved <= tlbwe; - if rst = '1' or (tlbie = '1' and r0.doall = '1') then + tlb_wr_index := unsigned(m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 + downto TLB_LG_PGSZ)); + ev.dtlb_miss_resolved <= m_in.tlbld; + if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then -- clear all valid bits at once for i in tlb_index_t loop dtlb_valids(i) <= (others => '0'); end loop; - elsif tlbie = '1' then + elsif m_in.tlbie = '1' then for i in tlb_way_t loop - if tlb_hit_expand(i) = '1' then - assert not is_X(tlb_req_index); - dtlb_valids(to_integer(tlb_req_index))(i) <= '0'; + if r1.tlb_hit_ways(i) = '1' then + assert not is_X(tlb_wr_index); + dtlb_valids(to_integer(tlb_wr_index))(i) <= '0'; end if; end loop; - elsif tlbwe = '1' then - assert not is_X(tlb_req_index); - repl_way := to_unsigned(0, TLB_WAY_BITS); - if TLB_NUM_WAYS > 1 then - if tlb_hit = '1' then - repl_way := tlb_hit_way; - else - repl_way := unsigned(r1.tlb_victim); + elsif m_in.tlbld = '1' then + assert not is_X(tlb_wr_index); + assert not is_X(r1.tlb_victim); + for way in 0 to TLB_NUM_WAYS - 1 loop + if TLB_NUM_WAYS = 1 or way = to_integer(unsigned(r1.tlb_victim)) then + j := way * TLB_EA_TAG_WIDTH; + dtlb_tags(to_integer(tlb_wr_index))(j + TLB_EA_TAG_WIDTH - 1 downto j) <= + (TLB_EA_TAG_WIDTH - 1 downto TLB_EA_TAG_BITS => '0') & + m_in.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); + k := way * TLB_PTE_BITS; + dtlb_ptes(to_integer(tlb_wr_index))(k + TLB_PTE_BITS - 1 downto k) <= m_in.pte; + dtlb_valids(to_integer(tlb_wr_index))(way) <= '1'; end if; - assert not is_X(repl_way); - end if; - eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); - tagset := tlb_tag_way; - write_tlb_tag(to_integer(repl_way), tagset, eatag); - dtlb_tags(to_integer(tlb_req_index)) <= tagset; - pteset := tlb_pte_way; - write_tlb_pte(to_integer(repl_way), pteset, r0.req.data); - dtlb_ptes(to_integer(tlb_req_index)) <= pteset; - dtlb_valids(to_integer(tlb_req_index))(to_integer(repl_way)) <= '1'; + end loop; end if; end if; end process; @@ -914,10 +881,10 @@ begin if rising_edge(clk) then if r0_stall = '1' then index := req_index; - valid := r0.req.valid and not (r0.tlbie or r0.tlbld); + valid := r0.req.valid; elsif m_in.valid = '1' then index := get_index(m_in.addr); - valid := not (m_in.tlbie or m_in.tlbld); + valid := '1'; else index := get_index(d_in.addr); valid := d_in.valid; @@ -999,7 +966,7 @@ begin dawr_match := r0.req.dawr_match; end if; - go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; + go := r0_valid and not r1.ls_error; if is_X(ra) then go := '0'; end if; @@ -1173,6 +1140,12 @@ begin else req_op_nop <= '1'; end if; + elsif r0.req.tlb_probe = '1' then + -- TLB probe is sent down by loadstore1 before sending a TLB + -- invalidation to mmu, to get r1.tlb_hit_* set correctly + -- (for a single-page invalidation) for the address. + -- It doesn't require r1.ls_valid to be set on completion, + -- so there is nothing else to do here. elsif access_ok = '0' then req_op_bad <= '1'; elsif r0.req.flush = '1' then @@ -1198,7 +1171,7 @@ begin if r0_stall = '0' then if m_in.valid = '1' then early_req_row <= get_row(m_in.addr); - early_rd_valid <= not (m_in.tlbie or m_in.tlbld); + early_rd_valid <= '1'; else early_req_row <= get_row(d_in.addr); early_rd_valid <= d_in.valid and d_in.load; @@ -1417,13 +1390,23 @@ begin end if; -- Record TLB hit information for updating TLB PLRU - r1.tlb_hit <= tlb_hit; - r1.tlb_hit_way <= tlb_hit_way; - r1.tlb_hit_index <= tlb_req_index; + -- and for invalidating or updating TLB contents + if r0_valid = '1' then + r1.tlb_hit <= tlb_hit; + r1.tlb_hit_way <= tlb_hit_way; + r1.tlb_hit_ways <= tlb_hit_expand; + r1.tlb_hit_index <= tlb_req_index; + else + r1.tlb_hit <= '0'; + end if; -- determine victim way in the TLB in the cycle after -- we detect the TLB miss if r1.ls_error = '1' then - r1.tlb_victim <= unsigned(tlb_plru_victim); + if r1.tlb_hit = '0' then + r1.tlb_victim <= unsigned(tlb_plru_victim); + else + r1.tlb_victim <= r1.tlb_hit_way; + end if; end if; end if; @@ -1482,9 +1465,7 @@ begin r1.stcx_fail <= '0'; r1.ls_valid <= (req_op_load_hit or req_op_nop) and not r0.mmu_req; - -- complete tlbies and TLB loads in the third cycle - r1.mmu_done <= (r0_valid and (r0.tlbie or r0.tlbld)) or - (req_op_load_hit and r0.mmu_req); + r1.mmu_done <= req_op_load_hit and r0.mmu_req; -- Clear the reservation if another entity writes to that line if kill_rsrv = '1' then @@ -1582,7 +1563,7 @@ begin r1.full <= req_op_load_miss or req_op_store or req_op_flush or req_op_sync; end if; end if; - if r0_valid = '1' and r0.tlbld = '1' then + if m_in.tlbld = '1' or m_in.tlbie = '1' then r1.ls_tlb_hit <= '0'; end if; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index a274d0f..5d05bbb 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -712,8 +712,8 @@ begin v.mmu_op := '1'; when others => end case; - v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr and - not hash_nop; + v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz or v.tlbie) and + not v.align_intr and not hash_nop; v.incomplete := v.dc_req and v.two_dwords; -- Work out controls for load and store formatting @@ -873,7 +873,7 @@ begin dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, r1.req.priv_mode, r1.req.store) then dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and - not (r1.req.touch or r1.req.sync or r1.req.flush); + not (r1.req.touch or r1.req.sync or r1.req.flush or r1.req.tlbie); end if; end loop; stage1_dawr_match <= dawr_match; @@ -918,7 +918,7 @@ begin v.req.store_data := store_data; v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and - not r1.req.incomplete and not r1.req.hashcmp; + not r1.req.incomplete and not r1.req.hashcmp and not r1.req.tlbie; v.wait_mmu := r1.req.valid and r1.req.mmu_op; if r1.req.valid = '1' and (r1.req.align_intr or r1.req.hashcmp) = '1' then v.busy := '1'; @@ -1263,6 +1263,7 @@ begin d_out.sync <= stage1_req.sync; d_out.nc <= stage1_req.nc; d_out.reserve <= stage1_req.reserve; + d_out.tlb_probe <= stage1_req.tlbie; d_out.atomic_qw <= stage1_req.atomic_qw; d_out.atomic_first <= stage1_req.atomic_first; d_out.atomic_last <= stage1_req.atomic_last; @@ -1279,6 +1280,7 @@ begin d_out.sync <= r2.req.sync; d_out.nc <= r2.req.nc; d_out.reserve <= r2.req.reserve; + d_out.tlb_probe <= r2.req.tlbie; d_out.atomic_qw <= r2.req.atomic_qw; d_out.atomic_first <= r2.req.atomic_first; d_out.atomic_last <= r2.req.atomic_last; diff --git a/mmu.vhdl b/mmu.vhdl index fb63cfd..91429f9 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -28,7 +28,6 @@ architecture behave of mmu is type state_t is (IDLE, DO_TLBIE, - TLB_WAIT, PART_TBL_READ, PART_TBL_WAIT, PART_TBL_DONE, @@ -195,7 +194,6 @@ begin variable v : reg_stage_t; variable dcreq : std_ulogic; variable tlb_load : std_ulogic; - variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; variable ptbl_rd : std_ulogic; variable prtbl_rd : std_ulogic; @@ -225,7 +223,6 @@ begin v.perm_err := '0'; v.rc_error := '0'; tlb_load := '0'; - itlb_load := '0'; tlbie_req := '0'; v.inval_all := '0'; ptbl_rd := '0'; @@ -309,14 +306,8 @@ begin end if; when DO_TLBIE => - dcreq := '1'; tlbie_req := '1'; - v.state := TLB_WAIT; - - when TLB_WAIT => - if d_in.done = '1' then - v.state := RADIX_FINISH; - end if; + v.state := RADIX_FINISH; when PART_TBL_READ => dcreq := '1'; @@ -438,20 +429,14 @@ begin when RADIX_LOAD_TLB => tlb_load := '1'; - if r.iside = '0' then - dcreq := '1'; - v.state := TLB_WAIT; - else - itlb_load := '1'; - v.state := IDLE; - end if; + v.state := RADIX_FINISH; when RADIX_FINISH => v.state := IDLE; end case; - if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then + if v.state = RADIX_FINISH then v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error; v.done := not v.err; end if; @@ -505,11 +490,11 @@ begin d_out.valid <= dcreq; d_out.tlbie <= tlbie_req; d_out.doall <= r.inval_all; - d_out.tlbld <= tlb_load; + d_out.tlbld <= tlb_load and not r.iside; d_out.addr <= addr; d_out.pte <= tlb_data; - i_out.tlbld <= itlb_load; + i_out.tlbld <= tlb_load and r.iside; i_out.tlbie <= tlbie_req; i_out.doall <= r.inval_all; i_out.addr <= addr;