diff --git a/common.vhdl b/common.vhdl index 02f0d3f..aaf176d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -39,7 +39,8 @@ package common is constant SPR_SPRG3U : spr_num_t := 259; constant SPR_HSPRG0 : spr_num_t := 304; constant SPR_HSPRG1 : spr_num_t := 305; - constant SPR_PGTBL0 : spr_num_t := 720; + constant SPR_PID : spr_num_t := 48; + constant SPR_PRTBL : spr_num_t := 720; -- GPR indices in the register file (GPR only) subtype gpr_index_t is std_ulogic_vector(4 downto 0); @@ -288,7 +289,7 @@ package common is iside : std_ulogic; load : std_ulogic; priv : std_ulogic; - sprn : std_ulogic_vector(3 downto 0); + sprn : std_ulogic_vector(9 downto 0); addr : std_ulogic_vector(63 downto 0); rs : std_ulogic_vector(63 downto 0); end record; diff --git a/decode1.vhdl b/decode1.vhdl index cd17d1e..4cd195f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -449,7 +449,7 @@ begin v.decode.sgl_pipe := '1'; -- send MMU-related SPRs to loadstore1 case sprn is - when SPR_DAR | SPR_DSISR | SPR_PGTBL0 => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL => v.decode.unit := LDST; when others => end case; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 251f529..e71ad74 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -255,7 +255,7 @@ begin mfspr := '1'; -- partial decode on SPR number should be adequate given -- the restricted set that get sent down this path - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then sprval := x"00000000" & r.dsisr; else @@ -266,16 +266,18 @@ begin sprval := m_in.sprval; end if; when OP_MTSPR => - done := '1'; - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then v.dsisr := l_in.data(31 downto 0); else v.dar := l_in.data; end if; + done := '1'; else -- writing one of the SPRs in the MMU mmu_mtspr := '1'; + stall := '1'; + v.state := TLBIE_WAIT; end if; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -466,7 +468,7 @@ begin m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; m_out.mtspr <= mmu_mtspr; - m_out.sprn <= sprn(3 downto 0); + m_out.sprn <= sprn; m_out.addr <= addr; m_out.slbia <= l_in.insn(7); m_out.rs <= l_in.data; diff --git a/mmu.vhdl b/mmu.vhdl index 8415443..0eefbab 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -28,6 +28,8 @@ architecture behave of mmu is type state_t is (IDLE, TLB_WAIT, + PROC_TBL_READ, + PROC_TBL_WAIT, SEGMENT_CHECK, RADIX_LOOKUP, RADIX_READ_WAIT, @@ -42,9 +44,15 @@ architecture behave of mmu is store : std_ulogic; priv : std_ulogic; addr : std_ulogic_vector(63 downto 0); + -- config SPRs + prtbl : std_ulogic_vector(63 downto 0); + pid : std_ulogic_vector(31 downto 0); -- internal state state : state_t; pgtbl0 : std_ulogic_vector(63 downto 0); + pt0_valid : std_ulogic; + pgtbl3 : std_ulogic_vector(63 downto 0); + pt3_valid : std_ulogic; shift : unsigned(5 downto 0); mask_size : unsigned(4 downto 0); pgbase : std_ulogic_vector(55 downto 0); @@ -64,8 +72,8 @@ architecture behave of mmu is begin -- Multiplex internal SPR values back to loadstore1, selected - -- by l_in.sprn. Easy when there's only one... - l_out.sprval <= r.pgtbl0; + -- by l_in.sprn. + l_out.sprval <= r.prtbl when l_in.sprn(9) = '1' else x"00000000" & r.pid; mmu_0: process(clk) begin @@ -73,7 +81,9 @@ begin if rst = '1' then r.state <= IDLE; r.valid <= '0'; - r.pgtbl0 <= (others => '0'); + r.pt0_valid <= '0'; + r.pt3_valid <= '0'; + r.prtbl <= (others => '0'); else if rin.valid = '1' then report "MMU got tlb miss for " & to_hstring(rin.addr); @@ -169,12 +179,17 @@ begin variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; variable inval_all : std_ulogic; + variable prtbl_rd : std_ulogic; + variable pt_valid : std_ulogic; + variable effpid : std_ulogic_vector(31 downto 0); + variable prtable_addr : std_ulogic_vector(63 downto 0); variable rts : unsigned(5 downto 0); variable mbits : unsigned(5 downto 0); variable pgtable_addr : std_ulogic_vector(63 downto 0); variable pte : std_ulogic_vector(63 downto 0); variable tlb_data : std_ulogic_vector(63 downto 0); variable nonzero : std_ulogic; + variable pgtbl : std_ulogic_vector(63 downto 0); variable perm_ok : std_ulogic; variable rc_ok : std_ulogic; variable addr : std_ulogic_vector(63 downto 0); @@ -193,6 +208,7 @@ begin itlb_load := '0'; tlbie_req := '0'; inval_all := '0'; + prtbl_rd := '0'; -- Radix tree data structures in memory are big-endian, -- so we need to byte-swap them @@ -202,14 +218,21 @@ begin case r.state is when IDLE => + if l_in.addr(63) = '0' then + pgtbl := r.pgtbl0; + pt_valid := r.pt0_valid; + else + pgtbl := r.pgtbl3; + pt_valid := r.pt3_valid; + end if; -- rts == radix tree size, # address bits being translated - rts := unsigned('0' & r.pgtbl0(62 downto 61) & r.pgtbl0(7 downto 5)); + rts := unsigned('0' & pgtbl(62 downto 61) & pgtbl(7 downto 5)); -- mbits == # address bits to index top level of tree - mbits := unsigned('0' & r.pgtbl0(4 downto 0)); + mbits := unsigned('0' & pgtbl(4 downto 0)); -- set v.shift to rts so that we can use finalmask for the segment check v.shift := rts; v.mask_size := mbits(4 downto 0); - v.pgbase := r.pgtbl0(55 downto 8) & x"00"; + v.pgbase := pgtbl(55 downto 8) & x"00"; if l_in.valid = '1' then v.addr := l_in.addr; @@ -223,11 +246,23 @@ begin -- RB[IS] != 0 or RB[AP] != 0, or for slbia inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or l_in.addr(7) or l_in.addr(6) or l_in.addr(5); + -- The RIC field of the tlbie instruction comes across on the + -- sprn bus as bits 2--3. RIC=2 flushes process table caches. + if l_in.sprn(3) = '1' then + v.pt0_valid := '0'; + v.pt3_valid := '0'; + end if; v.state := TLB_WAIT; else v.valid := '1'; - -- Use RPDS = 0 to disable radix tree walks - if mbits = 0 then + if pt_valid = '0' then + -- need to fetch process table entry + -- set v.shift so we can use finalmask for generating + -- the process table entry address + v.shift := unsigned('0' & r.prtbl(4 downto 0)); + v.state := PROC_TBL_READ; + elsif mbits = 0 then + -- Use RPDS = 0 to disable radix tree walks v.state := RADIX_ERROR; v.invalid := '1'; else @@ -236,7 +271,20 @@ begin end if; end if; if l_in.mtspr = '1' then - v.pgtbl0 := l_in.rs; + -- Move to PID needs to invalidate L1 TLBs and cached + -- pgtbl0 value. Move to PRTBL does that plus + -- invalidating the cached pgtbl3 value as well. + if l_in.sprn(9) = '0' then + v.pid := l_in.rs(31 downto 0); + else + v.prtbl := l_in.rs; + v.pt3_valid := '0'; + end if; + v.pt0_valid := '0'; + dcreq := '1'; + tlbie_req := '1'; + inval_all := '1'; + v.state := TLB_WAIT; end if; when TLB_WAIT => @@ -245,6 +293,41 @@ begin v.state := IDLE; end if; + when PROC_TBL_READ => + dcreq := '1'; + prtbl_rd := '1'; + v.state := PROC_TBL_WAIT; + + when PROC_TBL_WAIT => + if d_in.done = '1' then + if d_in.err = '0' then + if r.addr(63) = '1' then + v.pgtbl3 := data; + v.pt3_valid := '1'; + else + v.pgtbl0 := data; + v.pt0_valid := '1'; + end if; + -- rts == radix tree size, # address bits being translated + rts := unsigned('0' & data(62 downto 61) & data(7 downto 5)); + -- mbits == # address bits to index top level of tree + mbits := unsigned('0' & data(4 downto 0)); + -- set v.shift to rts so that we can use finalmask for the segment check + v.shift := rts; + v.mask_size := mbits(4 downto 0); + v.pgbase := data(55 downto 8) & x"00"; + if mbits = 0 then + v.state := RADIX_ERROR; + v.invalid := '1'; + else + v.state := SEGMENT_CHECK; + end if; + else + v.state := RADIX_ERROR; + v.badtree := '1'; + end if; + end if; + when SEGMENT_CHECK => mbits := '0' & r.mask_size; v.shift := r.shift + (31 - 12) - mbits; @@ -331,6 +414,16 @@ begin end case; + if r.addr(63) = '1' then + effpid := x"00000000"; + else + effpid := r.pid; + end if; + prtable_addr := x"00" & r.prtbl(55 downto 36) & + ((r.prtbl(35 downto 12) and not finalmask(23 downto 0)) or + (effpid(31 downto 8) and finalmask(23 downto 0))) & + effpid(7 downto 0) & "0000"; + pgtable_addr := x"00" & r.pgbase(55 downto 19) & ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) & "000"; @@ -348,6 +441,9 @@ begin elsif tlb_load = '1' then addr := r.addr(63 downto 12) & x"000"; tlb_data := pte; + elsif prtbl_rd = '1' then + addr := prtable_addr; + tlb_data := (others => '0'); else addr := pgtable_addr; tlb_data := (others => '0'); diff --git a/tests/mmu/mmu.c b/tests/mmu/mmu.c index 8281b04..a5d086b 100644 --- a/tests/mmu/mmu.c +++ b/tests/mmu/mmu.c @@ -21,6 +21,8 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs) #define DAR 19 #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -110,15 +112,20 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void *eas_mapped[4]; int neas_mapped; void init_mmu(void) { + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); zero_memory(pgdir, 1024 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); do_tlbie(0xc00, 0); /* invalidate all TLB entries */ } diff --git a/tests/privileged/privileged.c b/tests/privileged/privileged.c index eca6e0e..98c037c 100644 --- a/tests/privileged/privileged.c +++ b/tests/privileged/privileged.c @@ -13,6 +13,8 @@ extern int call_with_msr(unsigned long arg, int (*fn)(unsigned long), unsigned l #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -55,11 +57,6 @@ void print_test_number(int i) putchar(':'); } -static inline void do_tlbie(unsigned long rb, unsigned long rs) -{ - __asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory"); -} - static inline void store_pte(unsigned long *p, unsigned long pte) { __asm__ volatile("stdbrx %1,0,%0" : : "r" (p), "r" (pte) : "memory"); @@ -107,14 +104,18 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void init_mmu(void) { - zero_memory(pgdir, 1024 * sizeof(unsigned long)); + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); - do_tlbie(0xc00, 0); /* invalidate all TLB entries */ + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); + zero_memory(pgdir, 1024 * sizeof(unsigned long)); } static unsigned long *read_pgd(unsigned long i) diff --git a/tests/test_mmu.bin b/tests/test_mmu.bin index a1861b2..706f0d8 100755 Binary files a/tests/test_mmu.bin and b/tests/test_mmu.bin differ diff --git a/tests/test_privileged.bin b/tests/test_privileged.bin index 6eb6b53..340b7c0 100755 Binary files a/tests/test_privileged.bin and b/tests/test_privileged.bin differ