From 2843c99a71ad4b88d8d722bb7bae7d4979b6083c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 24 Apr 2020 10:58:56 +1000 Subject: [PATCH] MMU: Implement reading of the process table This adds the PID register and repurposes SPR 720 as the PRTBL register, which points to the base of the process table. There doesn't seem to be any point to implementing the partition table given that we don't have hypervisor mode. The MMU caches entry 0 of the process table internally (in pgtbl3) plus the entry indexed by the value in the PID register (pgtbl0). Both caches are invalidated by a tlbie[l] with RIC=2 or by a move to PRTBL. The pgtbl0 cache is invalidated by a move to PID. The dTLB and iTLB are cleared by a move to either PRTBL or PID. Which of the two page table root pointers is used (pgtbl0 or pgtbl3) depends on the MSB of the address being translated. Since the segment checking ensures that address(63) = address(62), this is sufficient to map quadrants 0 and 3. Signed-off-by: Paul Mackerras --- common.vhdl | 5 +- decode1.vhdl | 2 +- loadstore1.vhdl | 10 +-- mmu.vhdl | 114 +++++++++++++++++++++++++++++++--- tests/mmu/mmu.c | 11 +++- tests/privileged/privileged.c | 19 +++--- tests/test_mmu.bin | Bin 20496 -> 20504 bytes tests/test_privileged.bin | Bin 16400 -> 16408 bytes 8 files changed, 134 insertions(+), 27 deletions(-) diff --git a/common.vhdl b/common.vhdl index 02f0d3f..aaf176d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -39,7 +39,8 @@ package common is constant SPR_SPRG3U : spr_num_t := 259; constant SPR_HSPRG0 : spr_num_t := 304; constant SPR_HSPRG1 : spr_num_t := 305; - constant SPR_PGTBL0 : spr_num_t := 720; + constant SPR_PID : spr_num_t := 48; + constant SPR_PRTBL : spr_num_t := 720; -- GPR indices in the register file (GPR only) subtype gpr_index_t is std_ulogic_vector(4 downto 0); @@ -288,7 +289,7 @@ package common is iside : std_ulogic; load : std_ulogic; priv : std_ulogic; - sprn : std_ulogic_vector(3 downto 0); + sprn : std_ulogic_vector(9 downto 0); addr : std_ulogic_vector(63 downto 0); rs : std_ulogic_vector(63 downto 0); end record; diff --git a/decode1.vhdl b/decode1.vhdl index cd17d1e..4cd195f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -449,7 +449,7 @@ begin v.decode.sgl_pipe := '1'; -- send MMU-related SPRs to loadstore1 case sprn is - when SPR_DAR | SPR_DSISR | SPR_PGTBL0 => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL => v.decode.unit := LDST; when others => end case; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 251f529..e71ad74 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -255,7 +255,7 @@ begin mfspr := '1'; -- partial decode on SPR number should be adequate given -- the restricted set that get sent down this path - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then sprval := x"00000000" & r.dsisr; else @@ -266,16 +266,18 @@ begin sprval := m_in.sprval; end if; when OP_MTSPR => - done := '1'; - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then v.dsisr := l_in.data(31 downto 0); else v.dar := l_in.data; end if; + done := '1'; else -- writing one of the SPRs in the MMU mmu_mtspr := '1'; + stall := '1'; + v.state := TLBIE_WAIT; end if; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -466,7 +468,7 @@ begin m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; m_out.mtspr <= mmu_mtspr; - m_out.sprn <= sprn(3 downto 0); + m_out.sprn <= sprn; m_out.addr <= addr; m_out.slbia <= l_in.insn(7); m_out.rs <= l_in.data; diff --git a/mmu.vhdl b/mmu.vhdl index 8415443..0eefbab 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -28,6 +28,8 @@ architecture behave of mmu is type state_t is (IDLE, TLB_WAIT, + PROC_TBL_READ, + PROC_TBL_WAIT, SEGMENT_CHECK, RADIX_LOOKUP, RADIX_READ_WAIT, @@ -42,9 +44,15 @@ architecture behave of mmu is store : std_ulogic; priv : std_ulogic; addr : std_ulogic_vector(63 downto 0); + -- config SPRs + prtbl : std_ulogic_vector(63 downto 0); + pid : std_ulogic_vector(31 downto 0); -- internal state state : state_t; pgtbl0 : std_ulogic_vector(63 downto 0); + pt0_valid : std_ulogic; + pgtbl3 : std_ulogic_vector(63 downto 0); + pt3_valid : std_ulogic; shift : unsigned(5 downto 0); mask_size : unsigned(4 downto 0); pgbase : std_ulogic_vector(55 downto 0); @@ -64,8 +72,8 @@ architecture behave of mmu is begin -- Multiplex internal SPR values back to loadstore1, selected - -- by l_in.sprn. Easy when there's only one... - l_out.sprval <= r.pgtbl0; + -- by l_in.sprn. + l_out.sprval <= r.prtbl when l_in.sprn(9) = '1' else x"00000000" & r.pid; mmu_0: process(clk) begin @@ -73,7 +81,9 @@ begin if rst = '1' then r.state <= IDLE; r.valid <= '0'; - r.pgtbl0 <= (others => '0'); + r.pt0_valid <= '0'; + r.pt3_valid <= '0'; + r.prtbl <= (others => '0'); else if rin.valid = '1' then report "MMU got tlb miss for " & to_hstring(rin.addr); @@ -169,12 +179,17 @@ begin variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; variable inval_all : std_ulogic; + variable prtbl_rd : std_ulogic; + variable pt_valid : std_ulogic; + variable effpid : std_ulogic_vector(31 downto 0); + variable prtable_addr : std_ulogic_vector(63 downto 0); variable rts : unsigned(5 downto 0); variable mbits : unsigned(5 downto 0); variable pgtable_addr : std_ulogic_vector(63 downto 0); variable pte : std_ulogic_vector(63 downto 0); variable tlb_data : std_ulogic_vector(63 downto 0); variable nonzero : std_ulogic; + variable pgtbl : std_ulogic_vector(63 downto 0); variable perm_ok : std_ulogic; variable rc_ok : std_ulogic; variable addr : std_ulogic_vector(63 downto 0); @@ -193,6 +208,7 @@ begin itlb_load := '0'; tlbie_req := '0'; inval_all := '0'; + prtbl_rd := '0'; -- Radix tree data structures in memory are big-endian, -- so we need to byte-swap them @@ -202,14 +218,21 @@ begin case r.state is when IDLE => + if l_in.addr(63) = '0' then + pgtbl := r.pgtbl0; + pt_valid := r.pt0_valid; + else + pgtbl := r.pgtbl3; + pt_valid := r.pt3_valid; + end if; -- rts == radix tree size, # address bits being translated - rts := unsigned('0' & r.pgtbl0(62 downto 61) & r.pgtbl0(7 downto 5)); + rts := unsigned('0' & pgtbl(62 downto 61) & pgtbl(7 downto 5)); -- mbits == # address bits to index top level of tree - mbits := unsigned('0' & r.pgtbl0(4 downto 0)); + mbits := unsigned('0' & pgtbl(4 downto 0)); -- set v.shift to rts so that we can use finalmask for the segment check v.shift := rts; v.mask_size := mbits(4 downto 0); - v.pgbase := r.pgtbl0(55 downto 8) & x"00"; + v.pgbase := pgtbl(55 downto 8) & x"00"; if l_in.valid = '1' then v.addr := l_in.addr; @@ -223,11 +246,23 @@ begin -- RB[IS] != 0 or RB[AP] != 0, or for slbia inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or l_in.addr(7) or l_in.addr(6) or l_in.addr(5); + -- The RIC field of the tlbie instruction comes across on the + -- sprn bus as bits 2--3. RIC=2 flushes process table caches. + if l_in.sprn(3) = '1' then + v.pt0_valid := '0'; + v.pt3_valid := '0'; + end if; v.state := TLB_WAIT; else v.valid := '1'; - -- Use RPDS = 0 to disable radix tree walks - if mbits = 0 then + if pt_valid = '0' then + -- need to fetch process table entry + -- set v.shift so we can use finalmask for generating + -- the process table entry address + v.shift := unsigned('0' & r.prtbl(4 downto 0)); + v.state := PROC_TBL_READ; + elsif mbits = 0 then + -- Use RPDS = 0 to disable radix tree walks v.state := RADIX_ERROR; v.invalid := '1'; else @@ -236,7 +271,20 @@ begin end if; end if; if l_in.mtspr = '1' then - v.pgtbl0 := l_in.rs; + -- Move to PID needs to invalidate L1 TLBs and cached + -- pgtbl0 value. Move to PRTBL does that plus + -- invalidating the cached pgtbl3 value as well. + if l_in.sprn(9) = '0' then + v.pid := l_in.rs(31 downto 0); + else + v.prtbl := l_in.rs; + v.pt3_valid := '0'; + end if; + v.pt0_valid := '0'; + dcreq := '1'; + tlbie_req := '1'; + inval_all := '1'; + v.state := TLB_WAIT; end if; when TLB_WAIT => @@ -245,6 +293,41 @@ begin v.state := IDLE; end if; + when PROC_TBL_READ => + dcreq := '1'; + prtbl_rd := '1'; + v.state := PROC_TBL_WAIT; + + when PROC_TBL_WAIT => + if d_in.done = '1' then + if d_in.err = '0' then + if r.addr(63) = '1' then + v.pgtbl3 := data; + v.pt3_valid := '1'; + else + v.pgtbl0 := data; + v.pt0_valid := '1'; + end if; + -- rts == radix tree size, # address bits being translated + rts := unsigned('0' & data(62 downto 61) & data(7 downto 5)); + -- mbits == # address bits to index top level of tree + mbits := unsigned('0' & data(4 downto 0)); + -- set v.shift to rts so that we can use finalmask for the segment check + v.shift := rts; + v.mask_size := mbits(4 downto 0); + v.pgbase := data(55 downto 8) & x"00"; + if mbits = 0 then + v.state := RADIX_ERROR; + v.invalid := '1'; + else + v.state := SEGMENT_CHECK; + end if; + else + v.state := RADIX_ERROR; + v.badtree := '1'; + end if; + end if; + when SEGMENT_CHECK => mbits := '0' & r.mask_size; v.shift := r.shift + (31 - 12) - mbits; @@ -331,6 +414,16 @@ begin end case; + if r.addr(63) = '1' then + effpid := x"00000000"; + else + effpid := r.pid; + end if; + prtable_addr := x"00" & r.prtbl(55 downto 36) & + ((r.prtbl(35 downto 12) and not finalmask(23 downto 0)) or + (effpid(31 downto 8) and finalmask(23 downto 0))) & + effpid(7 downto 0) & "0000"; + pgtable_addr := x"00" & r.pgbase(55 downto 19) & ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) & "000"; @@ -348,6 +441,9 @@ begin elsif tlb_load = '1' then addr := r.addr(63 downto 12) & x"000"; tlb_data := pte; + elsif prtbl_rd = '1' then + addr := prtable_addr; + tlb_data := (others => '0'); else addr := pgtable_addr; tlb_data := (others => '0'); diff --git a/tests/mmu/mmu.c b/tests/mmu/mmu.c index 8281b04..a5d086b 100644 --- a/tests/mmu/mmu.c +++ b/tests/mmu/mmu.c @@ -21,6 +21,8 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs) #define DAR 19 #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -110,15 +112,20 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void *eas_mapped[4]; int neas_mapped; void init_mmu(void) { + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); zero_memory(pgdir, 1024 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); do_tlbie(0xc00, 0); /* invalidate all TLB entries */ } diff --git a/tests/privileged/privileged.c b/tests/privileged/privileged.c index eca6e0e..98c037c 100644 --- a/tests/privileged/privileged.c +++ b/tests/privileged/privileged.c @@ -13,6 +13,8 @@ extern int call_with_msr(unsigned long arg, int (*fn)(unsigned long), unsigned l #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -55,11 +57,6 @@ void print_test_number(int i) putchar(':'); } -static inline void do_tlbie(unsigned long rb, unsigned long rs) -{ - __asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory"); -} - static inline void store_pte(unsigned long *p, unsigned long pte) { __asm__ volatile("stdbrx %1,0,%0" : : "r" (p), "r" (pte) : "memory"); @@ -107,14 +104,18 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void init_mmu(void) { - zero_memory(pgdir, 1024 * sizeof(unsigned long)); + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); - do_tlbie(0xc00, 0); /* invalidate all TLB entries */ + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); + zero_memory(pgdir, 1024 * sizeof(unsigned long)); } static unsigned long *read_pgd(unsigned long i) diff --git a/tests/test_mmu.bin b/tests/test_mmu.bin index a1861b22dbbdfc857e7d512d9a924efe9a997bbb..706f0d80dc8d3b8563edbd9a1ba0cc8967b5603e 100755 GIT binary patch delta 1369 zcmZ9MZA=_h6o&6v7|J5uE>bEVEiAOSU{Mz6wlmvZc9&&gKMB}2w!t4uDHa+~5Qx(3 z+Oe5NO=<$3n1G2Etwo!(u?9%|V?tAnVlXLbh%vER6SaI*+gK86sMm90VvILAbKmE@ z=iIq-XEKAMFgyx_eQ@~G{>N-ki*MS#lH%Dc5X2)D%{%UFV*$okY>m5>HEt}M*yK*k z@Y5a}E0ou8wxS#Y2$co!Di&5|=5*=R4i=MJbS)}Xj8=Yx*S%@avKTX~vROSkZ)UJM z%L`YdoQKV7(ZblcEdk7;j6GsYh&wWiDyLH2le(X()-0G=sglM*EhSM;f~8v&#yrZg zkS8JT(YFmx*mE&b<%G-QZ&Ymon8uMk&CrD_dwfueihUg3#XI&eG@-ILY`8g%4tpNH zzxO2wV$QxlA%UCaYcPbfaw{A{yK)1LpwUq-${pX)HDiA(WMTjQ8-|=OFwOB4WZ;1F zJWS#a*FHny8ag~Rc-@!$mOC3KTwB42)2;);t?mUc4po0bOMBf146dt~U!99r+y~(z zN}ej{N5yjgf0tifTID}4jU}bV<4Zg&PE(vD=SQ7~sZLdkhT8Cg-`Wr(QmA6g&>J71 zSyZ%Bu6RB7V!TbysiKR{U-;dmrFH*g+^_pDQ|{g(=GIlx=|$`EPBk9WIt#@*$m##4{%} z*{^dNizUgd6Bq09Po;Nha+aRoOm*qKnk?yTBAy_B<>AIYw-Bq(Yx1u;n~Aeu(BwIt zGl-X||4W@8C0-?eQs+$K;cj|PVhgdvc-D2#bSfk0 zI32d8I7wal9p4E*k${+YmxX8GjT80S+u_=-#eMTx9dm;tJKaiTb$N4b%ENHUO zo}MwW8rl(K7K%0UWt@x@NR}kV?&J5=?IG2N4V!Z delta 1303 zcmZ9JVN6q36vywWJOsx|8F7MQS|qg?AT1zxi>+;eDr;$j5wjYyq!pOpL`5)xt(~2D z`>+I(u>*;IAc~6f!?KNlA51nAH=3EnB@##GhndA>3oPrBvCX7py1R4B{j{5R-u?gn z=iKw&JMYp}_>it)aLk?G=72IBcYLFDmaw*RJXYJaGxaKKV~j`6Qj z7rhQv&cDZ{T02+}>dbHq%j=3u{i?c&MY&UHSsB4l-N#clE+s~ z3^GfiJQETkn3vr3>lU^rc5~j5UqLkk?nSi?M|aBE`_MlseHG-%)Ar;Q)MAo}+G1+&<|zlJ-w&buLjZ#W;pdsy3K zm!(ZV(A~Me8{9a({}K3b&}9cF&bY3_6ZE_H(b#&TPuS>S~3Hff#9z)Sxyw-e8_H1c^%lJXd6NS`K;m`;*nmRPbt=9{!N{~uQ-=DenRKx6>lc)>C<_y;ymI@ z{rUxdR2lhX+&HOozv3;#kKfXHgW{KDo?g(ZxPbWQQ##+O_+`oGbgn5j5|0h&{Mn0* z4=N;%oYwg-icQ3SoVM$HSs6uSEDq5G6u(0J@C;2raWU~(oH-PP?eo7MDl|YfzIe?B zx6m3e8wytNU|=^{0|6Th<5a-ryuTvE82wSXEEH)9@2U_H+L=G!$F6|i=qU2N+h%0G zpK^T;vQ5C)Fs=veS}UnatO-_Vr$}|+QBsrHY$8};{eo0ZT8LHID`2!xmy<@;oa-a3 zrudQa4=@$1(h3ZWJ;Y~0o5o3fj^*uDS|6zeJlt;6CP__VqJ4+9K&lL<+s)czQj2(x zybLK9K5aK!%Qi8V`BR9i)LT98bt7xZKChB27v)FL8nTu4k?Qzchz?mvOJ?KbjbU%d zrrjcy#EH-j?LH|l&V|fcnp6%hQ??|Bu~vkzO=~6Pz?yJ9oW)qU*gd%}g#0eKOcY*< sPyXk1lD9~ni*~tWT^~gn=fcHc!c@2l_Rg<`O@J1tF0{~yvTI}i4+K!t1ONa4 diff --git a/tests/test_privileged.bin b/tests/test_privileged.bin index 6eb6b536b11ab8f75b5663faf5dcc4fea6c94e0e..340b7c0f0cf67ab4f669c57e4086487cc4020118 100755 GIT binary patch delta 710 zcmZ9JO=uHQ5Xaxdq^y>A-t4`}@z#{+BKLQ1PK` z!PslNQ-x5}NF54aq;%keQu6z$O>0nKA0q8r7Op1g4~`olenV zsw>efQ+$`^>jB^VS=}dj5@yvtbaaUvruCz+hS&8QkiZRn8~_g``a<<49!pGujrSAr z(!;9-SFGB-BD2k>#A}2V&KEOf>N)6Z%LefmRw1p6>T2kYCzE!*%~y#3 zX$pR~#ZztmoOpTXGig1;xS_%lrj18%7~dPmBZ{wWjUXg-P|!#Y(mmI@i^)-{-zG0Z z7-QyH%$w7(2Lb1b*g_%G8KP6hbu*f+>`~@PdAf&PBle0|KV=bOgGc8L_F7SRhlXW_ zz6E%}Is!wuWSR7rSFNZKY;i8y*qQw~_5>!no!;EN>27wqBlp!IJOPZ&;%93ZzF?QF zgNrHKgb#Sh9*$ky=G+yt6#or-xN1kUk4X{sMF>S0en~7N0EJ+}@TXlHEP57B0{m?c cM*9D9Zq=51ZpyWlp1DZeAs?-1XJse<149PYk^lez delta 699 zcmZ9IUuaTM9LImh=Fnx+B?Fs6UfuOx*T$3uUE4IRWI87p7>r_IvuF_gdvm3E3xXhm z$?v5eMhimft>H_L4ffJoy=@Osqk>s4J@lYWIsJ|^h<&-|o^wCn-}#;IJx_JEOpnT3 zYyN|h`Mwbn0Fe8JLD`9OQidJE*U}`*;dkk%wAQodW`3260my5S-h6hY%T7X0tDx+u zMa2FF-`*{xPL;r|YbJeR#|EtNkxPv<=P8r!srP~ZQneN+fyEsHh=wlDL`wmXw9z(3 zq#?6tA!;S=vQmcXX>43jZD>@-+fcwu;^oNn_}VfSnK3Uys_mWyt95aoPf zYVm4b6W!?49ZK@Edcai7wRnnwI1O^5<`&i1bWbHoya#km^m#~Y;T6aW(z_nj;C(COeahD&gWtdO!|%)9s21y zVA`k9_Y2=7y+8RbG8IG0G!~W2;L8T*Zi*;mqcKY34Q?y;@Y{V(mYj`!Y?0UpVlJA6 zM28}cO$@4b_66grVt^*zP)FD-K2sI8itDOfcKqRdPSnTEF?i16{egkly+l96KKI4~ zgwHa7S!}8kY!k=)UUnSM`4zT-Oa6($Y>RVE>?QpRT)=g|J-kegh%Y1@5%?9cDFH}= lbprp_K_w;v|Hgq4x~w+mYOP|uX{)VTZyT*{%F)r@{1*#}+x-9l