From 635e316f9b77e83db47889b4c4985b5a12141498 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Apr 2020 16:53:39 +1000 Subject: [PATCH] Pass mtspr/mfspr to MMU-related SPRs down to loadstore1 This arranges for some mfspr and mtspr to get sent to loadstore1 instead of being handled in execute1. In particular, DAR and DSISR are handled this way. They are therefore "slow" SPRs. While we're at it, fix the spelling of HEIR and remove mention of DAR and DSISR from the comments in execute1. Signed-off-by: Paul Mackerras --- common.vhdl | 8 +++++-- decode1.vhdl | 10 ++++++++- execute1.vhdl | 6 ++--- loadstore1.vhdl | 59 ++++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/common.vhdl b/common.vhdl index ed97e0c..d3d30e7 100644 --- a/common.vhdl +++ b/common.vhdl @@ -24,6 +24,8 @@ package common is constant SPR_XER : spr_num_t := 1; constant SPR_LR : spr_num_t := 8; constant SPR_CTR : spr_num_t := 9; + constant SPR_DSISR : spr_num_t := 18; + constant SPR_DAR : spr_num_t := 19; constant SPR_TB : spr_num_t := 268; constant SPR_DEC : spr_num_t := 22; constant SPR_SRR0 : spr_num_t := 26; @@ -214,7 +216,7 @@ package common is type Execute1ToLoadstore1Type is record valid : std_ulogic; - op : insn_type_t; -- what ld/st op to do + op : insn_type_t; -- what ld/st or m[tf]spr to do addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -228,10 +230,12 @@ package common is xerc : xer_common_t; reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. + spr_num : spr_num_t; -- SPR number for mfspr/mtspr end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, - reserve => '0', rc => '0', others => (others => '0')); + reserve => '0', rc => '0', + spr_num => 0, others => (others => '0')); type Loadstore1ToDcacheType is record valid : std_ulogic; diff --git a/decode1.vhdl b/decode1.vhdl index a819b79..70099d4 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -363,6 +363,7 @@ begin variable v : Decode1ToDecode2Type; variable majorop : major_opcode_t; variable op_19_bits: std_ulogic_vector(2 downto 0); + variable sprn : spr_num_t; begin v := r; @@ -429,10 +430,17 @@ begin end if; end if; elsif v.decode.insn_type = OP_MFSPR or v.decode.insn_type = OP_MTSPR then - v.ispr1 := fast_spr_num(decode_spr_num(f_in.insn)); + sprn := decode_spr_num(f_in.insn); + v.ispr1 := fast_spr_num(sprn); -- Make slow SPRs single issue if is_fast_spr(v.ispr1) = '0' then v.decode.sgl_pipe := '1'; + -- send MMU-related SPRs to loadstore1 + case sprn is + when SPR_DAR | SPR_DSISR => + v.decode.unit := LDST; + when others => + end case; end if; elsif v.decode.insn_type = OP_RFID then report "PPC RFID"; diff --git a/execute1.vhdl b/execute1.vhdl index 82776e2..490723e 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -463,7 +463,7 @@ begin elsif irq_valid = '1' and e_in.valid = '1' then -- we need two cycles to write srr0 and 1 - -- will need more when we have to write DSISR, DAR and HIER + -- will need more when we have to write HEIR -- Don't deliver the interrupt until we have a valid instruction -- coming in, so we have a valid NIA to put in SRR0. exception := '1'; @@ -494,13 +494,12 @@ begin when OP_ILLEGAL => -- we need two cycles to write srr0 and 1 - -- will need more when we have to write DSISR, DAR and HIER + -- will need more when we have to write HEIR illegal := '1'; when OP_SC => -- check bit 1 of the instruction is 1 so we know this is sc; -- 0 would mean scv, so generate an illegal instruction interrupt -- we need two cycles to write srr0 and 1 - -- will need more when we have to write DSISR, DAR and HIER if e_in.insn(1) = '1' then exception := '1'; exception_nextpc := '1'; @@ -983,6 +982,7 @@ begin lv.xerc := v.e.xerc; lv.reserve := e_in.reserve; lv.rc := e_in.rc; + lv.spr_num := decode_spr_num(e_in.insn); -- decode l*cix and st*cix instructions here if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and e_in.insn(5 downto 1) = "10101" then diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 90650db..7ddbbc0 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -59,6 +59,8 @@ architecture behave of loadstore1 is nc : std_ulogic; -- non-cacheable access state : state_t; second_bytes : std_ulogic_vector(7 downto 0); + dar : std_ulogic_vector(63 downto 0); + dsisr : std_ulogic_vector(31 downto 0); end record; type byte_sel_t is array(0 to 7) of std_ulogic; @@ -135,6 +137,9 @@ begin variable use_second : byte_sel_t; variable trim_ctl : trim_ctl_t; variable negative : std_ulogic; + variable mfspr : std_ulogic; + variable sprn : std_ulogic_vector(9 downto 0); + variable sprval : std_ulogic_vector(63 downto 0); begin v := r; req := '0'; @@ -142,6 +147,8 @@ begin done := '0'; byte_sel := (others => '0'); addr := lsu_sum; + mfspr := '0'; + sprval := (others => '0'); -- avoid inferred latches write_enable := '0'; do_update := '0'; @@ -200,11 +207,38 @@ begin if l_in.valid = '1' then v.load := '0'; v.dcbz := '0'; - if l_in.op = OP_LOAD then + case l_in.op is + when OP_STORE => + req := '1'; + when OP_LOAD => + req := '1'; v.load := '1'; - elsif l_in.op = OP_DCBZ then + when OP_DCBZ => + req := '1'; v.dcbz := '1'; - end if; + when OP_MFSPR => + done := '1'; + mfspr := '1'; + -- partial decode on SPR number should be adequate given + -- the restricted set that get sent down this path + sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10)); + if sprn(0) = '0' then + sprval := x"00000000" & r.dsisr; + else + sprval := r.dar; + end if; + when OP_MTSPR => + done := '1'; + sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10)); + if sprn(0) = '0' then + v.dsisr := l_in.data(31 downto 0); + else + v.dar := l_in.data; + end if; + when others => + assert false report "unknown op sent to loadstore1"; + end case; + v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; @@ -246,12 +280,13 @@ begin v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); end loop; - req := '1'; - stall := '1'; - if long_sel(15 downto 8) = "00000000" then - v.state := LAST_ACK_WAIT; - else - v.state := SECOND_REQ; + if req = '1' then + stall := '1'; + if long_sel(15 downto 8) = "00000000" then + v.state := LAST_ACK_WAIT; + else + v.state := SECOND_REQ; + end if; end if; end if; @@ -308,7 +343,11 @@ begin -- Multiplex either cache data to the destination GPR or -- the address for the rA update. l_out.valid <= done; - if do_update = '1' then + if mfspr = '1' then + l_out.write_enable <= '1'; + l_out.write_reg <= l_in.write_reg; + l_out.write_data <= sprval; + elsif do_update = '1' then l_out.write_enable <= '1'; l_out.write_reg <= r.update_reg; l_out.write_data <= r.addr;