diff --git a/common.vhdl b/common.vhdl index 76eaec2..1c8642b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -64,6 +64,11 @@ package common is constant SPR_DSCR : spr_num_t := 17; constant SPR_VRSAVE : spr_num_t := 256; constant SPR_PIR : spr_num_t := 1023; + constant SPR_CIABR : spr_num_t := 187; + constant SPR_DAWR0 : spr_num_t := 180; + constant SPR_DAWR1 : spr_num_t := 181; + constant SPR_DAWRX0 : spr_num_t := 188; + constant SPR_DAWRX1 : spr_num_t := 189; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -174,6 +179,7 @@ package common is constant SPRSEL_CTRL : spr_selector := 4x"a"; constant SPRSEL_DSCR : spr_selector := 4x"b"; constant SPRSEL_PIR : spr_selector := 4x"c"; + constant SPRSEL_CIABR : spr_selector := 4x"d"; constant SPRSEL_XER : spr_selector := 4x"f"; -- FSCR and HFSCR bit numbers @@ -275,6 +281,7 @@ package common is hfscr_fp: std_ulogic; heir: std_ulogic_vector(63 downto 0); dscr: std_ulogic_vector(24 downto 0); + ciabr: std_ulogic_vector(63 downto 0); end record; constant ctrl_t_init : ctrl_t := (wait_state => '0', run => '1', xer_low => 18x"0", @@ -526,6 +533,7 @@ package common is nia : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0); addr_v : std_ulogic; + trace : std_ulogic; occur : PMUEventType; end record; @@ -598,6 +606,8 @@ package common is type Loadstore1ToExecute1Type is record busy : std_ulogic; l2stall : std_ulogic; + ea_for_pmu : std_ulogic_vector(63 downto 0); + ea_valid : std_ulogic; end record; type Loadstore1ToDcacheType is record @@ -618,6 +628,7 @@ package common is addr : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 byte_sel : std_ulogic_vector(7 downto 0); + dawr_match : std_ulogic; -- valid the cycle after .valid = 1 end record; constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType := (addr => (others => '0'), data => (others => '0'), byte_sel => x"00", diff --git a/dcache.vhdl b/dcache.vhdl index ce7b351..ff7383c 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -316,6 +316,7 @@ architecture rtl of dcache is hit_way : way_t; same_tag : std_ulogic; mmu_req : std_ulogic; + dawr_m : std_ulogic; end record; -- First stage register, contains state for stage 1 of load hits @@ -635,6 +636,8 @@ begin -- put directly into req.data in the dcache_slow process below. r0.req.data <= d_in.data; r0.d_valid <= r0.req.valid; + -- the dawr_match signal has the same timing as the data + r0.req.dawr_match <= d_in.dawr_match; end if; end if; end process; @@ -953,12 +956,18 @@ begin variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable snoop_match : std_ulogic; variable hit_reload : std_ulogic; + variable dawr_match : std_ulogic; begin -- Extract line, row and tag from request rindex := get_index(r0.req.addr); req_index <= rindex; req_row := get_row(r0.req.addr); req_tag <= get_tag(ra); + if r0.d_valid = '0' then + dawr_match := d_in.dawr_match; + else + dawr_match := r0.req.dawr_match; + end if; go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; if is_X(r0.req.addr) then @@ -1135,7 +1144,7 @@ begin rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed); perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and (perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm)); - access_ok <= valid_ra and perm_ok and rc_ok; + access_ok <= valid_ra and perm_ok and rc_ok and not dawr_match; -- Combine the request and cache hit status to decide what -- operation needs to be done diff --git a/decode1.vhdl b/decode1.vhdl index 0ea9ed1..8d2d2fb 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -488,6 +488,8 @@ architecture behaviour of decode1 is i.sel := SPRSEL_DSCR; when SPR_PIR => i.sel := SPRSEL_PIR; + when SPR_CIABR => + i.sel := SPRSEL_CIABR; when others => i.valid := '0'; end case; diff --git a/decode2.vhdl b/decode2.vhdl index 7e993d5..cc241a2 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -477,7 +477,8 @@ begin case decode_spr_num(d_in.insn) is when SPR_XER => v.input_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; when SPR_TAR => v.e.uses_tar := '1'; @@ -499,7 +500,8 @@ begin when SPR_XER => v.e.output_xer := '1'; v.output_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; if d_in.valid = '1' then v.sgl_pipe := '1'; diff --git a/execute1.vhdl b/execute1.vhdl index 3b7ec2f..a3b9522 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -96,6 +96,7 @@ architecture behaviour of execute1 is set_heir : std_ulogic; write_ctrl : std_ulogic; write_dscr : std_ulogic; + write_ciabr : std_ulogic; enter_wait : std_ulogic; scv_trap : std_ulogic; end record; @@ -116,6 +117,7 @@ architecture behaviour of execute1 is start_div : std_ulogic; start_bsort : std_ulogic; do_trace : std_ulogic; + ciabr_trace : std_ulogic; fp_intr : std_ulogic; res2_sel : std_ulogic_vector(1 downto 0); bypass_valid : std_ulogic; @@ -133,6 +135,7 @@ architecture behaviour of execute1 is busy: std_ulogic; fp_exception_next : std_ulogic; trace_next : std_ulogic; + trace_ciabr : std_ulogic; prev_op : insn_type_t; prev_prefixed : std_ulogic; oe : std_ulogic; @@ -165,8 +168,8 @@ architecture behaviour of execute1 is constant reg_stage1_type_init : reg_stage1_type := (e => Execute1ToWritebackInit, se => side_effect_init, busy => '0', - fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, - prev_prefixed => '0', + fp_exception_next => '0', trace_next => '0', trace_ciabr => '0', + prev_op => OP_ILLEGAL, prev_prefixed => '0', oe => '0', mul_select => "000", res2_sel => "00", spr_select => spr_id_init, pmu_spr_num => 5x"0", redir_to_next => '0', advance_nia => '0', lr_from_next => '0', @@ -251,6 +254,7 @@ architecture behaviour of execute1 is -- PMU signals signal x_to_pmu : Execute1ToPMUType; signal pmu_to_x : PMUToExecute1Type; + signal pmu_trace : std_ulogic; -- signals for logging signal exception_log : std_ulogic; @@ -557,11 +561,12 @@ begin br_mispredict => ex2.br_mispredict, others => '0'); x_to_pmu.nia <= e_in.nia; - x_to_pmu.addr <= (others => '0'); - x_to_pmu.addr_v <= '0'; + x_to_pmu.addr <= l_in.ea_for_pmu; + x_to_pmu.addr_v <= l_in.ea_valid; x_to_pmu.spr_num <= ex1.pmu_spr_num; x_to_pmu.spr_val <= ex1.e.write_data; x_to_pmu.run <= ctrl.run; + x_to_pmu.trace <= pmu_trace; -- XER forwarding. The CA and CA32 bits are only modified by instructions -- that are handled here, so for them we can just use the result most @@ -1157,6 +1162,12 @@ begin end if; v.do_trace := ex1.msr(MSR_SE); + -- see if we have a CIABR map + if ctrl.ciabr(0) = '1' and ctrl.ciabr(1) = not ex1.msr(MSR_PR) and + ctrl.ciabr(63 downto 2) = e_in.nia(63 downto 2) then + v.ciabr_trace := '1'; + end if; + case_0: case e_in.insn_type is when OP_ILLEGAL => illegal := '1'; @@ -1392,6 +1403,8 @@ begin v.se.write_ctrl := '1'; when SPRSEL_DSCR => v.se.write_dscr := '1'; + when SPRSEL_CIABR => + v.se.write_ciabr := '1'; when others => end case; end if; @@ -1655,12 +1668,14 @@ begin v.e.srr1 := (others => '0'); v.e.srr1(47 - 33) := '1'; v.e.srr1(47 - 34) := ex1.prev_prefixed; - if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or - ex1.prev_op = OP_DCBF then + if (ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or + ex1.prev_op = OP_DCBF) and ex1.trace_ciabr = '0' then v.e.srr1(47 - 35) := '1'; - elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ then + elsif (ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ) and + ex1.trace_ciabr = '0' then v.e.srr1(47 - 36) := '1'; end if; + v.e.srr1(47 - 43) := ex1.trace_ciabr; elsif irq_valid = '1' then -- Don't deliver the interrupt until we have a valid instruction @@ -1693,7 +1708,8 @@ begin v.e.valid := actions.complete; bypass_valid := actions.bypass_valid; v.taken_branch_event := actions.take_branch; - v.trace_next := actions.do_trace; + v.trace_next := actions.do_trace or actions.ciabr_trace; + v.trace_ciabr := actions.ciabr_trace; v.fp_exception_next := actions.fp_intr; v.res2_sel := actions.res2_sel; v.msr := actions.new_msr; @@ -1725,6 +1741,7 @@ begin end if; is_scv := go and actions.se.scv_trap; bsort_start <= go and actions.start_bsort; + pmu_trace <= go and actions.do_trace; if not HAS_FPU and ex1.div_in_progress = '1' then v.div_in_progress := not divider_to_x.valid; @@ -1877,6 +1894,7 @@ begin assemble_ctrl(ctrl, ex1.msr(MSR_PR)) when SPRSEL_CTRL, 39x"0" & ctrl.dscr when SPRSEL_DSCR, 56x"0" & std_ulogic_vector(to_unsigned(CPU_INDEX, 8)) when SPRSEL_PIR, + ctrl.ciabr when SPRSEL_CIABR, assemble_xer(ex1.e.xerc, ctrl.xer_low) when others; stage2_stall <= l_in.l2stall or fp_in.f2stall; @@ -2057,6 +2075,9 @@ begin if ex1.se.write_dscr = '1' then ctrl_tmp.dscr <= ex1.e.write_data(24 downto 0); end if; + if ex1.se.write_ciabr = '1' then + ctrl_tmp.ciabr <= ex1.e.write_data; + end if; if ex1.se.enter_wait = '1' then ctrl_tmp.wait_state <= '1'; end if; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 485947b..0816931 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -95,13 +95,15 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; load_sp : std_ulogic; - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); ric : std_ulogic_vector(1 downto 0); is_slbia : std_ulogic; align_intr : std_ulogic; + dawr_intr : std_ulogic; dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; + ea_valid : std_ulogic; end record; constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', flush => '0', touch => '0', sync => '0', tlbie => '0', @@ -118,8 +120,10 @@ architecture behave of loadstore1 is atomic_qw => '0', atomic_first => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', - sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', - dword_index => '0', two_dwords => '0', incomplete => '0'); + sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0', + dawr_intr => '0', + dword_index => '0', two_dwords => '0', incomplete => '0', + ea_valid => '0'); type reg_stage1_t is record req : request_t; @@ -138,11 +142,15 @@ architecture behave of loadstore1 is one_cycle : std_ulogic; wr_sel : std_ulogic_vector(1 downto 0); addr0 : std_ulogic_vector(63 downto 0); - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr_ack: std_ulogic; end record; + constant num_dawr : positive := 2; + type dawr_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(63 downto 3); + type dawrx_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(15 downto 0); + type reg_stage3_t is record state : state_t; complete : std_ulogic; @@ -164,6 +172,10 @@ architecture behave of loadstore1 is intr_vec : integer range 0 to 16#fff#; srr1 : std_ulogic_vector(15 downto 0); events : Loadstore1EventType; + dawr : dawr_array_t; + dawrx : dawrx_array_t; + dawr_uplim : dawr_array_t; + dawr_upd : std_ulogic; end record; signal req_in : request_t; @@ -183,6 +195,7 @@ architecture behave of loadstore1 is signal stage1_req : request_t; signal stage1_dcreq : std_ulogic; signal stage1_dreq : std_ulogic; + signal stage1_dawr_match : std_ulogic; -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is @@ -285,6 +298,25 @@ architecture behave of loadstore1 is return fs2; end; + function dawrx_match_enable(dawrx : std_ulogic_vector(15 downto 0); virt_mode : std_ulogic; + priv_mode : std_ulogic; is_store : std_ulogic) + return boolean is + begin + -- check PRIVM field; note priv_mode = '1' implies hypervisor mode + if (priv_mode = '0' and dawrx(0) = '0') or (priv_mode = '1' and dawrx(2) = '0') then + return false; + end if; + -- check WT/WTI fields + if dawrx(3) = '0' and virt_mode /= dawrx(4) then + return false; + end if; + -- check DW/DR fields + if (is_store = '0' and dawrx(5) = '0') or (is_store = '1' and dawrx(6) = '0') then + return false; + end if; + return true; + end; + begin loadstore1_reg: process(clk) begin @@ -300,7 +332,7 @@ begin r1.req.instr_fault <= '0'; r1.req.load <= '0'; r1.req.priv_mode <= '0'; - r1.req.sprsel <= "00"; + r1.req.sprsel <= "000"; r1.req.ric <= "00"; r1.req.xerc <= xerc_init; @@ -311,7 +343,7 @@ begin r2.req.instr_fault <= '0'; r2.req.load <= '0'; r2.req.priv_mode <= '0'; - r2.req.sprsel <= "00"; + r2.req.sprsel <= "000"; r2.req.ric <= "00"; r2.req.xerc <= xerc_init; @@ -328,12 +360,19 @@ begin r3.stage1_en <= '1'; r3.events.load_complete <= '0'; r3.events.store_complete <= '0'; + for i in 0 to num_dawr - 1 loop + r3.dawr(i) <= (others => '0'); + r3.dawrx(i) <= (others => '0'); + r3.dawr_uplim(i) <= (others => '0'); + end loop; + r3.dawr_upd <= '0'; flushing <= '0'; else r1 <= r1in; r2 <= r2in; r3 <= r3in; - flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and + flushing <= (flushing or (r1in.req.valid and + (r1in.req.align_intr or r1in.req.dawr_intr))) and not flush; end if; stage1_dreq <= stage1_dcreq; @@ -435,12 +474,15 @@ begin v.virt_mode := l_in.virt_mode; v.priv_mode := l_in.priv_mode; v.ric := l_in.insn(19 downto 18); - if sprn(1) = '1' then + if sprn(8 downto 7) = "01" then + -- debug registers DAWR[X][01] + v.sprsel := '1' & sprn(3) & sprn(0); + elsif sprn(1) = '1' then -- DSISR and DAR - v.sprsel := '1' & sprn(0); + v.sprsel := "01" & sprn(0); else -- PID and PTCR - v.sprsel := '0' & sprn(8); + v.sprsel := "00" & sprn(8); end if; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -464,6 +506,7 @@ begin addr(63 downto 32) := (others => '0'); end if; v.addr := addr; + v.ea_valid := l_in.valid; -- XXX Temporary hack. Mark the op as non-cachable if the address -- is the form 0xc------- for a real-mode access. @@ -509,6 +552,7 @@ begin case l_in.op is when OP_SYNC => v.sync := '1'; + v.ea_valid := '0'; when OP_STORE => v.store := '1'; if l_in.length = "0000" then @@ -536,14 +580,15 @@ begin v.align_intr := v.nc; when OP_TLBIE => v.tlbie := '1'; - v.addr := l_in.addr2; -- address from RB for tlbie v.is_slbia := l_in.insn(7); v.mmu_op := '1'; when OP_MFSPR => v.read_spr := '1'; + v.ea_valid := '0'; when OP_MTSPR => v.write_spr := '1'; - v.mmu_op := not sprn(1); + v.mmu_op := not (sprn(1) or sprn(2)); + v.ea_valid := '0'; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk v.instr_fault := '1'; @@ -654,8 +699,12 @@ begin variable byte_offset : unsigned(2 downto 0); variable interrupt : std_ulogic; variable dbg_spr_rd : std_ulogic; - variable sprsel : std_ulogic_vector(1 downto 0); + variable sprsel : std_ulogic_vector(2 downto 0); variable sprval : std_ulogic_vector(63 downto 0); + variable dawr_match : std_ulogic; + variable addr : std_ulogic_vector(63 downto 3); + variable addl : unsigned(64 downto 3); + variable addu : unsigned(64 downto 3); begin v := r2; @@ -672,21 +721,47 @@ begin end if; end loop; + -- Test for DAWR0/1 matches + dawr_match := '0'; + for i in 0 to 1 loop + addr := r1.req.addr(63 downto 3); + if r1.req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then + -- HRAMMC=1 => trim top bit from address + addr(63) := '0'; + end if; + addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i)); + addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr); + if addl(64) = '0' and addu(64) = '0' and + dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, + r1.req.priv_mode, r1.req.store) then + dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and + not (r1.req.touch or r1.req.sync or r1.req.flush); + end if; + end loop; + stage1_dawr_match <= dawr_match; + dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr); if dbg_spr_rd = '0' then sprsel := r1.req.sprsel; else - sprsel := dbg_spr_addr; + sprsel := '0' & dbg_spr_addr; end if; - if sprsel(1) = '1' then - if sprsel(0) = '0' then + case sprsel is + when "100" => + sprval := r3.dawr(0) & "000"; + when "101" => + sprval := r3.dawr(1) & "000"; + when "110" => + sprval := 48x"0" & r3.dawrx(0); + when "111" => + sprval := 48x"0" & r3.dawrx(1); + when "010" => sprval := x"00000000" & r3.dsisr; - else + when "011" => sprval := r3.dar; - end if; - else - sprval := m_in.sprval; - end if; + when others => + sprval := m_in.sprval; -- MMU regs + end case; if dbg_spr_req = '0' then v.dbg_spr_ack := '0'; elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then @@ -699,11 +774,17 @@ begin v.req := r1.req; v.addr0 := r1.addr0; v.req.store_data := store_data; + v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and not r1.req.incomplete; v.wait_mmu := r1.req.valid and r1.req.mmu_op; - v.busy := r1.req.valid and r1.req.mmu_op; - v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op); + if r1.req.valid = '1' and r1.req.align_intr = '1' then + v.busy := '1'; + v.one_cycle := '0'; + else + v.busy := r1.req.valid and r1.req.mmu_op; + v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op); + end if; if r1.req.do_update = '1' or r1.req.store = '1' or r1.req.read_spr = '1' then v.wr_sel := "00"; elsif r1.req.load_sp = '1' then @@ -741,7 +822,7 @@ begin end if; interrupt := (r2.req.valid and r2.req.align_intr) or - (d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or + (d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or m_in.err; if interrupt = '1' then v.req.valid := '0'; @@ -798,6 +879,15 @@ begin v.srr1 := (others => '0'); v.events := (others => '0'); + -- Evaluate DAWR upper limits after a clock edge + v.dawr_upd := '0'; + if r3.dawr_upd = '1' then + for i in 0 to num_dawr - 1 loop + v.dawr_uplim(i) := std_ulogic_vector(unsigned(r3.dawr(i)) + + unsigned(r3.dawrx(i)(15 downto 10))); + end loop; + end if; + -- load data formatting -- shift and byte-reverse data bytes for i in 0 to 7 loop @@ -877,12 +967,25 @@ begin if r2.req.load_sp = '1' and r2.req.dc_req = '0' then write_enable := '1'; end if; - if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then - if r2.req.sprsel(0) = '0' then - v.dsisr := r2.req.store_data(31 downto 0); - else - v.dar := r2.req.store_data; + if r2.req.write_spr = '1' then + if r2.req.sprsel(2) = '1' then + v.dawr_upd := '1'; end if; + case r2.req.sprsel is + when "100" => + v.dawr(0) := r2.req.store_data(63 downto 3); + when "101" => + v.dawr(1) := r2.req.store_data(63 downto 3); + when "110" => + v.dawrx(0) := r2.req.store_data(15 downto 0); + when "111" => + v.dawrx(1) := r2.req.store_data(15 downto 0); + when "010" => + v.dsisr := r2.req.store_data(31 downto 0); + when "011" => + v.dar := r2.req.store_data; + when others => + end case; end if; end if; @@ -905,9 +1008,10 @@ begin end if; end if; if d_in.error = '1' then - if d_in.cache_paradox = '1' then + if d_in.cache_paradox = '1' or d_in.reserve_nc = '1' or r2.req.dawr_intr = '1' then -- signal an interrupt straight away exception := '1'; + dsisr(63 - 41) := r2.req.dawr_intr; dsisr(63 - 38) := not r2.req.load; dsisr(63 - 37) := d_in.reserve_nc; -- XXX there is no architected bit for this @@ -960,6 +1064,7 @@ begin v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; if m_in.segerr = '0' then + dsisr(63 - 38) := not r2.req.load; v.intr_vec := 16#300#; v.dsisr := dsisr; else @@ -1026,8 +1131,10 @@ begin end if; if stage1_dreq = '1' then d_out.data <= store_data; + d_out.dawr_match <= stage1_dawr_match; else d_out.data <= r2.req.store_data; + d_out.dawr_match <= r2.req.dawr_intr; end if; d_out.hold <= l_in.e2stall; @@ -1062,6 +1169,9 @@ begin e_out.busy <= busy; e_out.l2stall <= dc_stall or d_in.error or r2.busy; + e_out.ea_for_pmu <= req_in.addr; + e_out.ea_valid <= req_in.ea_valid; + events <= r3.events; flush <= exception; diff --git a/pmu.vhdl b/pmu.vhdl index 928d6c2..2afa1eb 100644 --- a/pmu.vhdl +++ b/pmu.vhdl @@ -183,12 +183,12 @@ begin end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "1100" then siar <= p_in.spr_val; - elsif doalert = '1' then + elsif doalert = '1' or p_in.trace = '1' then siar <= p_in.nia; end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "1101" then sdar <= p_in.spr_val; - elsif doalert = '1' then + elsif doalert = '1' or p_in.trace = '1' then sdar <= p_in.addr; end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "0000" then diff --git a/tests/reservation/reservation.c b/tests/reservation/reservation.c index 502b285..9ae2921 100644 --- a/tests/reservation/reservation.c +++ b/tests/reservation/reservation.c @@ -157,6 +157,7 @@ int resv_test_2(void) { unsigned long x[3]; unsigned long offset, j, size, ret; + unsigned int instr; x[0] = 1234; x[1] = x[2] = 0; @@ -169,6 +170,9 @@ int resv_test_2(void) if (ret == 0x600) { if ((offset & (size - 1)) == 0) return j + 0x10; + instr = *(unsigned int *)mfspr(SRR0); + if ((instr & 0xfc00073f) != 0x7c000028) + return j + 0x40; } else if (ret) return ret; ret = callit(size, (unsigned long)&x[0] + offset, do_stcx); @@ -177,6 +181,9 @@ int resv_test_2(void) if (ret == 0x600) { if ((offset & (size - 1)) == 0) return j + 0x30; + instr = *(unsigned int *)mfspr(SRR0); + if ((instr & 0xfc00033f) != 0x7c00012d) + return j + 0x50; } else if (ret) return ret; } diff --git a/tests/test_reservation.bin b/tests/test_reservation.bin index 9c9ad8f..7d3f0a5 100755 Binary files a/tests/test_reservation.bin and b/tests/test_reservation.bin differ diff --git a/tests/test_trace.bin b/tests/test_trace.bin index 3c7c7bc..44538d8 100755 Binary files a/tests/test_trace.bin and b/tests/test_trace.bin differ diff --git a/tests/test_trace.console_out b/tests/test_trace.console_out index 2fe36d2..3a5a601 100644 --- a/tests/test_trace.console_out +++ b/tests/test_trace.console_out @@ -7,3 +7,5 @@ test 06:PASS test 07:PASS test 08:PASS test 09:PASS +test 10:PASS +test 11:PASS diff --git a/tests/trace/head.S b/tests/trace/head.S index cd57e3a..fd444b6 100644 --- a/tests/trace/head.S +++ b/tests/trace/head.S @@ -224,3 +224,30 @@ test8: test9: sc blr + + .global test10 +test10: + addi %r3,%r3,1 + addi %r4,%r4,2 + addi %r3,%r3,4 + addi %r4,%r4,8 + cmpd %r3,%r4 + bne 1f + nop + nop +1: li %r3,-1 + blr + + .global test11 +test11: + stdx %r3,%r3,%r4 + stw %r3,6(%r4) + dcbt 0,%r4 + dcbf 0,%r4 + dcbtst 0,%r4 + sync + ld %r3,0(%r4) + lwz %r3,6(%r4) + lwz %r3,27(%r4) + stb %r3,26(%r4) + blr diff --git a/tests/trace/trace.c b/tests/trace/trace.c index 908d299..a88c751 100644 --- a/tests/trace/trace.c +++ b/tests/trace/trace.c @@ -7,15 +7,23 @@ extern unsigned long callit(unsigned long arg1, unsigned long arg2, unsigned long (*fn)(unsigned long, unsigned long), unsigned long msr, unsigned long *regs); - #define MSR_FP 0x2000 #define MSR_SE 0x400 #define MSR_BE 0x200 +#define DSISR 18 +#define DAR 19 #define SRR0 26 #define SRR1 27 #define SPRG0 272 #define SPRG1 273 +#define CIABR 187 +#define DAWR0 180 +#define DAWR1 181 +#define DAWRX0 188 +#define DAWRX1 189 +#define SIAR 780 +#define SDAR 781 static inline unsigned long mfmsr(void) { @@ -80,6 +88,8 @@ int trace_test_1(void) return ret + 2; if (regs[0] != 3 || regs[1] != 2) return 3; + if (mfspr(SIAR) != (unsigned long)&test1) + return 4; return 0; } @@ -98,6 +108,8 @@ int trace_test_2(void) return ret + 2; if (regs[0] != 3 || x != 3) return 3; + if (mfspr(SIAR) != (unsigned long)&test2 || mfspr(SDAR) != (unsigned long)&x) + return 4; return 0; } @@ -116,6 +128,8 @@ int trace_test_3(void) return ret + 2; if (regs[0] != 11 || x != 11) return 3; + if (mfspr(SIAR) != (unsigned long)&test3 || mfspr(SDAR) != (unsigned long)&x) + return 4; return 0; } @@ -169,6 +183,8 @@ int trace_test_6(void) return ret + 2; if (regs[0] != 11 || regs[1] != 55) return 3; + if (mfspr(SIAR) != (unsigned long)&test6 + 8) + return 4; return 0; } @@ -186,6 +202,8 @@ int trace_test_7(void) return ret + 2; if (regs[0] != 11 || regs[1] != 1) return 3; + if (mfspr(SIAR) != (unsigned long)&test7 + 8) + return 4; return 0; } @@ -218,6 +236,125 @@ int trace_test_9(void) return 0; } +extern unsigned long test10(unsigned long, unsigned long); + +/* test CIABR */ +int trace_test_10(void) +{ + unsigned long ret; + unsigned long regs[2]; + + mtspr(CIABR, (unsigned long)&test10 + 4 + 3); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test10 + 8) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x40100000) + return ret + 2; + if (regs[0] != 2 || regs[1] != 3) + return 3; + + /* test CIABR on a taken branch */ + mtspr(CIABR, (unsigned long)&test10 + 20 + 3); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test10 + 32) + return ret + 4; + if ((mfspr(SRR1) & 0x781f0000) != 0x40100000) + return ret + 5; + if (regs[0] != 6 || regs[1] != 11) + return 6; + + /* test CIABR with PRIV = problem state */ + mtspr(CIABR, (unsigned long)&test10 + 1); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0) + return ret + 7; + /* don't have page tables so can't actually run in problem state */ + return 0; +} + +/* test DAWR[X]{0,1} */ +#define MRD_SHIFT 10 +#define HRAMMC 0x80 +#define DW 0x40 +#define DR 0x20 +#define WT 0x10 +#define WTI 0x08 +#define PRIVM_HYP 0x04 +#define PRIVM_PNH 0x02 +#define PRIVM_PRO 0x01 + +extern unsigned long test11(unsigned long, unsigned long); + +int trace_test_11(void) +{ + unsigned long ret; + unsigned long regs[2]; + unsigned long x[4]; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (0 << MRD_SHIFT) + DW + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 1; + if (mfspr(SRR0) != (unsigned long) &test11 || mfspr(DSISR) != 0x02400000 || + mfspr(DAR) != (unsigned long)&x[0]) + return 2; + + mtspr(DAWR0, (unsigned long)&x[1]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 3; + if (mfspr(SRR0) != (unsigned long) &test11 + 4 || mfspr(DSISR) != 0x02400000 || + mfspr(DAR) != (unsigned long)&x[1]) + return 4; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (0 << MRD_SHIFT) + DR + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 5; + if (mfspr(SRR0) != (unsigned long) &test11 + 24 || mfspr(DSISR) != 0x00400000) + return 6; + + mtspr(DAWR0, (unsigned long)&x[1]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 7; + if (mfspr(SRR0) != (unsigned long) &test11 + 28 || mfspr(DSISR) != 0x00400000) + return 8; + + mtspr(DAWR0, (unsigned long)&x[3]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 9; + if (mfspr(SRR0) != (unsigned long) &test11 + 32 || mfspr(DSISR) != 0x00400000) + return 10; + + mtspr(DAWR0, (unsigned long)&x[2]); + mtspr(DAWRX0, (1 << MRD_SHIFT) + DW + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 11; + if (mfspr(SRR0) != (unsigned long) &test11 + 36 || mfspr(DSISR) != 0x02400000) + return 12; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (3 << MRD_SHIFT) + DR + DW + WT + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0) + return ret + 13; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (3 << MRD_SHIFT) + DR + DW + WT + WTI + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 14; + if (mfspr(SRR0) != (unsigned long) &test11 || mfspr(DSISR) != 0x02400000) + return 15; + + return 0; +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -230,7 +367,7 @@ void do_test(int num, int (*test)(void)) print_string("PASS\r\n"); } else { fail = 1; - print_string("FAIL "); + print_string(" FAIL "); print_hex(ret, 4); print_string("\r\n"); } @@ -249,6 +386,8 @@ int main(void) do_test(7, trace_test_7); do_test(8, trace_test_8); do_test(9, trace_test_9); + do_test(10, trace_test_10); + do_test(11, trace_test_11); return fail; }