From d0f319290fd22724a06b6db628aa7ee3458ca1bc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 25 Feb 2022 16:46:34 +1100 Subject: [PATCH] Restore debug access to SPRs This provides access to the SPRs via the JTAG DMI interface. For now they are still accessed as if they were GPR/FPRs using the same numbering as before (GPRs at 0 - 0x1f, SPRs at 0x20 - 0x2d, FPRs at 0x40 - 0x5f). For XER, debug reads now report the full value, not just the bits that were previously stored in the register file. The "slow" SPR mux is not used for debug reads. Decode2 determines on each cycle whether a debug SPR access will happen next cycle, based on whether there is a request and whether the current instruction accesses the SPR RAM. Signed-off-by: Paul Mackerras --- common.vhdl | 2 + core.vhdl | 14 +++++++ core_debug.vhdl | 84 +++++++++++++++++++++++++++++++++---- decode2.vhdl | 29 ++++++++++++- execute1.vhdl | 24 +++++++++++ scripts/mw_debug/mw_debug.c | 2 +- 6 files changed, 144 insertions(+), 11 deletions(-) diff --git a/common.vhdl b/common.vhdl index 06b62e0..d743c2d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -337,6 +337,7 @@ package common is ramspr_wraddr : ramspr_index; ramspr_write_even : std_ulogic; ramspr_write_odd : std_ulogic; + dbg_spr_access : std_ulogic; dec_ctr : std_ulogic; end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := @@ -354,6 +355,7 @@ package common is spr_is_ram => '0', ramspr_even_rdaddr => 0, ramspr_odd_rdaddr => 0, ramspr_rd_odd => '0', ramspr_wraddr => 0, ramspr_write_even => '0', ramspr_write_odd => '0', + dbg_spr_access => '0', dec_ctr => '0', others => (others => '0')); diff --git a/core.vhdl b/core.vhdl index 82c66b4..a91b729 100644 --- a/core.vhdl +++ b/core.vhdl @@ -150,6 +150,10 @@ architecture behave of core is signal dbg_gpr_ack : std_ulogic; signal dbg_gpr_addr : gspr_index_t; signal dbg_gpr_data : std_ulogic_vector(63 downto 0); + signal dbg_spr_req : std_ulogic; + signal dbg_spr_ack : std_ulogic; + signal dbg_spr_addr : std_ulogic_vector(7 downto 0); + signal dbg_spr_data : std_ulogic_vector(63 downto 0); signal ctrl_debug : ctrl_t; @@ -307,6 +311,8 @@ begin execute2_bypass => execute2_bypass, execute2_cr_bypass => execute2_cr_bypass, writeback_bypass => writeback_bypass, + dbg_spr_req => dbg_spr_req, + dbg_spr_addr => dbg_spr_addr, log_out => log_data(119 downto 110) ); decode2_busy_in <= ex1_busy_out; @@ -378,6 +384,10 @@ begin dc_events => dcache_events, ic_events => icache_events, terminate_out => terminate, + dbg_spr_req => dbg_spr_req, + dbg_spr_ack => dbg_spr_ack, + dbg_spr_addr => dbg_spr_addr, + dbg_spr_data => dbg_spr_data, sim_dump => sim_ex_dump, sim_dump_done => sim_cr_dump, log_out => log_data(134 downto 120), @@ -504,6 +514,10 @@ begin dbg_gpr_ack => dbg_gpr_ack, dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_data => dbg_gpr_data, + dbg_spr_req => dbg_spr_req, + dbg_spr_ack => dbg_spr_ack, + dbg_spr_addr => dbg_spr_addr, + dbg_spr_data => dbg_spr_data, log_data => log_data, log_read_addr => log_rd_addr, log_read_data => log_rd_data, diff --git a/core_debug.vhdl b/core_debug.vhdl index ff99df4..a1d4a94 100644 --- a/core_debug.vhdl +++ b/core_debug.vhdl @@ -33,12 +33,18 @@ entity core_debug is nia : in std_ulogic_vector(63 downto 0); msr : in std_ulogic_vector(63 downto 0); - -- GSPR register read port + -- GPR/FPR register read port dbg_gpr_req : out std_ulogic; dbg_gpr_ack : in std_ulogic; dbg_gpr_addr : out gspr_index_t; dbg_gpr_data : in std_ulogic_vector(63 downto 0); + -- SPR register read port + dbg_spr_req : out std_ulogic; + dbg_spr_ack : in std_ulogic; + dbg_spr_addr : out std_ulogic_vector(7 downto 0); + dbg_spr_data : in std_ulogic_vector(63 downto 0); + -- Core logging data log_data : in std_ulogic_vector(255 downto 0); log_read_addr : in std_ulogic_vector(31 downto 0); @@ -105,7 +111,10 @@ architecture behave of core_debug is signal do_icreset : std_ulogic; signal terminated : std_ulogic; signal do_gspr_rd : std_ulogic; - signal gspr_index : gspr_index_t; + signal gspr_index : std_ulogic_vector(7 downto 0); + signal gspr_data : std_ulogic_vector(63 downto 0); + + signal spr_index_valid : std_ulogic; signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0'); signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0'); @@ -119,9 +128,7 @@ architecture behave of core_debug is begin -- Single cycle register accesses on DMI except for GSPR data dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA - else dbg_gpr_ack; - dbg_gpr_req <= dmi_req when dmi_addr = DBG_CORE_GSPR_DATA - else '0'; + else dbg_gpr_ack or dbg_spr_ack; -- Status register read composition stat_reg <= (2 => terminated, @@ -129,12 +136,16 @@ begin 0 => stopping, others => '0'); + gspr_data <= dbg_gpr_data when gspr_index(5) = '0' else + dbg_spr_data when spr_index_valid = '1' else + (others => '0'); + -- DMI read data mux with dmi_addr select dmi_dout <= stat_reg when DBG_CORE_STAT, nia when DBG_CORE_NIA, msr when DBG_CORE_MSR, - dbg_gpr_data when DBG_CORE_GSPR_DATA, + gspr_data when DBG_CORE_GSPR_DATA, log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR, log_dmi_data when DBG_CORE_LOG_DATA, log_dmi_trigger when DBG_CORE_LOG_TRIGGER, @@ -191,7 +202,7 @@ begin terminated <= '0'; end if; elsif dmi_addr = DBG_CORE_GSPR_INDEX then - gspr_index <= dmi_din(gspr_index_t'left downto 0); + gspr_index <= dmi_din(7 downto 0); elsif dmi_addr = DBG_CORE_LOG_ADDR then log_dmi_addr <= dmi_din(31 downto 0); do_dmi_log_rd <= '1'; @@ -226,7 +237,64 @@ begin end if; end process; - dbg_gpr_addr <= gspr_index; + gspr_access: process(clk) + variable valid : std_ulogic; + variable sel : spr_selector; + variable isram : std_ulogic; + variable raddr : ramspr_index; + variable odd : std_ulogic; + begin + if rising_edge(clk) then + if rst = '1' or dmi_req = '0' or dmi_addr /= DBG_CORE_GSPR_DATA then + dbg_gpr_req <= '0'; + dbg_spr_req <= '0'; + else + dbg_gpr_req <= not gspr_index(5); + dbg_spr_req <= gspr_index(5); + end if; + + -- Map 0 - 0x1f to GPRs, 0x20 - 0x3f to SPRs, and 0x40 - 0x5f to FPRs + dbg_gpr_addr <= gspr_index(6) & gspr_index(4 downto 0); + + -- For SPRs, use the same mapping as when the fast SPRs were in the GPR file + valid := '1'; + sel := "000"; + isram := '1'; + raddr := 0; + odd := '0'; + case gspr_index(4 downto 0) is + when 5x"00" => + raddr := RAMSPR_LR; + when 5x"01" => + odd := '1'; + raddr := RAMSPR_CTR; + when 5x"02" | 5x"03" => + odd := gspr_index(0); + raddr := RAMSPR_SRR0; + when 5x"04" | 5x"05" => + odd := gspr_index(0); + raddr := RAMSPR_HSRR0; + when 5x"06" | 5x"07" => + odd := gspr_index(0); + raddr := RAMSPR_SPRG0; + when 5x"08" | 5x"09" => + odd := gspr_index(0); + raddr := RAMSPR_SPRG2; + when 5x"0a" | 5x"0b" => + odd := gspr_index(0); + raddr := RAMSPR_HSPRG0; + when 5x"0c" => + isram := '0'; + sel := SPRSEL_XER; + when 5x"0d" => + raddr := RAMSPR_TAR; + when others => + valid := '0'; + end case; + dbg_spr_addr <= isram & sel & std_ulogic_vector(to_unsigned(raddr, 3)) & odd; + spr_index_valid <= valid; + end if; + end process; -- Core control signals generated by the debug module core_stop <= stopping and not do_step; diff --git a/decode2.vhdl b/decode2.vhdl index 5a8c2b7..d91bec5 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -43,6 +43,10 @@ entity decode2 is execute2_cr_bypass : in cr_bypass_data_t; writeback_bypass : in bypass_data_t; + -- Access to SPRs from core_debug module + dbg_spr_req : in std_ulogic; + dbg_spr_addr : in std_ulogic_vector(7 downto 0); + log_out : out std_ulogic_vector(9 downto 0) ); end entity decode2; @@ -60,6 +64,7 @@ architecture behaviour of decode2 is reg_o_valid : std_ulogic; input_ov : std_ulogic; output_ov : std_ulogic; + read_rspr : std_ulogic; end record; constant reg_type_init : reg_type := (e => Decode2ToExecute1Init, repeat => NONE, others => '0'); @@ -347,6 +352,13 @@ begin " tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid); end if; dc2 <= dc2in; + elsif dc2.read_rspr = '0' then + -- Update debug SPR access signals even when stalled + -- if the instruction in dc2.e doesn't read any SPRs. + dc2.e.dbg_spr_access <= dc2in.e.dbg_spr_access; + dc2.e.ramspr_even_rdaddr <= dc2in.e.ramspr_even_rdaddr; + dc2.e.ramspr_odd_rdaddr <= dc2in.e.ramspr_odd_rdaddr; + dc2.e.ramspr_rd_odd <= dc2in.e.ramspr_rd_odd; end if; end if; end process; @@ -381,6 +393,7 @@ begin variable op : insn_type_t; variable valid_in : std_ulogic; variable decctr : std_ulogic; + variable sprs_busy : std_ulogic; begin v := dc2; @@ -389,6 +402,8 @@ begin if dc2.busy = '0' then v.e := Decode2ToExecute1Init; + sprs_busy := '0'; + if d_in.valid = '1' then v.prev_sgl := dc2.sgl_pipe; v.sgl_pipe := d_in.decode.sgl_pipe; @@ -467,6 +482,7 @@ begin v.e.ramspr_odd_rdaddr := RAMSPR_CTR; v.e.ramspr_wraddr := RAMSPR_CTR; v.e.ramspr_write_odd := '1'; + sprs_busy := '1'; end if; if v.e.lr = '1' then -- write LR @@ -484,11 +500,13 @@ begin else v.e.ramspr_even_rdaddr := RAMSPR_TAR; end if; + sprs_busy := '1'; when OP_MFSPR => v.e.ramspr_even_rdaddr := d_in.ram_spr.index; v.e.ramspr_odd_rdaddr := d_in.ram_spr.index; v.e.ramspr_rd_odd := d_in.ram_spr.isodd; v.e.spr_is_ram := d_in.ram_spr.valid; + sprs_busy := d_in.ram_spr.valid; when OP_MTSPR => v.e.ramspr_wraddr := d_in.ram_spr.index; v.e.ramspr_write_even := d_in.ram_spr.valid and not d_in.ram_spr.isodd; @@ -497,8 +515,10 @@ begin when OP_RFID => v.e.ramspr_even_rdaddr := RAMSPR_SRR0; v.e.ramspr_odd_rdaddr := RAMSPR_SRR1; + sprs_busy := '1'; when others => end case; + v.read_rspr := sprs_busy and d_in.valid; case d_in.decode.length is when is1B => @@ -545,8 +565,6 @@ begin -- Privileged mfspr to invalid/unimplemented SPR numbers -- writes the contents of RT back to RT (i.e. it's a no-op) v.e.result_sel := "001"; -- logical_result - elsif d_in.spr_info.ispmu = '1' then - v.e.result_sel := "100"; -- pmuspr_result end if; end if; @@ -649,6 +667,13 @@ begin stall_out <= dc2.busy or deferred; + v.e.dbg_spr_access := dbg_spr_req and not v.read_rspr; + if v.e.dbg_spr_access = '1' then + v.e.ramspr_even_rdaddr := to_integer(unsigned(dbg_spr_addr(3 downto 1))); + v.e.ramspr_odd_rdaddr := to_integer(unsigned(dbg_spr_addr(3 downto 1))); + v.e.ramspr_rd_odd := dbg_spr_addr(0); + end if; + -- Update registers dc2in <= v; diff --git a/execute1.vhdl b/execute1.vhdl index dc68806..20efef6 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -55,6 +55,12 @@ entity execute1 is dc_events : in DcacheEventType; ic_events : in IcacheEventType; + -- Access to SPRs from core_debug module + dbg_spr_req : in std_ulogic; + dbg_spr_ack : out std_ulogic; + dbg_spr_addr : in std_ulogic_vector(7 downto 0); + dbg_spr_data : out std_ulogic_vector(63 downto 0); + -- debug sim_dump : in std_ulogic; sim_dump_done : out std_ulogic; @@ -604,6 +610,24 @@ begin end if; end process; + ex_dbg_spr: process(clk) + begin + if rising_edge(clk) then + if rst = '0' and dbg_spr_req = '1' then + if e_in.dbg_spr_access = '1' and dbg_spr_ack = '0' then + if dbg_spr_addr(7) = '1' then + dbg_spr_data <= ramspr_result; + else + dbg_spr_data <= assemble_xer(xerc_in, ctrl.xer_low); + end if; + dbg_spr_ack <= '1'; + end if; + else + dbg_spr_ack <= '0'; + end if; + end if; + end process; + -- Data path for integer instructions (first execute stage) execute1_dp: process(all) variable a_inv : std_ulogic_vector(63 downto 0); diff --git a/scripts/mw_debug/mw_debug.c b/scripts/mw_debug/mw_debug.c index 6271760..ef5b1ec 100644 --- a/scripts/mw_debug/mw_debug.c +++ b/scripts/mw_debug/mw_debug.c @@ -548,7 +548,7 @@ static const char *fast_spr_names[] = { "lr", "ctr", "srr0", "srr1", "hsrr0", "hsrr1", "sprg0", "sprg1", "sprg2", "sprg3", - "hsprg0", "hsprg1", "xer" + "hsprg0", "hsprg1", "xer", "tar", }; static void gpr_read(uint64_t reg, uint64_t count)