From 7bc7f335f1fb96cd6362a8efc4c4e4cc38e60cd2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 16 Sep 2023 13:53:34 +1000 Subject: [PATCH] Implement CTRL register The CTRL register has a single bit called RUN. It has some unusual behaviours: - It can only be written via SPR number 152, which is privileged - It can only be read via SPR number 136, which is non-privileged - Reading in problem state (user mode) returns the RUN bit in bit 0, but reading in privileged state (hypervisor mode) returns the RUN bit in bits 0 and 15. - Reading SPR 152 in problem state causes a HEAI (illegal instruction) interrupt, but reading in privileged state is a no-op; this is the same as for an unimplemented SPR. The RUN bit goes to the PMU and is also plumbed out to drive a LED on the Arty board. Signed-off-by: Paul Mackerras --- common.vhdl | 8 +++++++- core.vhdl | 2 ++ decode1.vhdl | 8 ++++++++ decode2.vhdl | 13 +++++++++---- execute1.vhdl | 20 +++++++++++++++++++- fpga/top-arty.vhdl | 5 +++++ soc.vhdl | 3 +++ 7 files changed, 53 insertions(+), 6 deletions(-) diff --git a/common.vhdl b/common.vhdl index fa6df86..9f38874 100644 --- a/common.vhdl +++ b/common.vhdl @@ -58,6 +58,8 @@ package common is constant SPR_FSCR : spr_num_t := 153; constant SPR_HFSCR : spr_num_t := 190; constant SPR_HEIR : spr_num_t := 339; + constant SPR_CTRL : spr_num_t := 136; + constant SPR_CTRLW : spr_num_t := 152; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -148,6 +150,8 @@ package common is sel : spr_selector; valid : std_ulogic; ispmu : std_ulogic; + ronly : std_ulogic; + wonly : std_ulogic; end record; constant spr_id_init : spr_id := (sel => "0000", others => '0'); @@ -161,6 +165,7 @@ package common is constant SPRSEL_FSCR : spr_selector := 4x"7"; constant SPRSEL_HFSCR : spr_selector := 4x"8"; constant SPRSEL_HEIR : spr_selector := 4x"9"; + constant SPRSEL_CTRL : spr_selector := 4x"a"; constant SPRSEL_XER : spr_selector := 4x"f"; -- FSCR and HFSCR bit numbers @@ -243,6 +248,7 @@ package common is -- This needs to die... type ctrl_t is record + run: std_ulogic; tb: std_ulogic_vector(63 downto 0); dec: std_ulogic_vector(63 downto 0); msr: std_ulogic_vector(63 downto 0); @@ -258,7 +264,7 @@ package common is heir: std_ulogic_vector(63 downto 0); end record; constant ctrl_t_init : ctrl_t := - (xer_low => 18x"0", + (run => '1', xer_low => 18x"0", fscr_ic => x"0", fscr_pref => '1', fscr_tar => '1', hfscr_ic => x"0", hfscr_pref => '1', hfscr_tar => '1', hfscr_fp => '1', others => (others => '0')); diff --git a/core.vhdl b/core.vhdl index 35a860e..bba1004 100644 --- a/core.vhdl +++ b/core.vhdl @@ -48,6 +48,7 @@ entity core is ext_irq : in std_ulogic; + run_out : out std_ulogic; terminated_out : out std_logic ); end core; @@ -390,6 +391,7 @@ begin ls_events => loadstore_events, dc_events => dcache_events, ic_events => icache_events, + run_out => run_out, terminate_out => terminate, dbg_spr_req => dbg_spr_req, dbg_spr_ack => dbg_spr_ack, diff --git a/decode1.vhdl b/decode1.vhdl index a4e4908..09f9f77 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -430,6 +430,8 @@ architecture behaviour of decode1 is i.sel := "0000"; i.valid := '1'; i.ispmu := '0'; + i.ronly := '0'; + i.wonly := '0'; case sprn is when SPR_TB => i.sel := SPRSEL_TB; @@ -458,6 +460,12 @@ architecture behaviour of decode1 is i.sel := SPRSEL_HFSCR; when SPR_HEIR => i.sel := SPRSEL_HEIR; + when SPR_CTRL => + i.sel := SPRSEL_CTRL; + i.ronly := '1'; + when SPR_CTRLW => + i.sel := SPRSEL_CTRL; + i.wonly := '1'; when others => i.valid := '0'; end case; diff --git a/decode2.vhdl b/decode2.vhdl index e7c73fa..1c3f324 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -420,6 +420,8 @@ begin v.e.input_cr := d_in.decode.input_cr; v.e.output_cr := d_in.decode.output_cr; + v.e.spr_select := d_in.spr_info; + -- Work out whether XER SO/OV/OV32 bits are set -- or used by this instruction v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); @@ -454,6 +456,9 @@ begin v.e.uses_tar := '1'; when others => end case; + if d_in.spr_info.wonly = '1' then + v.e.spr_select.valid := '0'; + end if; end if; when OP_MTSPR => if is_X(d_in.insn) then @@ -474,7 +479,9 @@ begin v.e.uses_tar := '1'; when others => end case; - if d_in.spr_info.valid = '1' and d_in.valid = '1' then + if d_in.spr_info.ronly = '1' then + v.e.spr_select.valid := '0'; + elsif d_in.spr_info.valid = '1' and d_in.valid = '1' then v.sgl_pipe := '1'; end if; end if; @@ -505,8 +512,6 @@ begin v.e.repeat := '1'; end if; - v.e.spr_select := d_in.spr_info; - if decctr = '1' then -- read and write CTR v.e.ramspr_odd_rdaddr := RAMSPR_CTR; @@ -602,7 +607,7 @@ begin if op = OP_MFSPR then if d_in.ram_spr.valid = '1' then v.e.result_sel := "101"; -- ramspr_result - elsif d_in.spr_info.valid = '0' then + elsif d_in.spr_info.valid = '0' or d_in.spr_info.wonly = '1' then -- Privileged mfspr to invalid/unimplemented SPR numbers -- writes the contents of RT back to RT (i.e. it's a no-op) v.e.result_sel := "001"; -- logical_result diff --git a/execute1.vhdl b/execute1.vhdl index e48bfb0..ed79a3d 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -45,6 +45,7 @@ entity execute1 is dbg_ctrl_out : out ctrl_t; + run_out : out std_ulogic; icache_inval : out std_ulogic; terminate_out : out std_ulogic; @@ -92,6 +93,7 @@ architecture behaviour of execute1 is write_hic : std_ulogic; write_heir : std_ulogic; set_heir : std_ulogic; + write_ctrl : std_ulogic; end record; constant side_effect_init : side_effect_type := (others => '0'); @@ -404,6 +406,15 @@ architecture behaviour of execute1 is return ret; end; + function assemble_ctrl(c: ctrl_t; msrpr: std_ulogic) return std_ulogic_vector is + variable ret : std_ulogic_vector(63 downto 0); + begin + ret := (others => '0'); + ret(0) := c.run; + ret(15) := c.run and not msrpr; + return ret; + end; + -- Tell vivado to keep the hierarchy for the random module so that the -- net names in the xdc file match. attribute keep_hierarchy : string; @@ -523,7 +534,7 @@ begin x_to_pmu.addr_v <= '0'; x_to_pmu.spr_num <= ex1.pmu_spr_num; x_to_pmu.spr_val <= ex1.e.write_data; - x_to_pmu.run <= '1'; + x_to_pmu.run <= ctrl.run; -- XER forwarding. The CA and CA32 bits are only modified by instructions -- that are handled here, so for them we can just use the result most @@ -1334,6 +1345,8 @@ begin v.se.write_hfscr := '1'; when SPRSEL_HEIR => v.se.write_heir := '1'; + when SPRSEL_CTRL => + v.se.write_ctrl := '1'; when others => end case; end if; @@ -1773,6 +1786,7 @@ begin assemble_fscr(ctrl) when SPRSEL_FSCR, assemble_hfscr(ctrl) when SPRSEL_HFSCR, ctrl.heir when SPRSEL_HEIR, + assemble_ctrl(ctrl, ex1.msr(MSR_PR)) when SPRSEL_CTRL, assemble_xer(ex1.e.xerc, ctrl.xer_low) when others; stage2_stall <= l_in.l2stall or fp_in.f2stall; @@ -1943,6 +1957,9 @@ begin ctrl_tmp.heir(63 downto 32) <= (others => '0'); end if; end if; + if ex1.se.write_ctrl = '1' then + ctrl_tmp.run <= ex1.e.write_data(0); + end if; end if; if interrupt_in.intr = '1' then @@ -1981,6 +1998,7 @@ begin e_out <= ex2.e; e_out.msr <= msr_copy(ctrl.msr); + run_out <= ctrl.run; terminate_out <= ex2.se.terminate; icache_inval <= ex2.se.icache_inval; diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index 0980667..c3be9d9 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -142,6 +142,9 @@ end entity toplevel; architecture behaviour of toplevel is + -- Status + signal run_out : std_ulogic; + -- Reset signals: signal soc_rst : std_ulogic; signal pll_rst : std_ulogic; @@ -263,6 +266,7 @@ begin system_clk => system_clk, rst => soc_rst, sw_soc_reset => sw_rst, + run_out => run_out, -- UART signals uart0_txd => uart_main_tx, @@ -742,6 +746,7 @@ begin led4 <= system_clk_locked; led5 <= eth_clk_locked; led6 <= not soc_rst; + led7 <= run_out; -- GPIO gpio_in(10) <= btn0; diff --git a/soc.vhdl b/soc.vhdl index 942da63..71474df 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -99,6 +99,8 @@ entity soc is rst : in std_ulogic; system_clk : in std_ulogic; + run_out : out std_ulogic; + -- "Large" (64-bit) DRAM wishbone wb_dram_in : out wishbone_master_out; wb_dram_out : in wishbone_slave_out := wishbone_slave_out_init; @@ -366,6 +368,7 @@ begin clk => system_clk, rst => rst_core, alt_reset => alt_reset_d, + run_out => run_out, wishbone_insn_in => wishbone_icore_in, wishbone_insn_out => wishbone_icore_out, wishbone_data_in => wishbone_dcore_in,