From d2bf3f3580205072de01f8741738b351b10054c7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 8 Aug 2025 08:55:48 +1000 Subject: [PATCH] core: Implement hypervisor doorbell interrupt and msg* instructions This implements the hypervisor doorbell exception and interrupt and the msgsnd, msgclr and msgsync instructions (msgsync is a no-op). The msgsnd instruction can generate a hypervisor doorbell interrupt on any CPU in the system. To achieve this, each core sends its hypervisor doorbell messages to the soc level, which ORs together the bits for each CPU and sends it to that CPU. The privileged doorbell exception/interrupt and the msgsndp/msgclrp instructions are not required since we don't implement SMT. Signed-off-by: Paul Mackerras --- core.vhdl | 7 +++++ decode1.vhdl | 3 +++ decode_types.vhdl | 54 +++++++++++++++++++++------------------ execute1.vhdl | 51 +++++++++++++++++++++++++++++++++--- predecode.vhdl | 3 +++ scripts/fmt_log/fmt_log.c | 6 ++--- soc.vhdl | 22 +++++++++++++++- 7 files changed, 113 insertions(+), 33 deletions(-) diff --git a/core.vhdl b/core.vhdl index d4efcf3..bc59014 100644 --- a/core.vhdl +++ b/core.vhdl @@ -10,6 +10,7 @@ entity core is generic ( SIM : boolean := false; CPU_INDEX : natural := 0; + NCPUS : positive := 1; DISABLE_FLATTEN : boolean := false; EX1_BYPASS : boolean := true; HAS_FPU : boolean := true; @@ -52,6 +53,9 @@ entity core is ext_irq : in std_ulogic; + msg_in : in std_ulogic; + msg_out : out std_ulogic_vector(NCPUS-1 downto 0); + run_out : out std_ulogic; terminated_out : out std_logic ); @@ -370,6 +374,7 @@ begin generic map ( SIM => SIM, CPU_INDEX => CPU_INDEX, + NCPUS => NCPUS, EX1_BYPASS => EX1_BYPASS, HAS_FPU => HAS_FPU, LOG_LENGTH => LOG_LENGTH @@ -398,6 +403,8 @@ begin ls_events => loadstore_events, dc_events => dcache_events, ic_events => icache_events, + msg_out => msg_out, + msg_in => msg_in, run_out => run_out, terminate_out => terminate, dbg_spr_req => dbg_spr_req, diff --git a/decode1.vhdl b/decode1.vhdl index e18350f..afe7610 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -267,6 +267,9 @@ architecture behaviour of decode1 is INSN_modsw => (DVU, NONE, OP_MOD, RA, RB, NONE, NONE, RT, ADD, "101", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0', '0', NONE), INSN_modud => (DVU, NONE, OP_MOD, RA, RB, NONE, NONE, RT, ADD, "101", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', '0', NONE), INSN_moduw => (DVU, NONE, OP_MOD, RA, RB, NONE, NONE, RT, ADD, "101", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', '0', NONE), + INSN_msgclr => (ALU, NONE, OP_MSG, NONE, RB, NONE, NONE, NONE, ADD, "011", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', '0', NONE), + INSN_msgsnd => (ALU, NONE, OP_MSG, NONE, RB, NONE, NONE, NONE, ADD, "001", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', '0', NONE), + INSN_msgsync => (ALU, NONE, OP_NOP, NONE, IMM, NONE, NONE, NONE, ADD, "000", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', '0', NONE), INSN_mtcrf => (ALU, NONE, OP_MTCRF, NONE, IMM, NONE, RS, NONE, ADD, "101", '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', '0', NONE), INSN_mtfsb => (FPU, FPU, OP_FP_MISC, NONE, IMM, NONE, NONE, NONE, ADD, "000", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), INSN_mtfsf => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, NONE, ADD, "000", '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), diff --git a/decode_types.vhdl b/decode_types.vhdl index 598ff52..1503369 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -14,6 +14,7 @@ package decode_types is OP_ISYNC, OP_LOAD, OP_STORE, OP_MCRXRX, OP_MFMSR, OP_MFSPR, + OP_MSG, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, OP_MUL_H64, OP_MUL_H32, OP_RFID, @@ -87,10 +88,11 @@ package decode_types is INSN_mfcr, INSN_mfmsr, INSN_mfspr, + INSN_msgsync, INSN_mtcrf, INSN_mtmsr, - INSN_mtmsrd, - INSN_mtspr, -- 60 + INSN_mtmsrd, -- 60 + INSN_mtspr, INSN_mulli, INSN_neg, INSN_nop, @@ -99,8 +101,8 @@ package decode_types is INSN_popcntb, INSN_popcntw, INSN_popcntd, - INSN_prtyw, - INSN_prtyd, -- 70 + INSN_prtyw, -- 70 + INSN_prtyd, INSN_rfid, INSN_rfscv, INSN_rldic, @@ -109,8 +111,8 @@ package decode_types is INSN_rldimi, INSN_rlwimi, INSN_rlwinm, - INSN_rnop, - INSN_sc, -- 80 + INSN_rnop, -- 80 + INSN_sc, INSN_setb, INSN_slbia, INSN_sradi, @@ -119,8 +121,8 @@ package decode_types is INSN_std, INSN_stdu, INSN_sthu, - INSN_stq, - INSN_stwu, -- 90 + INSN_stq, -- 90 + INSN_stwu, INSN_subfic, INSN_subfme, INSN_subfze, @@ -129,10 +131,8 @@ package decode_types is INSN_tlbsync, INSN_twi, INSN_wait, - INSN_xori, - INSN_xoris, -- 100 - -- pad to 102 - INSN_065, + INSN_xori, -- 100 + INSN_xoris, -- Non-prefixed instructions that have a MLS:D prefixed form and -- their corresponding prefixed instructions. @@ -233,6 +233,8 @@ package decode_types is INSN_modsw, INSN_moduw, INSN_modud, -- 190 + INSN_msgclr, + INSN_msgsnd, INSN_mulhw, INSN_mulhwu, INSN_mulhd, @@ -240,9 +242,9 @@ package decode_types is INSN_mullw, INSN_mulld, INSN_nand, - INSN_nor, + INSN_nor, -- 200 INSN_or, - INSN_orc, -- 200 + INSN_orc, INSN_pdepd, INSN_pextd, INSN_rldcl, @@ -250,9 +252,9 @@ package decode_types is INSN_rlwnm, INSN_slw, INSN_sld, - INSN_sraw, + INSN_sraw, -- 210 INSN_srad, - INSN_srw, -- 210 + INSN_srw, INSN_srd, INSN_stbcix, INSN_stbcx, @@ -260,9 +262,9 @@ package decode_types is INSN_stbux, INSN_stdbrx, INSN_stdcix, - INSN_stdcx, + INSN_stdcx, -- 220 INSN_stdx, - INSN_stdux, -- 220 + INSN_stdux, INSN_sthbrx, INSN_sthcix, INSN_sthcx, @@ -270,9 +272,9 @@ package decode_types is INSN_sthux, INSN_stqcx, INSN_stwbrx, - INSN_stwcix, + INSN_stwcix, -- 230 INSN_stwcx, - INSN_stwx, -- 230 + INSN_stwx, INSN_stwux, INSN_subf, INSN_subfc, @@ -280,10 +282,11 @@ package decode_types is INSN_td, INSN_tlbie, INSN_tlbiel, - INSN_tw, + INSN_tw, -- 240 INSN_xor, - -- pad to 240 to simplify comparison logic + -- pad to 248 to simplify comparison logic + INSN_242, INSN_243, INSN_244, INSN_245, INSN_246, INSN_247, -- The following instructions have a third input addressed by RC INSN_maddld, @@ -291,9 +294,7 @@ package decode_types is INSN_maddhdu, -- pad to 256 to simplify comparison logic - INSN_243, - INSN_244, INSN_245, INSN_246, INSN_247, - INSN_248, INSN_249, INSN_250, INSN_251, + INSN_251, INSN_252, INSN_253, INSN_254, INSN_255, -- The following instructions access floating-point registers @@ -693,6 +694,9 @@ package body decode_types is when INSN_moduw => return "011111"; when INSN_modsd => return "011111"; when INSN_modsw => return "011111"; + when INSN_msgclr => return "011111"; + when INSN_msgsnd => return "011111"; + when INSN_msgsync => return "011111"; when INSN_mtcrf => return "011111"; when INSN_mtmsr => return "011111"; when INSN_mtmsrd => return "011111"; diff --git a/execute1.vhdl b/execute1.vhdl index a018405..34fd03a 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -16,6 +16,7 @@ entity execute1 is EX1_BYPASS : boolean := true; HAS_FPU : boolean := true; CPU_INDEX : natural; + NCPUS : positive := 1; -- Non-zero to enable log data collection LOG_LENGTH : natural := 0 ); @@ -48,6 +49,9 @@ entity execute1 is dbg_ctrl_out : out ctrl_t; + msg_in : in std_ulogic; + msg_out : out std_ulogic_vector(NCPUS-1 downto 0); + run_out : out std_ulogic; icache_inval : out std_ulogic; terminate_out : out std_ulogic; @@ -103,8 +107,10 @@ architecture behaviour of execute1 is write_tbl : std_ulogic; write_tbu : std_ulogic; noop_spr_read : std_ulogic; + send_hmsg : std_ulogic_vector(NCPUS-1 downto 0); + clr_hmsg : std_ulogic; end record; - constant side_effect_init : side_effect_type := (others => '0'); + constant side_effect_init : side_effect_type := (send_hmsg => (others => '0'), others => '0'); type actions_type is record e : Execute1ToWritebackType; @@ -287,6 +293,9 @@ architecture behaviour of execute1 is signal tb_next : std_ulogic_vector(63 downto 0); signal tb_carry : std_ulogic; + -- directed hypervisor doorbell state + signal dhd_pending : std_ulogic; + type privilege_level is (USER, SUPER); type op_privilege_array is array(insn_type_t) of privilege_level; constant op_privilege: op_privilege_array := ( @@ -614,6 +623,18 @@ begin dbg_ctrl_out <= ctrl; log_rd_addr <= ex2.log_addr_spr; + -- Doorbells + doorbell_sync : process(clk) + begin + if rising_edge(clk) then + if rst = '1' or ex2.se.clr_hmsg = '1' then + dhd_pending <= '0'; + elsif msg_in = '1' then + dhd_pending <= '1'; + end if; + end if; + end process; + a_in <= e_in.read_data1; b_in <= e_in.read_data2; c_in <= e_in.read_data3; @@ -1440,6 +1461,20 @@ begin end if; end if; + when OP_MSG => + -- msgsnd, msgclr + if b_in(31 downto 27) = 5x"5" then + if e_in.insn(6) = '0' then -- msgsnd + for cpuid in 0 to NCPUS-1 loop + if unsigned(b_in(19 downto 0)) = to_unsigned(cpuid, 20) then + v.se.send_hmsg(cpuid) := '1'; + end if; + end loop; + else -- msgclr + v.se.clr_hmsg := '1'; + end if; + end if; + when OP_MTCRF => when OP_MTMSRD => v.se.write_msr := '1'; @@ -1704,8 +1739,9 @@ begin v.busy := '0'; bypass_valid := actions.bypass_valid; - irq_valid := ex1.msr(MSR_EE) and (pmu_to_x.intr or dec_sign or - (ext_irq_in and not ctrl.lpcr_heic)); + irq_valid := ex1.msr(MSR_EE) and + (pmu_to_x.intr or dec_sign or dhd_pending or + (ext_irq_in and not ctrl.lpcr_heic)); if valid_in = '1' then v.prev_op := e_in.insn_type; @@ -1747,6 +1783,11 @@ begin if pmu_to_x.intr = '1' then v.e.intr_vec := 16#f00#; report "IRQ valid: PMU"; + elsif dhd_pending = '1' then + v.e.intr_vec := 16#e80#; + v.e.hv_intr := '1'; + v.se.clr_hmsg := '1'; + report "Hypervisor doorbell"; elsif dec_sign = '1' then v.e.intr_vec := 16#900#; report "IRQ valid: DEC"; @@ -2175,7 +2216,7 @@ begin -- pending exceptions clear any wait state -- ex1.fp_exception_next is not tested because it is not possible to -- get into wait state with a pending FP exception. - irq_exc := pmu_to_x.intr or dec_sign or ext_irq_in; + irq_exc := pmu_to_x.intr or dec_sign or ext_irq_in or dhd_pending; if ex1.trace_next = '1' or irq_exc = '1' or interrupt_in.intr = '1' then ctrl_tmp.wait_state <= '0'; end if; @@ -2223,6 +2264,8 @@ begin terminate_out <= ex2.se.terminate; icache_inval <= ex2.se.icache_inval; + msg_out <= ex2.se.send_hmsg; + exception_log <= v.e.interrupt; end process; diff --git a/predecode.vhdl b/predecode.vhdl index 852c96c..ece38b1 100644 --- a/predecode.vhdl +++ b/predecode.vhdl @@ -346,6 +346,9 @@ architecture behaviour of predecoder is 2#0_01000_01011# => INSN_moduw, 2#0_11000_01001# => INSN_modsd, 2#0_11000_01011# => INSN_modsw, + 2#0_00111_01110# => INSN_msgclr, + 2#0_00110_01110# => INSN_msgsnd, + 2#0_11011_10110# => INSN_msgsync, 2#0_00100_10000# => INSN_mtcrf, 2#0_00100_10010# => INSN_mtmsr, 2#0_00101_10010# => INSN_mtmsrd, diff --git a/scripts/fmt_log/fmt_log.c b/scripts/fmt_log/fmt_log.c index 5a81c96..d5b4488 100644 --- a/scripts/fmt_log/fmt_log.c +++ b/scripts/fmt_log/fmt_log.c @@ -89,9 +89,9 @@ const char *ops[64] = "illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bperm ", "bsort ", "cmp ", "compute", "countb ", "darn ", "dcbf ", "dcbst ", "dcbz ", "icbi ", "icbt ", "fpcmp ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", - "mod ", "isync ", "ld ", "st ", "mcrxrx ", "mfmsr ", "mfspr ", "mtcrf ", - "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "rfid ", "sc ", "sync ", - "tlbie ", "trap ", "wait ", "ffail ", "?44 ", "?45 ", "?46 ", "?47 ", + "mod ", "isync ", "ld ", "st ", "mcrxrx ", "mfmsr ", "mfspr ", "msg ", + "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "rfid ", "sc ", + "sync ", "tlbie ", "trap ", "wait ", "ffail ", "?45 ", "?46 ", "?47 ", "?48 ", "?49 ", "?50 ", "?51 ", "?52 ", "?53 ", "?54 ", "?55 ", "?56 ", "?57 ", "?58 ", "?59 ", "?60 ", "?61 ", "?62 ", "?63 " }; diff --git a/soc.vhdl b/soc.vhdl index 6652711..3daeb73 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -273,6 +273,9 @@ architecture behaviour of soc is signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0); + type msg_percpu_array is array(cpu_index_t) of std_ulogic_vector(NCPUS-1 downto 0); + signal msgs : msg_percpu_array; + function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is variable wwb : wishbone_master_out; begin @@ -355,10 +358,14 @@ begin -- Processor cores processors: for i in 0 to NCPUS-1 generate + signal msgin : std_ulogic; + + begin core: entity work.core generic map( SIM => SIM, CPU_INDEX => i, + NCPUS => NCPUS, HAS_FPU => HAS_FPU, HAS_BTC => HAS_BTC, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, @@ -389,8 +396,21 @@ begin dmi_wr => dmi_wr, dmi_ack => dmi_core_ack(i), dmi_req => dmi_core_req(i), - ext_irq => core_ext_irq(i) + ext_irq => core_ext_irq(i), + msg_out => msgs(i), + msg_in => msgin ); + + process(all) + variable m : std_ulogic; + begin + m := '0'; + for j in 0 to NCPUS-1 loop + m := m or msgs(j)(i); + end loop; + msgin <= m; + end process; + end generate; run_out <= or (core_run_out);