Merge pull request #419 from paulusmack/prefix

Add support for prefixed instructions
3 years ago · f668597f67
parent b7ccffe2a3 b50170cd1d
commit f668597f67
18 changed files with 896 additions and 163 deletions
--- a/common.vhdl
+++ b/common.vhdl
@ -263,6 +263,10 @@ package common is
 	valid: std_ulogic;
 	stop_mark : std_ulogic;
 	nia: std_ulogic_vector(63 downto 0);
+        prefixed: std_ulogic;
+        prefix: std_ulogic_vector(25 downto 0);
+        illegal_suffix: std_ulogic;
+        misaligned_prefix: std_ulogic;
 	insn: std_ulogic_vector(31 downto 0);
 	decode: decode_rom_t;
        br_pred: std_ulogic; -- Branch was predicted to be taken
@ -274,7 +278,9 @@ package common is
        reg_c : gspr_index_t;
    end record;
    constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
-        (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
+        (valid => '0', stop_mark => '0', nia => (others => '0'),
+         prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
+         illegal_suffix => '0', misaligned_prefix => '0',
         decode => decode_rom_init, br_pred => '0', big_endian => '0',
         spr_info => spr_id_init, ram_spr => ram_spr_info_init,
         reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));
@ -359,9 +365,12 @@ package common is
        ramspr_write_odd   : std_ulogic;
        dbg_spr_access : std_ulogic;
        dec_ctr : std_ulogic;
+        prefixed : std_ulogic;
+        illegal_suffix : std_ulogic;
+        misaligned_prefix : std_ulogic;
    end record;
    constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
-	(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
+	(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
         write_reg_enable => '0',
         lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0',
 	 invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
@ -378,6 +387,7 @@ package common is
         ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0',
         dbg_spr_access => '0',
         dec_ctr => '0',
+         prefixed => '0', illegal_suffix => '0', misaligned_prefix => '0',
         others => (others => '0'));

    type MultiplyInputType is record
@ -500,6 +510,7 @@ package common is
        priv_mode : std_ulogic;                         -- privileged mode (MSR[PR] = 0)
        mode_32bit : std_ulogic;                        -- trim addresses to 32 bits
        is_32bit : std_ulogic;
+        prefixed : std_ulogic;
        repeat : std_ulogic;
        second : std_ulogic;
        e2stall : std_ulogic;
@ -514,7 +525,7 @@ package common is
         addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
         write_reg => (others => '0'),
         length => (others => '0'),
-         mode_32bit => '0', is_32bit => '0',
+         mode_32bit => '0', is_32bit => '0', prefixed => '0',
         repeat => '0', second => '0', e2stall => '0',
         msr => (others => '0'));

--- a/decode1.vhdl
+++ b/decode1.vhdl
@ -45,6 +45,16 @@ architecture behaviour of decode1 is
    signal decode_rom_addr : insn_code;
    signal decode : decode_rom_t;

+    type prefix_state_t is record
+        prefixed : std_ulogic;
+        prefix   : std_ulogic_vector(25 downto 0);
+        pref_ia  : std_ulogic_vector(3 downto 0);
+    end record;
+    constant prefix_state_init : prefix_state_t := (prefixed => '0', prefix => (others => '0'),
+                                                    pref_ia => (others => '0'));
+
+    signal pr, pr_in : prefix_state_t;
+
    signal fetch_failed : std_ulogic;

    -- If we have an FPU, then it is used for integer divisions,
@ -64,7 +74,7 @@ architecture behaviour of decode1 is
    constant decode_rom : decoder_rom_t := (
        --                   unit   fac   internal      in1         in2          in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl  rpt
        --                                     op                                            in   out   A   out  in    out  len        ext                                 pipe
-        INSN_illegal     =>  (NONE, NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_illegal     =>  (ALU,  NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_fetch_fail  =>  (LDST, NONE, OP_FETCH_FAILED, CIA,     NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),

        INSN_add         =>  (ALU,  NONE, OP_ADD,       RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
@ -79,10 +89,10 @@ architecture behaviour of decode1 is
        INSN_addme       =>  (ALU,  NONE, OP_ADD,       RA,         CONST_M1,    NONE, RT,   '0', '0', '0', '0', CA,   '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
        INSN_addpcis     =>  (ALU,  NONE, OP_ADD,       CIA,        CONST_DXHI4, NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_addze       =>  (ALU,  NONE, OP_ADD,       RA,         NONE,        NONE, RT,   '0', '0', '0', '0', CA,   '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
-        INSN_and         =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_andc        =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_andi_dot    =>  (ALU,  NONE, OP_AND,       NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
-        INSN_andis_dot   =>  (ALU,  NONE, OP_AND,       NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
+        INSN_and         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_andc        =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_andi_dot    =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
+        INSN_andis_dot   =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
        INSN_attn        =>  (ALU,  NONE, OP_ATTN,      NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
        INSN_b           =>  (ALU,  NONE, OP_B,         NONE,       CONST_LI,    NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
        INSN_bc          =>  (ALU,  NONE, OP_BC,        NONE,       CONST_BD,    NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
@ -90,6 +100,9 @@ architecture behaviour of decode1 is
        INSN_bclr        =>  (ALU,  NONE, OP_BCREG,     NONE,       NONE,        NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
        INSN_bctar       =>  (ALU,  NONE, OP_BCREG,     NONE,       NONE,        NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
        INSN_bperm       =>  (ALU,  NONE, OP_BPERM,     NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brh         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brw         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brd         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_cbcdtd      =>  (ALU,  NONE, OP_BCD,       NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_cdtbcd      =>  (ALU,  NONE, OP_BCD,       NONE,       NONE,        RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_cmp         =>  (ALU,  NONE, OP_CMP,       RA,         RB,          NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
@ -258,14 +271,30 @@ architecture behaviour of decode1 is
        INSN_mulld       =>  (ALU,  NONE, OP_MUL_L64,   RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
        INSN_mulli       =>  (ALU,  NONE, OP_MUL_L64,   RA,         CONST_SI,    NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
        INSN_mullw       =>  (ALU,  NONE, OP_MUL_L64,   RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RCOE, '0', '0', NONE),
-        INSN_nand        =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_nand        =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
        INSN_neg         =>  (ALU,  NONE, OP_ADD,       RA,         NONE,        NONE, RT,   '0', '0', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
        INSN_nop         =>  (ALU,  NONE, OP_NOP,       NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-        INSN_nor         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_or          =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_orc         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_ori         =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-        INSN_oris        =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_nor         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_or          =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_orc         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_ori         =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI,    RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
+        INSN_oris        =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
+        INSN_paddi       =>  (ALU,  NONE, OP_ADD,       RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plbz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pld         =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfd        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfs        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_plha        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plhz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwa        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pnop        =>  (ALU,  NONE, OP_NOP,       NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstb        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstd        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfd       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfs       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_psth        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstw        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_popcntb     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_popcntd     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_popcntw     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -347,7 +376,7 @@ architecture behaviour of decode1 is
        INSN_xori        =>  (ALU,  NONE, OP_XOR,       NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
        INSN_xoris       =>  (ALU,  NONE, OP_XOR,       NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),

-        others           =>  (NONE, NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
+        others           =>  (ALU,  NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
        );

    function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
@ -434,12 +463,17 @@ begin
            if rst = '1' then
                r <= Decode1ToDecode2Init;
                fetch_failed <= '0';
+                pr <= prefix_state_init;
            elsif flush_in = '1' then
                r.valid <= '0';
                fetch_failed <= '0';
+                pr <= prefix_state_init;
            elsif stall_in = '0' then
                r <= rin;
                fetch_failed <= f_in.fetch_failed;
+                if f_in.valid = '1' then
+                    pr <= pr_in;
+                end if;
            end if;
            if rst = '1' then
                br.br_nia <= (others => '0');
@ -471,12 +505,18 @@ begin
        variable icode : insn_code;
        variable sprn : spr_num_t;
        variable maybe_rb : std_ulogic;
+        variable pv : prefix_state_t;
+        variable icode_bits : std_ulogic_vector(9 downto 0);
+        variable valid_suffix : std_ulogic;
    begin
        v := Decode1ToDecode2Init;
+        pv := pr;

        v.valid := f_in.valid;
        v.nia  := f_in.nia;
        v.insn := f_in.insn;
+        v.prefix := pr.prefix;
+        v.prefixed := pr.prefixed;
        v.stop_mark := f_in.stop_mark;
        v.big_endian := f_in.big_endian;

@ -490,17 +530,65 @@ begin
        end if;

        icode := f_in.icode;
+        icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(icode), 10));

        if f_in.fetch_failed = '1' then
-            icode := INSN_fetch_fail;
+            icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_fetch_fail), 10));
            -- Only send down a single OP_FETCH_FAILED
            v.valid := not fetch_failed;
+            pv := prefix_state_init;
+
+        elsif pr.prefixed = '1' then
+            -- Check suffix value and convert to the prefixed instruction code
+            if pr.prefix(24) = '1' then
+                -- either pnop or illegal
+                icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_pnop), 10));
+            else
+                -- various load/store instructions
+                icode_bits(0) := '1';
+            end if;
+            valid_suffix := '0';
+            case pr.prefix(25 downto 23) is
+                when "000" =>    -- 8LS
+                    if icode >= INSN_first_8ls and icode < INSN_first_rb then
+                        valid_suffix := '1';
+                    end if;
+                when "100" =>   -- MLS
+                    if icode >= INSN_first_mls and icode < INSN_first_8ls then
+                        valid_suffix := '1';
+                    elsif icode >= INSN_first_fp_mls and icode < INSN_first_fp_nonmls then
+                        valid_suffix := '1';
+                    end if;
+                when "110" =>   -- MRR, i.e. pnop
+                    if pr.prefix(22 downto 20) = "000" then
+                        valid_suffix := '1';
+                    end if;
+                when others =>
+            end case;
+            v.nia(5 downto 2) := pr.pref_ia;
+            v.prefixed := '1';
+            v.prefix := pr.prefix;
+            v.illegal_suffix := not valid_suffix;
+            pv := prefix_state_init;
+
+        elsif icode = INSN_prefix then
+            pv.prefixed := '1';
+            pv.pref_ia := f_in.nia(5 downto 2);
+            pv.prefix := f_in.insn(25 downto 0);
+            -- Check if the address of the prefix mod 64 is 60;
+            -- if so we need to arrange to generate an alignment interrupt
+            if f_in.nia(5 downto 2) = "1111" then
+                v.misaligned_prefix := '1';
+            else
+                v.valid := '0';
+            end if;
+
        end if;
-        decode_rom_addr <= icode;
+        decode_rom_addr <= insn_code'val(to_integer(unsigned(icode_bits)));

        if f_in.valid = '1' then
-            report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) &
-                " at " & to_hstring(f_in.nia);
+            report "Decode " & insn_code'image(insn_code'val(to_integer(unsigned(icode_bits)))) & " " &
+                to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
        end if;

        -- Branch predictor
@ -533,6 +621,8 @@ begin
        br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);

        -- Work out GPR/FPR read addresses
+        -- Note that for prefixed instructions we are working this out based
+        -- only on the suffix.
        maybe_rb := '0';
        vr.reg_1_addr := '0' & insn_ra(f_in.insn);
        vr.reg_2_addr := '0' & insn_rb(f_in.insn);
@ -568,6 +658,7 @@ begin
        -- Update registers
        rin <= v;
        br_in <= bv;
+        pr_in <= pv;

        -- Update outputs
        d_out <= r;
--- a/decode2.vhdl
+++ b/decode2.vhdl
@ -83,12 +83,13 @@ architecture behaviour of decode2 is
    constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));

    function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0);
                                 instr_addr : std_ulogic_vector(63 downto 0))
        return decode_input_reg_t is
    begin
-        if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
+        if t = RA or ((t = RA_OR_ZERO or t = RA0_OR_CIA) and insn_ra(insn_in) /= "00000") then
            return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
-        elsif t = CIA then
+        elsif t = CIA or (t = RA0_OR_CIA and insn_prefix_r(prefix) = '1') then
            return ('0', (others => '0'), instr_addr);
        elsif HAS_FPU and t = FRA then
            return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
@ -97,7 +98,8 @@ architecture behaviour of decode2 is
        end if;
    end;

-    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0))
+    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0))
        return decode_input_reg_t is
        variable ret : decode_input_reg_t;
    begin
@ -114,6 +116,8 @@ architecture behaviour of decode2 is
                ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
            when CONST_SI =>
                ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
+            when CONST_PSI =>
+                ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_prefixed_si(prefix, insn_in)), 64)));
            when CONST_SI_HI =>
                ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
            when CONST_UI_HI =>
@ -201,13 +205,13 @@ architecture behaviour of decode2 is
    type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);

    constant result_select : mux_select_array_t := (
-        OP_AND      => "001",           -- logical_result
-        OP_OR       => "001",
+        OP_LOGIC    => "001",           -- logical_result
        OP_XOR      => "001",
        OP_PRTY     => "001",
        OP_CMPB     => "001",
        OP_EXTS     => "001",
        OP_BPERM    => "001",
+        OP_BREV     => "001",
        OP_BCD      => "001",
        OP_MTSPR    => "001",
        OP_RLC      => "010",           -- rotator_result
@ -367,21 +371,27 @@ begin
    c_out.read <= d_in.decode.input_cr;

    decode2_addrs: process(all)
+        variable dec_a, dec_b, dec_c : decode_input_reg_t;
+        variable dec_o : decode_output_reg_t;
    begin
-        decoded_reg_a <= decode_input_reg_init;
-        decoded_reg_b <= decode_input_reg_init;
-        decoded_reg_c <= decode_input_reg_init;
-        decoded_reg_o <= decode_output_reg_init;
-        if d_in.valid = '1' then
-            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia);
-            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn);
-            decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
-            decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
+        dec_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
+        dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
+        dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
+        dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
+        if d_in.valid = '0' or d_in.illegal_suffix = '1' then
+            dec_a.reg_valid := '0';
+            dec_b.reg_valid := '0';
+            dec_c.reg_valid := '0';
+            dec_o.reg_valid := '0';
        end if;

-        r_out.read1_enable <= decoded_reg_a.reg_valid;
-        r_out.read2_enable <= decoded_reg_b.reg_valid;
-        r_out.read3_enable <= decoded_reg_c.reg_valid;
+        decoded_reg_a <= dec_a;
+        decoded_reg_b <= dec_b;
+        decoded_reg_c <= dec_c;
+        decoded_reg_o <= dec_o;
+        r_out.read1_enable <= dec_a.reg_valid;
+        r_out.read2_enable <= dec_b.reg_valid;
+        r_out.read3_enable <= dec_c.reg_valid;

    end process;

@ -588,6 +598,9 @@ begin
                    v.e.result_sel := "001";        -- logical_result
                end if;
            end if;
+            v.e.prefixed := d_in.prefixed;
+            v.e.illegal_suffix := d_in.illegal_suffix;
+            v.e.misaligned_prefix := d_in.misaligned_prefix;

        elsif dc2.e.valid = '1' then
            -- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@ -3,8 +3,9 @@ use ieee.std_logic_1164.all;

 package decode_types is
    type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
-			 OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
-			 OP_BCD, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
+			 OP_ATTN, OP_B, OP_BC, OP_BCREG,
+			 OP_BCD, OP_BPERM, OP_BREV,
+                         OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
 			 OP_CNTZ, OP_CROP,
 			 OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
 			 OP_DCBZ, OP_ICBI, OP_ICBT,
@ -12,10 +13,11 @@ package decode_types is
                         OP_DIV, OP_DIVE, OP_MOD,
                         OP_EXTS, OP_EXTSWSLI,
                         OP_ISEL, OP_ISYNC,
+                         OP_LOGIC,
 			 OP_LOAD, OP_STORE,
 			 OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
 			 OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
-			 OP_MUL_H64, OP_MUL_H32, OP_OR,
+			 OP_MUL_H64, OP_MUL_H32,
 			 OP_POPCNT, OP_PRTY, OP_RFID,
 			 OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
 			 OP_SHL, OP_SHR,
@ -34,222 +36,241 @@ package decode_types is
        -- The following instructions don't have an RB operand or access FPRs
        INSN_illegal, -- 0
        INSN_fetch_fail,
-        INSN_addi,
+        INSN_prefix,
+        INSN_pnop,
        INSN_addic,
        INSN_addic_dot,
        INSN_addis,
        INSN_addme,
        INSN_addpcis,
        INSN_addze,
-        INSN_andi_dot,
-        INSN_andis_dot, -- 10
+        INSN_andi_dot, -- 10
+        INSN_andis_dot,
        INSN_attn,
        INSN_b,
        INSN_bc,
        INSN_bcctr,
        INSN_bclr,
        INSN_bctar,
+        INSN_brh,
+        INSN_brw,
+        INSN_brd, -- 20
        INSN_cbcdtd,
        INSN_cdtbcd,
        INSN_cmpi,
-        INSN_cmpli, -- 20
+        INSN_cmpli,
        INSN_cntlzw,
        INSN_cntlzd,
        INSN_cnttzw,
        INSN_cnttzd,
        INSN_crand,
-        INSN_crandc,
+        INSN_crandc, -- 30
        INSN_creqv,
        INSN_crnand,
        INSN_crnor,
-        INSN_cror, -- 30
+        INSN_cror,
        INSN_crorc,
        INSN_crxor,
        INSN_darn,
        INSN_eieio,
        INSN_extsb,
-        INSN_extsh,
+        INSN_extsh, -- 40
        INSN_extsw,
        INSN_extswsli,
        INSN_isync,
-        INSN_lbz, -- 40
        INSN_lbzu,
        INSN_ld,
        INSN_ldu,
-        INSN_lha,
        INSN_lhau,
-        INSN_lhz,
-        INSN_lhzu,
        INSN_lwa,
-        INSN_lwz,
-        INSN_lwzu, -- 50
-        INSN_mcrf,
-        INSN_mcrfs,
+        INSN_lwzu,
+        INSN_mcrf, -- 50
        INSN_mcrxrx,
        INSN_mfcr,
        INSN_mfmsr,
        INSN_mfspr,
        INSN_mtcrf,
-        INSN_mtfsb,
-        INSN_mtfsfi,
-        INSN_mtmsr, -- 60
+        INSN_mtmsr,
        INSN_mtmsrd,
        INSN_mtspr,
        INSN_mulli,
-        INSN_neg,
+        INSN_neg, -- 60
        INSN_nop,
        INSN_ori,
        INSN_oris,
        INSN_popcntb,
        INSN_popcntw,
-        INSN_popcntd, -- 70
+        INSN_popcntd,
        INSN_prtyw,
        INSN_prtyd,
        INSN_rfid,
-        INSN_rldic,
+        INSN_rldic, -- 70
        INSN_rldicl,
        INSN_rldicr,
        INSN_rldimi,
        INSN_rlwimi,
        INSN_rlwinm,
-        INSN_sc, -- 80
+        INSN_sc,
        INSN_setb,
        INSN_slbia,
        INSN_sradi,
-        INSN_srawi,
-        INSN_stb,
+        INSN_srawi, -- 80
        INSN_stbu,
        INSN_std,
        INSN_stdu,
-        INSN_sth,
-        INSN_sthu, -- 90
-        INSN_stw,
+        INSN_sthu,
        INSN_stwu,
        INSN_subfic,
        INSN_subfme,
        INSN_subfze,
        INSN_sync,
-        INSN_tdi,
+        INSN_tdi, -- 90
        INSN_tlbsync,
        INSN_twi,
-        INSN_wait, -- 100
+        INSN_wait,
        INSN_xori,
        INSN_xoris,

-        -- pad to 112 to simplify comparison logic
-        INSN_103,
-        INSN_104, INSN_105, INSN_106, INSN_107,
-        INSN_108, INSN_109, INSN_110, INSN_111,
+        -- Non-prefixed instructions that have a MLS:D prefixed form and
+        -- their corresponding prefixed instructions.
+        -- The non-prefixed versions have even indexes so that we can
+        -- convert them to the prefixed version by setting bit 0
+        INSN_addi, -- 96
+        INSN_paddi,
+        INSN_lbz,
+        INSN_plbz,
+        INSN_lha, -- 100
+        INSN_plha,
+        INSN_lhz,
+        INSN_plhz,
+        INSN_lwz,
+        INSN_plwz,
+        INSN_stb,
+        INSN_pstb,
+        INSN_sth,
+        INSN_psth,
+        INSN_stw, -- 110
+        INSN_pstw,
+
+        -- Slots for non-prefixed opcodes that are 8LS:D when prefixed
+        INSN_lhzu, -- 112
+        INSN_plwa,
+        INSN_op57,
+        INSN_pld,
+        INSN_op61,
+        INSN_pstd,
+
+        -- pad to 128 to simplify comparison logic
+        INSN_076, INSN_077,
+        INSN_078, INSN_079, INSN_07a, INSN_07b, INSN_07c, INSN_07d, INSN_07e, INSN_07f,

        -- The following instructions have an RB operand but don't access FPRs
        INSN_add,
        INSN_addc,
-        INSN_adde,
+        INSN_adde, -- 130
        INSN_addex,
        INSN_addg6s,
        INSN_and,
        INSN_andc,
        INSN_bperm,
-        INSN_cmp, -- 120
+        INSN_cmp,
        INSN_cmpb,
        INSN_cmpeqb,
        INSN_cmpl,
-        INSN_cmprb,
+        INSN_cmprb, -- 140
        INSN_dcbf,
        INSN_dcbst,
        INSN_dcbt,
        INSN_dcbtst,
        INSN_dcbz,
-        INSN_divd, -- 130
+        INSN_divd,
        INSN_divdu,
        INSN_divde,
        INSN_divdeu,
-        INSN_divw,
+        INSN_divw, -- 150
        INSN_divwu,
        INSN_divwe,
        INSN_divweu,
        INSN_eqv,
        INSN_icbi,
-        INSN_icbt, -- 140
+        INSN_icbt,
        INSN_isel,
        INSN_lbarx,
        INSN_lbzcix,
-        INSN_lbzux,
+        INSN_lbzux, -- 160
        INSN_lbzx,
        INSN_ldarx,
        INSN_ldbrx,
        INSN_ldcix,
        INSN_ldx,
-        INSN_ldux, -- 150
+        INSN_ldux,
        INSN_lharx,
        INSN_lhax,
        INSN_lhaux,
-        INSN_lhbrx,
+        INSN_lhbrx, -- 170
        INSN_lhzcix,
        INSN_lhzx,
        INSN_lhzux,
        INSN_lwarx,
        INSN_lwax,
-        INSN_lwaux, -- 160
+        INSN_lwaux,
        INSN_lwbrx,
        INSN_lwzcix,
        INSN_lwzx,
-        INSN_lwzux,
+        INSN_lwzux, -- 180
        INSN_modsd,
        INSN_modsw,
        INSN_moduw,
        INSN_modud,
        INSN_mulhw,
-        INSN_mulhwu, -- 170
+        INSN_mulhwu,
        INSN_mulhd,
        INSN_mulhdu,
        INSN_mullw,
-        INSN_mulld,
+        INSN_mulld, -- 190
        INSN_nand,
        INSN_nor,
        INSN_or,
        INSN_orc,
        INSN_rldcl,
-        INSN_rldcr, -- 180
+        INSN_rldcr,
        INSN_rlwnm,
        INSN_slw,
        INSN_sld,
-        INSN_sraw,
+        INSN_sraw, -- 200
        INSN_srad,
        INSN_srw,
        INSN_srd,
        INSN_stbcix,
        INSN_stbcx,
-        INSN_stbx, -- 190
+        INSN_stbx,
        INSN_stbux,
        INSN_stdbrx,
        INSN_stdcix,
-        INSN_stdcx,
+        INSN_stdcx, -- 210
        INSN_stdx,
        INSN_stdux,
        INSN_sthbrx,
        INSN_sthcix,
        INSN_sthcx,
-        INSN_sthx, -- 200
+        INSN_sthx,
        INSN_sthux,
        INSN_stwbrx,
        INSN_stwcix,
-        INSN_stwcx,
+        INSN_stwcx, -- 220
        INSN_stwx,
        INSN_stwux,
        INSN_subf,
        INSN_subfc,
        INSN_subfe,
-        INSN_td, -- 210
+        INSN_td,
        INSN_tlbie,
        INSN_tlbiel,
        INSN_tw,
-        INSN_xor,
+        INSN_xor, -- 230

-        -- pad to 224 to simplify comparison logic
-        INSN_215,
-        INSN_216, INSN_217, INSN_218, INSN_219,
-        INSN_220, INSN_221, INSN_222, INSN_223,
+        -- pad to 232 to simplify comparison logic
+        INSN_231,

        -- The following instructions have a third input addressed by RC
        INSN_maddld,
@ -257,9 +278,7 @@ package decode_types is
        INSN_maddhdu,

        -- pad to 256 to simplify comparison logic
-        INSN_227,
-        INSN_228, INSN_229, INSN_230, INSN_231,
-        INSN_232, INSN_233, INSN_234, INSN_235,
+        INSN_235,
        INSN_236, INSN_237, INSN_238, INSN_239,
        INSN_240, INSN_241, INSN_242, INSN_243,
        INSN_244, INSN_245, INSN_246, INSN_247,
@ -267,36 +286,54 @@ package decode_types is
        INSN_252, INSN_253, INSN_254, INSN_255,

        -- The following instructions access floating-point registers
-        -- These ones have an FRS operand, but RA/RB are GPRs
-        INSN_stfd,
-        INSN_stfdu,
+        -- They have an FRS operand, but RA/RB are GPRs
+
+        -- Non-prefixed floating-point loads and stores that have a MLS:D
+        -- prefixed form, and their corresponding prefixed instructions.
+        INSN_stfd, -- 256
+        INSN_pstfd,
        INSN_stfs,
+        INSN_pstfs,
+        INSN_lfd, -- 260
+        INSN_plfd,
+        INSN_lfs,
+        INSN_plfs,
+
+        -- opcodes that can't have a prefix
+        INSN_stfdu, -- 264
        INSN_stfsu,
-        INSN_stfdux, -- 260
+        INSN_stfdux,
        INSN_stfdx,
        INSN_stfiwx,
        INSN_stfsux,
-        INSN_stfsx,
+        INSN_stfsx, -- 270
        -- These ones don't actually have an FRS operand (rather an FRT destination)
        -- but are here so that all FP instructions are >= INST_first_frs.
-        INSN_lfd,
        INSN_lfdu,
-        INSN_lfs,
        INSN_lfsu,
        INSN_lfdx,
-        INSN_lfdux, -- 270
+        INSN_lfdux,
        INSN_lfiwax,
        INSN_lfiwzx,
        INSN_lfsx,
        INSN_lfsux,
-        INSN_275, -- padding
+        -- These are here in order to keep the FP instructions together
+        INSN_mcrfs,
+        INSN_mtfsb, -- 280
+        INSN_mtfsfi,
+        INSN_282, -- padding
+        INSN_283,
+        INSN_284,
+        INSN_285,
+        INSN_286,
+        INSN_287,

        -- The following instructions access FRA and/or FRB operands
-        INSN_fabs,
+        INSN_fabs, -- 288
        INSN_fadd,
-        INSN_fadds,
+        INSN_fadds, -- 290
        INSN_fcfid,
-        INSN_fcfids, -- 280
+        INSN_fcfids,
        INSN_fcfidu,
        INSN_fcfidus,
        INSN_fcmpo,
@ -304,9 +341,9 @@ package decode_types is
        INSN_fcpsgn,
        INSN_fctid,
        INSN_fctidz,
-        INSN_fctidu,
+        INSN_fctidu, -- 300
        INSN_fctiduz,
-        INSN_fctiw, -- 290
+        INSN_fctiw,
        INSN_fctiwz,
        INSN_fctiwu,
        INSN_fctiwuz,
@ -314,9 +351,9 @@ package decode_types is
        INSN_fdivs,
        INSN_fmr,
        INSN_fmrgew,
-        INSN_fmrgow,
+        INSN_fmrgow, -- 310
        INSN_fnabs,
-        INSN_fneg, -- 300
+        INSN_fneg,
        INSN_fre,
        INSN_fres,
        INSN_frim,
@ -324,9 +361,9 @@ package decode_types is
        INSN_frip,
        INSN_friz,
        INSN_frsp,
-        INSN_frsqrte,
+        INSN_frsqrte, -- 320
        INSN_frsqrtes,
-        INSN_fsqrt, -- 310
+        INSN_fsqrt,
        INSN_fsqrts,
        INSN_fsub,
        INSN_fsubs,
@ -335,21 +372,21 @@ package decode_types is
        INSN_mffs,
        INSN_mtfsf,

-        -- pad to 320
-        INSN_318, INSN_319,
+        -- pad to 336
+        INSN_330, INSN_331, INSN_332, INSN_333, INSN_334, INSN_335,

        -- The following instructions access FRA, FRB (possibly) and FRC operands
-        INSN_fmul, -- 320
+        INSN_fmul, -- 336
        INSN_fmuls,
        INSN_fmadd,
        INSN_fmadds,
-        INSN_fmsub,
+        INSN_fmsub, -- 340
        INSN_fmsubs,
        INSN_fnmadd,
        INSN_fnmadds,
        INSN_fnmsub,
        INSN_fnmsubs,
-        INSN_fsel  -- 330
+        INSN_fsel
        );

    constant INSN_first_rb : insn_code := INSN_add;
@ -357,10 +394,14 @@ package decode_types is
    constant INSN_first_frs : insn_code := INSN_stfd;
    constant INSN_first_frab : insn_code := INSN_fabs;
    constant INSN_first_frabc : insn_code := INSN_fmul;
+    constant INSN_first_mls : insn_code := INSN_addi;
+    constant INSN_first_8ls : insn_code := INSN_lhzu;
+    constant INSN_first_fp_mls : insn_code := INSN_stfd;
+    constant INSN_first_fp_nonmls : insn_code := INSN_stfdu;

-    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
+    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA);
    type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
-                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
+                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB);
    type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
    type output_reg_a_t is (NONE, RT, RA, FRT);
    type rc_t is (NONE, ONE, RC, RCOE);
@ -384,7 +425,7 @@ package decode_types is

    constant TOO_OFFSET : integer := 0;

-    type unit_t is (NONE, ALU, LDST, FPU);
+    type unit_t is (ALU, LDST, FPU);
    type facility_t is (NONE, FPU);
    type length_t is (NONE, is1B, is2B, is4B, is8B);

@ -425,7 +466,7 @@ package decode_types is
 	sgl_pipe     : std_ulogic;
        repeat       : repeat_t;
    end record;
-    constant decode_rom_init : decode_rom_t := (unit => NONE, facility => NONE,
+    constant decode_rom_init : decode_rom_t := (unit => ALU, facility => NONE,
 						insn_type => OP_ILLEGAL, input_reg_a => NONE,
 						input_reg_b => NONE, input_reg_c => NONE,
 						output_reg_a => NONE, input_cr => '0', output_cr => '0',
--- a/execute1.vhdl
+++ b/execute1.vhdl
@ -118,6 +118,7 @@ architecture behaviour of execute1 is
        fp_exception_next : std_ulogic;
        trace_next : std_ulogic;
        prev_op : insn_type_t;
+        prev_prefixed : std_ulogic;
        oe : std_ulogic;
        mul_select : std_ulogic_vector(1 downto 0);
        res2_sel : std_ulogic_vector(1 downto 0);
@ -141,6 +142,7 @@ architecture behaviour of execute1 is
        (e => Execute1ToWritebackInit, se => side_effect_init,
         busy => '0',
         fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
+         prev_prefixed => '0',
         oe => '0', mul_select => "00", res2_sel => "00",
         spr_select => spr_id_init, pmu_spr_num => 5x"0",
         mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
@ -390,6 +392,7 @@ begin
 	    op => e_in.insn_type,
 	    invert_in => e_in.invert_a,
 	    invert_out => e_in.invert_out,
+            is_signed => e_in.is_signed,
 	    result => logical_result,
            datalen => e_in.data_len
 	    );
@ -834,14 +837,27 @@ begin
 		end if;
                misc_result <= mfcr_result;
            when "110" =>
-                -- setb
-                bfa := insn_bfa(e_in.insn);
-                crbit := to_integer(unsigned(bfa)) * 4;
+                -- setb and set[n]bc[r]
                setb_result := (others => '0');
-                if cr_in(31 - crbit) = '1' then
-                    setb_result := (others => '1');
-                elsif cr_in(30 - crbit) = '1' then
-                    setb_result(0) := '1';
+                if e_in.insn(9) = '0' then
+                    -- setb
+                    bfa := insn_bfa(e_in.insn);
+                    crbit := to_integer(unsigned(bfa)) * 4;
+                    if cr_in(31 - crbit) = '1' then
+                        setb_result := (others => '1');
+                    elsif cr_in(30 - crbit) = '1' then
+                        setb_result(0) := '1';
+                    end if;
+                else
+                    -- set[n]bc[r]
+                    crbit := to_integer(unsigned(insn_bi(e_in.insn)));
+                    if (cr_in(31 - crbit) xor e_in.insn(6)) = '1' then
+                        if e_in.insn(7) = '0' then
+                            setb_result(0) := '1';
+                        else
+                            setb_result := (others => '1');
+                        end if;
+                    end if;
                end if;
                misc_result <= setb_result;
            when others =>
@ -978,6 +994,7 @@ begin
 	variable bo, bi : std_ulogic_vector(4 downto 0);
        variable illegal : std_ulogic;
        variable privileged : std_ulogic;
+        variable misaligned : std_ulogic;
        variable slow_op : std_ulogic;
        variable owait : std_ulogic;
        variable srr1 : std_ulogic_vector(63 downto 0);
@ -1021,16 +1038,14 @@ begin

        illegal := '0';
        privileged := '0';
+        misaligned := e_in.misaligned_prefix;
        slow_op := '0';
        owait := '0';

-        if ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
-            privileged := '1';
-        end if;
-
-        if (not HAS_FPU and e_in.fac = FPU) or e_in.unit = NONE then
-            -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
+        if e_in.illegal_suffix = '1' then
            illegal := '1';
+        elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
+            privileged := '1';
        end if;

        v.do_trace := ex1.msr(MSR_SE);
@ -1091,8 +1106,8 @@ begin
            when OP_ADDG6S =>
            when OP_CMPRB =>
            when OP_CMPEQB =>
-            when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
-                OP_BPERM | OP_BCD =>
+            when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
+                OP_BPERM | OP_BREV | OP_BCD =>

 	    when OP_B =>
                v.take_branch := '1';
@ -1320,9 +1335,22 @@ begin
                end if;
        end case;

-        if privileged = '1' then
+        if misaligned = '1' then
+            -- generate an alignment interrupt
+            -- This is higher priority than illegal because a misaligned
+            -- prefix will come down as an OP_ILLEGAL instruction.
+            v.exception := '1';
+            v.e.intr_vec := 16#600#;
+            v.e.srr1(47 - 35) := '1';
+            v.e.srr1(47 - 34) := '1';
+            if e_in.valid = '1' then
+                report "misaligned prefixed instruction interrupt";
+            end if;
+
+        elsif privileged = '1' then
            -- generate a program interrupt
            v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
            -- set bit 45 to indicate privileged instruction type interrupt
            v.e.srr1(47 - 45) := '1';
            if e_in.valid = '1' then
@ -1331,6 +1359,7 @@ begin

        elsif illegal = '1' then
            v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
            -- Since we aren't doing Hypervisor emulation assist (0xe40) we
            -- set bit 44 to indicate we have an illegal
            v.e.srr1(47 - 44) := '1';
@ -1341,6 +1370,7 @@ begin
        elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then
            -- generate a floating-point unavailable interrupt
            v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
            v.e.intr_vec := 16#800#;
            if e_in.valid = '1' then
                report "FP unavailable interrupt";
@ -1406,6 +1436,7 @@ begin

        if valid_in = '1' then
            v.prev_op := e_in.insn_type;
+            v.prev_prefixed := e_in.prefixed;
        end if;

        -- Determine if there is any interrupt to be taken
@ -1427,6 +1458,7 @@ begin
                v.e.intr_vec := 16#d00#;
                v.e.srr1 := (others => '0');
                v.e.srr1(47 - 33) := '1';
+                v.e.srr1(47 - 34) := ex1.prev_prefixed;
                if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
                    ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then
                    v.e.srr1(47 - 35) := '1';
@ -1589,6 +1621,7 @@ begin
        lv.priv_mode := not ex1.msr(MSR_PR);
        lv.mode_32bit := not ex1.msr(MSR_SF);
        lv.is_32bit := e_in.is_32bit;
+        lv.prefixed := e_in.prefixed;
        lv.repeat := e_in.repeat;
        lv.second := e_in.second;
        lv.e2stall := fp_in.f2stall;
--- a/fetch1.vhdl
+++ b/fetch1.vhdl
@ -41,7 +41,6 @@ architecture behaviour of fetch1 is
        mode_32bit: std_ulogic;
        rd_is_niap4: std_ulogic;
        predicted_taken: std_ulogic;
-        pred_not_taken: std_ulogic;
        predicted_nia: std_ulogic_vector(63 downto 0);
    end record;
    signal r, r_next : Fetch1ToIcacheType;
@ -87,7 +86,6 @@ begin
                r.pred_ntaken <= r_next.pred_ntaken;
                r.nia <= r_next.nia;
                r_int.predicted_taken <= r_next_int.predicted_taken;
-                r_int.pred_not_taken <= r_next_int.pred_not_taken;
                r_int.predicted_nia <= r_next_int.predicted_nia;
                r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
            end if;
@ -155,7 +153,6 @@ begin
        v.predicted := '0';
        v.pred_ntaken := '0';
        v_int.predicted_taken := '0';
-        v_int.pred_not_taken := '0';
        v_int.rd_is_niap4 := '0';

 	if rst = '1' then
@ -185,10 +182,8 @@ begin
            end if;
        elsif r_int.predicted_taken = '1' then
            v.nia := r_int.predicted_nia;
-            v.predicted := '1';
-        else
+        elsif r.req = '1' then
            v_int.rd_is_niap4 := '1';
-            v.pred_ntaken := r_int.pred_not_taken;
            v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
            if r_int.mode_32bit = '1' then
                v.nia(63 downto 32) := x"00000000";
@ -198,7 +193,8 @@ begin
                btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
                = v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
                v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1);
-                v_int.pred_not_taken := not btc_rd_data(BTC_WIDTH - 1);
+                v.predicted := btc_rd_data(BTC_WIDTH - 1);
+                v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
            end if;
        end if;
        v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
--- a/icache.vhdl
+++ b/icache.vhdl
@ -192,6 +192,8 @@ architecture rtl of icache is
 	hit_smark : std_ulogic;
 	hit_valid : std_ulogic;
        big_endian: std_ulogic;
+        predicted  : std_ulogic;
+        pred_ntaken: std_ulogic;

 	-- Cache miss state (reload state machine)
        state            : state_t;
@ -629,8 +631,8 @@ begin
 	i_out.stop_mark <= r.hit_smark;
        i_out.fetch_failed <= r.fetch_failed;
        i_out.big_endian <= r.big_endian;
-        i_out.next_predicted <= i_in.predicted;
-        i_out.next_pred_ntaken <= i_in.pred_ntaken;
+        i_out.next_predicted <= r.predicted;
+        i_out.next_pred_ntaken <= r.pred_ntaken;

 	-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
 	stall_out <= not (is_hit and access_ok);
@ -673,6 +675,8 @@ begin
                r.hit_smark <= i_in.stop_mark;
                r.hit_nia <= i_in.nia;
                r.big_endian <= i_in.big_endian;
+                r.predicted <= i_in.predicted;
+                r.pred_ntaken <= i_in.pred_ntaken;
            end if;
            if i_out.valid = '1' then
                assert not is_X(i_out.insn) severity failure;
--- a/insn_helpers.vhdl
+++ b/insn_helpers.vhdl
@ -43,6 +43,9 @@ package insn_helpers is
    function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic;
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector;
 end package insn_helpers;

 package body insn_helpers is
@ -250,4 +253,16 @@ package body insn_helpers is
    begin
        return insn_in(15 downto 12);
    end;
+
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic is
+    begin
+        return prefix(20);
+    end;
+
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector is
+    begin
+        return prefix(17 downto 0) & suffix(15 downto 0);
+    end;
+
 end package body insn_helpers;
--- a/loadstore1.vhdl
+++ b/loadstore1.vhdl
@ -69,6 +69,7 @@ architecture behave of loadstore1 is
        instr_fault  : std_ulogic;
        do_update    : std_ulogic;
        mode_32bit   : std_ulogic;
+        prefixed     : std_ulogic;
 	addr         : std_ulogic_vector(63 downto 0);
        byte_sel     : std_ulogic_vector(7 downto 0);
        second_bytes : std_ulogic_vector(7 downto 0);
@ -99,7 +100,8 @@ architecture behave of loadstore1 is
    constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
                                          dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
                                          instr_fault => '0', do_update => '0',
-                                          mode_32bit => '0', addr => (others => '0'),
+                                          mode_32bit => '0', prefixed => '0',
+                                          addr => (others => '0'),
                                          byte_sel => x"00", second_bytes => x"00",
                                          store_data => (others => '0'), instr_tag => instr_tag_init,
                                          write_reg => 6x"00", length => x"0",
@ -411,6 +413,7 @@ begin
        v.valid := l_in.valid;
        v.instr_tag := l_in.instr_tag;
        v.mode_32bit := l_in.mode_32bit;
+        v.prefixed := l_in.prefixed;
        v.write_reg := l_in.write_reg;
        v.length := l_in.length;
        v.elt_length := l_in.length;
@ -906,8 +909,10 @@ begin
        if exception = '1' then
            if r2.req.align_intr = '1' then
                v.intr_vec := 16#600#;
+                v.srr1(47 - 34) := r2.req.prefixed;
                v.dar := r2.req.addr;
            elsif r2.req.instr_fault = '0' then
+                v.srr1(47 - 34) := r2.req.prefixed;
                v.dar := r2.req.addr;
                if m_in.segerr = '0' then
                    v.intr_vec := 16#300#;
--- a/logical.vhdl
+++ b/logical.vhdl
@ -13,6 +13,7 @@ entity logical is
        op         : in insn_type_t;
        invert_in  : in std_ulogic;
        invert_out : in std_ulogic;
+        is_signed  : in std_ulogic;
        result     : out std_ulogic_vector(63 downto 0);
        datalen    : in std_logic_vector(3 downto 0)
        );
@ -92,7 +93,8 @@ architecture behaviour of logical is

 begin
    logical_0: process(all)
-        variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
+        variable rb_adj, rs_adj : std_ulogic_vector(63 downto 0);
+        variable tmp : std_ulogic_vector(63 downto 0);
        variable negative : std_ulogic;
        variable j : integer;
    begin
@ -123,19 +125,34 @@ begin
        end if;

        case op is
-            when OP_AND | OP_OR | OP_XOR =>
-                case op is
-                    when OP_AND =>
-                        tmp := rs and rb_adj;
-                    when OP_OR =>
-                        tmp := rs or rb_adj;
-                    when others =>
-                        tmp := rs xor rb_adj;
-                end case;
+            when OP_LOGIC =>
+                -- for now, abuse the 'is_signed' field to indicate inversion of RS
+                rs_adj := rs;
+                if is_signed = '1' then
+                    rs_adj := not rs;
+                end if;
+                tmp := rs_adj and rb_adj;
+                if invert_out = '1' then
+                    tmp := not tmp;
+                end if;
+            when OP_XOR =>
+                tmp := rs xor rb;
                if invert_out = '1' then
                    tmp := not tmp;
                end if;

+            when OP_BREV =>
+                if datalen(3) = '1' then
+                    tmp := rs( 7 downto  0) & rs(15 downto  8) & rs(23 downto 16) & rs(31 downto 24) & 
+                           rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56);
+                elsif datalen(2) = '1' then
+                    tmp := rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56) &
+                           rs( 7 downto  0) & rs(15 downto  8) & rs(23 downto 16) & rs(31 downto 24);
+                else
+                    tmp := rs(55 downto 48) & rs(63 downto 56) & rs(39 downto 32) & rs(47 downto 40) &
+                           rs(23 downto 16) & rs(31 downto 24) & rs( 7 downto  0) & rs(15 downto  8);
+                end if;
+
            when OP_PRTY =>
                tmp := parity;
            when OP_CMPB =>
--- a/predecode.vhdl
+++ b/predecode.vhdl
@ -158,6 +158,11 @@ architecture behaviour of predecoder is
        2#111111_11010# to 2#111111_11011# =>  INSN_fmadd,
        2#111111_11100# to 2#111111_11101# =>  INSN_fnmsub,
        2#111111_11110# to 2#111111_11111# =>  INSN_fnmadd,
+        -- prefix word, PO1
+        2#000001_00000# to 2#000001_11111# =>  INSN_prefix,
+        -- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed
+        2#111001_00000# to 2#111001_11111# =>  INSN_op57,
+        2#111101_00000# to 2#111101_11111# =>  INSN_op61,
        others                             =>  INSN_illegal
        );

@ -179,6 +184,9 @@ architecture behaviour of predecoder is
        2#0_00000_11100#  =>  INSN_and,
        2#0_00001_11100#  =>  INSN_andc,
        2#0_00111_11100#  =>  INSN_bperm,
+        2#0_00110_11011#  =>  INSN_brh,
+        2#0_00100_11011#  =>  INSN_brw,
+        2#0_00101_11011#  =>  INSN_brd,
        2#0_01001_11010#  =>  INSN_cbcdtd,
        2#0_01000_11010#  =>  INSN_cdtbcd,
        2#0_00000_00000#  =>  INSN_cmp,
@ -331,6 +339,10 @@ architecture behaviour of predecoder is
        2#0_00101_11010#  =>  INSN_prtyd,
        2#0_00100_11010#  =>  INSN_prtyw,
        2#0_00100_00000#  =>  INSN_setb,
+        2#0_01100_00000#  =>  INSN_setb, -- setbc
+        2#0_01101_00000#  =>  INSN_setb, -- setbcr
+        2#0_01110_00000#  =>  INSN_setb, -- setnbc
+        2#0_01111_00000#  =>  INSN_setb, -- setnbcr
        2#0_01111_10010#  =>  INSN_slbia,
        2#0_00000_11011#  =>  INSN_sld,
        2#0_00000_11000#  =>  INSN_slw,
--- a/tests/prefix/Makefile
+++ b/tests/prefix/Makefile
@ -0,0 +1,3 @@
+TEST=prefix
+
+include ../Makefile.test
--- a/tests/prefix/head.S
+++ b/tests/prefix/head.S
@ -0,0 +1,247 @@
+/* Copyright 2013-2014 IBM Corp.
+ * Copyright 2023 Paul Mackerras <paulus@ozlabs.org>.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Load an immediate 64-bit value into a register */
+#define LOAD_IMM64(r, e)			\
+	lis     r,(e)@highest;			\
+	ori     r,r,(e)@higher;			\
+	rldicr  r,r, 32, 31;			\
+	oris    r,r, (e)@h;			\
+	ori     r,r, (e)@l;
+
+	.section ".head","ax"
+
+	/*
+	 * Microwatt currently enters in LE mode at 0x0, so we don't need to
+	 * do any endian fix ups
+	 */
+	. = 0
+.global _start
+_start:
+	LOAD_IMM64(%r10,__bss_start)
+	LOAD_IMM64(%r11,__bss_end)
+	subf	%r11,%r10,%r11
+	addi	%r11,%r11,63
+	srdi.	%r11,%r11,6
+	beq	2f
+	mtctr	%r11
+1:	dcbz	0,%r10
+	addi	%r10,%r10,64
+	bdnz	1b
+
+2:	LOAD_IMM64(%r1,__stack_top)
+	li	%r0,0
+	stdu	%r0,-16(%r1)
+	LOAD_IMM64(%r10, die)
+	mtsprg0	%r10
+	LOAD_IMM64(%r12, main)
+	mtctr	%r12
+	bctrl
+die:	attn // terminate on exit
+	b .
+
+.global trapit
+trapit:
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-256(%r1)
+	mtsprg1	%r1
+	r = 14
+	.rept	18
+	std	r,r*8(%r1)
+	r = r + 1
+	.endr
+	mfcr	%r0
+	stw	%r0,13*8(%r1)
+	LOAD_IMM64(%r10, ret)
+	mtsprg0	%r10
+	mr	%r12,%r4
+	mtctr	%r4
+	bctrl
+ret:
+	mfsprg1	%r1
+	LOAD_IMM64(%r10, die)
+	mtsprg0	%r10
+	r = 14
+	.rept	18
+	ld	r,r*8(%r1)
+	r = r + 1
+	.endr
+	lwz	%r0,13*8(%r1)
+	mtcr	%r0
+	ld	%r0,256+16(%r1)
+	addi	%r1,%r1,256
+	mtlr	%r0
+	blr
+
+#define EXCEPTION(nr)		\
+	.= nr			;\
+	mfsprg0	%r0		;\
+	mtctr	%r0		;\
+	li	%r3,nr		;\
+	bctr
+
+	EXCEPTION(0x300)
+	EXCEPTION(0x380)
+	EXCEPTION(0x400)
+	EXCEPTION(0x480)
+	EXCEPTION(0x500)
+	EXCEPTION(0x600)
+	EXCEPTION(0x700)
+	EXCEPTION(0x800)
+	EXCEPTION(0x900)
+	EXCEPTION(0x980)
+	EXCEPTION(0xa00)
+	EXCEPTION(0xb00)
+	EXCEPTION(0xc00)
+	EXCEPTION(0xd00)
+	EXCEPTION(0xe00)
+	EXCEPTION(0xe20)
+	EXCEPTION(0xe40)
+	EXCEPTION(0xe60)
+	EXCEPTION(0xe80)
+	EXCEPTION(0xf00)
+	EXCEPTION(0xf20)
+	EXCEPTION(0xf40)
+	EXCEPTION(0xf60)
+	EXCEPTION(0xf80)
+
+	. = 0x1000
+	.globl	test_paddi
+test_paddi:
+	nop
+	nop
+	.machine "power10"
+	paddi	%r3,%r3,0x123456789,0
+	blr
+
+	.globl	test_paddi_r
+test_paddi_r:
+	nop
+	nop
+	paddi	%r3,0,0x123456789 - 0x101c,1
+	blr
+
+	.globl	test_paddi_neg
+test_paddi_neg:
+	nop
+	nop
+	paddi	%r3,%r3,-0x123456789,0
+	blr
+
+	.globl	test_pld
+test_pld:
+	nop
+	nop
+	pld	%r4,lvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plfd
+test_plfd:
+	nop
+	nop
+	plfd	%f0,fpvar(0)
+	stfd	%f0,0(%r3)
+	blr
+
+	. = 0x1074
+	.globl	test_paddi_mis
+test_paddi_mis:
+	nop
+	nop
+	.long	0x06012345
+	.long	0x38636789
+	blr
+
+	.globl	test_pstd
+test_pstd:
+	nop
+	nop
+	pstd	%r3,lvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plbz
+test_plbz:
+	nop
+	nop
+	plbz	%r4,bvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_pstb
+test_pstb:
+	nop
+	nop
+	pstb	%r3,bvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plha
+test_plha:
+	nop
+	nop
+	plha	%r4,hvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plhz
+test_plhz:
+	nop
+	nop
+	plhz	%r4,hvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_psth
+test_psth:
+	nop
+	nop
+	psth	%r3,hvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plwa
+test_plwa:
+	nop
+	nop
+	plwa	%r4,wvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plwz
+test_plwz:
+	nop
+	nop
+	plwz	%r4,wvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_pstw
+test_pstw:
+	nop
+	nop
+	pstw	%r3,wvar(0)
+	li	%r3,0
+	blr
--- a/tests/prefix/powerpc.lds
+++ b/tests/prefix/powerpc.lds
@ -0,0 +1,27 @@
+SECTIONS
+{
+	. = 0;
+	_start = .;
+	.head : {
+		KEEP(*(.head))
+	}
+	. = ALIGN(0x1000);
+	.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
+	. = ALIGN(0x1000);
+	.data : { *(.data) *(.data.*) *(.got) *(.toc) }
+	. = ALIGN(0x80);
+	__bss_start = .;
+	.bss : {
+		*(.dynsbss)
+		*(.sbss)
+		*(.scommon)
+		*(.dynbss)
+		*(.bss)
+		*(.common)
+		*(.bss.*)
+	}
+	. = ALIGN(0x80);
+	__bss_end = .;
+	. = . + 0x4000;
+	__stack_top = .;
+}
--- a/tests/prefix/prefix.c
+++ b/tests/prefix/prefix.c
@ -0,0 +1,214 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "console.h"
+
+#define MSR_LE	0x1
+#define MSR_DR	0x10
+#define MSR_IR	0x20
+#define MSR_SF	0x8000000000000000ul
+
+#define DSISR	18
+#define DAR	19
+#define SRR0	26
+#define SRR1	27
+#define PID	48
+#define PTCR	464
+
+extern long trapit(long arg, long (*func)(long));
+extern long test_paddi(long arg);
+extern long test_paddi_r(long arg);
+extern long test_paddi_neg(long arg);
+extern long test_paddi_mis(long arg);
+extern long test_plbz(long arg);
+extern long test_pld(long arg);
+extern long test_plha(long arg);
+extern long test_plhz(long arg);
+extern long test_plwa(long arg);
+extern long test_plwz(long arg);
+extern long test_pstb(long arg);
+extern long test_pstd(long arg);
+extern long test_psth(long arg);
+extern long test_pstw(long arg);
+extern long test_plfd(long arg);
+
+static inline unsigned long mfspr(int sprnum)
+{
+	long val;
+
+	__asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum));
+	return val;
+}
+
+static inline void mtspr(int sprnum, unsigned long val)
+{
+	__asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
+}
+
+void print_string(const char *str)
+{
+	for (; *str; ++str)
+		putchar(*str);
+}
+
+void print_hex(unsigned long val, int ndigits, const char *str)
+{
+	int i, x;
+
+	for (i = (ndigits - 1) * 4; i >= 0; i -= 4) {
+		x = (val >> i) & 0xf;
+		if (x >= 10)
+			putchar(x + 'a' - 10);
+		else
+			putchar(x + '0');
+	}
+	print_string(str);
+}
+
+// i < 100
+void print_test_number(int i)
+{
+	print_string("test ");
+	putchar(48 + i/10);
+	putchar(48 + i%10);
+	putchar(':');
+}
+
+long int prefix_test_1(void)
+{
+	long int ret;
+
+	ret = trapit(0x321, test_paddi);
+	if (ret != 0x123456789 + 0x321)
+		return ret;
+	ret = trapit(0x322, test_paddi_r);
+	if (ret != 0x123456789)
+		return ret;
+	ret = trapit(0x323, test_paddi_neg);
+	if (ret != 0x323 - 0x123456789)
+		return ret;
+	return 0;
+}
+
+double fpvar = 123.456;
+
+long int prefix_test_2(void)
+{
+	long int ret;
+	double x;
+
+	ret = trapit(0x123, test_paddi_mis);
+	if (ret != 0x600)
+		return 1;
+	if (mfspr(SRR0) != (unsigned long)&test_paddi_mis + 8)
+		return 2;
+	if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34))))
+		return 3;
+
+	ret = trapit((long)&x, test_plfd);
+	if (ret != 0x800)
+		return ret;
+	if (mfspr(SRR0) != (unsigned long)&test_plfd + 8)
+		return 6;
+	if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 34))))
+		return 7;
+	return 0;
+}
+
+unsigned char bvar = 0x63;
+long lvar = 0xfedcba987654;
+unsigned short hvar = 0xffee;
+unsigned int wvar = 0x80457788;
+
+long int prefix_test_3(void)
+{
+	long int ret;
+	long int x;
+
+	ret = trapit((long)&x, test_pld);
+	if (ret)
+		return ret | 1;
+	if (x != lvar)
+		return 2;
+	ret = trapit(1234, test_pstd);
+	if (ret)
+		return ret | 2;
+	if (lvar != 1234)
+		return 3;
+
+	ret = trapit((long)&x, test_plbz);
+	if (ret)
+		return ret | 0x10;
+	if (x != bvar)
+		return 0x11;
+	ret = trapit(0xaa, test_pstb);
+	if (ret)
+		return ret | 0x12;
+	if (bvar != 0xaa)
+		return 0x13;
+
+	ret = trapit((long)&x, test_plhz);
+	if (ret)
+		return ret | 0x20;
+	if (x != hvar)
+		return 0x21;
+	ret = trapit((long)&x, test_plha);
+	if (ret)
+		return ret | 0x22;
+	if (x != (signed short)hvar)
+		return 0x23;
+	ret = trapit(0x23aa, test_psth);
+	if (ret)
+		return ret | 0x24;
+	if (hvar != 0x23aa)
+		return 0x25;
+
+	ret = trapit((long)&x, test_plwz);
+	if (ret)
+		return ret | 0x30;
+	if (x != wvar)
+		return 0x31;
+	ret = trapit((long)&x, test_plwa);
+	if (ret)
+		return ret | 0x32;
+	if (x != (signed int)wvar)
+		return 0x33;
+	ret = trapit(0x23aaf44f, test_pstw);
+	if (ret)
+		return ret | 0x34;
+	if (wvar != 0x23aaf44f)
+		return 0x35;
+	return 0;
+}
+
+int fail = 0;
+
+void do_test(int num, long int (*test)(void))
+{
+	long int ret;
+
+	print_test_number(num);
+	ret = test();
+	if (ret == 0) {
+		print_string("PASS\r\n");
+	} else {
+		fail = 1;
+		print_string("FAIL ");
+		print_hex(ret, 16, " SRR0=");
+		print_hex(mfspr(SRR0), 16, " SRR1=");
+		print_hex(mfspr(SRR1), 16, "\r\n");
+	}
+}
+
+int main(void)
+{
+	console_init();
+	//init_mmu();
+
+	do_test(1, prefix_test_1);
+	do_test(2, prefix_test_2);
+	do_test(3, prefix_test_3);
+
+	return fail;
+}
--- a/tests/test_prefix.bin
+++ b/tests/test_prefix.bin
--- a/tests/test_prefix.console_out
+++ b/tests/test_prefix.console_out
@ -0,0 +1,3 @@
+test 01:PASS
+test 02:PASS
+test 03:PASS
--- a/tests/test_prefix.metavalue
+++ b/tests/test_prefix.metavalue
@ -0,0 +1 @@
+30