From 4416ebe92ea956d37fa7ee3938fe057f5dd91a31 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Sat, 20 Aug 2022 18:06:06 +1000
Subject: [PATCH 1/7] fetch1: Change the way predictions from the BTC are sent
 downstream

Instead of sending down the predicted taken/not-taken bits with the
target of the branch, we now send them down with the branch itself.
Previously icache adjusted for this by sending the prediction bits to
decode1 without a 1-clock delay while everything else had a 1-clock
delay.  Now icache keeps the prediction bits with the rest of the
attributes for the request.

Also fix a buglet in fetch1 where the first address sent out after
reset didn't have .req set.  Currently this doesn't cause a problem
because icache doesn't really look at .req.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 fetch1.vhdl | 10 +++-------
 icache.vhdl |  8 ++++++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fetch1.vhdl b/fetch1.vhdl
index 13f2a7f..6803fb6 100644
--- a/fetch1.vhdl
+++ b/fetch1.vhdl
@@ -41,7 +41,6 @@ architecture behaviour of fetch1 is
         mode_32bit: std_ulogic;
         rd_is_niap4: std_ulogic;
         predicted_taken: std_ulogic;
-        pred_not_taken: std_ulogic;
         predicted_nia: std_ulogic_vector(63 downto 0);
     end record;
     signal r, r_next : Fetch1ToIcacheType;
@@ -87,7 +86,6 @@ begin
                 r.pred_ntaken <= r_next.pred_ntaken;
                 r.nia <= r_next.nia;
                 r_int.predicted_taken <= r_next_int.predicted_taken;
-                r_int.pred_not_taken <= r_next_int.pred_not_taken;
                 r_int.predicted_nia <= r_next_int.predicted_nia;
                 r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
             end if;
@@ -155,7 +153,6 @@ begin
         v.predicted := '0';
         v.pred_ntaken := '0';
         v_int.predicted_taken := '0';
-        v_int.pred_not_taken := '0';
         v_int.rd_is_niap4 := '0';
 
 	if rst = '1' then
@@ -185,10 +182,8 @@ begin
             end if;
         elsif r_int.predicted_taken = '1' then
             v.nia := r_int.predicted_nia;
-            v.predicted := '1';
-        else
+        elsif r.req = '1' then
             v_int.rd_is_niap4 := '1';
-            v.pred_ntaken := r_int.pred_not_taken;
             v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
             if r_int.mode_32bit = '1' then
                 v.nia(63 downto 32) := x"00000000";
@@ -198,7 +193,8 @@ begin
                 btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
                 = v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
                 v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1);
-                v_int.pred_not_taken := not btc_rd_data(BTC_WIDTH - 1);
+                v.predicted := btc_rd_data(BTC_WIDTH - 1);
+                v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
             end if;
         end if;
         v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
diff --git a/icache.vhdl b/icache.vhdl
index 0467630..6383726 100644
--- a/icache.vhdl
+++ b/icache.vhdl
@@ -192,6 +192,8 @@ architecture rtl of icache is
 	hit_smark : std_ulogic;
 	hit_valid : std_ulogic;
         big_endian: std_ulogic;
+        predicted  : std_ulogic;
+        pred_ntaken: std_ulogic;
 
 	-- Cache miss state (reload state machine)
         state            : state_t;
@@ -629,8 +631,8 @@ begin
 	i_out.stop_mark <= r.hit_smark;
         i_out.fetch_failed <= r.fetch_failed;
         i_out.big_endian <= r.big_endian;
-        i_out.next_predicted <= i_in.predicted;
-        i_out.next_pred_ntaken <= i_in.pred_ntaken;
+        i_out.next_predicted <= r.predicted;
+        i_out.next_pred_ntaken <= r.pred_ntaken;
 
 	-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
 	stall_out <= not (is_hit and access_ok);
@@ -673,6 +675,8 @@ begin
                 r.hit_smark <= i_in.stop_mark;
                 r.hit_nia <= i_in.nia;
                 r.big_endian <= i_in.big_endian;
+                r.predicted <= i_in.predicted;
+                r.pred_ntaken <= i_in.pred_ntaken;
             end if;
             if i_out.valid = '1' then
                 assert not is_X(i_out.insn) severity failure;

From 7af0e001adaf5f4528f4916e80a7ada904f378ca Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 3 Jul 2023 21:09:33 +1000
Subject: [PATCH 2/7] Move insn_codes for mcrfs, mtfsb0/1 and mtfsfi

This moves the insn_code values for mcrfs, mtfsb0/1 and mtfsfi into
the region used for floating-point instructions.  This means that in
no-FPU implementations, they will get turned into illegal instructions
in predecode.  We then don't need the code in execute1 that makes FP
instructions illegal in no-FPU implementations.

We also remove the NONE value for unit_t, since it was only ever used
with insn_type = OP_ILLEGAL, and the check for unit = NONE was
redundant with the check for insn_type = OP_ILLEGAL.  Thus the check
for unit = NONE is no longer needed and is removed here.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 common.vhdl       |  2 +-
 decode1.vhdl      |  4 +--
 decode_types.vhdl | 62 ++++++++++++++++++++++++-----------------------
 execute1.vhdl     |  5 ----
 4 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/common.vhdl b/common.vhdl
index 7c7a8d5..44302c7 100644
--- a/common.vhdl
+++ b/common.vhdl
@@ -361,7 +361,7 @@ package common is
         dec_ctr : std_ulogic;
     end record;
     constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
-	(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
+	(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
          write_reg_enable => '0',
          lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0',
 	 invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
diff --git a/decode1.vhdl b/decode1.vhdl
index 559a505..c987bec 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -64,7 +64,7 @@ architecture behaviour of decode1 is
     constant decode_rom : decoder_rom_t := (
         --                   unit   fac   internal      in1         in2          in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl  rpt
         --                                     op                                            in   out   A   out  in    out  len        ext                                 pipe
-        INSN_illegal     =>  (NONE, NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_illegal     =>  (ALU,  NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_fetch_fail  =>  (LDST, NONE, OP_FETCH_FAILED, CIA,     NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
 
         INSN_add         =>  (ALU,  NONE, OP_ADD,       RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
@@ -347,7 +347,7 @@ architecture behaviour of decode1 is
         INSN_xori        =>  (ALU,  NONE, OP_XOR,       NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_xoris       =>  (ALU,  NONE, OP_XOR,       NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
 
-        others           =>  (NONE, NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
+        others           =>  (ALU,  NONE, OP_ILLEGAL,   NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
         );
 
     function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
diff --git a/decode_types.vhdl b/decode_types.vhdl
index 7ceb2ae..e9f6e70 100644
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@@ -84,60 +84,57 @@ package decode_types is
         INSN_lwz,
         INSN_lwzu, -- 50
         INSN_mcrf,
-        INSN_mcrfs,
         INSN_mcrxrx,
         INSN_mfcr,
         INSN_mfmsr,
         INSN_mfspr,
         INSN_mtcrf,
-        INSN_mtfsb,
-        INSN_mtfsfi,
-        INSN_mtmsr, -- 60
+        INSN_mtmsr,
         INSN_mtmsrd,
         INSN_mtspr,
-        INSN_mulli,
+        INSN_mulli, -- 60
         INSN_neg,
         INSN_nop,
         INSN_ori,
         INSN_oris,
         INSN_popcntb,
         INSN_popcntw,
-        INSN_popcntd, -- 70
+        INSN_popcntd,
         INSN_prtyw,
         INSN_prtyd,
-        INSN_rfid,
+        INSN_rfid, -- 70
         INSN_rldic,
         INSN_rldicl,
         INSN_rldicr,
         INSN_rldimi,
         INSN_rlwimi,
         INSN_rlwinm,
-        INSN_sc, -- 80
+        INSN_sc,
         INSN_setb,
         INSN_slbia,
-        INSN_sradi,
+        INSN_sradi, -- 80
         INSN_srawi,
         INSN_stb,
         INSN_stbu,
         INSN_std,
         INSN_stdu,
         INSN_sth,
-        INSN_sthu, -- 90
+        INSN_sthu,
         INSN_stw,
         INSN_stwu,
-        INSN_subfic,
+        INSN_subfic, -- 90
         INSN_subfme,
         INSN_subfze,
         INSN_sync,
         INSN_tdi,
         INSN_tlbsync,
         INSN_twi,
-        INSN_wait, -- 100
+        INSN_wait,
         INSN_xori,
         INSN_xoris,
 
         -- pad to 112 to simplify comparison logic
-        INSN_103,
+        INSN_100, INSN_101, INSN_102, INSN_103,
         INSN_104, INSN_105, INSN_106, INSN_107,
         INSN_108, INSN_109, INSN_110, INSN_111,
 
@@ -289,59 +286,64 @@ package decode_types is
         INSN_lfiwzx,
         INSN_lfsx,
         INSN_lfsux,
-        INSN_275, -- padding
+        -- These are here in order to keep the FP instructions together
+        INSN_mcrfs,
+        INSN_mtfsb,
+        INSN_mtfsfi,
+        INSN_278, -- padding
+        INSN_279,
 
         -- The following instructions access FRA and/or FRB operands
-        INSN_fabs,
+        INSN_fabs, -- 280
         INSN_fadd,
         INSN_fadds,
         INSN_fcfid,
-        INSN_fcfids, -- 280
+        INSN_fcfids,
         INSN_fcfidu,
         INSN_fcfidus,
         INSN_fcmpo,
         INSN_fcmpu,
         INSN_fcpsgn,
-        INSN_fctid,
+        INSN_fctid, -- 290
         INSN_fctidz,
         INSN_fctidu,
         INSN_fctiduz,
-        INSN_fctiw, -- 290
+        INSN_fctiw,
         INSN_fctiwz,
         INSN_fctiwu,
         INSN_fctiwuz,
         INSN_fdiv,
         INSN_fdivs,
-        INSN_fmr,
+        INSN_fmr, -- 300
         INSN_fmrgew,
         INSN_fmrgow,
         INSN_fnabs,
-        INSN_fneg, -- 300
+        INSN_fneg,
         INSN_fre,
         INSN_fres,
         INSN_frim,
         INSN_frin,
         INSN_frip,
-        INSN_friz,
+        INSN_friz, -- 310
         INSN_frsp,
         INSN_frsqrte,
         INSN_frsqrtes,
-        INSN_fsqrt, -- 310
+        INSN_fsqrt,
         INSN_fsqrts,
         INSN_fsub,
         INSN_fsubs,
         INSN_ftdiv,
         INSN_ftsqrt,
-        INSN_mffs,
+        INSN_mffs, -- 320
         INSN_mtfsf,
 
-        -- pad to 320
-        INSN_318, INSN_319,
+        -- pad to 328
+        INSN_322, INSN_323, INSN_324, INSN_325, INSN_326, INSN_327,
 
         -- The following instructions access FRA, FRB (possibly) and FRC operands
-        INSN_fmul, -- 320
+        INSN_fmul,
         INSN_fmuls,
-        INSN_fmadd,
+        INSN_fmadd, -- 330
         INSN_fmadds,
         INSN_fmsub,
         INSN_fmsubs,
@@ -349,7 +351,7 @@ package decode_types is
         INSN_fnmadds,
         INSN_fnmsub,
         INSN_fnmsubs,
-        INSN_fsel  -- 330
+        INSN_fsel
         );
 
     constant INSN_first_rb : insn_code := INSN_add;
@@ -384,7 +386,7 @@ package decode_types is
 
     constant TOO_OFFSET : integer := 0;
 
-    type unit_t is (NONE, ALU, LDST, FPU);
+    type unit_t is (ALU, LDST, FPU);
     type facility_t is (NONE, FPU);
     type length_t is (NONE, is1B, is2B, is4B, is8B);
 
@@ -425,7 +427,7 @@ package decode_types is
 	sgl_pipe     : std_ulogic;
         repeat       : repeat_t;
     end record;
-    constant decode_rom_init : decode_rom_t := (unit => NONE, facility => NONE,
+    constant decode_rom_init : decode_rom_t := (unit => ALU, facility => NONE,
 						insn_type => OP_ILLEGAL, input_reg_a => NONE,
 						input_reg_b => NONE, input_reg_c => NONE,
 						output_reg_a => NONE, input_cr => '0', output_cr => '0',
diff --git a/execute1.vhdl b/execute1.vhdl
index e31f4d6..db1159d 100644
--- a/execute1.vhdl
+++ b/execute1.vhdl
@@ -1028,11 +1028,6 @@ begin
             privileged := '1';
         end if;
 
-        if (not HAS_FPU and e_in.fac = FPU) or e_in.unit = NONE then
-            -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
-            illegal := '1';
-        end if;
-
         v.do_trace := ex1.msr(MSR_SE);
         case_0: case e_in.insn_type is
 	    when OP_ILLEGAL =>

From 39ca675ce399c494aad7c27a1caa8ceff62c23af Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 3 Jul 2023 12:32:43 +1000
Subject: [PATCH 3/7] Decode prefixed instructions

This adds logic to do basic decoding of the prefixed instructions
defined in PowerISA v3.1B which are in the SFFS (Scalar Fixed plus
Floating-Point Subset) compliancy subset.  In PowerISA v3.1B SFFS,
there are 14 prefixed load/store instructions plus the prefixed no-op
instruction (pnop).  The prefixed load/store instructions all use an
extended version of D-form, which has an extra 18 bits of displacement
in the prefix, plus an 'R' bit which enables PC-relative addressing.

When decode1 sees an instruction word where the insn_code is
INSN_prefix (i.e. the primary opcode was 1), it stores the prefix word
and sends nothing down to decode2 in that cycle.  When the next valid
instruction word arrives, it is interpreted as a suffix, meaning that
its insn_code gets modified before being used to look up the decode
table.

The insn_code values are rearranged so that the values for
instructions which are the suffix of a valid prefixed instruction are
all at even indexes, and the corresponding prefixed instructions
follow immediately, so that an insn_code value can be converted to the
corresponding prefixed value by setting the LSB of the insn_code
value.  There are two prefixed instructions, pld and pstd, for which
the suffix is not a valid SFFS instruction by itself, so these have
been given dummy insn_code values which decode as illegal (INSN_op57
and INSN_op61).

For a prefixed instruction, decode1 examines the type and subtype
fields of the prefix and checks that the suffix is valid for the type
and subtype.  This check doesn't affect which entry of the decode
table is used; the result is passed down to decode2, and will in
future be acted upon in execute1.

The instruction address passed down to decode2 is the address of the
prefix.  To enable this, part of the instruction address is saved when
the prefix is seen, and then the instruction address received from
icache is partly overlaid by the saved prefix address.  Because
prefixed instructions are not permitted to cross 64-byte boundaries,
we only need to save bits 5:2 of the instruction to do this.  If the
alignment restriction ever gets relaxed, we will then need to save
more bits of the address.

Decode2 has been extended to handle the R bit of the prefix (in 8LS
and MLS forms) and to be able to generate the 34-bit immediate value
from the prefix and suffix.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 common.vhdl       |   7 +-
 decode1.vhdl      |  90 +++++++++++++++++++-
 decode2.vhdl      |  14 ++--
 decode_types.vhdl | 207 +++++++++++++++++++++++++++-------------------
 insn_helpers.vhdl |  15 ++++
 predecode.vhdl    |   5 ++
 6 files changed, 243 insertions(+), 95 deletions(-)

diff --git a/common.vhdl b/common.vhdl
index 44302c7..838179b 100644
--- a/common.vhdl
+++ b/common.vhdl
@@ -263,6 +263,9 @@ package common is
 	valid: std_ulogic;
 	stop_mark : std_ulogic;
 	nia: std_ulogic_vector(63 downto 0);
+        prefixed: std_ulogic;
+        prefix: std_ulogic_vector(25 downto 0);
+        illegal_suffix: std_ulogic;
 	insn: std_ulogic_vector(31 downto 0);
 	decode: decode_rom_t;
         br_pred: std_ulogic; -- Branch was predicted to be taken
@@ -274,7 +277,9 @@ package common is
         reg_c : gspr_index_t;
     end record;
     constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
-        (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
+        (valid => '0', stop_mark => '0', nia => (others => '0'),
+         prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
+         illegal_suffix => '0',
          decode => decode_rom_init, br_pred => '0', big_endian => '0',
          spr_info => spr_id_init, ram_spr => ram_spr_info_init,
          reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));
diff --git a/decode1.vhdl b/decode1.vhdl
index c987bec..138e483 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -45,6 +45,16 @@ architecture behaviour of decode1 is
     signal decode_rom_addr : insn_code;
     signal decode : decode_rom_t;
 
+    type prefix_state_t is record
+        prefixed : std_ulogic;
+        prefix   : std_ulogic_vector(25 downto 0);
+        pref_ia  : std_ulogic_vector(3 downto 0);
+    end record;
+    constant prefix_state_init : prefix_state_t := (prefixed => '0', prefix => (others => '0'),
+                                                    pref_ia => (others => '0'));
+
+    signal pr, pr_in : prefix_state_t;
+
     signal fetch_failed : std_ulogic;
 
     -- If we have an FPU, then it is used for integer divisions,
@@ -266,6 +276,22 @@ architecture behaviour of decode1 is
         INSN_orc         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
         INSN_ori         =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_oris        =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_paddi       =>  (ALU,  NONE, OP_ADD,       RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plbz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pld         =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfd        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfs        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_plha        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plhz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwa        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pnop        =>  (ALU,  NONE, OP_NOP,       NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstb        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstd        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfd       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfs       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_psth        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstw        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntb     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntd     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntw     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@@ -434,12 +460,17 @@ begin
             if rst = '1' then
                 r <= Decode1ToDecode2Init;
                 fetch_failed <= '0';
+                pr <= prefix_state_init;
             elsif flush_in = '1' then
                 r.valid <= '0';
                 fetch_failed <= '0';
+                pr <= prefix_state_init;
             elsif stall_in = '0' then
                 r <= rin;
                 fetch_failed <= f_in.fetch_failed;
+                if f_in.valid = '1' then
+                    pr <= pr_in;
+                end if;
             end if;
             if rst = '1' then
                 br.br_nia <= (others => '0');
@@ -471,12 +502,18 @@ begin
         variable icode : insn_code;
         variable sprn : spr_num_t;
         variable maybe_rb : std_ulogic;
+        variable pv : prefix_state_t;
+        variable icode_bits : std_ulogic_vector(9 downto 0);
+        variable valid_suffix : std_ulogic;
     begin
         v := Decode1ToDecode2Init;
+        pv := pr;
 
         v.valid := f_in.valid;
         v.nia  := f_in.nia;
         v.insn := f_in.insn;
+        v.prefix := pr.prefix;
+        v.prefixed := pr.prefixed;
         v.stop_mark := f_in.stop_mark;
         v.big_endian := f_in.big_endian;
 
@@ -490,17 +527,59 @@ begin
         end if;
 
         icode := f_in.icode;
+        icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(icode), 10));
 
         if f_in.fetch_failed = '1' then
-            icode := INSN_fetch_fail;
+            icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_fetch_fail), 10));
             -- Only send down a single OP_FETCH_FAILED
             v.valid := not fetch_failed;
+            pv := prefix_state_init;
+
+        elsif pr.prefixed = '1' then
+            -- Check suffix value and convert to the prefixed instruction code
+            if pr.prefix(24) = '1' then
+                -- either pnop or illegal
+                icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_pnop), 10));
+            else
+                -- various load/store instructions
+                icode_bits(0) := '1';
+            end if;
+            valid_suffix := '0';
+            case pr.prefix(25 downto 23) is
+                when "000" =>    -- 8LS
+                    if icode >= INSN_first_8ls and icode < INSN_first_rb then
+                        valid_suffix := '1';
+                    end if;
+                when "100" =>   -- MLS
+                    if icode >= INSN_first_mls and icode < INSN_first_8ls then
+                        valid_suffix := '1';
+                    elsif icode >= INSN_first_fp_mls and icode < INSN_first_fp_nonmls then
+                        valid_suffix := '1';
+                    end if;
+                when "110" =>   -- MRR, i.e. pnop
+                    if pr.prefix(22 downto 20) = "000" then
+                        valid_suffix := '1';
+                    end if;
+                when others =>
+            end case;
+            v.nia(5 downto 2) := pr.pref_ia;
+            v.prefixed := '1';
+            v.prefix := pr.prefix;
+            v.illegal_suffix := not valid_suffix;
+            pv := prefix_state_init;
+
+        elsif icode = INSN_prefix then
+            pv.prefixed := '1';
+            pv.pref_ia := f_in.nia(5 downto 2);
+            pv.prefix := f_in.insn(25 downto 0);
+            v.valid := '0';
+
         end if;
-        decode_rom_addr <= icode;
+        decode_rom_addr <= insn_code'val(to_integer(unsigned(icode_bits)));
 
         if f_in.valid = '1' then
-            report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) &
-                " at " & to_hstring(f_in.nia);
+            report "Decode " & insn_code'image(insn_code'val(to_integer(unsigned(icode_bits)))) & " " &
+                to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
         end if;
 
         -- Branch predictor
@@ -533,6 +612,8 @@ begin
         br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
 
         -- Work out GPR/FPR read addresses
+        -- Note that for prefixed instructions we are working this out based
+        -- only on the suffix.
         maybe_rb := '0';
         vr.reg_1_addr := '0' & insn_ra(f_in.insn);
         vr.reg_2_addr := '0' & insn_rb(f_in.insn);
@@ -568,6 +649,7 @@ begin
         -- Update registers
         rin <= v;
         br_in <= bv;
+        pr_in <= pv;
 
         -- Update outputs
         d_out <= r;
diff --git a/decode2.vhdl b/decode2.vhdl
index f58bd9b..fa3b54d 100644
--- a/decode2.vhdl
+++ b/decode2.vhdl
@@ -83,12 +83,13 @@ architecture behaviour of decode2 is
     constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));
 
     function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0);
                                  instr_addr : std_ulogic_vector(63 downto 0))
         return decode_input_reg_t is
     begin
-        if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
+        if t = RA or ((t = RA_OR_ZERO or t = RA0_OR_CIA) and insn_ra(insn_in) /= "00000") then
             return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
-        elsif t = CIA then
+        elsif t = CIA or (t = RA0_OR_CIA and insn_prefix_r(prefix) = '1') then
             return ('0', (others => '0'), instr_addr);
         elsif HAS_FPU and t = FRA then
             return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
@@ -97,7 +98,8 @@ architecture behaviour of decode2 is
         end if;
     end;
 
-    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0))
+    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0))
         return decode_input_reg_t is
         variable ret : decode_input_reg_t;
     begin
@@ -114,6 +116,8 @@ architecture behaviour of decode2 is
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
             when CONST_SI =>
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
+            when CONST_PSI =>
+                ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_prefixed_si(prefix, insn_in)), 64)));
             when CONST_SI_HI =>
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
             when CONST_UI_HI =>
@@ -373,8 +377,8 @@ begin
         decoded_reg_c <= decode_input_reg_init;
         decoded_reg_o <= decode_output_reg_init;
         if d_in.valid = '1' then
-            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia);
-            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn);
+            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
+            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
             decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
             decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
         end if;
diff --git a/decode_types.vhdl b/decode_types.vhdl
index e9f6e70..428d943 100644
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@@ -34,15 +34,16 @@ package decode_types is
         -- The following instructions don't have an RB operand or access FPRs
         INSN_illegal, -- 0
         INSN_fetch_fail,
-        INSN_addi,
+        INSN_prefix,
+        INSN_pnop,
         INSN_addic,
         INSN_addic_dot,
         INSN_addis,
         INSN_addme,
         INSN_addpcis,
         INSN_addze,
-        INSN_andi_dot,
-        INSN_andis_dot, -- 10
+        INSN_andi_dot, -- 10
+        INSN_andis_dot,
         INSN_attn,
         INSN_b,
         INSN_bc,
@@ -51,8 +52,8 @@ package decode_types is
         INSN_bctar,
         INSN_cbcdtd,
         INSN_cdtbcd,
-        INSN_cmpi,
-        INSN_cmpli, -- 20
+        INSN_cmpi, -- 20
+        INSN_cmpli,
         INSN_cntlzw,
         INSN_cntlzd,
         INSN_cnttzw,
@@ -61,8 +62,8 @@ package decode_types is
         INSN_crandc,
         INSN_creqv,
         INSN_crnand,
-        INSN_crnor,
-        INSN_cror, -- 30
+        INSN_crnor, -- 30
+        INSN_cror,
         INSN_crorc,
         INSN_crxor,
         INSN_darn,
@@ -71,182 +72,203 @@ package decode_types is
         INSN_extsh,
         INSN_extsw,
         INSN_extswsli,
-        INSN_isync,
-        INSN_lbz, -- 40
+        INSN_isync, -- 40
         INSN_lbzu,
         INSN_ld,
         INSN_ldu,
-        INSN_lha,
         INSN_lhau,
-        INSN_lhz,
-        INSN_lhzu,
         INSN_lwa,
-        INSN_lwz,
-        INSN_lwzu, -- 50
+        INSN_lwzu,
         INSN_mcrf,
         INSN_mcrxrx,
         INSN_mfcr,
-        INSN_mfmsr,
+        INSN_mfmsr, -- 50
         INSN_mfspr,
         INSN_mtcrf,
         INSN_mtmsr,
         INSN_mtmsrd,
         INSN_mtspr,
-        INSN_mulli, -- 60
+        INSN_mulli,
         INSN_neg,
         INSN_nop,
         INSN_ori,
-        INSN_oris,
+        INSN_oris, -- 60
         INSN_popcntb,
         INSN_popcntw,
         INSN_popcntd,
         INSN_prtyw,
         INSN_prtyd,
-        INSN_rfid, -- 70
+        INSN_rfid,
         INSN_rldic,
         INSN_rldicl,
         INSN_rldicr,
-        INSN_rldimi,
+        INSN_rldimi, -- 70
         INSN_rlwimi,
         INSN_rlwinm,
         INSN_sc,
         INSN_setb,
         INSN_slbia,
-        INSN_sradi, -- 80
+        INSN_sradi,
         INSN_srawi,
-        INSN_stb,
         INSN_stbu,
         INSN_std,
-        INSN_stdu,
-        INSN_sth,
+        INSN_stdu, -- 80
         INSN_sthu,
-        INSN_stw,
         INSN_stwu,
-        INSN_subfic, -- 90
+        INSN_subfic,
         INSN_subfme,
         INSN_subfze,
         INSN_sync,
         INSN_tdi,
         INSN_tlbsync,
         INSN_twi,
-        INSN_wait,
+        INSN_wait, -- 90
         INSN_xori,
         INSN_xoris,
+        INSN_93, -- padding
+        INSN_94,
+        INSN_95,
+
+        -- Non-prefixed instructions that have a MLS:D prefixed form and
+        -- their corresponding prefixed instructions.
+        -- The non-prefixed versions have even indexes so that we can
+        -- convert them to the prefixed version by setting bit 0
+        INSN_addi, -- 96
+        INSN_paddi,
+        INSN_lbz,
+        INSN_plbz,
+        INSN_lha, -- 100
+        INSN_plha,
+        INSN_lhz,
+        INSN_plhz,
+        INSN_lwz,
+        INSN_plwz,
+        INSN_stb,
+        INSN_pstb,
+        INSN_sth,
+        INSN_psth,
+        INSN_stw, -- 110
+        INSN_pstw,
 
-        -- pad to 112 to simplify comparison logic
-        INSN_100, INSN_101, INSN_102, INSN_103,
-        INSN_104, INSN_105, INSN_106, INSN_107,
-        INSN_108, INSN_109, INSN_110, INSN_111,
+        -- Slots for non-prefixed opcodes that are 8LS:D when prefixed
+        INSN_lhzu, -- 112
+        INSN_plwa,
+        INSN_op57,
+        INSN_pld,
+        INSN_op61,
+        INSN_pstd,
+
+        -- pad to 128 to simplify comparison logic
+        INSN_076, INSN_077,
+        INSN_078, INSN_079, INSN_07a, INSN_07b, INSN_07c, INSN_07d, INSN_07e, INSN_07f,
 
         -- The following instructions have an RB operand but don't access FPRs
         INSN_add,
         INSN_addc,
-        INSN_adde,
+        INSN_adde, -- 130
         INSN_addex,
         INSN_addg6s,
         INSN_and,
         INSN_andc,
         INSN_bperm,
-        INSN_cmp, -- 120
+        INSN_cmp,
         INSN_cmpb,
         INSN_cmpeqb,
         INSN_cmpl,
-        INSN_cmprb,
+        INSN_cmprb, -- 140
         INSN_dcbf,
         INSN_dcbst,
         INSN_dcbt,
         INSN_dcbtst,
         INSN_dcbz,
-        INSN_divd, -- 130
+        INSN_divd,
         INSN_divdu,
         INSN_divde,
         INSN_divdeu,
-        INSN_divw,
+        INSN_divw, -- 150
         INSN_divwu,
         INSN_divwe,
         INSN_divweu,
         INSN_eqv,
         INSN_icbi,
-        INSN_icbt, -- 140
+        INSN_icbt,
         INSN_isel,
         INSN_lbarx,
         INSN_lbzcix,
-        INSN_lbzux,
+        INSN_lbzux, -- 160
         INSN_lbzx,
         INSN_ldarx,
         INSN_ldbrx,
         INSN_ldcix,
         INSN_ldx,
-        INSN_ldux, -- 150
+        INSN_ldux,
         INSN_lharx,
         INSN_lhax,
         INSN_lhaux,
-        INSN_lhbrx,
+        INSN_lhbrx, -- 170
         INSN_lhzcix,
         INSN_lhzx,
         INSN_lhzux,
         INSN_lwarx,
         INSN_lwax,
-        INSN_lwaux, -- 160
+        INSN_lwaux,
         INSN_lwbrx,
         INSN_lwzcix,
         INSN_lwzx,
-        INSN_lwzux,
+        INSN_lwzux, -- 180
         INSN_modsd,
         INSN_modsw,
         INSN_moduw,
         INSN_modud,
         INSN_mulhw,
-        INSN_mulhwu, -- 170
+        INSN_mulhwu,
         INSN_mulhd,
         INSN_mulhdu,
         INSN_mullw,
-        INSN_mulld,
+        INSN_mulld, -- 190
         INSN_nand,
         INSN_nor,
         INSN_or,
         INSN_orc,
         INSN_rldcl,
-        INSN_rldcr, -- 180
+        INSN_rldcr,
         INSN_rlwnm,
         INSN_slw,
         INSN_sld,
-        INSN_sraw,
+        INSN_sraw, -- 200
         INSN_srad,
         INSN_srw,
         INSN_srd,
         INSN_stbcix,
         INSN_stbcx,
-        INSN_stbx, -- 190
+        INSN_stbx,
         INSN_stbux,
         INSN_stdbrx,
         INSN_stdcix,
-        INSN_stdcx,
+        INSN_stdcx, -- 210
         INSN_stdx,
         INSN_stdux,
         INSN_sthbrx,
         INSN_sthcix,
         INSN_sthcx,
-        INSN_sthx, -- 200
+        INSN_sthx,
         INSN_sthux,
         INSN_stwbrx,
         INSN_stwcix,
-        INSN_stwcx,
+        INSN_stwcx, -- 220
         INSN_stwx,
         INSN_stwux,
         INSN_subf,
         INSN_subfc,
         INSN_subfe,
-        INSN_td, -- 210
+        INSN_td,
         INSN_tlbie,
         INSN_tlbiel,
         INSN_tw,
-        INSN_xor,
+        INSN_xor, -- 230
 
-        -- pad to 224 to simplify comparison logic
-        INSN_215,
-        INSN_216, INSN_217, INSN_218, INSN_219,
-        INSN_220, INSN_221, INSN_222, INSN_223,
+        -- pad to 232 to simplify comparison logic
+        INSN_231,
 
         -- The following instructions have a third input addressed by RC
         INSN_maddld,
@@ -254,9 +276,7 @@ package decode_types is
         INSN_maddhdu,
 
         -- pad to 256 to simplify comparison logic
-        INSN_227,
-        INSN_228, INSN_229, INSN_230, INSN_231,
-        INSN_232, INSN_233, INSN_234, INSN_235,
+        INSN_235,
         INSN_236, INSN_237, INSN_238, INSN_239,
         INSN_240, INSN_241, INSN_242, INSN_243,
         INSN_244, INSN_245, INSN_246, INSN_247,
@@ -264,39 +284,52 @@ package decode_types is
         INSN_252, INSN_253, INSN_254, INSN_255,
 
         -- The following instructions access floating-point registers
-        -- These ones have an FRS operand, but RA/RB are GPRs
-        INSN_stfd,
-        INSN_stfdu,
+        -- They have an FRS operand, but RA/RB are GPRs
+
+        -- Non-prefixed floating-point loads and stores that have a MLS:D
+        -- prefixed form, and their corresponding prefixed instructions.
+        INSN_stfd, -- 256
+        INSN_pstfd,
         INSN_stfs,
+        INSN_pstfs,
+        INSN_lfd, -- 260
+        INSN_plfd,
+        INSN_lfs,
+        INSN_plfs,
+
+        -- opcodes that can't have a prefix
+        INSN_stfdu, -- 264
         INSN_stfsu,
-        INSN_stfdux, -- 260
+        INSN_stfdux,
         INSN_stfdx,
         INSN_stfiwx,
         INSN_stfsux,
-        INSN_stfsx,
+        INSN_stfsx, -- 270
         -- These ones don't actually have an FRS operand (rather an FRT destination)
         -- but are here so that all FP instructions are >= INST_first_frs.
-        INSN_lfd,
         INSN_lfdu,
-        INSN_lfs,
         INSN_lfsu,
         INSN_lfdx,
-        INSN_lfdux, -- 270
+        INSN_lfdux,
         INSN_lfiwax,
         INSN_lfiwzx,
         INSN_lfsx,
         INSN_lfsux,
         -- These are here in order to keep the FP instructions together
         INSN_mcrfs,
-        INSN_mtfsb,
+        INSN_mtfsb, -- 280
         INSN_mtfsfi,
-        INSN_278, -- padding
-        INSN_279,
+        INSN_282, -- padding
+        INSN_283,
+        INSN_284,
+        INSN_285,
+        INSN_286,
+        INSN_287,
 
         -- The following instructions access FRA and/or FRB operands
-        INSN_fabs, -- 280
+        INSN_fabs, -- 288
         INSN_fadd,
-        INSN_fadds,
+        INSN_fadds, -- 290
         INSN_fcfid,
         INSN_fcfids,
         INSN_fcfidu,
@@ -304,9 +337,9 @@ package decode_types is
         INSN_fcmpo,
         INSN_fcmpu,
         INSN_fcpsgn,
-        INSN_fctid, -- 290
+        INSN_fctid,
         INSN_fctidz,
-        INSN_fctidu,
+        INSN_fctidu, -- 300
         INSN_fctiduz,
         INSN_fctiw,
         INSN_fctiwz,
@@ -314,9 +347,9 @@ package decode_types is
         INSN_fctiwuz,
         INSN_fdiv,
         INSN_fdivs,
-        INSN_fmr, -- 300
+        INSN_fmr,
         INSN_fmrgew,
-        INSN_fmrgow,
+        INSN_fmrgow, -- 310
         INSN_fnabs,
         INSN_fneg,
         INSN_fre,
@@ -324,9 +357,9 @@ package decode_types is
         INSN_frim,
         INSN_frin,
         INSN_frip,
-        INSN_friz, -- 310
+        INSN_friz,
         INSN_frsp,
-        INSN_frsqrte,
+        INSN_frsqrte, -- 320
         INSN_frsqrtes,
         INSN_fsqrt,
         INSN_fsqrts,
@@ -334,18 +367,18 @@ package decode_types is
         INSN_fsubs,
         INSN_ftdiv,
         INSN_ftsqrt,
-        INSN_mffs, -- 320
+        INSN_mffs,
         INSN_mtfsf,
 
-        -- pad to 328
-        INSN_322, INSN_323, INSN_324, INSN_325, INSN_326, INSN_327,
+        -- pad to 336
+        INSN_330, INSN_331, INSN_332, INSN_333, INSN_334, INSN_335,
 
         -- The following instructions access FRA, FRB (possibly) and FRC operands
-        INSN_fmul,
+        INSN_fmul, -- 336
         INSN_fmuls,
-        INSN_fmadd, -- 330
+        INSN_fmadd,
         INSN_fmadds,
-        INSN_fmsub,
+        INSN_fmsub, -- 340
         INSN_fmsubs,
         INSN_fnmadd,
         INSN_fnmadds,
@@ -359,10 +392,14 @@ package decode_types is
     constant INSN_first_frs : insn_code := INSN_stfd;
     constant INSN_first_frab : insn_code := INSN_fabs;
     constant INSN_first_frabc : insn_code := INSN_fmul;
+    constant INSN_first_mls : insn_code := INSN_addi;
+    constant INSN_first_8ls : insn_code := INSN_lhzu;
+    constant INSN_first_fp_mls : insn_code := INSN_stfd;
+    constant INSN_first_fp_nonmls : insn_code := INSN_stfdu;
 
-    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
+    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA);
     type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
-                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
+                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB);
     type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
     type output_reg_a_t is (NONE, RT, RA, FRT);
     type rc_t is (NONE, ONE, RC, RCOE);
diff --git a/insn_helpers.vhdl b/insn_helpers.vhdl
index 2ddcadb..acd75e9 100644
--- a/insn_helpers.vhdl
+++ b/insn_helpers.vhdl
@@ -43,6 +43,9 @@ package insn_helpers is
     function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
     function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
     function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic;
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector;
 end package insn_helpers;
 
 package body insn_helpers is
@@ -250,4 +253,16 @@ package body insn_helpers is
     begin
         return insn_in(15 downto 12);
     end;
+
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic is
+    begin
+        return prefix(20);
+    end;
+
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector is
+    begin
+        return prefix(17 downto 0) & suffix(15 downto 0);
+    end;
+
 end package body insn_helpers;
diff --git a/predecode.vhdl b/predecode.vhdl
index 7e1149b..41b26ad 100644
--- a/predecode.vhdl
+++ b/predecode.vhdl
@@ -158,6 +158,11 @@ architecture behaviour of predecoder is
         2#111111_11010# to 2#111111_11011# =>  INSN_fmadd,
         2#111111_11100# to 2#111111_11101# =>  INSN_fnmsub,
         2#111111_11110# to 2#111111_11111# =>  INSN_fnmadd,
+        -- prefix word, PO1
+        2#000001_00000# to 2#000001_11111# =>  INSN_prefix,
+        -- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed
+        2#111001_00000# to 2#111001_11111# =>  INSN_op57,
+        2#111101_00000# to 2#111101_11111# =>  INSN_op61,
         others                             =>  INSN_illegal
         );
 

From c4492c843a6d4c61df7e0134c3b13342599b1102 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 3 Jul 2023 18:19:38 +1000
Subject: [PATCH 4/7] Implement interrupts for prefixed instructions

This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field.  This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0.  (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)

This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.

This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 common.vhdl     | 10 ++++++++--
 decode1.vhdl    |  8 +++++++-
 decode2.vhdl    | 33 +++++++++++++++++++++------------
 execute1.vhdl   | 28 ++++++++++++++++++++++++++--
 loadstore1.vhdl |  7 ++++++-
 5 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/common.vhdl b/common.vhdl
index 838179b..59c855e 100644
--- a/common.vhdl
+++ b/common.vhdl
@@ -266,6 +266,7 @@ package common is
         prefixed: std_ulogic;
         prefix: std_ulogic_vector(25 downto 0);
         illegal_suffix: std_ulogic;
+        misaligned_prefix: std_ulogic;
 	insn: std_ulogic_vector(31 downto 0);
 	decode: decode_rom_t;
         br_pred: std_ulogic; -- Branch was predicted to be taken
@@ -279,7 +280,7 @@ package common is
     constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
         (valid => '0', stop_mark => '0', nia => (others => '0'),
          prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
-         illegal_suffix => '0',
+         illegal_suffix => '0', misaligned_prefix => '0',
          decode => decode_rom_init, br_pred => '0', big_endian => '0',
          spr_info => spr_id_init, ram_spr => ram_spr_info_init,
          reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));
@@ -364,6 +365,9 @@ package common is
         ramspr_write_odd   : std_ulogic;
         dbg_spr_access : std_ulogic;
         dec_ctr : std_ulogic;
+        prefixed : std_ulogic;
+        illegal_suffix : std_ulogic;
+        misaligned_prefix : std_ulogic;
     end record;
     constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
 	(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
@@ -383,6 +387,7 @@ package common is
          ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0',
          dbg_spr_access => '0',
          dec_ctr => '0',
+         prefixed => '0', illegal_suffix => '0', misaligned_prefix => '0',
          others => (others => '0'));
 
     type MultiplyInputType is record
@@ -505,6 +510,7 @@ package common is
         priv_mode : std_ulogic;                         -- privileged mode (MSR[PR] = 0)
         mode_32bit : std_ulogic;                        -- trim addresses to 32 bits
         is_32bit : std_ulogic;
+        prefixed : std_ulogic;
         repeat : std_ulogic;
         second : std_ulogic;
         e2stall : std_ulogic;
@@ -519,7 +525,7 @@ package common is
          addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
          write_reg => (others => '0'),
          length => (others => '0'),
-         mode_32bit => '0', is_32bit => '0',
+         mode_32bit => '0', is_32bit => '0', prefixed => '0',
          repeat => '0', second => '0', e2stall => '0',
          msr => (others => '0'));
 
diff --git a/decode1.vhdl b/decode1.vhdl
index 138e483..0aa2fee 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -572,7 +572,13 @@ begin
             pv.prefixed := '1';
             pv.pref_ia := f_in.nia(5 downto 2);
             pv.prefix := f_in.insn(25 downto 0);
-            v.valid := '0';
+            -- Check if the address of the prefix mod 64 is 60;
+            -- if so we need to arrange to generate an alignment interrupt
+            if f_in.nia(5 downto 2) = "1111" then
+                v.misaligned_prefix := '1';
+            else
+                v.valid := '0';
+            end if;
 
         end if;
         decode_rom_addr <= insn_code'val(to_integer(unsigned(icode_bits)));
diff --git a/decode2.vhdl b/decode2.vhdl
index fa3b54d..338a80a 100644
--- a/decode2.vhdl
+++ b/decode2.vhdl
@@ -371,21 +371,27 @@ begin
     c_out.read <= d_in.decode.input_cr;
 
     decode2_addrs: process(all)
+        variable dec_a, dec_b, dec_c : decode_input_reg_t;
+        variable dec_o : decode_output_reg_t;
     begin
-        decoded_reg_a <= decode_input_reg_init;
-        decoded_reg_b <= decode_input_reg_init;
-        decoded_reg_c <= decode_input_reg_init;
-        decoded_reg_o <= decode_output_reg_init;
-        if d_in.valid = '1' then
-            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
-            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
-            decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
-            decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
+        dec_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
+        dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
+        dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
+        dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
+        if d_in.valid = '0' or d_in.illegal_suffix = '1' then
+            dec_a.reg_valid := '0';
+            dec_b.reg_valid := '0';
+            dec_c.reg_valid := '0';
+            dec_o.reg_valid := '0';
         end if;
 
-        r_out.read1_enable <= decoded_reg_a.reg_valid;
-        r_out.read2_enable <= decoded_reg_b.reg_valid;
-        r_out.read3_enable <= decoded_reg_c.reg_valid;
+        decoded_reg_a <= dec_a;
+        decoded_reg_b <= dec_b;
+        decoded_reg_c <= dec_c;
+        decoded_reg_o <= dec_o;
+        r_out.read1_enable <= dec_a.reg_valid;
+        r_out.read2_enable <= dec_b.reg_valid;
+        r_out.read3_enable <= dec_c.reg_valid;
 
     end process;
 
@@ -592,6 +598,9 @@ begin
                     v.e.result_sel := "001";        -- logical_result
                 end if;
             end if;
+            v.e.prefixed := d_in.prefixed;
+            v.e.illegal_suffix := d_in.illegal_suffix;
+            v.e.misaligned_prefix := d_in.misaligned_prefix;
 
         elsif dc2.e.valid = '1' then
             -- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
diff --git a/execute1.vhdl b/execute1.vhdl
index db1159d..e6cfd3e 100644
--- a/execute1.vhdl
+++ b/execute1.vhdl
@@ -118,6 +118,7 @@ architecture behaviour of execute1 is
         fp_exception_next : std_ulogic;
         trace_next : std_ulogic;
         prev_op : insn_type_t;
+        prev_prefixed : std_ulogic;
         oe : std_ulogic;
         mul_select : std_ulogic_vector(1 downto 0);
         res2_sel : std_ulogic_vector(1 downto 0);
@@ -141,6 +142,7 @@ architecture behaviour of execute1 is
         (e => Execute1ToWritebackInit, se => side_effect_init,
          busy => '0',
          fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
+         prev_prefixed => '0',
          oe => '0', mul_select => "00", res2_sel => "00",
          spr_select => spr_id_init, pmu_spr_num => 5x"0",
          mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
@@ -978,6 +980,7 @@ begin
 	variable bo, bi : std_ulogic_vector(4 downto 0);
         variable illegal : std_ulogic;
         variable privileged : std_ulogic;
+        variable misaligned : std_ulogic;
         variable slow_op : std_ulogic;
         variable owait : std_ulogic;
         variable srr1 : std_ulogic_vector(63 downto 0);
@@ -1021,10 +1024,13 @@ begin
 
         illegal := '0';
         privileged := '0';
+        misaligned := e_in.misaligned_prefix;
         slow_op := '0';
         owait := '0';
 
-        if ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
+        if e_in.illegal_suffix = '1' then
+            illegal := '1';
+        elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
             privileged := '1';
         end if;
 
@@ -1315,9 +1321,22 @@ begin
                 end if;
         end case;
 
-        if privileged = '1' then
+        if misaligned = '1' then
+            -- generate an alignment interrupt
+            -- This is higher priority than illegal because a misaligned
+            -- prefix will come down as an OP_ILLEGAL instruction.
+            v.exception := '1';
+            v.e.intr_vec := 16#600#;
+            v.e.srr1(47 - 35) := '1';
+            v.e.srr1(47 - 34) := '1';
+            if e_in.valid = '1' then
+                report "misaligned prefixed instruction interrupt";
+            end if;
+
+        elsif privileged = '1' then
             -- generate a program interrupt
             v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
             -- set bit 45 to indicate privileged instruction type interrupt
             v.e.srr1(47 - 45) := '1';
             if e_in.valid = '1' then
@@ -1326,6 +1345,7 @@ begin
 
         elsif illegal = '1' then
             v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
             -- Since we aren't doing Hypervisor emulation assist (0xe40) we
             -- set bit 44 to indicate we have an illegal
             v.e.srr1(47 - 44) := '1';
@@ -1336,6 +1356,7 @@ begin
         elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then
             -- generate a floating-point unavailable interrupt
             v.exception := '1';
+            v.e.srr1(47 - 34) := e_in.prefixed;
             v.e.intr_vec := 16#800#;
             if e_in.valid = '1' then
                 report "FP unavailable interrupt";
@@ -1401,6 +1422,7 @@ begin
 
         if valid_in = '1' then
             v.prev_op := e_in.insn_type;
+            v.prev_prefixed := e_in.prefixed;
         end if;
 
         -- Determine if there is any interrupt to be taken
@@ -1422,6 +1444,7 @@ begin
                 v.e.intr_vec := 16#d00#;
                 v.e.srr1 := (others => '0');
                 v.e.srr1(47 - 33) := '1';
+                v.e.srr1(47 - 34) := ex1.prev_prefixed;
                 if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
                     ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then
                     v.e.srr1(47 - 35) := '1';
@@ -1584,6 +1607,7 @@ begin
         lv.priv_mode := not ex1.msr(MSR_PR);
         lv.mode_32bit := not ex1.msr(MSR_SF);
         lv.is_32bit := e_in.is_32bit;
+        lv.prefixed := e_in.prefixed;
         lv.repeat := e_in.repeat;
         lv.second := e_in.second;
         lv.e2stall := fp_in.f2stall;
diff --git a/loadstore1.vhdl b/loadstore1.vhdl
index 01babc3..fc8c158 100644
--- a/loadstore1.vhdl
+++ b/loadstore1.vhdl
@@ -69,6 +69,7 @@ architecture behave of loadstore1 is
         instr_fault  : std_ulogic;
         do_update    : std_ulogic;
         mode_32bit   : std_ulogic;
+        prefixed     : std_ulogic;
 	addr         : std_ulogic_vector(63 downto 0);
         byte_sel     : std_ulogic_vector(7 downto 0);
         second_bytes : std_ulogic_vector(7 downto 0);
@@ -99,7 +100,8 @@ architecture behave of loadstore1 is
     constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
                                           dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
                                           instr_fault => '0', do_update => '0',
-                                          mode_32bit => '0', addr => (others => '0'),
+                                          mode_32bit => '0', prefixed => '0',
+                                          addr => (others => '0'),
                                           byte_sel => x"00", second_bytes => x"00",
                                           store_data => (others => '0'), instr_tag => instr_tag_init,
                                           write_reg => 6x"00", length => x"0",
@@ -411,6 +413,7 @@ begin
         v.valid := l_in.valid;
         v.instr_tag := l_in.instr_tag;
         v.mode_32bit := l_in.mode_32bit;
+        v.prefixed := l_in.prefixed;
         v.write_reg := l_in.write_reg;
         v.length := l_in.length;
         v.elt_length := l_in.length;
@@ -906,8 +909,10 @@ begin
         if exception = '1' then
             if r2.req.align_intr = '1' then
                 v.intr_vec := 16#600#;
+                v.srr1(47 - 34) := r2.req.prefixed;
                 v.dar := r2.req.addr;
             elsif r2.req.instr_fault = '0' then
+                v.srr1(47 - 34) := r2.req.prefixed;
                 v.dar := r2.req.addr;
                 if m_in.segerr = '0' then
                     v.intr_vec := 16#300#;

From 7c5a2bcaf4dc72bed27099a7270e4e4991815ca9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Wed, 5 Jul 2023 19:25:28 +1000
Subject: [PATCH 5/7] tests: Add a test for prefixed instructions

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 tests/prefix/Makefile         |   3 +
 tests/prefix/head.S           | 247 ++++++++++++++++++++++++++++++++++
 tests/prefix/powerpc.lds      |  27 ++++
 tests/prefix/prefix.c         | 214 +++++++++++++++++++++++++++++
 tests/test_prefix.bin         | Bin 0 -> 12320 bytes
 tests/test_prefix.console_out |   3 +
 tests/test_prefix.metavalue   |   1 +
 7 files changed, 495 insertions(+)
 create mode 100644 tests/prefix/Makefile
 create mode 100644 tests/prefix/head.S
 create mode 100644 tests/prefix/powerpc.lds
 create mode 100644 tests/prefix/prefix.c
 create mode 100755 tests/test_prefix.bin
 create mode 100644 tests/test_prefix.console_out
 create mode 100644 tests/test_prefix.metavalue

diff --git a/tests/prefix/Makefile b/tests/prefix/Makefile
new file mode 100644
index 0000000..038ec2a
--- /dev/null
+++ b/tests/prefix/Makefile
@@ -0,0 +1,3 @@
+TEST=prefix
+
+include ../Makefile.test
diff --git a/tests/prefix/head.S b/tests/prefix/head.S
new file mode 100644
index 0000000..961c2a9
--- /dev/null
+++ b/tests/prefix/head.S
@@ -0,0 +1,247 @@
+/* Copyright 2013-2014 IBM Corp.
+ * Copyright 2023 Paul Mackerras <paulus@ozlabs.org>.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Load an immediate 64-bit value into a register */
+#define LOAD_IMM64(r, e)			\
+	lis     r,(e)@highest;			\
+	ori     r,r,(e)@higher;			\
+	rldicr  r,r, 32, 31;			\
+	oris    r,r, (e)@h;			\
+	ori     r,r, (e)@l;
+
+	.section ".head","ax"
+
+	/*
+	 * Microwatt currently enters in LE mode at 0x0, so we don't need to
+	 * do any endian fix ups
+	 */
+	. = 0
+.global _start
+_start:
+	LOAD_IMM64(%r10,__bss_start)
+	LOAD_IMM64(%r11,__bss_end)
+	subf	%r11,%r10,%r11
+	addi	%r11,%r11,63
+	srdi.	%r11,%r11,6
+	beq	2f
+	mtctr	%r11
+1:	dcbz	0,%r10
+	addi	%r10,%r10,64
+	bdnz	1b
+
+2:	LOAD_IMM64(%r1,__stack_top)
+	li	%r0,0
+	stdu	%r0,-16(%r1)
+	LOAD_IMM64(%r10, die)
+	mtsprg0	%r10
+	LOAD_IMM64(%r12, main)
+	mtctr	%r12
+	bctrl
+die:	attn // terminate on exit
+	b .
+
+.global trapit
+trapit:
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-256(%r1)
+	mtsprg1	%r1
+	r = 14
+	.rept	18
+	std	r,r*8(%r1)
+	r = r + 1
+	.endr
+	mfcr	%r0
+	stw	%r0,13*8(%r1)
+	LOAD_IMM64(%r10, ret)
+	mtsprg0	%r10
+	mr	%r12,%r4
+	mtctr	%r4
+	bctrl
+ret:
+	mfsprg1	%r1
+	LOAD_IMM64(%r10, die)
+	mtsprg0	%r10
+	r = 14
+	.rept	18
+	ld	r,r*8(%r1)
+	r = r + 1
+	.endr
+	lwz	%r0,13*8(%r1)
+	mtcr	%r0
+	ld	%r0,256+16(%r1)
+	addi	%r1,%r1,256
+	mtlr	%r0
+	blr
+
+#define EXCEPTION(nr)		\
+	.= nr			;\
+	mfsprg0	%r0		;\
+	mtctr	%r0		;\
+	li	%r3,nr		;\
+	bctr
+
+	EXCEPTION(0x300)
+	EXCEPTION(0x380)
+	EXCEPTION(0x400)
+	EXCEPTION(0x480)
+	EXCEPTION(0x500)
+	EXCEPTION(0x600)
+	EXCEPTION(0x700)
+	EXCEPTION(0x800)
+	EXCEPTION(0x900)
+	EXCEPTION(0x980)
+	EXCEPTION(0xa00)
+	EXCEPTION(0xb00)
+	EXCEPTION(0xc00)
+	EXCEPTION(0xd00)
+	EXCEPTION(0xe00)
+	EXCEPTION(0xe20)
+	EXCEPTION(0xe40)
+	EXCEPTION(0xe60)
+	EXCEPTION(0xe80)
+	EXCEPTION(0xf00)
+	EXCEPTION(0xf20)
+	EXCEPTION(0xf40)
+	EXCEPTION(0xf60)
+	EXCEPTION(0xf80)
+
+	. = 0x1000
+	.globl	test_paddi
+test_paddi:
+	nop
+	nop
+	.machine "power10"
+	paddi	%r3,%r3,0x123456789,0
+	blr
+
+	.globl	test_paddi_r
+test_paddi_r:
+	nop
+	nop
+	paddi	%r3,0,0x123456789 - 0x101c,1
+	blr
+
+	.globl	test_paddi_neg
+test_paddi_neg:
+	nop
+	nop
+	paddi	%r3,%r3,-0x123456789,0
+	blr
+
+	.globl	test_pld
+test_pld:
+	nop
+	nop
+	pld	%r4,lvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plfd
+test_plfd:
+	nop
+	nop
+	plfd	%f0,fpvar(0)
+	stfd	%f0,0(%r3)
+	blr
+
+	. = 0x1074
+	.globl	test_paddi_mis
+test_paddi_mis:
+	nop
+	nop
+	.long	0x06012345
+	.long	0x38636789
+	blr
+
+	.globl	test_pstd
+test_pstd:
+	nop
+	nop
+	pstd	%r3,lvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plbz
+test_plbz:
+	nop
+	nop
+	plbz	%r4,bvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_pstb
+test_pstb:
+	nop
+	nop
+	pstb	%r3,bvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plha
+test_plha:
+	nop
+	nop
+	plha	%r4,hvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plhz
+test_plhz:
+	nop
+	nop
+	plhz	%r4,hvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_psth
+test_psth:
+	nop
+	nop
+	psth	%r3,hvar(0)
+	li	%r3,0
+	blr
+
+	.globl	test_plwa
+test_plwa:
+	nop
+	nop
+	plwa	%r4,wvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_plwz
+test_plwz:
+	nop
+	nop
+	plwz	%r4,wvar(0)
+	std	%r4,0(%r3)
+	li	%r3,0
+	blr
+
+	.globl	test_pstw
+test_pstw:
+	nop
+	nop
+	pstw	%r3,wvar(0)
+	li	%r3,0
+	blr
diff --git a/tests/prefix/powerpc.lds b/tests/prefix/powerpc.lds
new file mode 100644
index 0000000..99611ab
--- /dev/null
+++ b/tests/prefix/powerpc.lds
@@ -0,0 +1,27 @@
+SECTIONS
+{
+	. = 0;
+	_start = .;
+	.head : {
+		KEEP(*(.head))
+	}
+	. = ALIGN(0x1000);
+	.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
+	. = ALIGN(0x1000);
+	.data : { *(.data) *(.data.*) *(.got) *(.toc) }
+	. = ALIGN(0x80);
+	__bss_start = .;
+	.bss : {
+		*(.dynsbss)
+		*(.sbss)
+		*(.scommon)
+		*(.dynbss)
+		*(.bss)
+		*(.common)
+		*(.bss.*)
+	}
+	. = ALIGN(0x80);
+	__bss_end = .;
+	. = . + 0x4000;
+	__stack_top = .;
+}
diff --git a/tests/prefix/prefix.c b/tests/prefix/prefix.c
new file mode 100644
index 0000000..94ac500
--- /dev/null
+++ b/tests/prefix/prefix.c
@@ -0,0 +1,214 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "console.h"
+
+#define MSR_LE	0x1
+#define MSR_DR	0x10
+#define MSR_IR	0x20
+#define MSR_SF	0x8000000000000000ul
+
+#define DSISR	18
+#define DAR	19
+#define SRR0	26
+#define SRR1	27
+#define PID	48
+#define PTCR	464
+
+extern long trapit(long arg, long (*func)(long));
+extern long test_paddi(long arg);
+extern long test_paddi_r(long arg);
+extern long test_paddi_neg(long arg);
+extern long test_paddi_mis(long arg);
+extern long test_plbz(long arg);
+extern long test_pld(long arg);
+extern long test_plha(long arg);
+extern long test_plhz(long arg);
+extern long test_plwa(long arg);
+extern long test_plwz(long arg);
+extern long test_pstb(long arg);
+extern long test_pstd(long arg);
+extern long test_psth(long arg);
+extern long test_pstw(long arg);
+extern long test_plfd(long arg);
+
+static inline unsigned long mfspr(int sprnum)
+{
+	long val;
+
+	__asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum));
+	return val;
+}
+
+static inline void mtspr(int sprnum, unsigned long val)
+{
+	__asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
+}
+
+void print_string(const char *str)
+{
+	for (; *str; ++str)
+		putchar(*str);
+}
+
+void print_hex(unsigned long val, int ndigits, const char *str)
+{
+	int i, x;
+
+	for (i = (ndigits - 1) * 4; i >= 0; i -= 4) {
+		x = (val >> i) & 0xf;
+		if (x >= 10)
+			putchar(x + 'a' - 10);
+		else
+			putchar(x + '0');
+	}
+	print_string(str);
+}
+
+// i < 100
+void print_test_number(int i)
+{
+	print_string("test ");
+	putchar(48 + i/10);
+	putchar(48 + i%10);
+	putchar(':');
+}
+
+long int prefix_test_1(void)
+{
+	long int ret;
+
+	ret = trapit(0x321, test_paddi);
+	if (ret != 0x123456789 + 0x321)
+		return ret;
+	ret = trapit(0x322, test_paddi_r);
+	if (ret != 0x123456789)
+		return ret;
+	ret = trapit(0x323, test_paddi_neg);
+	if (ret != 0x323 - 0x123456789)
+		return ret;
+	return 0;
+}
+
+double fpvar = 123.456;
+
+long int prefix_test_2(void)
+{
+	long int ret;
+	double x;
+
+	ret = trapit(0x123, test_paddi_mis);
+	if (ret != 0x600)
+		return 1;
+	if (mfspr(SRR0) != (unsigned long)&test_paddi_mis + 8)
+		return 2;
+	if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34))))
+		return 3;
+
+	ret = trapit((long)&x, test_plfd);
+	if (ret != 0x800)
+		return ret;
+	if (mfspr(SRR0) != (unsigned long)&test_plfd + 8)
+		return 6;
+	if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 34))))
+		return 7;
+	return 0;
+}
+
+unsigned char bvar = 0x63;
+long lvar = 0xfedcba987654;
+unsigned short hvar = 0xffee;
+unsigned int wvar = 0x80457788;
+
+long int prefix_test_3(void)
+{
+	long int ret;
+	long int x;
+
+	ret = trapit((long)&x, test_pld);
+	if (ret)
+		return ret | 1;
+	if (x != lvar)
+		return 2;
+	ret = trapit(1234, test_pstd);
+	if (ret)
+		return ret | 2;
+	if (lvar != 1234)
+		return 3;
+
+	ret = trapit((long)&x, test_plbz);
+	if (ret)
+		return ret | 0x10;
+	if (x != bvar)
+		return 0x11;
+	ret = trapit(0xaa, test_pstb);
+	if (ret)
+		return ret | 0x12;
+	if (bvar != 0xaa)
+		return 0x13;
+
+	ret = trapit((long)&x, test_plhz);
+	if (ret)
+		return ret | 0x20;
+	if (x != hvar)
+		return 0x21;
+	ret = trapit((long)&x, test_plha);
+	if (ret)
+		return ret | 0x22;
+	if (x != (signed short)hvar)
+		return 0x23;
+	ret = trapit(0x23aa, test_psth);
+	if (ret)
+		return ret | 0x24;
+	if (hvar != 0x23aa)
+		return 0x25;
+
+	ret = trapit((long)&x, test_plwz);
+	if (ret)
+		return ret | 0x30;
+	if (x != wvar)
+		return 0x31;
+	ret = trapit((long)&x, test_plwa);
+	if (ret)
+		return ret | 0x32;
+	if (x != (signed int)wvar)
+		return 0x33;
+	ret = trapit(0x23aaf44f, test_pstw);
+	if (ret)
+		return ret | 0x34;
+	if (wvar != 0x23aaf44f)
+		return 0x35;
+	return 0;
+}
+
+int fail = 0;
+
+void do_test(int num, long int (*test)(void))
+{
+	long int ret;
+
+	print_test_number(num);
+	ret = test();
+	if (ret == 0) {
+		print_string("PASS\r\n");
+	} else {
+		fail = 1;
+		print_string("FAIL ");
+		print_hex(ret, 16, " SRR0=");
+		print_hex(mfspr(SRR0), 16, " SRR1=");
+		print_hex(mfspr(SRR1), 16, "\r\n");
+	}
+}
+
+int main(void)
+{
+	console_init();
+	//init_mmu();
+
+	do_test(1, prefix_test_1);
+	do_test(2, prefix_test_2);
+	do_test(3, prefix_test_3);
+
+	return fail;
+}
diff --git a/tests/test_prefix.bin b/tests/test_prefix.bin
new file mode 100755
index 0000000000000000000000000000000000000000..a5f9ff7c9b6deb6cc20d68be124c5ca137c0cfbf
GIT binary patch
literal 12320
zcmeHNZEREL6+YKCwi7~XHYQ~YYOmw0!pzdlMyDGR!@VX6A)|!>8%RUbH7RN#s{|_=
z4!f=|A(N6S)3T~R=sp^lz-psu;s-wvnyRpBh@ViQsXzEg3O{UzPE=!(69ae8xgUvR
zlI0gfT70FOYoGT#=RD_m&-*&|y+m@1s55cJ*V%*X0X^E8AW~uLhOHa6?gO;0JF#Q$
zq0}boZaR6ryJr>kDjD~|)crlA$<*0YFepwWwh#r?E52Y7*VpMtP$eSiD?=zyS&mc4
zm)X1{MWiFoAo2_%&)|XeFApX%?txU$qrXNjpouy%F25Eap&%H+LT2+i?J!-QOw#zI
zPQp|_1*e8cnL0&@snc{~>LWTcb)GIyU8M1;Q4)$*DOkKlO7U|_6vya9F-vEP59o5S
zK;y-iiL^IK7%J!1C|KN*p+OB?H0F|tuhYu?oEpF<2R=FQ$%9WGeDdIv2cJCn<iRHo
zK6&t&0G|o)nE;;&@R<Og3GkT!p9yf$#jjq_0)mhwAt+_seoZ9kP&uBd0-1OK^IWBo
zTa`rayXSm3`SnH4Po71tf06Ukvc;+YIk($W>74!}?-H-yx5VrBFY)@T7N>q^{ppLG
zpQ;z9{--vi=WKBFr{zn${+cCT|B5AEf9>4$JL^MSU|e2cTwP#XpJ)78ZwF3&^uhx5
zFEB1IFs?2zuFo_6tm{X7mCv@2;2n6QOR~Rn+P0P6xAs66>wBnayL8j#J$7ogAMhT3
zRKG`b65sV_`(w;o7p1F2?$1lU&)V^y&S}L`AI=pg5Y_wV%MpmGr{=<VqWVYkVK8=j
z{xS9Z9PvC9)z8j{!I(b(n2O&TrnNztm7}6SMS+R}6$L5^R1~NvP*I?wKt+Ly0u==+
z3Oqjr1d?0md|cxD?1s_X(xUcBe82rV?zfASkWP#lKLi_<C={u~eSd+v)F=hnC+pm!
z7D>smk6&-dx$qsAI@W48jb54K^&{SAxVP6-5~l`nT!?eguwpaL4cup*aOA9wI}IG?
z7s_w};B*@o(BlIl1#>IyoNwCLzX3Y}+duBiaOa#~1GaS<+c5BT_>53hBx>Ux*g1bw
zh7*CSv2h+feui@f?VJyvSasg8u|F!qHUN9=iNzxX4DcT_o;A8?V`~!N8&aijlwliz
zec#4<^oBFvX1<}Sh;HL#;8>$uw`bQG1#X{>BRzhC^R2V_w%XW68+)b<`#oSq8~c4=
zTzWj`@b%l+eHP!A?GL-uL>YcP@MDi{KFB>15@XvRh7;6LtzCWGtGt4kqt-ZD1`j>(
zpRyh#Xc@%)aPGr#^x@-MRy={kSX(HZxOXz#GjfPyhBTEd{Z|3g0L=9{d<WjJeAnB)
zYv3D#?>XD|i6O==-L&HV{tuR~AHLS_HM4siboqFScflAh8pao!j;cm;NKmC@@m`ww
zUk3k}ohMm;P!o6_yy3_Y;%8hNQhqX;tu?br(@fh>Q>2SAUJqA%;U0>H4+zAZ)@u7q
z{$Fg`iyS=foMZf^t+hWCX(@^6<vfM3wx%Se&xt9{#B}PEv8LB}slzG{^$D}*#j5Xa
zv$Hn{lpe%ydJ(KEnfHVH%c$dmuS@&VWyVm)to34PRTj%I`lNI4XKj-8DJ}V(PqYnz
z;oU?pMH)$$nM?BDpR#`0tbu1_3_K%aj8E`>a};rZ@aW_8Mql{HRam>L>4dz8!c7-^
zq0}S8XnxDie*u0`*t4ZP+_P&!+#=VNtbYf4-~N_r8f$*g(>1;SRRR0Y;*)fg(o!`U
zFL8Y<?D#t#q&2KFiEWdDRr3bu`=$MwHM!v_(G$-zmG-%rH#BTM+r)eA5#-9abLnL9
ze!7m70zY?zYqqC8jpu+YjhSP?;+J*wcpYmM>!Jxlt2|D}yChw2_}#0p8^)ho0;Eg+
z<LA&TchkRyE#o?lS}l73_9Le4SEwZ@vAqs9#<2EIj&1OnuP?YCc#STzb>Tgqd%W)#
zebP5k6Tj#FZh5;;Vm<ji_ck5wlO_=tdyB-}d-eTU&gp!nOy|Q3M>qBx(i;Z&+ONk^
zb5%sbzD~3@w+eIp7Y}J8uZbzFmm~KLwzJ><tZ9Ey)i!#&cJq8$zeBIh1#C=y1|vHd
zKjxNkdEl;sH|*2L|Kby!gO50iPeab+vt6B&hu`v*H+m0ZJ(zWx{`oy(s^s@`lZ(TH
zKfAe~xStK9bdBEmEzygSfZaa`C&`$nYS?()uE6^kYP;b3H2n&$*JkPzbo49lSB&{<
z-PROQbp(C*Dn9?E2Qz-Nc+jXAL#+v%J*IZhl}7P=p*P&)$1{H-bz1hH$f%|Bh&d)&
zb{Txgs&^94+L`@4q{ly4&Cd?P=e=EFGgh7LZy45mPwO&DF`4|x;kR<MQ|hkI-{Zub
ziL*6z+U^ZrH|VDi2-q6YpU@4l$-$3o5mWv6zr>m=*?jH#duNZw>9g&Y%?++KWj3d;
zMr<D+d@!$8f5JB3JTh0SzpEk^?}i-qZ}c(tc7NaQWawQD`!)Utdsi*xVa02V@EHGJ
z1^vxj`RN`*H2XMu;O(O#UhhzL?OIVy)UIsZ#_z>lJ9kE7)^#1OJchm3>?!UoUgw;L
z_u|sMfo%cc`4EopBu`Qi>T3jAjrVmpPyYwIJ$i|@)J9h-?`)~ArMOp7mh(6-d{4nJ
zF5*fZwIaq<a~N+$oO^Np5#wXHQfrk$!$Nzl46j4Aq}=f;jWBCC_f8tdwi%zzW~NqT
zVyERqEaqAEg<rqpr~JwNp=($`G^JK<hjY7ExpVs#`*YkIe8QjOQyX%#bGsRN2M50G
zRm`}LFuZ2UFs`zn#nH;Y6(QRYQdz#!icO{_5rJqF{)gGW&8&HQEwv&V(Of#Rtq$iS
zV1{tzgKb<>606;;2d~%QlyTmNO~uvZJS=?Zno%Rpq*0!U|2eKAJ}?=_2glTLu7^zq
zM?Tp0K2G(qVX%MYs3=fT;Qvd3{$p+WzYL<?zdUv6<`beWyS!tc41eppw|*x77fHDR
A$^ZZW

literal 0
HcmV?d00001

diff --git a/tests/test_prefix.console_out b/tests/test_prefix.console_out
new file mode 100644
index 0000000..623335d
--- /dev/null
+++ b/tests/test_prefix.console_out
@@ -0,0 +1,3 @@
+test 01:PASS
+test 02:PASS
+test 03:PASS
diff --git a/tests/test_prefix.metavalue b/tests/test_prefix.metavalue
new file mode 100644
index 0000000..64bb6b7
--- /dev/null
+++ b/tests/test_prefix.metavalue
@@ -0,0 +1 @@
+30

From fd8c0000c03c00be91aee83f578ccf18ccbcb5bb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 27 Jul 2023 14:58:09 +1000
Subject: [PATCH 6/7] Implement set[n]bc[r] instructions

This implements the setbc, setnbc, setbcr and setnbcr instructions.
Because the insn_type_t type already has 64 elements, this uses the
existing OP_SETB for the new instructions, and has execute1 compute
different results depending on bits 6-9 of the instruction.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 execute1.vhdl  | 27 ++++++++++++++++++++-------
 predecode.vhdl |  4 ++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/execute1.vhdl b/execute1.vhdl
index e6cfd3e..e537048 100644
--- a/execute1.vhdl
+++ b/execute1.vhdl
@@ -836,14 +836,27 @@ begin
 		end if;
                 misc_result <= mfcr_result;
             when "110" =>
-                -- setb
-                bfa := insn_bfa(e_in.insn);
-                crbit := to_integer(unsigned(bfa)) * 4;
+                -- setb and set[n]bc[r]
                 setb_result := (others => '0');
-                if cr_in(31 - crbit) = '1' then
-                    setb_result := (others => '1');
-                elsif cr_in(30 - crbit) = '1' then
-                    setb_result(0) := '1';
+                if e_in.insn(9) = '0' then
+                    -- setb
+                    bfa := insn_bfa(e_in.insn);
+                    crbit := to_integer(unsigned(bfa)) * 4;
+                    if cr_in(31 - crbit) = '1' then
+                        setb_result := (others => '1');
+                    elsif cr_in(30 - crbit) = '1' then
+                        setb_result(0) := '1';
+                    end if;
+                else
+                    -- set[n]bc[r]
+                    crbit := to_integer(unsigned(insn_bi(e_in.insn)));
+                    if (cr_in(31 - crbit) xor e_in.insn(6)) = '1' then
+                        if e_in.insn(7) = '0' then
+                            setb_result(0) := '1';
+                        else
+                            setb_result := (others => '1');
+                        end if;
+                    end if;
                 end if;
                 misc_result <= setb_result;
             when others =>
diff --git a/predecode.vhdl b/predecode.vhdl
index 41b26ad..58b17e3 100644
--- a/predecode.vhdl
+++ b/predecode.vhdl
@@ -336,6 +336,10 @@ architecture behaviour of predecoder is
         2#0_00101_11010#  =>  INSN_prtyd,
         2#0_00100_11010#  =>  INSN_prtyw,
         2#0_00100_00000#  =>  INSN_setb,
+        2#0_01100_00000#  =>  INSN_setb, -- setbc
+        2#0_01101_00000#  =>  INSN_setb, -- setbcr
+        2#0_01110_00000#  =>  INSN_setb, -- setnbc
+        2#0_01111_00000#  =>  INSN_setb, -- setnbcr
         2#0_01111_10010#  =>  INSN_slbia,
         2#0_00000_11011#  =>  INSN_sld,
         2#0_00000_11000#  =>  INSN_slw,

From b50170cd1d158ed6ae19e98d9b4cca022b6b2c2f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Sat, 9 Sep 2023 22:14:03 +1000
Subject: [PATCH 7/7] Implement byte reversal instructions

This implements the byte-reverse halfword, word and doubleword
instructions: brh, brw, and brd.  These instructions were added to the
ISA in version 3.1.  They use a new OP_BREV insn_type value.  The
logic for these instructions is implemented in logical.vhdl.

In order to avoid going over 64 insn_type values, OP_AND and OP_OR
were combined into OP_LOGIC, which is like OP_AND except that the RS
input can be inverted as well as the RB input.  The various forms of
OR instruction are then implemented using the identity

    a OR b = NOT (NOT a AND NOT b)

The 'is_signed' field of the instruction decode table is used to
indicate that RS should be inverted.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 decode1.vhdl      | 23 +++++++++++++----------
 decode2.vhdl      |  4 ++--
 decode_types.vhdl | 44 +++++++++++++++++++++++---------------------
 execute1.vhdl     |  5 +++--
 logical.vhdl      | 37 +++++++++++++++++++++++++++----------
 predecode.vhdl    |  3 +++
 6 files changed, 71 insertions(+), 45 deletions(-)

diff --git a/decode1.vhdl b/decode1.vhdl
index 0aa2fee..e090d66 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -89,10 +89,10 @@ architecture behaviour of decode1 is
         INSN_addme       =>  (ALU,  NONE, OP_ADD,       RA,         CONST_M1,    NONE, RT,   '0', '0', '0', '0', CA,   '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
         INSN_addpcis     =>  (ALU,  NONE, OP_ADD,       CIA,        CONST_DXHI4, NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_addze       =>  (ALU,  NONE, OP_ADD,       RA,         NONE,        NONE, RT,   '0', '0', '0', '0', CA,   '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
-        INSN_and         =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_andc        =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_andi_dot    =>  (ALU,  NONE, OP_AND,       NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
-        INSN_andis_dot   =>  (ALU,  NONE, OP_AND,       NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
+        INSN_and         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_andc        =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_andi_dot    =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
+        INSN_andis_dot   =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0', NONE),
         INSN_attn        =>  (ALU,  NONE, OP_ATTN,      NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
         INSN_b           =>  (ALU,  NONE, OP_B,         NONE,       CONST_LI,    NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
         INSN_bc          =>  (ALU,  NONE, OP_BC,        NONE,       CONST_BD,    NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
@@ -100,6 +100,9 @@ architecture behaviour of decode1 is
         INSN_bclr        =>  (ALU,  NONE, OP_BCREG,     NONE,       NONE,        NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
         INSN_bctar       =>  (ALU,  NONE, OP_BCREG,     NONE,       NONE,        NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
         INSN_bperm       =>  (ALU,  NONE, OP_BPERM,     NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brh         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brw         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_brd         =>  (ALU,  NONE, OP_BREV,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_cbcdtd      =>  (ALU,  NONE, OP_BCD,       NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_cdtbcd      =>  (ALU,  NONE, OP_BCD,       NONE,       NONE,        RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_cmp         =>  (ALU,  NONE, OP_CMP,       RA,         RB,          NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
@@ -268,14 +271,14 @@ architecture behaviour of decode1 is
         INSN_mulld       =>  (ALU,  NONE, OP_MUL_L64,   RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
         INSN_mulli       =>  (ALU,  NONE, OP_MUL_L64,   RA,         CONST_SI,    NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
         INSN_mullw       =>  (ALU,  NONE, OP_MUL_L64,   RA,         RB,          NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RCOE, '0', '0', NONE),
-        INSN_nand        =>  (ALU,  NONE, OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
+        INSN_nand        =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
         INSN_neg         =>  (ALU,  NONE, OP_ADD,       RA,         NONE,        NONE, RT,   '0', '0', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
         INSN_nop         =>  (ALU,  NONE, OP_NOP,       NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-        INSN_nor         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_or          =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_orc         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
-        INSN_ori         =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-        INSN_oris        =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_nor         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_or          =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_orc         =>  (ALU,  NONE, OP_LOGIC,     NONE,       RB,          RS,   RA,   '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0', NONE),
+        INSN_ori         =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI,    RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
+        INSN_oris        =>  (ALU,  NONE, OP_LOGIC,     NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
         INSN_paddi       =>  (ALU,  NONE, OP_ADD,       RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_plbz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_pld         =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
diff --git a/decode2.vhdl b/decode2.vhdl
index 338a80a..1f3e7ff 100644
--- a/decode2.vhdl
+++ b/decode2.vhdl
@@ -205,13 +205,13 @@ architecture behaviour of decode2 is
     type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);
 
     constant result_select : mux_select_array_t := (
-        OP_AND      => "001",           -- logical_result
-        OP_OR       => "001",
+        OP_LOGIC    => "001",           -- logical_result
         OP_XOR      => "001",
         OP_PRTY     => "001",
         OP_CMPB     => "001",
         OP_EXTS     => "001",
         OP_BPERM    => "001",
+        OP_BREV     => "001",
         OP_BCD      => "001",
         OP_MTSPR    => "001",
         OP_RLC      => "010",           -- rotator_result
diff --git a/decode_types.vhdl b/decode_types.vhdl
index 428d943..9e7ef84 100644
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@@ -3,8 +3,9 @@ use ieee.std_logic_1164.all;
 
 package decode_types is
     type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
-			 OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
-			 OP_BCD, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
+			 OP_ATTN, OP_B, OP_BC, OP_BCREG,
+			 OP_BCD, OP_BPERM, OP_BREV,
+                         OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
 			 OP_CNTZ, OP_CROP,
 			 OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
 			 OP_DCBZ, OP_ICBI, OP_ICBT,
@@ -12,10 +13,11 @@ package decode_types is
                          OP_DIV, OP_DIVE, OP_MOD,
                          OP_EXTS, OP_EXTSWSLI,
                          OP_ISEL, OP_ISYNC,
+                         OP_LOGIC,
 			 OP_LOAD, OP_STORE,
 			 OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
 			 OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
-			 OP_MUL_H64, OP_MUL_H32, OP_OR,
+			 OP_MUL_H64, OP_MUL_H32,
 			 OP_POPCNT, OP_PRTY, OP_RFID,
 			 OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
 			 OP_SHL, OP_SHR,
@@ -50,84 +52,84 @@ package decode_types is
         INSN_bcctr,
         INSN_bclr,
         INSN_bctar,
+        INSN_brh,
+        INSN_brw,
+        INSN_brd, -- 20
         INSN_cbcdtd,
         INSN_cdtbcd,
-        INSN_cmpi, -- 20
+        INSN_cmpi,
         INSN_cmpli,
         INSN_cntlzw,
         INSN_cntlzd,
         INSN_cnttzw,
         INSN_cnttzd,
         INSN_crand,
-        INSN_crandc,
+        INSN_crandc, -- 30
         INSN_creqv,
         INSN_crnand,
-        INSN_crnor, -- 30
+        INSN_crnor,
         INSN_cror,
         INSN_crorc,
         INSN_crxor,
         INSN_darn,
         INSN_eieio,
         INSN_extsb,
-        INSN_extsh,
+        INSN_extsh, -- 40
         INSN_extsw,
         INSN_extswsli,
-        INSN_isync, -- 40
+        INSN_isync,
         INSN_lbzu,
         INSN_ld,
         INSN_ldu,
         INSN_lhau,
         INSN_lwa,
         INSN_lwzu,
-        INSN_mcrf,
+        INSN_mcrf, -- 50
         INSN_mcrxrx,
         INSN_mfcr,
-        INSN_mfmsr, -- 50
+        INSN_mfmsr,
         INSN_mfspr,
         INSN_mtcrf,
         INSN_mtmsr,
         INSN_mtmsrd,
         INSN_mtspr,
         INSN_mulli,
-        INSN_neg,
+        INSN_neg, -- 60
         INSN_nop,
         INSN_ori,
-        INSN_oris, -- 60
+        INSN_oris,
         INSN_popcntb,
         INSN_popcntw,
         INSN_popcntd,
         INSN_prtyw,
         INSN_prtyd,
         INSN_rfid,
-        INSN_rldic,
+        INSN_rldic, -- 70
         INSN_rldicl,
         INSN_rldicr,
-        INSN_rldimi, -- 70
+        INSN_rldimi,
         INSN_rlwimi,
         INSN_rlwinm,
         INSN_sc,
         INSN_setb,
         INSN_slbia,
         INSN_sradi,
-        INSN_srawi,
+        INSN_srawi, -- 80
         INSN_stbu,
         INSN_std,
-        INSN_stdu, -- 80
+        INSN_stdu,
         INSN_sthu,
         INSN_stwu,
         INSN_subfic,
         INSN_subfme,
         INSN_subfze,
         INSN_sync,
-        INSN_tdi,
+        INSN_tdi, -- 90
         INSN_tlbsync,
         INSN_twi,
-        INSN_wait, -- 90
+        INSN_wait,
         INSN_xori,
         INSN_xoris,
-        INSN_93, -- padding
-        INSN_94,
-        INSN_95,
 
         -- Non-prefixed instructions that have a MLS:D prefixed form and
         -- their corresponding prefixed instructions.
diff --git a/execute1.vhdl b/execute1.vhdl
index e537048..7c1ff8f 100644
--- a/execute1.vhdl
+++ b/execute1.vhdl
@@ -392,6 +392,7 @@ begin
 	    op => e_in.insn_type,
 	    invert_in => e_in.invert_a,
 	    invert_out => e_in.invert_out,
+            is_signed => e_in.is_signed,
 	    result => logical_result,
             datalen => e_in.data_len
 	    );
@@ -1105,8 +1106,8 @@ begin
             when OP_ADDG6S =>
             when OP_CMPRB =>
             when OP_CMPEQB =>
-            when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
-                OP_BPERM | OP_BCD =>
+            when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
+                OP_BPERM | OP_BREV | OP_BCD =>
 
 	    when OP_B =>
                 v.take_branch := '1';
diff --git a/logical.vhdl b/logical.vhdl
index 77ef29c..2d139f8 100644
--- a/logical.vhdl
+++ b/logical.vhdl
@@ -13,6 +13,7 @@ entity logical is
         op         : in insn_type_t;
         invert_in  : in std_ulogic;
         invert_out : in std_ulogic;
+        is_signed  : in std_ulogic;
         result     : out std_ulogic_vector(63 downto 0);
         datalen    : in std_logic_vector(3 downto 0)
         );
@@ -92,7 +93,8 @@ architecture behaviour of logical is
 
 begin
     logical_0: process(all)
-        variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
+        variable rb_adj, rs_adj : std_ulogic_vector(63 downto 0);
+        variable tmp : std_ulogic_vector(63 downto 0);
         variable negative : std_ulogic;
         variable j : integer;
     begin
@@ -123,19 +125,34 @@ begin
         end if;
 
         case op is
-            when OP_AND | OP_OR | OP_XOR =>
-                case op is
-                    when OP_AND =>
-                        tmp := rs and rb_adj;
-                    when OP_OR =>
-                        tmp := rs or rb_adj;
-                    when others =>
-                        tmp := rs xor rb_adj;
-                end case;
+            when OP_LOGIC =>
+                -- for now, abuse the 'is_signed' field to indicate inversion of RS
+                rs_adj := rs;
+                if is_signed = '1' then
+                    rs_adj := not rs;
+                end if;
+                tmp := rs_adj and rb_adj;
+                if invert_out = '1' then
+                    tmp := not tmp;
+                end if;
+            when OP_XOR =>
+                tmp := rs xor rb;
                 if invert_out = '1' then
                     tmp := not tmp;
                 end if;
 
+            when OP_BREV =>
+                if datalen(3) = '1' then
+                    tmp := rs( 7 downto  0) & rs(15 downto  8) & rs(23 downto 16) & rs(31 downto 24) & 
+                           rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56);
+                elsif datalen(2) = '1' then
+                    tmp := rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56) &
+                           rs( 7 downto  0) & rs(15 downto  8) & rs(23 downto 16) & rs(31 downto 24);
+                else
+                    tmp := rs(55 downto 48) & rs(63 downto 56) & rs(39 downto 32) & rs(47 downto 40) &
+                           rs(23 downto 16) & rs(31 downto 24) & rs( 7 downto  0) & rs(15 downto  8);
+                end if;
+
             when OP_PRTY =>
                 tmp := parity;
             when OP_CMPB =>
diff --git a/predecode.vhdl b/predecode.vhdl
index 58b17e3..27f80e1 100644
--- a/predecode.vhdl
+++ b/predecode.vhdl
@@ -184,6 +184,9 @@ architecture behaviour of predecoder is
         2#0_00000_11100#  =>  INSN_and,
         2#0_00001_11100#  =>  INSN_andc,
         2#0_00111_11100#  =>  INSN_bperm,
+        2#0_00110_11011#  =>  INSN_brh,
+        2#0_00100_11011#  =>  INSN_brw,
+        2#0_00101_11011#  =>  INSN_brd,
         2#0_01001_11010#  =>  INSN_cbcdtd,
         2#0_01000_11010#  =>  INSN_cdtbcd,
         2#0_00000_00000#  =>  INSN_cmp,