From 39ca675ce399c494aad7c27a1caa8ceff62c23af Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 3 Jul 2023 12:32:43 +1000
Subject: [PATCH] Decode prefixed instructions

This adds logic to do basic decoding of the prefixed instructions
defined in PowerISA v3.1B which are in the SFFS (Scalar Fixed plus
Floating-Point Subset) compliancy subset.  In PowerISA v3.1B SFFS,
there are 14 prefixed load/store instructions plus the prefixed no-op
instruction (pnop).  The prefixed load/store instructions all use an
extended version of D-form, which has an extra 18 bits of displacement
in the prefix, plus an 'R' bit which enables PC-relative addressing.

When decode1 sees an instruction word where the insn_code is
INSN_prefix (i.e. the primary opcode was 1), it stores the prefix word
and sends nothing down to decode2 in that cycle.  When the next valid
instruction word arrives, it is interpreted as a suffix, meaning that
its insn_code gets modified before being used to look up the decode
table.

The insn_code values are rearranged so that the values for
instructions which are the suffix of a valid prefixed instruction are
all at even indexes, and the corresponding prefixed instructions
follow immediately, so that an insn_code value can be converted to the
corresponding prefixed value by setting the LSB of the insn_code
value.  There are two prefixed instructions, pld and pstd, for which
the suffix is not a valid SFFS instruction by itself, so these have
been given dummy insn_code values which decode as illegal (INSN_op57
and INSN_op61).

For a prefixed instruction, decode1 examines the type and subtype
fields of the prefix and checks that the suffix is valid for the type
and subtype.  This check doesn't affect which entry of the decode
table is used; the result is passed down to decode2, and will in
future be acted upon in execute1.

The instruction address passed down to decode2 is the address of the
prefix.  To enable this, part of the instruction address is saved when
the prefix is seen, and then the instruction address received from
icache is partly overlaid by the saved prefix address.  Because
prefixed instructions are not permitted to cross 64-byte boundaries,
we only need to save bits 5:2 of the instruction to do this.  If the
alignment restriction ever gets relaxed, we will then need to save
more bits of the address.

Decode2 has been extended to handle the R bit of the prefix (in 8LS
and MLS forms) and to be able to generate the 34-bit immediate value
from the prefix and suffix.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 common.vhdl       |   7 +-
 decode1.vhdl      |  90 +++++++++++++++++++-
 decode2.vhdl      |  14 ++--
 decode_types.vhdl | 207 +++++++++++++++++++++++++++-------------------
 insn_helpers.vhdl |  15 ++++
 predecode.vhdl    |   5 ++
 6 files changed, 243 insertions(+), 95 deletions(-)

diff --git a/common.vhdl b/common.vhdl
index 44302c7..838179b 100644
--- a/common.vhdl
+++ b/common.vhdl
@@ -263,6 +263,9 @@ package common is
 	valid: std_ulogic;
 	stop_mark : std_ulogic;
 	nia: std_ulogic_vector(63 downto 0);
+        prefixed: std_ulogic;
+        prefix: std_ulogic_vector(25 downto 0);
+        illegal_suffix: std_ulogic;
 	insn: std_ulogic_vector(31 downto 0);
 	decode: decode_rom_t;
         br_pred: std_ulogic; -- Branch was predicted to be taken
@@ -274,7 +277,9 @@ package common is
         reg_c : gspr_index_t;
     end record;
     constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
-        (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
+        (valid => '0', stop_mark => '0', nia => (others => '0'),
+         prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
+         illegal_suffix => '0',
          decode => decode_rom_init, br_pred => '0', big_endian => '0',
          spr_info => spr_id_init, ram_spr => ram_spr_info_init,
          reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));
diff --git a/decode1.vhdl b/decode1.vhdl
index c987bec..138e483 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -45,6 +45,16 @@ architecture behaviour of decode1 is
     signal decode_rom_addr : insn_code;
     signal decode : decode_rom_t;
 
+    type prefix_state_t is record
+        prefixed : std_ulogic;
+        prefix   : std_ulogic_vector(25 downto 0);
+        pref_ia  : std_ulogic_vector(3 downto 0);
+    end record;
+    constant prefix_state_init : prefix_state_t := (prefixed => '0', prefix => (others => '0'),
+                                                    pref_ia => (others => '0'));
+
+    signal pr, pr_in : prefix_state_t;
+
     signal fetch_failed : std_ulogic;
 
     -- If we have an FPU, then it is used for integer divisions,
@@ -266,6 +276,22 @@ architecture behaviour of decode1 is
         INSN_orc         =>  (ALU,  NONE, OP_OR,        NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0', NONE),
         INSN_ori         =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI,    RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_oris        =>  (ALU,  NONE, OP_OR,        NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_paddi       =>  (ALU,  NONE, OP_ADD,       RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plbz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pld         =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfd        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plfs        =>  (LDST, FPU,  OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, FRT,  '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_plha        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plhz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwa        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_plwz        =>  (LDST, NONE, OP_LOAD,      RA0_OR_CIA, CONST_PSI,   NONE, RT,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pnop        =>  (ALU,  NONE, OP_NOP,       NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstb        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstd        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfd       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstfs       =>  (LDST, FPU,  OP_STORE,     RA0_OR_CIA, CONST_PSI,   FRS,  NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
+        INSN_psth        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
+        INSN_pstw        =>  (LDST, NONE, OP_STORE,     RA0_OR_CIA, CONST_PSI,   RS,   NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntb     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntd     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
         INSN_popcntw     =>  (ALU,  NONE, OP_POPCNT,    NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@@ -434,12 +460,17 @@ begin
             if rst = '1' then
                 r <= Decode1ToDecode2Init;
                 fetch_failed <= '0';
+                pr <= prefix_state_init;
             elsif flush_in = '1' then
                 r.valid <= '0';
                 fetch_failed <= '0';
+                pr <= prefix_state_init;
             elsif stall_in = '0' then
                 r <= rin;
                 fetch_failed <= f_in.fetch_failed;
+                if f_in.valid = '1' then
+                    pr <= pr_in;
+                end if;
             end if;
             if rst = '1' then
                 br.br_nia <= (others => '0');
@@ -471,12 +502,18 @@ begin
         variable icode : insn_code;
         variable sprn : spr_num_t;
         variable maybe_rb : std_ulogic;
+        variable pv : prefix_state_t;
+        variable icode_bits : std_ulogic_vector(9 downto 0);
+        variable valid_suffix : std_ulogic;
     begin
         v := Decode1ToDecode2Init;
+        pv := pr;
 
         v.valid := f_in.valid;
         v.nia  := f_in.nia;
         v.insn := f_in.insn;
+        v.prefix := pr.prefix;
+        v.prefixed := pr.prefixed;
         v.stop_mark := f_in.stop_mark;
         v.big_endian := f_in.big_endian;
 
@@ -490,17 +527,59 @@ begin
         end if;
 
         icode := f_in.icode;
+        icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(icode), 10));
 
         if f_in.fetch_failed = '1' then
-            icode := INSN_fetch_fail;
+            icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_fetch_fail), 10));
             -- Only send down a single OP_FETCH_FAILED
             v.valid := not fetch_failed;
+            pv := prefix_state_init;
+
+        elsif pr.prefixed = '1' then
+            -- Check suffix value and convert to the prefixed instruction code
+            if pr.prefix(24) = '1' then
+                -- either pnop or illegal
+                icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_pnop), 10));
+            else
+                -- various load/store instructions
+                icode_bits(0) := '1';
+            end if;
+            valid_suffix := '0';
+            case pr.prefix(25 downto 23) is
+                when "000" =>    -- 8LS
+                    if icode >= INSN_first_8ls and icode < INSN_first_rb then
+                        valid_suffix := '1';
+                    end if;
+                when "100" =>   -- MLS
+                    if icode >= INSN_first_mls and icode < INSN_first_8ls then
+                        valid_suffix := '1';
+                    elsif icode >= INSN_first_fp_mls and icode < INSN_first_fp_nonmls then
+                        valid_suffix := '1';
+                    end if;
+                when "110" =>   -- MRR, i.e. pnop
+                    if pr.prefix(22 downto 20) = "000" then
+                        valid_suffix := '1';
+                    end if;
+                when others =>
+            end case;
+            v.nia(5 downto 2) := pr.pref_ia;
+            v.prefixed := '1';
+            v.prefix := pr.prefix;
+            v.illegal_suffix := not valid_suffix;
+            pv := prefix_state_init;
+
+        elsif icode = INSN_prefix then
+            pv.prefixed := '1';
+            pv.pref_ia := f_in.nia(5 downto 2);
+            pv.prefix := f_in.insn(25 downto 0);
+            v.valid := '0';
+
         end if;
-        decode_rom_addr <= icode;
+        decode_rom_addr <= insn_code'val(to_integer(unsigned(icode_bits)));
 
         if f_in.valid = '1' then
-            report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) &
-                " at " & to_hstring(f_in.nia);
+            report "Decode " & insn_code'image(insn_code'val(to_integer(unsigned(icode_bits)))) & " " &
+                to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
         end if;
 
         -- Branch predictor
@@ -533,6 +612,8 @@ begin
         br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
 
         -- Work out GPR/FPR read addresses
+        -- Note that for prefixed instructions we are working this out based
+        -- only on the suffix.
         maybe_rb := '0';
         vr.reg_1_addr := '0' & insn_ra(f_in.insn);
         vr.reg_2_addr := '0' & insn_rb(f_in.insn);
@@ -568,6 +649,7 @@ begin
         -- Update registers
         rin <= v;
         br_in <= bv;
+        pr_in <= pv;
 
         -- Update outputs
         d_out <= r;
diff --git a/decode2.vhdl b/decode2.vhdl
index f58bd9b..fa3b54d 100644
--- a/decode2.vhdl
+++ b/decode2.vhdl
@@ -83,12 +83,13 @@ architecture behaviour of decode2 is
     constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));
 
     function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0);
                                  instr_addr : std_ulogic_vector(63 downto 0))
         return decode_input_reg_t is
     begin
-        if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
+        if t = RA or ((t = RA_OR_ZERO or t = RA0_OR_CIA) and insn_ra(insn_in) /= "00000") then
             return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
-        elsif t = CIA then
+        elsif t = CIA or (t = RA0_OR_CIA and insn_prefix_r(prefix) = '1') then
             return ('0', (others => '0'), instr_addr);
         elsif HAS_FPU and t = FRA then
             return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
@@ -97,7 +98,8 @@ architecture behaviour of decode2 is
         end if;
     end;
 
-    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0))
+    function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
+                                 prefix : std_ulogic_vector(25 downto 0))
         return decode_input_reg_t is
         variable ret : decode_input_reg_t;
     begin
@@ -114,6 +116,8 @@ architecture behaviour of decode2 is
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
             when CONST_SI =>
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
+            when CONST_PSI =>
+                ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_prefixed_si(prefix, insn_in)), 64)));
             when CONST_SI_HI =>
                 ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
             when CONST_UI_HI =>
@@ -373,8 +377,8 @@ begin
         decoded_reg_c <= decode_input_reg_init;
         decoded_reg_o <= decode_output_reg_init;
         if d_in.valid = '1' then
-            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia);
-            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn);
+            decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
+            decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
             decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
             decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
         end if;
diff --git a/decode_types.vhdl b/decode_types.vhdl
index e9f6e70..428d943 100644
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@@ -34,15 +34,16 @@ package decode_types is
         -- The following instructions don't have an RB operand or access FPRs
         INSN_illegal, -- 0
         INSN_fetch_fail,
-        INSN_addi,
+        INSN_prefix,
+        INSN_pnop,
         INSN_addic,
         INSN_addic_dot,
         INSN_addis,
         INSN_addme,
         INSN_addpcis,
         INSN_addze,
-        INSN_andi_dot,
-        INSN_andis_dot, -- 10
+        INSN_andi_dot, -- 10
+        INSN_andis_dot,
         INSN_attn,
         INSN_b,
         INSN_bc,
@@ -51,8 +52,8 @@ package decode_types is
         INSN_bctar,
         INSN_cbcdtd,
         INSN_cdtbcd,
-        INSN_cmpi,
-        INSN_cmpli, -- 20
+        INSN_cmpi, -- 20
+        INSN_cmpli,
         INSN_cntlzw,
         INSN_cntlzd,
         INSN_cnttzw,
@@ -61,8 +62,8 @@ package decode_types is
         INSN_crandc,
         INSN_creqv,
         INSN_crnand,
-        INSN_crnor,
-        INSN_cror, -- 30
+        INSN_crnor, -- 30
+        INSN_cror,
         INSN_crorc,
         INSN_crxor,
         INSN_darn,
@@ -71,182 +72,203 @@ package decode_types is
         INSN_extsh,
         INSN_extsw,
         INSN_extswsli,
-        INSN_isync,
-        INSN_lbz, -- 40
+        INSN_isync, -- 40
         INSN_lbzu,
         INSN_ld,
         INSN_ldu,
-        INSN_lha,
         INSN_lhau,
-        INSN_lhz,
-        INSN_lhzu,
         INSN_lwa,
-        INSN_lwz,
-        INSN_lwzu, -- 50
+        INSN_lwzu,
         INSN_mcrf,
         INSN_mcrxrx,
         INSN_mfcr,
-        INSN_mfmsr,
+        INSN_mfmsr, -- 50
         INSN_mfspr,
         INSN_mtcrf,
         INSN_mtmsr,
         INSN_mtmsrd,
         INSN_mtspr,
-        INSN_mulli, -- 60
+        INSN_mulli,
         INSN_neg,
         INSN_nop,
         INSN_ori,
-        INSN_oris,
+        INSN_oris, -- 60
         INSN_popcntb,
         INSN_popcntw,
         INSN_popcntd,
         INSN_prtyw,
         INSN_prtyd,
-        INSN_rfid, -- 70
+        INSN_rfid,
         INSN_rldic,
         INSN_rldicl,
         INSN_rldicr,
-        INSN_rldimi,
+        INSN_rldimi, -- 70
         INSN_rlwimi,
         INSN_rlwinm,
         INSN_sc,
         INSN_setb,
         INSN_slbia,
-        INSN_sradi, -- 80
+        INSN_sradi,
         INSN_srawi,
-        INSN_stb,
         INSN_stbu,
         INSN_std,
-        INSN_stdu,
-        INSN_sth,
+        INSN_stdu, -- 80
         INSN_sthu,
-        INSN_stw,
         INSN_stwu,
-        INSN_subfic, -- 90
+        INSN_subfic,
         INSN_subfme,
         INSN_subfze,
         INSN_sync,
         INSN_tdi,
         INSN_tlbsync,
         INSN_twi,
-        INSN_wait,
+        INSN_wait, -- 90
         INSN_xori,
         INSN_xoris,
+        INSN_93, -- padding
+        INSN_94,
+        INSN_95,
+
+        -- Non-prefixed instructions that have a MLS:D prefixed form and
+        -- their corresponding prefixed instructions.
+        -- The non-prefixed versions have even indexes so that we can
+        -- convert them to the prefixed version by setting bit 0
+        INSN_addi, -- 96
+        INSN_paddi,
+        INSN_lbz,
+        INSN_plbz,
+        INSN_lha, -- 100
+        INSN_plha,
+        INSN_lhz,
+        INSN_plhz,
+        INSN_lwz,
+        INSN_plwz,
+        INSN_stb,
+        INSN_pstb,
+        INSN_sth,
+        INSN_psth,
+        INSN_stw, -- 110
+        INSN_pstw,
 
-        -- pad to 112 to simplify comparison logic
-        INSN_100, INSN_101, INSN_102, INSN_103,
-        INSN_104, INSN_105, INSN_106, INSN_107,
-        INSN_108, INSN_109, INSN_110, INSN_111,
+        -- Slots for non-prefixed opcodes that are 8LS:D when prefixed
+        INSN_lhzu, -- 112
+        INSN_plwa,
+        INSN_op57,
+        INSN_pld,
+        INSN_op61,
+        INSN_pstd,
+
+        -- pad to 128 to simplify comparison logic
+        INSN_076, INSN_077,
+        INSN_078, INSN_079, INSN_07a, INSN_07b, INSN_07c, INSN_07d, INSN_07e, INSN_07f,
 
         -- The following instructions have an RB operand but don't access FPRs
         INSN_add,
         INSN_addc,
-        INSN_adde,
+        INSN_adde, -- 130
         INSN_addex,
         INSN_addg6s,
         INSN_and,
         INSN_andc,
         INSN_bperm,
-        INSN_cmp, -- 120
+        INSN_cmp,
         INSN_cmpb,
         INSN_cmpeqb,
         INSN_cmpl,
-        INSN_cmprb,
+        INSN_cmprb, -- 140
         INSN_dcbf,
         INSN_dcbst,
         INSN_dcbt,
         INSN_dcbtst,
         INSN_dcbz,
-        INSN_divd, -- 130
+        INSN_divd,
         INSN_divdu,
         INSN_divde,
         INSN_divdeu,
-        INSN_divw,
+        INSN_divw, -- 150
         INSN_divwu,
         INSN_divwe,
         INSN_divweu,
         INSN_eqv,
         INSN_icbi,
-        INSN_icbt, -- 140
+        INSN_icbt,
         INSN_isel,
         INSN_lbarx,
         INSN_lbzcix,
-        INSN_lbzux,
+        INSN_lbzux, -- 160
         INSN_lbzx,
         INSN_ldarx,
         INSN_ldbrx,
         INSN_ldcix,
         INSN_ldx,
-        INSN_ldux, -- 150
+        INSN_ldux,
         INSN_lharx,
         INSN_lhax,
         INSN_lhaux,
-        INSN_lhbrx,
+        INSN_lhbrx, -- 170
         INSN_lhzcix,
         INSN_lhzx,
         INSN_lhzux,
         INSN_lwarx,
         INSN_lwax,
-        INSN_lwaux, -- 160
+        INSN_lwaux,
         INSN_lwbrx,
         INSN_lwzcix,
         INSN_lwzx,
-        INSN_lwzux,
+        INSN_lwzux, -- 180
         INSN_modsd,
         INSN_modsw,
         INSN_moduw,
         INSN_modud,
         INSN_mulhw,
-        INSN_mulhwu, -- 170
+        INSN_mulhwu,
         INSN_mulhd,
         INSN_mulhdu,
         INSN_mullw,
-        INSN_mulld,
+        INSN_mulld, -- 190
         INSN_nand,
         INSN_nor,
         INSN_or,
         INSN_orc,
         INSN_rldcl,
-        INSN_rldcr, -- 180
+        INSN_rldcr,
         INSN_rlwnm,
         INSN_slw,
         INSN_sld,
-        INSN_sraw,
+        INSN_sraw, -- 200
         INSN_srad,
         INSN_srw,
         INSN_srd,
         INSN_stbcix,
         INSN_stbcx,
-        INSN_stbx, -- 190
+        INSN_stbx,
         INSN_stbux,
         INSN_stdbrx,
         INSN_stdcix,
-        INSN_stdcx,
+        INSN_stdcx, -- 210
         INSN_stdx,
         INSN_stdux,
         INSN_sthbrx,
         INSN_sthcix,
         INSN_sthcx,
-        INSN_sthx, -- 200
+        INSN_sthx,
         INSN_sthux,
         INSN_stwbrx,
         INSN_stwcix,
-        INSN_stwcx,
+        INSN_stwcx, -- 220
         INSN_stwx,
         INSN_stwux,
         INSN_subf,
         INSN_subfc,
         INSN_subfe,
-        INSN_td, -- 210
+        INSN_td,
         INSN_tlbie,
         INSN_tlbiel,
         INSN_tw,
-        INSN_xor,
+        INSN_xor, -- 230
 
-        -- pad to 224 to simplify comparison logic
-        INSN_215,
-        INSN_216, INSN_217, INSN_218, INSN_219,
-        INSN_220, INSN_221, INSN_222, INSN_223,
+        -- pad to 232 to simplify comparison logic
+        INSN_231,
 
         -- The following instructions have a third input addressed by RC
         INSN_maddld,
@@ -254,9 +276,7 @@ package decode_types is
         INSN_maddhdu,
 
         -- pad to 256 to simplify comparison logic
-        INSN_227,
-        INSN_228, INSN_229, INSN_230, INSN_231,
-        INSN_232, INSN_233, INSN_234, INSN_235,
+        INSN_235,
         INSN_236, INSN_237, INSN_238, INSN_239,
         INSN_240, INSN_241, INSN_242, INSN_243,
         INSN_244, INSN_245, INSN_246, INSN_247,
@@ -264,39 +284,52 @@ package decode_types is
         INSN_252, INSN_253, INSN_254, INSN_255,
 
         -- The following instructions access floating-point registers
-        -- These ones have an FRS operand, but RA/RB are GPRs
-        INSN_stfd,
-        INSN_stfdu,
+        -- They have an FRS operand, but RA/RB are GPRs
+
+        -- Non-prefixed floating-point loads and stores that have a MLS:D
+        -- prefixed form, and their corresponding prefixed instructions.
+        INSN_stfd, -- 256
+        INSN_pstfd,
         INSN_stfs,
+        INSN_pstfs,
+        INSN_lfd, -- 260
+        INSN_plfd,
+        INSN_lfs,
+        INSN_plfs,
+
+        -- opcodes that can't have a prefix
+        INSN_stfdu, -- 264
         INSN_stfsu,
-        INSN_stfdux, -- 260
+        INSN_stfdux,
         INSN_stfdx,
         INSN_stfiwx,
         INSN_stfsux,
-        INSN_stfsx,
+        INSN_stfsx, -- 270
         -- These ones don't actually have an FRS operand (rather an FRT destination)
         -- but are here so that all FP instructions are >= INST_first_frs.
-        INSN_lfd,
         INSN_lfdu,
-        INSN_lfs,
         INSN_lfsu,
         INSN_lfdx,
-        INSN_lfdux, -- 270
+        INSN_lfdux,
         INSN_lfiwax,
         INSN_lfiwzx,
         INSN_lfsx,
         INSN_lfsux,
         -- These are here in order to keep the FP instructions together
         INSN_mcrfs,
-        INSN_mtfsb,
+        INSN_mtfsb, -- 280
         INSN_mtfsfi,
-        INSN_278, -- padding
-        INSN_279,
+        INSN_282, -- padding
+        INSN_283,
+        INSN_284,
+        INSN_285,
+        INSN_286,
+        INSN_287,
 
         -- The following instructions access FRA and/or FRB operands
-        INSN_fabs, -- 280
+        INSN_fabs, -- 288
         INSN_fadd,
-        INSN_fadds,
+        INSN_fadds, -- 290
         INSN_fcfid,
         INSN_fcfids,
         INSN_fcfidu,
@@ -304,9 +337,9 @@ package decode_types is
         INSN_fcmpo,
         INSN_fcmpu,
         INSN_fcpsgn,
-        INSN_fctid, -- 290
+        INSN_fctid,
         INSN_fctidz,
-        INSN_fctidu,
+        INSN_fctidu, -- 300
         INSN_fctiduz,
         INSN_fctiw,
         INSN_fctiwz,
@@ -314,9 +347,9 @@ package decode_types is
         INSN_fctiwuz,
         INSN_fdiv,
         INSN_fdivs,
-        INSN_fmr, -- 300
+        INSN_fmr,
         INSN_fmrgew,
-        INSN_fmrgow,
+        INSN_fmrgow, -- 310
         INSN_fnabs,
         INSN_fneg,
         INSN_fre,
@@ -324,9 +357,9 @@ package decode_types is
         INSN_frim,
         INSN_frin,
         INSN_frip,
-        INSN_friz, -- 310
+        INSN_friz,
         INSN_frsp,
-        INSN_frsqrte,
+        INSN_frsqrte, -- 320
         INSN_frsqrtes,
         INSN_fsqrt,
         INSN_fsqrts,
@@ -334,18 +367,18 @@ package decode_types is
         INSN_fsubs,
         INSN_ftdiv,
         INSN_ftsqrt,
-        INSN_mffs, -- 320
+        INSN_mffs,
         INSN_mtfsf,
 
-        -- pad to 328
-        INSN_322, INSN_323, INSN_324, INSN_325, INSN_326, INSN_327,
+        -- pad to 336
+        INSN_330, INSN_331, INSN_332, INSN_333, INSN_334, INSN_335,
 
         -- The following instructions access FRA, FRB (possibly) and FRC operands
-        INSN_fmul,
+        INSN_fmul, -- 336
         INSN_fmuls,
-        INSN_fmadd, -- 330
+        INSN_fmadd,
         INSN_fmadds,
-        INSN_fmsub,
+        INSN_fmsub, -- 340
         INSN_fmsubs,
         INSN_fnmadd,
         INSN_fnmadds,
@@ -359,10 +392,14 @@ package decode_types is
     constant INSN_first_frs : insn_code := INSN_stfd;
     constant INSN_first_frab : insn_code := INSN_fabs;
     constant INSN_first_frabc : insn_code := INSN_fmul;
+    constant INSN_first_mls : insn_code := INSN_addi;
+    constant INSN_first_8ls : insn_code := INSN_lhzu;
+    constant INSN_first_fp_mls : insn_code := INSN_stfd;
+    constant INSN_first_fp_nonmls : insn_code := INSN_stfdu;
 
-    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
+    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA);
     type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
-                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
+                           CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB);
     type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
     type output_reg_a_t is (NONE, RT, RA, FRT);
     type rc_t is (NONE, ONE, RC, RCOE);
diff --git a/insn_helpers.vhdl b/insn_helpers.vhdl
index 2ddcadb..acd75e9 100644
--- a/insn_helpers.vhdl
+++ b/insn_helpers.vhdl
@@ -43,6 +43,9 @@ package insn_helpers is
     function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
     function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
     function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic;
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector;
 end package insn_helpers;
 
 package body insn_helpers is
@@ -250,4 +253,16 @@ package body insn_helpers is
     begin
         return insn_in(15 downto 12);
     end;
+
+    function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic is
+    begin
+        return prefix(20);
+    end;
+
+    function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
+        return std_ulogic_vector is
+    begin
+        return prefix(17 downto 0) & suffix(15 downto 0);
+    end;
+
 end package body insn_helpers;
diff --git a/predecode.vhdl b/predecode.vhdl
index 7e1149b..41b26ad 100644
--- a/predecode.vhdl
+++ b/predecode.vhdl
@@ -158,6 +158,11 @@ architecture behaviour of predecoder is
         2#111111_11010# to 2#111111_11011# =>  INSN_fmadd,
         2#111111_11100# to 2#111111_11101# =>  INSN_fnmsub,
         2#111111_11110# to 2#111111_11111# =>  INSN_fnmadd,
+        -- prefix word, PO1
+        2#000001_00000# to 2#000001_11111# =>  INSN_prefix,
+        -- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed
+        2#111001_00000# to 2#111001_11111# =>  INSN_op57,
+        2#111101_00000# to 2#111101_11111# =>  INSN_op61,
         others                             =>  INSN_illegal
         );