core: Implement the maddhd, maddhdu and maddld instructions

These instructions use major opcode 4 and have a third GPR input operand, so we need a decode table for major opcode 4 and some plumbing to get the RC register operand read. The multiply-add instructions use the same insn_type_t values as the regular multiply instructions, and we distinguish in execute1 by looking at the major opcode. This turns out to be convenient because we don't have to add any cases in the code that handles the output of the multiplier, and it frees up some insn_type_t values. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago · 290b05f97d
parent 8edfbf638b
commit 290b05f97d
6 changed files with 61 additions and 9 deletions
--- a/decode1.vhdl
+++ b/decode1.vhdl
@ -34,6 +34,8 @@ architecture behaviour of decode1 is
    subtype major_opcode_t is unsigned(5 downto 0);
    type major_rom_array_t is array(0 to 63) of decode_rom_t;
    type minor_valid_array_t is array(0 to 1023) of std_ulogic;
+    type minor_valid_array_2t is array(0 to 2047) of std_ulogic;
+    type op_4_subop_array_t is array(0 to 63) of decode_rom_t;
    type op_19_subop_array_t is array(0 to 7) of decode_rom_t;
    type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
    type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
@ -85,6 +87,24 @@ architecture behaviour of decode1 is
        others   => illegal_inst
        );

+    -- indexed by bits 5..0 and 10..6 of instruction word
+    constant decode_op_4_valid : minor_valid_array_2t := (
+        2#11000000000# to 2#11000011111# => '1',        -- maddhd
+        2#11000100000# to 2#11000111111# => '1',        -- maddhdu
+        2#11001100000# to 2#11001111111# => '1',        -- maddld
+        others => '0'
+        );
+
+    -- indexed by bits 5..0 of instruction word
+    constant decode_op_4_array : op_4_subop_array_t := (
+        --                   unit    internal      in1         in2          in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl
+        --                                op                                            in   out   A   out  in    out  len        ext                                 pipe
+        2#110000#  =>       (ALU,    OP_MUL_H64,   RA,         RB,          RCR,  RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0'), -- maddhd
+        2#110001#  =>       (ALU,    OP_MUL_H64,   RA,         RB,          RCR,  RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- maddhdu
+        2#110011#  =>       (ALU,    OP_MUL_L64,   RA,         RB,          RCR,  RT,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC,   '0', '0'), -- maddld
+        others   => decode_rom_init
+        );
+
    -- indexed by bits 10..1 of instruction word
    constant decode_op_19_valid : minor_valid_array_t := (
        -- addpcis, 5 upper bits are part of constant
@ -390,6 +410,7 @@ begin
        variable v : Decode1ToDecode2Type;
        variable f : Decode1ToFetch1Type;
        variable majorop : major_opcode_t;
+        variable minor4op : std_ulogic_vector(10 downto 0);
        variable op_19_bits: std_ulogic_vector(2 downto 0);
        variable sprn : spr_num_t;
        variable br_nia    : std_ulogic_vector(61 downto 0);
@ -418,6 +439,15 @@ begin
            end if;
            v.decode := fetch_fail_inst;

+        elsif majorop = "000100" then
+            -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*)
+            minor4op := f_in.insn(5 downto 0) & f_in.insn(10 downto 6);
+            if decode_op_4_valid(to_integer(unsigned(minor4op))) = '1' then
+                v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
+            else
+                v.decode := illegal_inst;
+            end if;
+
        elsif majorop = "011111" then
            -- major opcode 31, lots of things
            v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
--- a/decode2.vhdl
+++ b/decode2.vhdl
@ -135,6 +135,8 @@ architecture behaviour of decode2 is
        case t is
            when RS =>
                return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
+            when RCR =>
+                return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
            when NONE =>
                return ('0', (others => '0'), (others => '0'));
        end case;
@ -282,7 +284,8 @@ begin
                       else gpr_to_gspr(insn_ra(d_in.insn));
    r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
                       else gpr_to_gspr(insn_rb(d_in.insn));
-    r_out.read3_reg <= insn_rs(d_in.insn);
+    r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR
+                       else insn_rs(d_in.insn);

    c_out.read <= d_in.decode.input_cr;

--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@ -9,7 +9,7 @@ package decode_types is
 			 OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
 			 OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
 			 OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
-			 OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD,
+			 OP_LOAD, OP_STORE,
 			 OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
 			 OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
 			 OP_MUL_H64, OP_MUL_H32, OP_OR,
@ -23,7 +23,7 @@ package decode_types is
    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
    type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
                           CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
-    type input_reg_c_t is (NONE, RS);
+    type input_reg_c_t is (NONE, RS, RCR);
    type output_reg_a_t is (NONE, RT, RA, SPR);
    type rc_t is (NONE, ONE, RC);
    type carry_in_t is (ZERO, CA, ONE);
--- a/execute1.vhdl
+++ b/execute1.vhdl
@ -309,6 +309,7 @@ begin
        variable taken_branch : std_ulogic;
        variable abs_branch : std_ulogic;
        variable spr_val : std_ulogic_vector(63 downto 0);
+        variable addend : std_ulogic_vector(127 downto 0);
    begin
 	result := (others => '0');
 	result_with_carry := (others => '0');
@ -408,8 +409,20 @@ begin
            x_to_divider.is_modulus <= '1';
        end if;

+        addend := (others => '0');
+        if e_in.insn(26) = '0' then
+            -- integer multiply-add, major op 4 (if it is a multiply)
+            addend(63 downto 0) := c_in;
+            if e_in.is_signed = '1' then
+                addend(127 downto 64) := (others => c_in(63));
+            end if;
+        end if;
+        if (sign1 xor sign2) = '1' then
+            addend := not addend;
+        end if;
+
        x_to_multiply.not_result <= sign1 xor sign2;
-        x_to_multiply.addend <= (others => sign1 xor sign2);
+        x_to_multiply.addend <= addend;
        x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
        if e_in.is_32bit = '0' then
            -- 64-bit forms
--- a/insn_helpers.vhdl
+++ b/insn_helpers.vhdl
@ -6,6 +6,7 @@ package insn_helpers is
    function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector;
+    function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector;
    function insn_l (insn_in : std_ulogic_vector) return std_ulogic;
@ -59,6 +60,11 @@ package body insn_helpers is
        return insn_in(15 downto 11);
    end;

+    function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector is
+    begin
+        return insn_in(10 downto 6);
+    end;
+
    function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector is
    begin
        return insn_in(15 downto 0);
--- a/scripts/fmt_log/fmt_log.c
+++ b/scripts/fmt_log/fmt_log.c
@ -90,11 +90,11 @@ const char *ops[64] =
 	"illegal", "nop    ", "add    ", "and    ", "attn   ", "b      ", "bc     ", "bcreg  ",
 	"bperm  ", "cmp    ", "cmpb   ", "cmpeqb ", "cmprb  ", "cntz   ", "crop   ", "darn   ",
 	"dcbf   ", "dcbst  ", "dcbt   ", "dcbtst ", "dcbz   ", "div    ", "dive   ", "exts   ",
-	"extswsl", "icbi   ", "icbt   ", "isel   ", "isync  ", "ld     ", "st     ", "maddhd ",
-	"maddhdu", "maddld ", "mcrxrx ", "mfcr   ", "mfmsr  ", "mfspr  ", "mod    ", "mtcrf  ",
-	"mtmsr  ", "mtspr  ", "mull64 ", "mulh64 ", "mulh32 ", "or     ", "popcnt ", "prty   ",
-	"rfid   ", "rlc    ", "rlcl   ", "rlcr   ", "sc     ", "setb   ", "shl    ", "shr    ",
-	"sync   ", "tlbie  ", "trap   ", "xor    ", "ffail  ", "?61    ", "?62    ", "?63    "
+	"extswsl", "icbi   ", "icbt   ", "isel   ", "isync  ", "ld     ", "st     ", "mcrxrx ",
+	"mfcr   ", "mfmsr  ", "mfspr  ", "mod    ", "mtcrf  ", "mtmsr  ", "mtspr  ", "mull64 ",
+	"mulh64 ", "mulh32 ", "or     ", "popcnt ", "prty   ", "rfid   ", "rlc    ", "rlcl   ",
+	"rlcr   ", "sc     ", "setb   ", "shl    ", "shr    ", "sync   ", "tlbie  ", "trap   ",
+	"xor    ", "ffail  ", "?58    ", "?59    ", "?60    ", "?61    ", "?62    ", "?63    "
 };

 const char *spr_names[13] =