multiply: Move data formatting out of decode2

At present, decode2 does some formatting of the input data for the multiply unit - truncation to 32 bits for 32-bit operations and then sign or zero extension to 65 bits. This is going to prevent forwarding of results within the execute pipeline in future, so we move the formatting to the first cycle of the multiply pipeline. It turns out that we have a wasted cycle at the front of the multiply pipe, because decode2 has a register at its output and multiply has a register at its input. For now we use this cycle to do the data formatting. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
6 years ago · 9568e5f848
parent ac7df6fc04
commit 9568e5f848
4 changed files with 60 additions and 47 deletions
--- a/common.vhdl
+++ b/common.vhdl
@ -86,11 +86,15 @@ package common is
 	valid: std_ulogic;
 	insn_type: insn_type_t;
 	write_reg: std_ulogic_vector(4 downto 0);
-	data1: std_ulogic_vector(64 downto 0);
-	data2: std_ulogic_vector(64 downto 0);
+	data1: std_ulogic_vector(63 downto 0);
+	data2: std_ulogic_vector(63 downto 0);
 	rc: std_ulogic;
+	is_32bit: std_ulogic;
+	is_signed: std_ulogic;
    end record;
-    constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0'));
+    constant Decode2ToMultiplyInit : Decode2ToMultiplyType :=
+	    (valid => '0', insn_type => OP_ILLEGAL, rc => '0', is_32bit => '0', is_signed => '0',
+	     others => (others => '0'));

    type Decode2ToDividerType is record
 	valid: std_ulogic;
--- a/decode2.vhdl
+++ b/decode2.vhdl
@ -270,30 +270,12 @@ begin

 		-- multiply unit
 		v.m.insn_type := d_in.decode.insn_type;
-		mul_a := decoded_reg_a.data;
-		mul_b := decoded_reg_b.data;
+		v.m.data1 := decoded_reg_a.data;
+		v.m.data2 := decoded_reg_b.data;
 		v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
 		v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
-
-		if d_in.decode.is_32bit = '1' then
-			if d_in.decode.is_signed = '1' then
-				v.m.data1 := (others => mul_a(31));
-				v.m.data1(31 downto 0) := mul_a(31 downto 0);
-				v.m.data2 := (others => mul_b(31));
-				v.m.data2(31 downto 0) := mul_b(31 downto 0);
-			else
-				v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
-				v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
-			end if;
-		else
-			if d_in.decode.is_signed = '1' then
-				v.m.data1 := mul_a(63) & mul_a;
-				v.m.data2 := mul_b(63) & mul_b;
-			else
-				v.m.data1 := '0' & mul_a;
-				v.m.data2 := '0' & mul_b;
-			end if;
-		end if;
+                v.m.is_32bit := d_in.decode.is_32bit;
+                v.m.is_signed := d_in.decode.is_signed;

                -- divide unit
                -- PPC divide and modulus instruction words have these bits in
--- a/multiply.vhdl
+++ b/multiply.vhdl
@ -38,12 +38,17 @@ architecture behaviour of multiply is
    end record;

    signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit);
+
+    signal data1, data1_in : std_ulogic_vector(64 downto 0);
+    signal data2, data2_in : std_ulogic_vector(64 downto 0);
 begin
    multiply_0: process(clk)
    begin
        if rising_edge(clk) then
            m <= m_in;
            r <= rin;
+	    data1 <= data1_in;
+	    data2 <= data2_in;
        end if;
    end process;

@ -56,9 +61,30 @@ begin

        m_out <= MultiplyToWritebackInit;

+	if m_in.is_32bit = '1' then
+	    if m_in.is_signed = '1' then
+		data1_in <= (others => m_in.data1(31));
+		data1_in(31 downto 0) <= m_in.data1(31 downto 0);
+		data2_in <= (others => m_in.data2(31));
+		data2_in(31 downto 0) <= m_in.data2(31 downto 0);
+	    else
+		data1_in <= '0' & x"00000000" & m_in.data1(31 downto 0);
+		data2_in <= '0' & x"00000000" & m_in.data2(31 downto 0);
+	    end if;
+	else
+	    if m_in.is_signed = '1' then
+		data1_in <= m_in.data1(63) & m_in.data1;
+		data2_in <= m_in.data2(63) & m_in.data2;
+	    else
+		data1_in <= '0' & m_in.data1;
+		data2_in <= '0' & m_in.data2;
+	    end if;
+	end if;
+
+
        v.multiply_pipeline(0).valid := m.valid;
        v.multiply_pipeline(0).insn_type := m.insn_type;
-        v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
+        v.multiply_pipeline(0).data := signed(data1) * signed(data2);
        v.multiply_pipeline(0).write_reg := m.write_reg;
        v.multiply_pipeline(0).rc := m.rc;

--- a/multiply_tb.vhdl
+++ b/multiply_tb.vhdl
@ -41,8 +41,10 @@ begin
        m1.valid <= '1';
        m1.insn_type <= OP_MUL_L64;
        m1.write_reg <= "10001";
-        m1.data1 <= '0' & x"0000000000001000";
-        m1.data2 <= '0' & x"0000000000001111";
+        m1.data1 <= x"0000000000001000";
+        m1.data2 <= x"0000000000001111";
+        m1.is_32bit <= '0';
+        m1.is_signed <= '0';
        m1.rc <= '0';

        wait for clk_period;
@ -88,8 +90,8 @@ begin

            behave_rt := ppc_mulld(ra, rb);

-            m1.data1 <= '0' & ra;
-            m1.data2 <= '0' & rb;
+            m1.data1 <= ra;
+            m1.data2 <= rb;
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_L64;

@ -112,8 +114,8 @@ begin

            behave_rt := ppc_mulhdu(ra, rb);

-            m1.data1 <= '0' & ra;
-            m1.data2 <= '0' & rb;
+            m1.data1 <= ra;
+            m1.data2 <= rb;
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_H64;

@ -136,8 +138,9 @@ begin

            behave_rt := ppc_mulhd(ra, rb);

-            m1.data1 <= ra(63) & ra;
-            m1.data2 <= rb(63) & rb;
+            m1.data1 <= ra;
+            m1.data2 <= rb;
+            m1.is_signed <= '1';
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_H64;

@ -160,12 +163,11 @@ begin

            behave_rt := ppc_mullw(ra, rb);

-            m1.data1 <= (others => ra(31));
-            m1.data1(31 downto 0) <= ra(31 downto 0);
-            m1.data2 <= (others => rb(31));
-            m1.data2(31 downto 0) <= rb(31 downto 0);
+            m1.data1 <= ra;
+            m1.data2 <= rb;
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_L64;
+            m1.is_32bit <= '1';

            wait for clk_period;

@ -186,10 +188,8 @@ begin

            behave_rt := ppc_mulhw(ra, rb);

-            m1.data1 <= (others => ra(31));
-            m1.data1(31 downto 0) <= ra(31 downto 0);
-            m1.data2 <= (others => rb(31));
-            m1.data2(31 downto 0) <= rb(31 downto 0);
+            m1.data1 <= ra;
+            m1.data2 <= rb;
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_H32;

@ -212,10 +212,9 @@ begin

            behave_rt := ppc_mulhwu(ra, rb);

-            m1.data1 <= (others => '0');
-            m1.data1(31 downto 0) <= ra(31 downto 0);
-            m1.data2 <= (others => '0');
-            m1.data2(31 downto 0) <= rb(31 downto 0);
+            m1.data1 <= ra;
+            m1.data2 <= rb;
+            m1.is_signed <= '0';
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_H32;

@ -238,9 +237,11 @@ begin

            behave_rt := ppc_mulli(ra, si);

-            m1.data1 <= ra(63) & ra;
+            m1.data1 <= ra;
            m1.data2 <= (others => si(15));
            m1.data2(15 downto 0) <= si;
+            m1.is_signed <= '1';
+            m1.is_32bit <= '0';
            m1.valid <= '1';
            m1.insn_type <= OP_MUL_L64;