From e02d8060edaa0300f14a695895f4a0b622e0351a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 8 Aug 2022 22:26:39 +1000 Subject: [PATCH] Change the multiplier interface to support signed multipliers This adds an 'is_signed' signal to MultiplyInputType to indicate whether the data1 and data2 fields are to be interpreted as signed or unsigned numbers. The 'not_result' field is replaced by a 'subtract' field which provides a more intuitive interface for requesting that the product be subtracted from the addend rather than added, i.e. subtract = 1 gives C - A * B, vs. subtract = 0 giving C + A * B. (Previously the users of the multipliers got the same effect by complementing the addend and setting not_result = 1.) The is_32bit field is removed because it is no longer used now that we have a separate 32-bit multiplier. Signed-off-by: Paul Mackerras --- common.vhdl | 9 ++++----- execute1.vhdl | 44 ++++++++++++++++++++------------------------ fpu.vhdl | 10 +++------- multiply-32s.vhdl | 3 ++- multiply.vhdl | 29 ++++++++++++----------------- xilinx-mult-32s.vhdl | 8 +++++--- xilinx-mult.vhdl | 22 +++++++++++++--------- 7 files changed, 59 insertions(+), 66 deletions(-) diff --git a/common.vhdl b/common.vhdl index 6287be5..7c7a8d5 100644 --- a/common.vhdl +++ b/common.vhdl @@ -385,12 +385,11 @@ package common is data1: std_ulogic_vector(63 downto 0); data2: std_ulogic_vector(63 downto 0); addend: std_ulogic_vector(127 downto 0); - is_32bit: std_ulogic; - not_result: std_ulogic; + is_signed: std_ulogic; + subtract: std_ulogic; -- 0 => addend + data1 * data2, 1 => addend - data1 * data2 end record; - constant MultiplyInputInit : MultiplyInputType := (valid => '0', - is_32bit => '0', not_result => '0', - others => (others => '0')); + constant MultiplyInputInit : MultiplyInputType := (data1 => 64x"0", data2 => 64x"0", + addend => 128x"0", others => '0'); type MultiplyOutputType is record valid: std_ulogic; diff --git a/execute1.vhdl b/execute1.vhdl index 948bdd6..721d493 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -695,7 +695,22 @@ begin overflow_32 <= calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31)); overflow_64 <= calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63)); - -- signals to multiply and divide units + -- signals to multiplier + addend := (others => '0'); + if e_in.reg_valid3 = '1' then + -- integer multiply-add, major op 4 (if it is a multiply) + addend(63 downto 0) := c_in; + if e_in.is_signed = '1' then + addend(127 downto 64) := (others => c_in(63)); + end if; + end if; + x_to_multiply.data1 <= std_ulogic_vector(a_in); + x_to_multiply.data2 <= std_ulogic_vector(b_in); + x_to_multiply.is_signed <= e_in.is_signed; + x_to_multiply.subtract <= '0'; + x_to_multiply.addend <= addend; + + -- Interface to divide unit sign1 := '0'; sign2 := '0'; if e_in.is_signed = '1' then @@ -719,7 +734,6 @@ begin abs2 := - signed(b_in); end if; - -- Interface to multiply and divide units x_to_divider.is_signed <= e_in.is_signed; x_to_divider.is_32bit <= e_in.is_32bit; x_to_divider.is_extended <= '0'; @@ -728,24 +742,6 @@ begin x_to_divider.is_modulus <= '1'; end if; x_to_divider.flush <= flush_in; - - addend := (others => '0'); - if e_in.reg_valid3 = '1' then - -- integer multiply-add, major op 4 (if it is a multiply) - addend(63 downto 0) := c_in; - if e_in.is_signed = '1' then - addend(127 downto 64) := (others => c_in(63)); - end if; - end if; - if (sign1 xor sign2) = '1' then - addend := not addend; - end if; - - x_to_multiply.data1 <= std_ulogic_vector(abs1); - x_to_multiply.data2 <= std_ulogic_vector(abs2); - x_to_multiply.is_32bit <= e_in.is_32bit; - x_to_multiply.not_result <= sign1 xor sign2; - x_to_multiply.addend <= addend; x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); if e_in.is_32bit = '0' then -- 64-bit forms @@ -766,11 +762,11 @@ begin end if; -- signals to 32-bit multiplier - x_to_mult_32s.data1 <= 31x"0" & (a_in(31) and e_in.is_signed) & a_in(31 downto 0); - x_to_mult_32s.data2 <= 31x"0" & (b_in(31) and e_in.is_signed) & b_in(31 downto 0); + x_to_mult_32s.data1 <= 32x"0" & a_in(31 downto 0); + x_to_mult_32s.data2 <= 32x"0" & b_in(31 downto 0); + x_to_mult_32s.is_signed <= e_in.is_signed; -- The following are unused, but set here to avoid X states - x_to_mult_32s.is_32bit <= '1'; - x_to_mult_32s.not_result <= '0'; + x_to_mult_32s.subtract <= '0'; x_to_mult_32s.addend <= (others => '0'); shortmul_result <= std_ulogic_vector(resize(signed(mshort_p), 64)); diff --git a/fpu.vhdl b/fpu.vhdl index 417a318..eaa4cf2 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1071,7 +1071,7 @@ begin set_b_mant := '0'; set_c := '0'; set_s := '0'; - f_to_multiply.is_32bit <= '0'; + f_to_multiply.is_signed <= '0'; f_to_multiply.valid <= '0'; msel_1 <= MUL1_A; msel_2 <= MUL2_C; @@ -3227,12 +3227,8 @@ begin maddend(UNIT_BIT - 1 downto 0) := r.s; when others => end case; - if msel_inv = '1' then - f_to_multiply.addend <= not maddend; - else - f_to_multiply.addend <= maddend; - end if; - f_to_multiply.not_result <= msel_inv; + f_to_multiply.addend <= maddend; + f_to_multiply.subtract <= msel_inv; if set_y = '1' then v.y := f_to_multiply.data2; end if; diff --git a/multiply-32s.vhdl b/multiply-32s.vhdl index 0639dbf..ea3e2a8 100644 --- a/multiply-32s.vhdl +++ b/multiply-32s.vhdl @@ -40,7 +40,8 @@ begin variable ov : std_ulogic; begin v.valid := m_in.valid; - v.data := signed(m_in.data1(32 downto 0)) * signed(m_in.data2(32 downto 0)); + v.data := signed((m_in.is_signed and m_in.data1(31)) & m_in.data1(31 downto 0)) * + signed((m_in.is_signed and m_in.data2(31)) & m_in.data2(31 downto 0)); d := std_ulogic_vector(r.data(63 downto 0)); diff --git a/multiply.vhdl b/multiply.vhdl index c09fc22..615ceea 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -7,7 +7,7 @@ use work.common.all; entity multiply is generic ( - PIPELINE_DEPTH : natural := 4 + PIPELINE_DEPTH : natural := 3 ); port ( clk : in std_logic; @@ -23,11 +23,8 @@ architecture behaviour of multiply is type multiply_pipeline_stage is record valid : std_ulogic; data : unsigned(127 downto 0); - is_32bit : std_ulogic; - not_res : std_ulogic; end record; constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', - is_32bit => '0', not_res => '0', data => (others => '0')); type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; @@ -52,31 +49,29 @@ begin multiply_1: process(all) variable v : reg_type; + variable a, b : std_ulogic_vector(64 downto 0); + variable prod : std_ulogic_vector(129 downto 0); variable d : std_ulogic_vector(127 downto 0); variable d2 : std_ulogic_vector(63 downto 0); variable ov : std_ulogic; begin v := r; + a := (m.is_signed and m.data1(63)) & m.data1; + b := (m.is_signed and m.data2(63)) & m.data2; + prod := std_ulogic_vector(signed(a) * signed(b)); v.multiply_pipeline(0).valid := m.valid; - v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend); - v.multiply_pipeline(0).is_32bit := m.is_32bit; - v.multiply_pipeline(0).not_res := m.not_result; + if m.subtract = '1' then + v.multiply_pipeline(0).data := unsigned(m.addend) - unsigned(prod(127 downto 0)); + else + v.multiply_pipeline(0).data := unsigned(m.addend) + unsigned(prod(127 downto 0)); + end if; loop_0: for i in 1 to PIPELINE_DEPTH-1 loop v.multiply_pipeline(i) := r.multiply_pipeline(i-1); end loop; d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); - if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then - d := not d; - end if; - - ov := '0'; - if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then - ov := (or d(63 downto 31)) and not (and d(63 downto 31)); - else - ov := (or d(127 downto 63)) and not (and d(127 downto 63)); - end if; + ov := (or d(127 downto 63)) and not (and d(127 downto 63)); ovf_in <= ov; m_out.result <= d; diff --git a/xilinx-mult-32s.vhdl b/xilinx-mult-32s.vhdl index fde19ae..cacc22d 100644 --- a/xilinx-mult-32s.vhdl +++ b/xilinx-mult-32s.vhdl @@ -33,9 +33,11 @@ architecture behaviour of multiply_32s is signal product_lo : std_ulogic_vector(22 downto 0); begin - -- sign extend - data1 <= std_ulogic_vector(resize(signed(m_in.data1(32 downto 0)), 53)); - data2 <= std_ulogic_vector(resize(signed(m_in.data2(32 downto 0)), 35)); + -- sign extend if signed + data1(31 downto 0) <= m_in.data1(31 downto 0); + data1(52 downto 32) <= (others => m_in.is_signed and m_in.data1(31)); + data2(31 downto 0) <= m_in.data2(31 downto 0); + data2(34 downto 32) <= (others => m_in.is_signed and m_in.data2(31)); clocken <= m_in.valid and not stall; diff --git a/xilinx-mult.vhdl b/xilinx-mult.vhdl index 608810e..26ba5d7 100644 --- a/xilinx-mult.vhdl +++ b/xilinx-mult.vhdl @@ -18,6 +18,8 @@ entity multiply is end entity multiply; architecture behaviour of multiply is + signal d1sign : std_ulogic_vector(13 downto 0); + signal d2sign : std_ulogic_vector(4 downto 0); signal m00_p, m01_p, m02_p, m03_p : std_ulogic_vector(47 downto 0); signal m00_pc, m02_pc : std_ulogic_vector(47 downto 0); signal m10_p, m11_p, m12_p, m13_p : std_ulogic_vector(47 downto 0); @@ -39,7 +41,9 @@ architecture behaviour of multiply is signal overflow : std_ulogic; begin - addend <= m_in.addend; + addend <= m_in.addend when m_in.subtract = '0' else not m_in.addend; + d1sign <= (others => m_in.data1(63) and m_in.is_signed); + d2sign <= (others => m_in.data2(63) and m_in.is_signed); m00: DSP48E1 generic map ( @@ -233,7 +237,7 @@ begin A => 6x"0" & m_in.data1(23 downto 0), ACIN => (others => '0'), ALUMODE => "0000", - B => "00000" & m_in.data2(63 downto 51), + B => d2sign & m_in.data2(63 downto 51), BCIN => (others => '0'), C => (others => '0'), CARRYCASCIN => '0', @@ -463,7 +467,7 @@ begin A => 6x"0" & m_in.data1(47 downto 24), ACIN => (others => '0'), ALUMODE => "0000", - B => "00000" & m_in.data2(63 downto 51), + B => d2sign & m_in.data2(63 downto 51), BCIN => (others => '0'), C => (others => '0'), CARRYCASCIN => '0', @@ -517,7 +521,7 @@ begin PREG => 1 ) port map ( - A => 14x"0" & m_in.data1(63 downto 48), + A => d1sign & m_in.data1(63 downto 48), ACIN => (others => '0'), ALUMODE => "0000", B => '0' & m_in.data2(16 downto 0), @@ -575,7 +579,7 @@ begin PREG => 0 ) port map ( - A => 14x"0" & m_in.data1(63 downto 48), + A => d1sign & m_in.data1(63 downto 48), ACIN => (others => '0'), ALUMODE => "0000", B => '0' & m_in.data2(33 downto 17), @@ -632,7 +636,7 @@ begin PREG => 1 ) port map ( - A => 14x"0" & m_in.data1(63 downto 48), + A => d1sign & m_in.data1(63 downto 48), ACIN => (others => '0'), ALUMODE => "0000", B => '0' & m_in.data2(50 downto 34), @@ -690,10 +694,10 @@ begin PREG => 0 ) port map ( - A => 14x"0" & m_in.data1(63 downto 48), + A => d1sign & m_in.data1(63 downto 48), ACIN => (others => '0'), ALUMODE => "0000", - B => "00000" & m_in.data2(63 downto 51), + B => d2sign & m_in.data2(63 downto 51), BCIN => (others => '0'), C => (others => '0'), CARRYCASCIN => '0', @@ -996,7 +1000,7 @@ begin end if; m_out.valid <= valid_1; valid_1 <= m_in.valid; - rnot_1 <= m_in.not_result; + rnot_1 <= m_in.subtract; overflow <= not ((p1_pat and p0_pat) or (p1_patb and p0_patb)); end if; end process;