From 9568e5f848894d402f20710b3b2f0abf8037c081 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 5 Dec 2019 09:32:13 +1100 Subject: [PATCH] multiply: Move data formatting out of decode2 At present, decode2 does some formatting of the input data for the multiply unit - truncation to 32 bits for 32-bit operations and then sign or zero extension to 65 bits. This is going to prevent forwarding of results within the execute pipeline in future, so we move the formatting to the first cycle of the multiply pipeline. It turns out that we have a wasted cycle at the front of the multiply pipe, because decode2 has a register at its output and multiply has a register at its input. For now we use this cycle to do the data formatting. Signed-off-by: Paul Mackerras --- common.vhdl | 10 +++++++--- decode2.vhdl | 26 ++++---------------------- multiply.vhdl | 28 +++++++++++++++++++++++++++- multiply_tb.vhdl | 43 ++++++++++++++++++++++--------------------- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/common.vhdl b/common.vhdl index 44198b0..bf9cc92 100644 --- a/common.vhdl +++ b/common.vhdl @@ -86,11 +86,15 @@ package common is valid: std_ulogic; insn_type: insn_type_t; write_reg: std_ulogic_vector(4 downto 0); - data1: std_ulogic_vector(64 downto 0); - data2: std_ulogic_vector(64 downto 0); + data1: std_ulogic_vector(63 downto 0); + data2: std_ulogic_vector(63 downto 0); rc: std_ulogic; + is_32bit: std_ulogic; + is_signed: std_ulogic; end record; - constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0')); + constant Decode2ToMultiplyInit : Decode2ToMultiplyType := + (valid => '0', insn_type => OP_ILLEGAL, rc => '0', is_32bit => '0', is_signed => '0', + others => (others => '0')); type Decode2ToDividerType is record valid: std_ulogic; diff --git a/decode2.vhdl b/decode2.vhdl index 1307e7d..e441b02 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -270,30 +270,12 @@ begin -- multiply unit v.m.insn_type := d_in.decode.insn_type; - mul_a := decoded_reg_a.data; - mul_b := decoded_reg_b.data; + v.m.data1 := decoded_reg_a.data; + v.m.data2 := decoded_reg_b.data; v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.m.rc := decode_rc(d_in.decode.rc, d_in.insn); - - if d_in.decode.is_32bit = '1' then - if d_in.decode.is_signed = '1' then - v.m.data1 := (others => mul_a(31)); - v.m.data1(31 downto 0) := mul_a(31 downto 0); - v.m.data2 := (others => mul_b(31)); - v.m.data2(31 downto 0) := mul_b(31 downto 0); - else - v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0); - v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0); - end if; - else - if d_in.decode.is_signed = '1' then - v.m.data1 := mul_a(63) & mul_a; - v.m.data2 := mul_b(63) & mul_b; - else - v.m.data1 := '0' & mul_a; - v.m.data2 := '0' & mul_b; - end if; - end if; + v.m.is_32bit := d_in.decode.is_32bit; + v.m.is_signed := d_in.decode.is_signed; -- divide unit -- PPC divide and modulus instruction words have these bits in diff --git a/multiply.vhdl b/multiply.vhdl index 94fa792..92f4201 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -38,12 +38,17 @@ architecture behaviour of multiply is end record; signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit); + + signal data1, data1_in : std_ulogic_vector(64 downto 0); + signal data2, data2_in : std_ulogic_vector(64 downto 0); begin multiply_0: process(clk) begin if rising_edge(clk) then m <= m_in; r <= rin; + data1 <= data1_in; + data2 <= data2_in; end if; end process; @@ -56,9 +61,30 @@ begin m_out <= MultiplyToWritebackInit; + if m_in.is_32bit = '1' then + if m_in.is_signed = '1' then + data1_in <= (others => m_in.data1(31)); + data1_in(31 downto 0) <= m_in.data1(31 downto 0); + data2_in <= (others => m_in.data2(31)); + data2_in(31 downto 0) <= m_in.data2(31 downto 0); + else + data1_in <= '0' & x"00000000" & m_in.data1(31 downto 0); + data2_in <= '0' & x"00000000" & m_in.data2(31 downto 0); + end if; + else + if m_in.is_signed = '1' then + data1_in <= m_in.data1(63) & m_in.data1; + data2_in <= m_in.data2(63) & m_in.data2; + else + data1_in <= '0' & m_in.data1; + data2_in <= '0' & m_in.data2; + end if; + end if; + + v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).insn_type := m.insn_type; - v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2); + v.multiply_pipeline(0).data := signed(data1) * signed(data2); v.multiply_pipeline(0).write_reg := m.write_reg; v.multiply_pipeline(0).rc := m.rc; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index 48f83ab..552efcf 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -41,8 +41,10 @@ begin m1.valid <= '1'; m1.insn_type <= OP_MUL_L64; m1.write_reg <= "10001"; - m1.data1 <= '0' & x"0000000000001000"; - m1.data2 <= '0' & x"0000000000001111"; + m1.data1 <= x"0000000000001000"; + m1.data2 <= x"0000000000001111"; + m1.is_32bit <= '0'; + m1.is_signed <= '0'; m1.rc <= '0'; wait for clk_period; @@ -88,8 +90,8 @@ begin behave_rt := ppc_mulld(ra, rb); - m1.data1 <= '0' & ra; - m1.data2 <= '0' & rb; + m1.data1 <= ra; + m1.data2 <= rb; m1.valid <= '1'; m1.insn_type <= OP_MUL_L64; @@ -112,8 +114,8 @@ begin behave_rt := ppc_mulhdu(ra, rb); - m1.data1 <= '0' & ra; - m1.data2 <= '0' & rb; + m1.data1 <= ra; + m1.data2 <= rb; m1.valid <= '1'; m1.insn_type <= OP_MUL_H64; @@ -136,8 +138,9 @@ begin behave_rt := ppc_mulhd(ra, rb); - m1.data1 <= ra(63) & ra; - m1.data2 <= rb(63) & rb; + m1.data1 <= ra; + m1.data2 <= rb; + m1.is_signed <= '1'; m1.valid <= '1'; m1.insn_type <= OP_MUL_H64; @@ -160,12 +163,11 @@ begin behave_rt := ppc_mullw(ra, rb); - m1.data1 <= (others => ra(31)); - m1.data1(31 downto 0) <= ra(31 downto 0); - m1.data2 <= (others => rb(31)); - m1.data2(31 downto 0) <= rb(31 downto 0); + m1.data1 <= ra; + m1.data2 <= rb; m1.valid <= '1'; m1.insn_type <= OP_MUL_L64; + m1.is_32bit <= '1'; wait for clk_period; @@ -186,10 +188,8 @@ begin behave_rt := ppc_mulhw(ra, rb); - m1.data1 <= (others => ra(31)); - m1.data1(31 downto 0) <= ra(31 downto 0); - m1.data2 <= (others => rb(31)); - m1.data2(31 downto 0) <= rb(31 downto 0); + m1.data1 <= ra; + m1.data2 <= rb; m1.valid <= '1'; m1.insn_type <= OP_MUL_H32; @@ -212,10 +212,9 @@ begin behave_rt := ppc_mulhwu(ra, rb); - m1.data1 <= (others => '0'); - m1.data1(31 downto 0) <= ra(31 downto 0); - m1.data2 <= (others => '0'); - m1.data2(31 downto 0) <= rb(31 downto 0); + m1.data1 <= ra; + m1.data2 <= rb; + m1.is_signed <= '0'; m1.valid <= '1'; m1.insn_type <= OP_MUL_H32; @@ -238,9 +237,11 @@ begin behave_rt := ppc_mulli(ra, si); - m1.data1 <= ra(63) & ra; + m1.data1 <= ra; m1.data2 <= (others => si(15)); m1.data2(15 downto 0) <= si; + m1.is_signed <= '1'; + m1.is_32bit <= '0'; m1.valid <= '1'; m1.insn_type <= OP_MUL_L64;