multiply: Move data formatting out of decode2

At present, decode2 does some formatting of the input data for the
multiply unit - truncation to 32 bits for 32-bit operations and then
sign or zero extension to 65 bits.  This is going to prevent forwarding
of results within the execute pipeline in future, so we move the
formatting to the first cycle of the multiply pipeline.

It turns out that we have a wasted cycle at the front of the multiply
pipe, because decode2 has a register at its output and multiply has
a register at its input.  For now we use this cycle to do the data
formatting.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/121/head
Paul Mackerras 5 years ago
parent ac7df6fc04
commit 9568e5f848

@ -86,11 +86,15 @@ package common is
valid: std_ulogic;
insn_type: insn_type_t;
write_reg: std_ulogic_vector(4 downto 0);
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0);
rc: std_ulogic;
is_32bit: std_ulogic;
is_signed: std_ulogic;
end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0'));
constant Decode2ToMultiplyInit : Decode2ToMultiplyType :=
(valid => '0', insn_type => OP_ILLEGAL, rc => '0', is_32bit => '0', is_signed => '0',
others => (others => '0'));

type Decode2ToDividerType is record
valid: std_ulogic;

@ -270,30 +270,12 @@ begin

-- multiply unit
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.data1 := decoded_reg_a.data;
v.m.data2 := decoded_reg_b.data;
v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);

if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then
v.m.data1 := (others => mul_a(31));
v.m.data1(31 downto 0) := mul_a(31 downto 0);
v.m.data2 := (others => mul_b(31));
v.m.data2(31 downto 0) := mul_b(31 downto 0);
else
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
end if;
else
if d_in.decode.is_signed = '1' then
v.m.data1 := mul_a(63) & mul_a;
v.m.data2 := mul_b(63) & mul_b;
else
v.m.data1 := '0' & mul_a;
v.m.data2 := '0' & mul_b;
end if;
end if;
v.m.is_32bit := d_in.decode.is_32bit;
v.m.is_signed := d_in.decode.is_signed;

-- divide unit
-- PPC divide and modulus instruction words have these bits in

@ -38,12 +38,17 @@ architecture behaviour of multiply is
end record;

signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit);

signal data1, data1_in : std_ulogic_vector(64 downto 0);
signal data2, data2_in : std_ulogic_vector(64 downto 0);
begin
multiply_0: process(clk)
begin
if rising_edge(clk) then
m <= m_in;
r <= rin;
data1 <= data1_in;
data2 <= data2_in;
end if;
end process;

@ -56,9 +61,30 @@ begin

m_out <= MultiplyToWritebackInit;

if m_in.is_32bit = '1' then
if m_in.is_signed = '1' then
data1_in <= (others => m_in.data1(31));
data1_in(31 downto 0) <= m_in.data1(31 downto 0);
data2_in <= (others => m_in.data2(31));
data2_in(31 downto 0) <= m_in.data2(31 downto 0);
else
data1_in <= '0' & x"00000000" & m_in.data1(31 downto 0);
data2_in <= '0' & x"00000000" & m_in.data2(31 downto 0);
end if;
else
if m_in.is_signed = '1' then
data1_in <= m_in.data1(63) & m_in.data1;
data2_in <= m_in.data2(63) & m_in.data2;
else
data1_in <= '0' & m_in.data1;
data2_in <= '0' & m_in.data2;
end if;
end if;


v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type;
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).data := signed(data1) * signed(data2);
v.multiply_pipeline(0).write_reg := m.write_reg;
v.multiply_pipeline(0).rc := m.rc;


@ -41,8 +41,10 @@ begin
m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;
m1.write_reg <= "10001";
m1.data1 <= '0' & x"0000000000001000";
m1.data2 <= '0' & x"0000000000001111";
m1.data1 <= x"0000000000001000";
m1.data2 <= x"0000000000001111";
m1.is_32bit <= '0';
m1.is_signed <= '0';
m1.rc <= '0';

wait for clk_period;
@ -88,8 +90,8 @@ begin

behave_rt := ppc_mulld(ra, rb);

m1.data1 <= '0' & ra;
m1.data2 <= '0' & rb;
m1.data1 <= ra;
m1.data2 <= rb;
m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;

@ -112,8 +114,8 @@ begin

behave_rt := ppc_mulhdu(ra, rb);

m1.data1 <= '0' & ra;
m1.data2 <= '0' & rb;
m1.data1 <= ra;
m1.data2 <= rb;
m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;

@ -136,8 +138,9 @@ begin

behave_rt := ppc_mulhd(ra, rb);

m1.data1 <= ra(63) & ra;
m1.data2 <= rb(63) & rb;
m1.data1 <= ra;
m1.data2 <= rb;
m1.is_signed <= '1';
m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;

@ -160,12 +163,11 @@ begin

behave_rt := ppc_mullw(ra, rb);

m1.data1 <= (others => ra(31));
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => rb(31));
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.data1 <= ra;
m1.data2 <= rb;
m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;
m1.is_32bit <= '1';

wait for clk_period;

@ -186,10 +188,8 @@ begin

behave_rt := ppc_mulhw(ra, rb);

m1.data1 <= (others => ra(31));
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => rb(31));
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.data1 <= ra;
m1.data2 <= rb;
m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;

@ -212,10 +212,9 @@ begin

behave_rt := ppc_mulhwu(ra, rb);

m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.data1 <= ra;
m1.data2 <= rb;
m1.is_signed <= '0';
m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;

@ -238,9 +237,11 @@ begin

behave_rt := ppc_mulli(ra, si);

m1.data1 <= ra(63) & ra;
m1.data1 <= ra;
m1.data2 <= (others => si(15));
m1.data2(15 downto 0) <= si;
m1.is_signed <= '1';
m1.is_32bit <= '0';
m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


Loading…
Cancel
Save