execute1: Remember dest GPR, RC, OE, XER for slow operations

For multiply and divide operations, execute1 now records the
destination GPR number, RC and OE from the instruction, and the
XER value.  This means that the multiply and divide units don't
need to record those values and then send them back to execute1.
This makes the interface to those units a bit simpler.  They
simply report an overflow signal along with the result value, and
execute1 takes care of updating XER if necessary.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 4 years ago
parent 39d18d2738
commit c9a2076dd3

@ -133,21 +133,16 @@ package common is
type Execute1ToMultiplyType is record type Execute1ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; insn_type: insn_type_t;
write_reg: gpr_index_t;
data1: std_ulogic_vector(64 downto 0); data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic;
oe: std_ulogic;
is_32bit: std_ulogic; is_32bit: std_ulogic;
xerc: xer_common_t;
end record; end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
oe => '0', is_32bit => '0', xerc => xerc_init, is_32bit => '0',
others => (others => '0')); others => (others => '0'));


type Execute1ToDividerType is record type Execute1ToDividerType is record
valid: std_ulogic; valid: std_ulogic;
write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0); dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0); divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic; is_signed: std_ulogic;
@ -155,13 +150,9 @@ package common is
is_extended: std_ulogic; is_extended: std_ulogic;
is_modulus: std_ulogic; is_modulus: std_ulogic;
neg_result: std_ulogic; neg_result: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
end record; end record;
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0', is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
neg_result => '0', others => (others => '0')); neg_result => '0', others => (others => '0'));


type Decode2ToRegisterFileType is record type Decode2ToRegisterFileType is record
@ -264,30 +255,18 @@ package common is


type MultiplyToExecute1Type is record type MultiplyToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;

write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic; overflow : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
end record; end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0')); others => (others => '0'));


type DividerToExecute1Type is record type DividerToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;

write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic; overflow : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
end record; end record;
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0')); others => (others => '0'));


type WritebackToRegisterFileType is record type WritebackToRegisterFileType is record

@ -300,7 +300,9 @@ begin
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
v.e.write_reg := decoded_reg_o.reg; v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.e.cr := c_in.read_cr_data; v.e.cr := c_in.read_cr_data;
v.e.xerc := c_in.read_xerc_data; v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a; v.e.invert_a := d_in.decode.invert_a;

@ -29,13 +29,9 @@ architecture behaviour of divider is
signal is_32bit : std_ulogic; signal is_32bit : std_ulogic;
signal extended : std_ulogic; signal extended : std_ulogic;
signal is_signed : std_ulogic; signal is_signed : std_ulogic;
signal rc : std_ulogic;
signal write_reg : std_ulogic_vector(4 downto 0);
signal overflow : std_ulogic; signal overflow : std_ulogic;
signal ovf32 : std_ulogic; signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic; signal did_ovf : std_ulogic;
signal oe : std_ulogic;
signal xerc : xer_common_t;
begin begin
divider_0: process(clk) divider_0: process(clk)
begin begin
@ -54,15 +50,11 @@ begin
end if; end if;
div <= unsigned(d_in.divisor); div <= unsigned(d_in.divisor);
quot <= (others => '0'); quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= d_in.neg_result; neg_result <= d_in.neg_result;
is_modulus <= d_in.is_modulus; is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended; extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit; is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed; is_signed <= d_in.is_signed;
rc <= d_in.rc;
oe <= d_in.oe;
xerc <= d_in.xerc;
count <= "1111111"; count <= "1111111";
running <= '1'; running <= '1';
overflow <= '0'; overflow <= '0';
@ -98,9 +90,6 @@ begin


divider_1: process(all) divider_1: process(all)
begin begin
d_out.write_reg_nr <= write_reg;
d_out.rc <= rc;

if is_modulus = '1' then if is_modulus = '1' then
result <= dend(128 downto 65); result <= dend(128 downto 65);
else else
@ -136,21 +125,9 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
d_out.valid <= '0'; d_out.valid <= '0';
d_out.write_reg_data <= oresult; d_out.write_reg_data <= oresult;
d_out.write_xerc_enable <= '0'; d_out.overflow <= did_ovf;
d_out.xerc <= xerc;
if count = "1000000" then if count = "1000000" then
d_out.valid <= '1'; d_out.valid <= '1';
d_out.write_xerc_enable <= oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if oe = '1' then
d_out.xerc.ov <= did_ovf;
d_out.xerc.ov32 <= did_ovf;
d_out.xerc.so <= xerc.so or did_ovf;
end if;
end if; end if;
end if; end if;
end process; end process;

@ -43,7 +43,6 @@ begin
rst <= '0'; rst <= '0';


d1.valid <= '1'; d1.valid <= '1';
d1.write_reg <= "10001";
d1.dividend <= x"0000000010001000"; d1.dividend <= x"0000000010001000";
d1.divisor <= x"0000000000001111"; d1.divisor <= x"0000000000001111";
d1.is_signed <= '0'; d1.is_signed <= '0';
@ -51,7 +50,6 @@ begin
d1.is_extended <= '0'; d1.is_extended <= '0';
d1.is_modulus <= '0'; d1.is_modulus <= '0';
d1.neg_result <= '0'; d1.neg_result <= '0';
d1.rc <= '0';


wait for clk_period; wait for clk_period;
assert d2.valid = '0'; assert d2.valid = '0';
@ -66,15 +64,12 @@ begin
end loop; end loop;


assert d2.valid = '1'; assert d2.valid = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '0';


wait for clk_period; wait for clk_period;
assert d2.valid = '0' report "valid"; assert d2.valid = '0' report "valid";


d1.valid <= '1'; d1.valid <= '1';
d1.rc <= '1';


wait for clk_period; wait for clk_period;
assert d2.valid = '0' report "valid"; assert d2.valid = '0' report "valid";
@ -89,9 +84,7 @@ begin
end loop; end loop;


assert d2.valid = '1'; assert d2.valid = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '1';


wait for clk_period; wait for clk_period;
assert d2.valid = '0'; assert d2.valid = '0';

@ -38,6 +38,10 @@ architecture behaviour of execute1 is
next_lr : std_ulogic_vector(63 downto 0); next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
slow_op_dest : gpr_index_t;
slow_op_rc : std_ulogic;
slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t;
end record; end record;


signal r, rin : reg_type; signal r, rin : reg_type;
@ -187,6 +191,7 @@ begin
variable carry_32, carry_64 : std_ulogic; variable carry_32, carry_64 : std_ulogic;
variable sign1, sign2 : std_ulogic; variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0); variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic;
begin begin
result := (others => '0'); result := (others => '0');
result_with_carry := (others => '0'); result_with_carry := (others => '0');
@ -238,12 +243,6 @@ begin
-- signals to multiply unit -- signals to multiply unit
x_to_multiply <= Execute1ToMultiplyInit; x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type; x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.write_reg <= gspr_to_gpr(e_in.write_reg);
x_to_multiply.rc <= e_in.rc;
x_to_multiply.xerc <= v.e.xerc;
if e_in.insn_type = OP_MUL_L64 then
x_to_multiply.oe <= e_in.oe;
end if;
x_to_multiply.is_32bit <= e_in.is_32bit; x_to_multiply.is_32bit <= e_in.is_32bit;


if e_in.is_32bit = '1' then if e_in.is_32bit = '1' then
@ -291,16 +290,12 @@ begin
end if; end if;


x_to_divider <= Execute1ToDividerInit; x_to_divider <= Execute1ToDividerInit;
x_to_divider.write_reg <= gspr_to_gpr(e_in.write_reg);
x_to_divider.is_signed <= e_in.is_signed; x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit; x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1'; x_to_divider.is_modulus <= '1';
end if; end if;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
x_to_divider.rc <= e_in.rc;
x_to_divider.oe <= e_in.oe;
x_to_divider.xerc <= v.e.xerc;
if e_in.is_32bit = '0' then if e_in.is_32bit = '0' then
-- 64-bit forms -- 64-bit forms
if e_in.insn_type = OP_DIVE then if e_in.insn_type = OP_DIVE then
@ -342,6 +337,10 @@ begin
v.e.write_reg := e_in.write_reg; v.e.write_reg := e_in.write_reg;
v.e.write_len := x"8"; v.e.write_len := x"8";
v.e.sign_extend := '0'; v.e.sign_extend := '0';
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe;
v.slow_op_xerc := v.e.xerc;


case_0: case e_in.insn_type is case_0: case e_in.insn_type is


@ -664,35 +663,36 @@ begin
v.e.write_len := x"8"; v.e.write_len := x"8";
v.e.sign_extend := '0'; v.e.sign_extend := '0';
v.e.valid := '1'; v.e.valid := '1';
elsif r.mul_in_progress = '1' then elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
if multiply_to_x.valid = '1' then if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
v.e.write_reg := gpr_to_gspr(multiply_to_x.write_reg_nr); (r.div_in_progress = '1' and divider_to_x.valid = '1') then
result := multiply_to_x.write_reg_data; if r.mul_in_progress = '1' then
result := multiply_to_x.write_reg_data;
overflow := multiply_to_x.overflow;
else
result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow;
end if;
result_en := '1'; result_en := '1';
v.e.rc := multiply_to_x.rc; v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
v.e.xerc := multiply_to_x.xerc; v.e.rc := v.slow_op_rc;
v.e.write_xerc_enable := multiply_to_x.write_xerc_enable; v.e.xerc := v.slow_op_xerc;
v.e.write_xerc_enable := v.slow_op_oe;
-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
if v.slow_op_oe = '1' then
v.e.xerc.ov := overflow;
v.e.xerc.ov32 := overflow;
v.e.xerc.so := v.slow_op_xerc.so or overflow;
end if;
v.e.valid := '1'; v.e.valid := '1';
v.e.write_len := x"8"; v.e.write_len := x"8";
v.e.sign_extend := '0'; v.e.sign_extend := '0';
else else
stall_out <= '1'; stall_out <= '1';
v.mul_in_progress := '1'; v.mul_in_progress := r.mul_in_progress;
end if; v.div_in_progress := r.div_in_progress;
elsif r.div_in_progress = '1' then
if divider_to_x.valid = '1' then
v.e.write_reg := gpr_to_gspr(divider_to_x.write_reg_nr);
result := divider_to_x.write_reg_data;
result_en := '1';
v.e.rc := divider_to_x.rc;
v.e.xerc := divider_to_x.xerc;
v.e.write_xerc_enable := divider_to_x.write_xerc_enable;
v.e.valid := '1';
v.e.write_len := x"8";
v.e.sign_extend := '0';
else
stall_out <= '1';
v.div_in_progress := '1';
end if; end if;
end if; end if;



@ -25,19 +25,12 @@ architecture behaviour of multiply is
valid : std_ulogic; valid : std_ulogic;
insn_type : insn_type_t; insn_type : insn_type_t;
data : signed(129 downto 0); data : signed(129 downto 0);
write_reg : std_ulogic_vector(4 downto 0);
rc : std_ulogic;
oe : std_ulogic;
is_32bit : std_ulogic; is_32bit : std_ulogic;
xerc : xer_common_t;
end record; end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL, insn_type => OP_ILLEGAL,
rc => '0', oe => '0',
is_32bit => '0', is_32bit => '0',
xerc => xerc_init, data => (others => '0'));
data => (others => '0'),
others => (others => '0'));


type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit); constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
@ -69,11 +62,7 @@ begin
v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type; v.multiply_pipeline(0).insn_type := m.insn_type;
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2); v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).write_reg := m.write_reg;
v.multiply_pipeline(0).rc := m.rc;
v.multiply_pipeline(0).oe := m.oe;
v.multiply_pipeline(0).is_32bit := m.is_32bit; v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).xerc := m.xerc;


loop_0: for i in 1 to PIPELINE_DEPTH-1 loop loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
@ -101,24 +90,10 @@ begin
end case; end case;


m_out.write_reg_data <= d2; m_out.write_reg_data <= d2;
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg; m_out.overflow <= ov;
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;


-- Generate OV/OV32/SO when OE=1
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1'; m_out.valid <= '1';
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
m_out.xerc.ov <= ov;
m_out.xerc.ov32 <= ov;
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
end if;
end if; end if;


rin <= v; rin <= v;

@ -40,10 +40,8 @@ begin


m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64; m1.insn_type <= OP_MUL_L64;
m1.write_reg <= "10001";
m1.data1 <= '0' & x"0000000000001000"; m1.data1 <= '0' & x"0000000000001000";
m1.data2 <= '0' & x"0000000000001111"; m1.data2 <= '0' & x"0000000000001111";
m1.rc <= '0';


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -58,15 +56,12 @@ begin


wait for clk_period; wait for clk_period;
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000"; assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '0';


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';


m1.valid <= '1'; m1.valid <= '1';
m1.rc <= '1';


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -75,9 +70,7 @@ begin


wait for clk_period * (pipeline_depth-1); wait for clk_period * (pipeline_depth-1);
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000"; assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '1';


-- test mulld -- test mulld
mulld_loop : for i in 0 to 1000 loop mulld_loop : for i in 0 to 1000 loop

Loading…
Cancel
Save