Make multiplier hang off the side of execute1

With this, the multiplier isn't a separate pipe that decode2 issues
instructions to, but rather is a unit that execute1 sends operands
to and which sends the result back to execute1, which then sends it
to writeback.  Execute1 now sends a stall signal when it gets a
multiply instruction until it gets a valid signal back from the
multiplier.

This all means that we no longer need to mark the multiply
instructions as single-issue.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 5 years ago
parent 969245e379
commit 2167186b5f

@ -17,7 +17,7 @@ common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o common.o control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o divider.o
core_debug.o: common.o core_debug.o: common.o
countzero.o: countzero.o:
countzero_tb.o: common.o glibc_random.o countzero.o countzero_tb.o: common.o glibc_random.o countzero.o
@ -26,7 +26,7 @@ crhelpers.o: common.o
decode1.o: common.o decode_types.o decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o
decode_types.o: decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o
fetch1.o: common.o fetch1.o: common.o
fetch2.o: common.o wishbone_types.o fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o: glibc_random_helpers.o:

@ -130,7 +130,7 @@ package common is
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0')); is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));


type Decode2ToMultiplyType is record type Execute1ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; insn_type: insn_type_t;
write_reg: gpr_index_t; write_reg: gpr_index_t;
@ -141,9 +141,9 @@ package common is
is_32bit: std_ulogic; is_32bit: std_ulogic;
xerc: xer_common_t; xerc: xer_common_t;
end record; end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
oe => '0', is_32bit => '0', xerc => xerc_init, oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0')); others => (others => '0'));


type Decode2ToDividerType is record type Decode2ToDividerType is record
valid: std_ulogic; valid: std_ulogic;
@ -261,20 +261,19 @@ package common is
write_xerc_enable => '0', xerc => xerc_init, write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0')); others => (others => '0'));


type MultiplyToWritebackType is record type MultiplyToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;


write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t; write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic; write_xerc_enable : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
rc: std_ulogic; rc: std_ulogic;
end record; end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0',
rc => '0', write_xerc_enable => '0', rc => '0', write_xerc_enable => '0',
xerc => xerc_init, xerc => xerc_init,
others => (others => '0')); others => (others => '0'));


type DividerToWritebackType is record type DividerToWritebackType is record
valid: std_ulogic; valid: std_ulogic;

@ -63,10 +63,6 @@ architecture behave of core is
signal loadstore1_to_dcache: Loadstore1ToDcacheType; signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_writeback: DcacheToWritebackType; signal dcache_to_writeback: DcacheToWritebackType;


-- multiply signals
signal decode2_to_multiply: Decode2ToMultiplyType;
signal multiply_to_writeback: MultiplyToWritebackType;

-- divider signals -- divider signals
signal decode2_to_divider: Decode2ToDividerType; signal decode2_to_divider: Decode2ToDividerType;
signal divider_to_writeback: DividerToWritebackType; signal divider_to_writeback: DividerToWritebackType;
@ -115,7 +111,6 @@ architecture behave of core is
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
@ -197,7 +192,6 @@ begin
d_in => decode1_to_decode2, d_in => decode1_to_decode2,
e_out => decode2_to_execute1, e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1, l_out => decode2_to_loadstore1,
m_out => decode2_to_multiply,
d_out => decode2_to_divider, d_out => decode2_to_divider,
r_in => register_file_to_decode2, r_in => register_file_to_decode2,
r_out => decode2_to_register_file, r_out => decode2_to_register_file,
@ -265,13 +259,6 @@ begin
wishbone_out => wishbone_data_out wishbone_out => wishbone_data_out
); );


multiply_0: entity work.multiply
port map (
clk => clk,
m_in => decode2_to_multiply,
m_out => multiply_to_writeback
);

divider_0: entity work.divider divider_0: entity work.divider
port map ( port map (
clk => clk, clk => clk,
@ -285,7 +272,6 @@ begin
clk => clk, clk => clk,
e_in => execute1_to_writeback, e_in => execute1_to_writeback,
l_in => dcache_to_writeback, l_in => dcache_to_writeback,
m_in => multiply_to_writeback,
d_in => divider_to_writeback, d_in => divider_to_writeback,
w_out => writeback_to_register_file, w_out => writeback_to_register_file,
c_out => writeback_to_cr_file, c_out => writeback_to_cr_file,

@ -54,7 +54,7 @@ architecture behaviour of decode1 is
41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu 41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu
32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz 32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz
33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu 33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu
7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli 7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori 24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris 25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi 20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi
@ -244,19 +244,19 @@ architecture behaviour of decode1 is
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw 2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu 2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw 2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#0000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu 2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
-- next 4 have reserved bit set -- next 4 have reserved bit set
2#1001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd 2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#1000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu 2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw 2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu 2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld 2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo 2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw 2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo 2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand 2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg 2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego 2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego

@ -24,7 +24,6 @@ entity decode2 is
d_in : in Decode1ToDecode2Type; d_in : in Decode1ToDecode2Type;


e_out : out Decode2ToExecute1Type; e_out : out Decode2ToExecute1Type;
m_out : out Decode2ToMultiplyType;
d_out : out Decode2ToDividerType; d_out : out Decode2ToDividerType;
l_out : out Decode2ToLoadstore1Type; l_out : out Decode2ToLoadstore1Type;


@ -39,7 +38,6 @@ end entity decode2;
architecture behaviour of decode2 is architecture behaviour of decode2 is
type reg_type is record type reg_type is record
e : Decode2ToExecute1Type; e : Decode2ToExecute1Type;
m : Decode2ToMultiplyType;
d : Decode2ToDividerType; d : Decode2ToDividerType;
l : Decode2ToLoadstore1Type; l : Decode2ToLoadstore1Type;
end record; end record;
@ -238,7 +236,7 @@ begin
decode2_0: process(clk) decode2_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then if rin.e.valid = '1' or rin.l.valid = '1' or rin.d.valid = '1' then
report "execute " & to_hstring(rin.e.nia); report "execute " & to_hstring(rin.e.nia);
end if; end if;
r <= rin; r <= rin;
@ -266,14 +264,12 @@ begin


v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init; v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit; v.d := Decode2ToDividerInit;


mul_a := (others => '0'); mul_a := (others => '0');
mul_b := (others => '0'); mul_b := (others => '0');


--v.e.input_cr := d_in.decode.input_cr; --v.e.input_cr := d_in.decode.input_cr;
--v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr; --v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1); decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
@ -323,38 +319,6 @@ begin
v.e.insn := d_in.insn; v.e.insn := d_in.insn;
v.e.data_len := length; v.e.data_len := length;


-- multiply unit
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.m.is_32bit := d_in.decode.is_32bit;

if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then
v.m.data1 := (others => mul_a(31));
v.m.data1(31 downto 0) := mul_a(31 downto 0);
v.m.data2 := (others => mul_b(31));
v.m.data2(31 downto 0) := mul_b(31 downto 0);
else
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
end if;
else
if d_in.decode.is_signed = '1' then
v.m.data1 := mul_a(63) & mul_a;
v.m.data2 := mul_b(63) & mul_b;
else
v.m.data1 := '0' & mul_a;
v.m.data2 := '0' & mul_b;
end if;
end if;

-- divide unit -- divide unit
-- PPC divide and modulus instruction words have these bits in -- PPC divide and modulus instruction words have these bits in
-- the bottom 11 bits: o1dns 010t1 r -- the bottom 11 bits: o1dns 010t1 r
@ -438,7 +402,6 @@ begin
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);


v.e.valid := '0'; v.e.valid := '0';
v.m.valid := '0';
v.d.valid := '0'; v.d.valid := '0';
v.l.valid := '0'; v.l.valid := '0';
case d_in.decode.unit is case d_in.decode.unit is
@ -446,8 +409,6 @@ begin
v.e.valid := control_valid_out; v.e.valid := control_valid_out;
when LDST => when LDST =>
v.l.valid := control_valid_out; v.l.valid := control_valid_out;
when MUL =>
v.m.valid := control_valid_out;
when DIV => when DIV =>
v.d.valid := control_valid_out; v.d.valid := control_valid_out;
when NONE => when NONE =>
@ -458,7 +419,6 @@ begin
if rst = '1' then if rst = '1' then
v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init; v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit; v.d := Decode2ToDividerInit;
end if; end if;


@ -468,7 +428,6 @@ begin
-- Update outputs -- Update outputs
e_out <= r.e; e_out <= r.e;
l_out <= r.l; l_out <= r.l;
m_out <= r.m;
d_out <= r.d; d_out <= r.d;
end process; end process;
end architecture behaviour; end architecture behaviour;

@ -46,7 +46,7 @@ package decode_types is


constant TOO_OFFSET : integer := 0; constant TOO_OFFSET : integer := 0;


type unit_t is (NONE, ALU, LDST, MUL, DIV); type unit_t is (NONE, ALU, LDST, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B); type length_t is (NONE, is1B, is2B, is4B, is8B);


type decode_rom_t is record type decode_rom_t is record

@ -35,6 +35,7 @@ architecture behaviour of execute1 is
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
lr_update : std_ulogic; lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0); next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic;
end record; end record;


signal r, rin : reg_type; signal r, rin : reg_type;
@ -48,6 +49,10 @@ architecture behaviour of execute1 is
signal logical_result: std_ulogic_vector(63 downto 0); signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0);


-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
signal multiply_to_x: MultiplyToExecute1Type;

procedure set_carry(e: inout Execute1ToWritebackType; procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic; carry32 : in std_ulogic;
carry : in std_ulogic) is carry : in std_ulogic) is
@ -123,6 +128,13 @@ begin
result => countzero_result result => countzero_result
); );


multiply_0: entity work.multiply
port map (
clk => clk,
m_in => x_to_multiply,
m_out => multiply_to_x
);

execute1_0: process(clk) execute1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -204,6 +216,38 @@ begin
end if; end if;


v.lr_update := '0'; v.lr_update := '0';
v.mul_in_progress := '0';

-- signals to multiply unit
x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.write_reg <= gspr_to_gpr(e_in.write_reg);
x_to_multiply.rc <= e_in.rc;
x_to_multiply.xerc <= v.e.xerc;
if e_in.insn_type = OP_MUL_L64 then
x_to_multiply.oe <= e_in.oe;
end if;
x_to_multiply.is_32bit <= e_in.is_32bit;

if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => e_in.read_data1(31));
x_to_multiply.data1(31 downto 0) <= e_in.read_data1(31 downto 0);
x_to_multiply.data2 <= (others => e_in.read_data2(31));
x_to_multiply.data2(31 downto 0) <= e_in.read_data2(31 downto 0);
else
x_to_multiply.data1 <= '0' & x"00000000" & e_in.read_data1(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & e_in.read_data2(31 downto 0);
end if;
else
if e_in.is_signed = '1' then
x_to_multiply.data1 <= e_in.read_data1(63) & e_in.read_data1;
x_to_multiply.data2 <= e_in.read_data2(63) & e_in.read_data2;
else
x_to_multiply.data1 <= '0' & e_in.read_data1;
x_to_multiply.data2 <= '0' & e_in.read_data2;
end if;
end if;


ctrl_tmp <= ctrl; ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq -- FIXME: run at 512MHz not core freq
@ -506,11 +550,19 @@ begin
when OP_ICBI => when OP_ICBI =>
icache_inval <= '1'; icache_inval <= '1';


when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0';
v.mul_in_progress := '1';
stall_out <= '1';
x_to_multiply.valid <= '1';

when others => when others =>
terminate_out <= '1'; terminate_out <= '1';
report "illegal"; report "illegal";
end case; end case;


v.e.rc := e_in.rc and e_in.valid;

-- Update LR on the next cycle after a branch link -- Update LR on the next cycle after a branch link
-- --
-- WARNING: The LR update isn't tracked by our hazard tracker. This -- WARNING: The LR update isn't tracked by our hazard tracker. This
@ -536,11 +588,25 @@ begin
v.e.write_len := x"8"; v.e.write_len := x"8";
v.e.sign_extend := '0'; v.e.sign_extend := '0';
v.e.valid := '1'; v.e.valid := '1';
elsif r.mul_in_progress = '1' then
if multiply_to_x.valid = '1' then
v.e.write_reg := gpr_to_gspr(multiply_to_x.write_reg_nr);
result := multiply_to_x.write_reg_data;
result_en := '1';
v.e.rc := multiply_to_x.rc;
v.e.xerc := multiply_to_x.xerc;
v.e.write_xerc_enable := multiply_to_x.write_xerc_enable;
v.e.valid := '1';
v.e.write_len := x"8";
v.e.sign_extend := '0';
else
stall_out <= '1';
v.mul_in_progress := '1';
end if;
end if; end if;


v.e.write_data := result; v.e.write_data := result;
v.e.write_enable := result_en; v.e.write_enable := result_en;
v.e.rc := e_in.rc and e_in.valid;


-- Update registers -- Update registers
rin <= v; rin <= v;

@ -13,13 +13,13 @@ entity multiply is
port ( port (
clk : in std_logic; clk : in std_logic;


m_in : in Decode2ToMultiplyType; m_in : in Execute1ToMultiplyType;
m_out : out MultiplyToWritebackType m_out : out MultiplyToExecute1Type
); );
end entity multiply; end entity multiply;


architecture behaviour of multiply is architecture behaviour of multiply is
signal m: Decode2ToMultiplyType; signal m: Execute1ToMultiplyType;


type multiply_pipeline_stage is record type multiply_pipeline_stage is record
valid : std_ulogic; valid : std_ulogic;
@ -64,7 +64,7 @@ begin
begin begin
v := r; v := r;


m_out <= MultiplyToWritebackInit; m_out <= MultiplyToExecute1Init;


v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type; v.multiply_pipeline(0).insn_type := m.insn_type;
@ -107,7 +107,6 @@ begin
-- Generate OV/OV32/SO when OE=1 -- Generate OV/OV32/SO when OE=1
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1'; m_out.valid <= '1';
m_out.write_reg_enable <= '1';
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe; m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;



@ -17,8 +17,8 @@ architecture behave of multiply_tb is


constant pipeline_depth : integer := 4; constant pipeline_depth : integer := 4;


signal m1 : Decode2ToMultiplyType; signal m1 : Execute1ToMultiplyType;
signal m2 : MultiplyToWritebackType; signal m2 : MultiplyToExecute1Type;
begin begin
multiply_0: entity work.multiply multiply_0: entity work.multiply
generic map (PIPELINE_DEPTH => pipeline_depth) generic map (PIPELINE_DEPTH => pipeline_depth)
@ -58,7 +58,6 @@ begin


wait for clk_period; wait for clk_period;
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_enable = '1';
assert m2.write_reg_nr = "10001"; assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000"; assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '0'; assert m2.rc = '0';
@ -76,7 +75,6 @@ begin


wait for clk_period * (pipeline_depth-1); wait for clk_period * (pipeline_depth-1);
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_enable = '1';
assert m2.write_reg_nr = "10001"; assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000"; assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '1'; assert m2.rc = '1';

@ -12,7 +12,6 @@ entity writeback is


e_in : in Execute1ToWritebackType; e_in : in Execute1ToWritebackType;
l_in : in DcacheToWritebackType; l_in : in DcacheToWritebackType;
m_in : in MultiplyToWritebackType;
d_in : in DividerToWritebackType; d_in : in DividerToWritebackType;


w_out : out WritebackToRegisterFileType; w_out : out WritebackToRegisterFileType;
@ -67,32 +66,28 @@ begin
begin begin
x := "" & e_in.valid; x := "" & e_in.valid;
y := "" & l_in.valid; y := "" & l_in.valid;
z := "" & m_in.valid; z := "" & d_in.valid;
w := "" & d_in.valid; assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;


x := "" & e_in.write_enable; x := "" & e_in.write_enable;
y := "" & l_in.write_enable; y := "" & l_in.write_enable;
z := "" & m_in.write_reg_enable; z := "" & d_in.write_reg_enable;
w := "" & d_in.write_reg_enable; assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;


w := "" & e_in.write_cr_enable; w := "" & e_in.write_cr_enable;
x := "" & (e_in.write_enable and e_in.rc); x := "" & (e_in.write_enable and e_in.rc);
y := "" & (m_in.valid and m_in.rc);
z := "" & (d_in.valid and d_in.rc); z := "" & (d_in.valid and d_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure;


x := "" & e_in.write_xerc_enable; x := "" & e_in.write_xerc_enable;
y := "" & m_in.write_xerc_enable;
z := "" & D_in.write_xerc_enable; z := "" & D_in.write_xerc_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; assert (to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure;


w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;


complete_out <= '0'; complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' or m_in.valid = '1' or d_in.valid = '1' then if e_in.valid = '1' or l_in.valid = '1' or d_in.valid = '1' then
complete_out <= '1'; complete_out <= '1';
end if; end if;


@ -143,19 +138,6 @@ begin
xe := l_in.xerc; xe := l_in.xerc;
end if; end if;


if m_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr);
data_in <= m_in.write_reg_data;
rc <= m_in.rc;
xe := m_in.xerc;
end if;

if m_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= m_in.xerc;
end if;

if d_in.write_reg_enable = '1' then if d_in.write_reg_enable = '1' then
w_out.write_enable <= '1'; w_out.write_enable <= '1';
w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr); w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);

Loading…
Cancel
Save