diff --git a/common.vhdl b/common.vhdl index 1d0bbac..639f0f7 100644 --- a/common.vhdl +++ b/common.vhdl @@ -133,21 +133,16 @@ package common is type Execute1ToMultiplyType is record valid: std_ulogic; insn_type: insn_type_t; - write_reg: gpr_index_t; data1: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(64 downto 0); - rc: std_ulogic; - oe: std_ulogic; is_32bit: std_ulogic; - xerc: xer_common_t; end record; - constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', - oe => '0', is_32bit => '0', xerc => xerc_init, + constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, + is_32bit => '0', others => (others => '0')); type Execute1ToDividerType is record valid: std_ulogic; - write_reg: gpr_index_t; dividend: std_ulogic_vector(63 downto 0); divisor: std_ulogic_vector(63 downto 0); is_signed: std_ulogic; @@ -155,13 +150,9 @@ package common is is_extended: std_ulogic; is_modulus: std_ulogic; neg_result: std_ulogic; - rc: std_ulogic; - oe: std_ulogic; - xerc: xer_common_t; end record; constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', - rc => '0', oe => '0', xerc => xerc_init, neg_result => '0', others => (others => '0')); type Decode2ToRegisterFileType is record @@ -264,30 +255,18 @@ package common is type MultiplyToExecute1Type is record valid: std_ulogic; - - write_reg_nr: gpr_index_t; write_reg_data: std_ulogic_vector(63 downto 0); - write_xerc_enable : std_ulogic; - xerc : xer_common_t; - rc: std_ulogic; + overflow : std_ulogic; end record; - constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', - rc => '0', write_xerc_enable => '0', - xerc => xerc_init, + constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0', others => (others => '0')); type DividerToExecute1Type is record valid: std_ulogic; - - write_reg_nr: gpr_index_t; write_reg_data: std_ulogic_vector(63 downto 0); - write_xerc_enable : std_ulogic; - xerc : xer_common_t; - rc: std_ulogic; + overflow : std_ulogic; end record; - constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', - rc => '0', write_xerc_enable => '0', - xerc => xerc_init, + constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0', others => (others => '0')); type WritebackToRegisterFileType is record diff --git a/decode2.vhdl b/decode2.vhdl index a95dae3..6cd4574 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -300,7 +300,9 @@ begin v.e.read_data3 := decoded_reg_c.data; v.e.write_reg := decoded_reg_o.reg; v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); - v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); + if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then + v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); + end if; v.e.cr := c_in.read_cr_data; v.e.xerc := c_in.read_xerc_data; v.e.invert_a := d_in.decode.invert_a; diff --git a/divider.vhdl b/divider.vhdl index 33d2a0d..aef65a4 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -29,13 +29,9 @@ architecture behaviour of divider is signal is_32bit : std_ulogic; signal extended : std_ulogic; signal is_signed : std_ulogic; - signal rc : std_ulogic; - signal write_reg : std_ulogic_vector(4 downto 0); signal overflow : std_ulogic; signal ovf32 : std_ulogic; signal did_ovf : std_ulogic; - signal oe : std_ulogic; - signal xerc : xer_common_t; begin divider_0: process(clk) begin @@ -54,15 +50,11 @@ begin end if; div <= unsigned(d_in.divisor); quot <= (others => '0'); - write_reg <= d_in.write_reg; neg_result <= d_in.neg_result; is_modulus <= d_in.is_modulus; extended <= d_in.is_extended; is_32bit <= d_in.is_32bit; is_signed <= d_in.is_signed; - rc <= d_in.rc; - oe <= d_in.oe; - xerc <= d_in.xerc; count <= "1111111"; running <= '1'; overflow <= '0'; @@ -98,9 +90,6 @@ begin divider_1: process(all) begin - d_out.write_reg_nr <= write_reg; - d_out.rc <= rc; - if is_modulus = '1' then result <= dend(128 downto 65); else @@ -136,21 +125,9 @@ begin if rising_edge(clk) then d_out.valid <= '0'; d_out.write_reg_data <= oresult; - d_out.write_xerc_enable <= '0'; - d_out.xerc <= xerc; + d_out.overflow <= did_ovf; if count = "1000000" then d_out.valid <= '1'; - d_out.write_xerc_enable <= oe; - - -- We must test oe because the RC update code in writeback - -- will use the xerc value to set CR0:SO so we must not clobber - -- xerc if OE wasn't set. - -- - if oe = '1' then - d_out.xerc.ov <= did_ovf; - d_out.xerc.ov32 <= did_ovf; - d_out.xerc.so <= xerc.so or did_ovf; - end if; end if; end if; end process; diff --git a/divider_tb.vhdl b/divider_tb.vhdl index 8151315..95156a3 100644 --- a/divider_tb.vhdl +++ b/divider_tb.vhdl @@ -43,7 +43,6 @@ begin rst <= '0'; d1.valid <= '1'; - d1.write_reg <= "10001"; d1.dividend <= x"0000000010001000"; d1.divisor <= x"0000000000001111"; d1.is_signed <= '0'; @@ -51,7 +50,6 @@ begin d1.is_extended <= '0'; d1.is_modulus <= '0'; d1.neg_result <= '0'; - d1.rc <= '0'; wait for clk_period; assert d2.valid = '0'; @@ -66,15 +64,12 @@ begin end loop; assert d2.valid = '1'; - assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.rc = '0'; wait for clk_period; assert d2.valid = '0' report "valid"; d1.valid <= '1'; - d1.rc <= '1'; wait for clk_period; assert d2.valid = '0' report "valid"; @@ -89,9 +84,7 @@ begin end loop; assert d2.valid = '1'; - assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.rc = '1'; wait for clk_period; assert d2.valid = '0'; diff --git a/execute1.vhdl b/execute1.vhdl index 7bcffdc..94845d8 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -38,6 +38,10 @@ architecture behaviour of execute1 is next_lr : std_ulogic_vector(63 downto 0); mul_in_progress : std_ulogic; div_in_progress : std_ulogic; + slow_op_dest : gpr_index_t; + slow_op_rc : std_ulogic; + slow_op_oe : std_ulogic; + slow_op_xerc : xer_common_t; end record; signal r, rin : reg_type; @@ -187,6 +191,7 @@ begin variable carry_32, carry_64 : std_ulogic; variable sign1, sign2 : std_ulogic; variable abs1, abs2 : signed(63 downto 0); + variable overflow : std_ulogic; begin result := (others => '0'); result_with_carry := (others => '0'); @@ -238,12 +243,6 @@ begin -- signals to multiply unit x_to_multiply <= Execute1ToMultiplyInit; x_to_multiply.insn_type <= e_in.insn_type; - x_to_multiply.write_reg <= gspr_to_gpr(e_in.write_reg); - x_to_multiply.rc <= e_in.rc; - x_to_multiply.xerc <= v.e.xerc; - if e_in.insn_type = OP_MUL_L64 then - x_to_multiply.oe <= e_in.oe; - end if; x_to_multiply.is_32bit <= e_in.is_32bit; if e_in.is_32bit = '1' then @@ -291,16 +290,12 @@ begin end if; x_to_divider <= Execute1ToDividerInit; - x_to_divider.write_reg <= gspr_to_gpr(e_in.write_reg); x_to_divider.is_signed <= e_in.is_signed; x_to_divider.is_32bit <= e_in.is_32bit; if e_in.insn_type = OP_MOD then x_to_divider.is_modulus <= '1'; end if; x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); - x_to_divider.rc <= e_in.rc; - x_to_divider.oe <= e_in.oe; - x_to_divider.xerc <= v.e.xerc; if e_in.is_32bit = '0' then -- 64-bit forms if e_in.insn_type = OP_DIVE then @@ -342,6 +337,10 @@ begin v.e.write_reg := e_in.write_reg; v.e.write_len := x"8"; v.e.sign_extend := '0'; + v.slow_op_dest := gspr_to_gpr(e_in.write_reg); + v.slow_op_rc := e_in.rc; + v.slow_op_oe := e_in.oe; + v.slow_op_xerc := v.e.xerc; case_0: case e_in.insn_type is @@ -664,35 +663,36 @@ begin v.e.write_len := x"8"; v.e.sign_extend := '0'; v.e.valid := '1'; - elsif r.mul_in_progress = '1' then - if multiply_to_x.valid = '1' then - v.e.write_reg := gpr_to_gspr(multiply_to_x.write_reg_nr); - result := multiply_to_x.write_reg_data; + elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then + if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or + (r.div_in_progress = '1' and divider_to_x.valid = '1') then + if r.mul_in_progress = '1' then + result := multiply_to_x.write_reg_data; + overflow := multiply_to_x.overflow; + else + result := divider_to_x.write_reg_data; + overflow := divider_to_x.overflow; + end if; result_en := '1'; - v.e.rc := multiply_to_x.rc; - v.e.xerc := multiply_to_x.xerc; - v.e.write_xerc_enable := multiply_to_x.write_xerc_enable; + v.e.write_reg := gpr_to_gspr(v.slow_op_dest); + v.e.rc := v.slow_op_rc; + v.e.xerc := v.slow_op_xerc; + v.e.write_xerc_enable := v.slow_op_oe; + -- We must test oe because the RC update code in writeback + -- will use the xerc value to set CR0:SO so we must not clobber + -- xerc if OE wasn't set. + if v.slow_op_oe = '1' then + v.e.xerc.ov := overflow; + v.e.xerc.ov32 := overflow; + v.e.xerc.so := v.slow_op_xerc.so or overflow; + end if; v.e.valid := '1'; v.e.write_len := x"8"; v.e.sign_extend := '0'; else stall_out <= '1'; - v.mul_in_progress := '1'; - end if; - elsif r.div_in_progress = '1' then - if divider_to_x.valid = '1' then - v.e.write_reg := gpr_to_gspr(divider_to_x.write_reg_nr); - result := divider_to_x.write_reg_data; - result_en := '1'; - v.e.rc := divider_to_x.rc; - v.e.xerc := divider_to_x.xerc; - v.e.write_xerc_enable := divider_to_x.write_xerc_enable; - v.e.valid := '1'; - v.e.write_len := x"8"; - v.e.sign_extend := '0'; - else - stall_out <= '1'; - v.div_in_progress := '1'; + v.mul_in_progress := r.mul_in_progress; + v.div_in_progress := r.div_in_progress; end if; end if; diff --git a/multiply.vhdl b/multiply.vhdl index 714b844..959c114 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -25,19 +25,12 @@ architecture behaviour of multiply is valid : std_ulogic; insn_type : insn_type_t; data : signed(129 downto 0); - write_reg : std_ulogic_vector(4 downto 0); - rc : std_ulogic; - oe : std_ulogic; is_32bit : std_ulogic; - xerc : xer_common_t; end record; constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', insn_type => OP_ILLEGAL, - rc => '0', oe => '0', is_32bit => '0', - xerc => xerc_init, - data => (others => '0'), - others => (others => '0')); + data => (others => '0')); type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit); @@ -69,11 +62,7 @@ begin v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).insn_type := m.insn_type; v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2); - v.multiply_pipeline(0).write_reg := m.write_reg; - v.multiply_pipeline(0).rc := m.rc; - v.multiply_pipeline(0).oe := m.oe; v.multiply_pipeline(0).is_32bit := m.is_32bit; - v.multiply_pipeline(0).xerc := m.xerc; loop_0: for i in 1 to PIPELINE_DEPTH-1 loop v.multiply_pipeline(i) := r.multiply_pipeline(i-1); @@ -101,24 +90,10 @@ begin end case; m_out.write_reg_data <= d2; - m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg; - m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc; + m_out.overflow <= ov; - -- Generate OV/OV32/SO when OE=1 if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then m_out.valid <= '1'; - m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; - m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe; - - -- We must test oe because the RC update code in writeback - -- will use the xerc value to set CR0:SO so we must not clobber - -- xerc if OE wasn't set. - -- - if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then - m_out.xerc.ov <= ov; - m_out.xerc.ov32 <= ov; - m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov; - end if; end if; rin <= v; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index a76d739..8f1d795 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -40,10 +40,8 @@ begin m1.valid <= '1'; m1.insn_type <= OP_MUL_L64; - m1.write_reg <= "10001"; m1.data1 <= '0' & x"0000000000001000"; m1.data2 <= '0' & x"0000000000001111"; - m1.rc <= '0'; wait for clk_period; assert m2.valid = '0'; @@ -58,15 +56,12 @@ begin wait for clk_period; assert m2.valid = '1'; - assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.rc = '0'; wait for clk_period; assert m2.valid = '0'; m1.valid <= '1'; - m1.rc <= '1'; wait for clk_period; assert m2.valid = '0'; @@ -75,9 +70,7 @@ begin wait for clk_period * (pipeline_depth-1); assert m2.valid = '1'; - assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.rc = '1'; -- test mulld mulld_loop : for i in 0 to 1000 loop