Make divider hang off the side of execute1

With this, the divider is a unit that execute1 sends operands to and
which sends its results back to execute1, which then send them to
writeback.  Execute1 now sends a stall signal when it gets a divide
or modulus instruction until it gets a valid signal back from the
divider.  Divide and modulus instructions are no longer marked as
single-issue.

The data formatting step that used to be done in decode2 for div
and mod instructions is now done in execute1.  We also do the
absolute value operation in that same cycle instead of taking an
extra cycle inside the divider for signed operations with a
negative operand.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 5 years ago
parent 2167186b5f
commit 39d18d2738

@ -17,7 +17,7 @@ common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o divider.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
core_debug.o: common.o
countzero.o:
countzero_tb.o: common.o glibc_random.o countzero.o
@ -26,7 +26,7 @@ crhelpers.o: common.o
decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o
decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o divider.o
fetch1.o: common.o
fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o:

@ -145,7 +145,7 @@ package common is
oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0'));

type Decode2ToDividerType is record
type Execute1ToDividerType is record
valid: std_ulogic;
write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0);
@ -154,14 +154,15 @@ package common is
is_32bit: std_ulogic;
is_extended: std_ulogic;
is_modulus: std_ulogic;
neg_result: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
neg_result => '0', others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
@ -275,20 +276,19 @@ package common is
xerc => xerc_init,
others => (others => '0'));

type DividerToWritebackType is record
type DividerToExecute1Type is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
end record;
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;

@ -63,10 +63,6 @@ architecture behave of core is
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_writeback: DcacheToWritebackType;

-- divider signals
signal decode2_to_divider: Decode2ToDividerType;
signal divider_to_writeback: DividerToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -111,7 +107,6 @@ architecture behave of core is
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
@ -192,7 +187,6 @@ begin
d_in => decode1_to_decode2,
e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1,
d_out => decode2_to_divider,
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
@ -228,6 +222,7 @@ begin
execute1_0: entity work.execute1
port map (
clk => clk,
rst => core_rst,
flush_out => flush,
stall_out => ex1_stall_out,
e_in => decode2_to_execute1,
@ -259,20 +254,11 @@ begin
wishbone_out => wishbone_data_out
);

divider_0: entity work.divider
port map (
clk => clk,
rst => core_rst,
d_in => decode2_to_divider,
d_out => divider_to_writeback
);

writeback_0: entity work.writeback
port map (
clk => clk,
e_in => execute1_to_writeback,
l_in => dcache_to_writeback,
d_in => divider_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
complete_out => complete

@ -160,22 +160,22 @@ architecture behaviour of decode1 is
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
-- 2#1111110110# dcbz
2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu
2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo
2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu
2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo
2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde
2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo
2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe
2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo
2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu
2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo
2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu
2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo
2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd
2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo
2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw
2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu
2#1110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweuo
2#0110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divde
2#1110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdeo
2#0110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwe
2#1110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divweo
2#0111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdu
2#1111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divduo
2#0111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwu
2#1111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwuo
2#0111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divd
2#1111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdo
2#0111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divw
2#1111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwo
2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv
2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb
2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh
@ -238,10 +238,10 @@ architecture behaviour of decode1 is
-- 2#1001000000# mcrxrx
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- modud
2#0100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- moduw
2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd
2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd

@ -24,7 +24,6 @@ entity decode2 is
d_in : in Decode1ToDecode2Type;

e_out : out Decode2ToExecute1Type;
d_out : out Decode2ToDividerType;
l_out : out Decode2ToLoadstore1Type;

r_in : in RegisterFileToDecode2Type;
@ -38,7 +37,6 @@ end entity decode2;
architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
d : Decode2ToDividerType;
l : Decode2ToLoadstore1Type;
end record;

@ -236,7 +234,7 @@ begin
decode2_0: process(clk)
begin
if rising_edge(clk) then
if rin.e.valid = '1' or rin.l.valid = '1' or rin.d.valid = '1' then
if rin.e.valid = '1' or rin.l.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
@ -257,14 +255,12 @@ begin
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable signed_division: std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
begin
v := r;

v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.d := Decode2ToDividerInit;

mul_a := (others => '0');
mul_b := (others => '0');
@ -319,51 +315,6 @@ begin
v.e.insn := d_in.insn;
v.e.data_len := length;

-- divide unit
-- PPC divide and modulus instruction words have these bits in
-- the bottom 11 bits: o1dns 010t1 r
-- where o = OE for div instrs, signedness for mod instrs
-- d = 1 for div*, 0 for mod*
-- n = 1 for normal, 0 for extended (dividend << 32/64)
-- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code)
v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := d_in.insn(2);
if d_in.insn(8) = '1' then
signed_division := d_in.insn(6);
else
signed_division := d_in.insn(10);
end if;
v.d.is_signed := signed_division;
if d_in.insn(2) = '0' then
-- 64-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
v.d.is_extended := '1';
end if;
v.d.dividend := decoded_reg_a.data;
v.d.divisor := decoded_reg_b.data;
else
-- 32-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
-- sign extend to 64 bits
v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
else
v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
end if;
if signed_division = '1' and decoded_reg_b.data(31) = '1' then
v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
else
v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
end if;
end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.d.xerc := c_in.read_xerc_data;
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);

-- load/store unit
v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
v.l.addr1 := decoded_reg_a.data;
@ -402,15 +353,12 @@ begin
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);

v.e.valid := '0';
v.d.valid := '0';
v.l.valid := '0';
case d_in.decode.unit is
when ALU =>
v.e.valid := control_valid_out;
when LDST =>
v.l.valid := control_valid_out;
when DIV =>
v.d.valid := control_valid_out;
when NONE =>
v.e.valid := control_valid_out;
v.e.insn_type := OP_ILLEGAL;
@ -419,7 +367,6 @@ begin
if rst = '1' then
v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.d := Decode2ToDividerInit;
end if;

-- Update registers
@ -428,6 +375,5 @@ begin
-- Update outputs
e_out <= r.e;
l_out <= r.l;
d_out <= r.d;
end process;
end architecture behaviour;

@ -8,7 +8,7 @@ package decode_types is
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
@ -46,7 +46,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, DIV);
type unit_t is (NONE, ALU, LDST);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record

@ -10,8 +10,8 @@ entity divider is
port (
clk : in std_logic;
rst : in std_logic;
d_in : in Decode2ToDividerType;
d_out : out DividerToWritebackType
d_in : in Execute1ToDividerType;
d_out : out DividerToExecute1Type
);
end entity divider;

@ -23,7 +23,6 @@ architecture behaviour of divider is
signal sresult : std_ulogic_vector(64 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal running : std_ulogic;
signal signcheck : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
@ -48,7 +47,7 @@ begin
running <= '0';
count <= "0000000";
elsif d_in.valid = '1' then
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
if d_in.is_extended = '1' then
dend <= '0' & d_in.dividend & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & d_in.dividend;
@ -56,7 +55,7 @@ begin
div <= unsigned(d_in.divisor);
quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= '0';
neg_result <= d_in.neg_result;
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
@ -68,20 +67,6 @@ begin
running <= '1';
overflow <= '0';
ovf32 <= '0';
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
elsif signcheck = '1' then
signcheck <= '0';
neg_result <= dend(63) xor (div(63) and not is_modulus);
if dend(63) = '1' then
if extended = '1' then
dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
end if;
end if;
if div(63) = '1' then
div <= unsigned(- signed(div));
end if;
elsif running = '1' then
if count = "0111111" then
running <= '0';
@ -151,12 +136,10 @@ begin
if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult;
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= '0';
d_out.xerc <= xerc;
if count = "1000000" then
d_out.valid <= '1';
d_out.write_reg_enable <= '1';
d_out.write_xerc_enable <= oe;

-- We must test oe because the RC update code in writeback

@ -16,8 +16,8 @@ architecture behave of divider_tb is
signal rst : std_ulogic;
constant clk_period : time := 10 ns;

signal d1 : Decode2ToDividerType;
signal d2 : DividerToWritebackType;
signal d1 : Execute1ToDividerType;
signal d2 : DividerToExecute1Type;
begin
divider_0: entity work.divider
port map (clk => clk, rst => rst, d_in => d1, d_out => d2);
@ -50,6 +50,7 @@ begin
d1.is_32bit <= '0';
d1.is_extended <= '0';
d1.is_modulus <= '0';
d1.neg_result <= '0';
d1.rc <= '0';

wait for clk_period;
@ -65,7 +66,6 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '0';
@ -89,7 +89,6 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '1';
@ -105,9 +104,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.valid <= '1';

wait for clk_period;
@ -142,6 +142,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.valid <= '1';

wait for clk_period;
@ -173,9 +174,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '1';
d1.valid <= '1';

@ -216,6 +218,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '1';
d1.valid <= '1';

@ -250,9 +253,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -289,6 +293,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -322,9 +327,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -365,6 +371,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -398,9 +405,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -438,6 +446,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -472,9 +481,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';
@ -517,6 +527,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';

@ -13,6 +13,7 @@ use work.ppc_fx_insns.all;
entity execute1 is
port (
clk : in std_ulogic;
rst : in std_ulogic;

-- asynchronous
flush_out : out std_ulogic;
@ -36,6 +37,7 @@ architecture behaviour of execute1 is
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic;
div_in_progress : std_ulogic;
end record;

signal r, rin : reg_type;
@ -53,6 +55,10 @@ architecture behaviour of execute1 is
signal x_to_multiply: Execute1ToMultiplyType;
signal multiply_to_x: MultiplyToExecute1Type;

-- divider signals
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type;

procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
carry : in std_ulogic) is
@ -135,6 +141,14 @@ begin
m_out => multiply_to_x
);

divider_0: entity work.divider
port map (
clk => clk,
rst => rst,
d_in => x_to_divider,
d_out => divider_to_x
);

execute1_0: process(clk)
begin
if rising_edge(clk) then
@ -171,6 +185,8 @@ begin
variable l : std_ulogic;
variable next_nia : std_ulogic_vector(63 downto 0);
variable carry_32, carry_64 : std_ulogic;
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -217,6 +233,7 @@ begin

v.lr_update := '0';
v.mul_in_progress := '0';
v.div_in_progress := '0';

-- signals to multiply unit
x_to_multiply <= Execute1ToMultiplyInit;
@ -249,6 +266,59 @@ begin
end if;
end if;

-- signals to divide unit
sign1 := '0';
sign2 := '0';
if e_in.is_signed = '1' then
if e_in.is_32bit = '1' then
sign1 := e_in.read_data1(31);
sign2 := e_in.read_data2(31);
else
sign1 := e_in.read_data1(63);
sign2 := e_in.read_data2(63);
end if;
end if;
-- take absolute values
if sign1 = '0' then
abs1 := signed(e_in.read_data1);
else
abs1 := - signed(e_in.read_data1);
end if;
if sign2 = '0' then
abs2 := signed(e_in.read_data2);
else
abs2 := - signed(e_in.read_data2);
end if;

x_to_divider <= Execute1ToDividerInit;
x_to_divider.write_reg <= gspr_to_gpr(e_in.write_reg);
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1';
end if;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
x_to_divider.rc <= e_in.rc;
x_to_divider.oe <= e_in.oe;
x_to_divider.xerc <= v.e.xerc;
if e_in.is_32bit = '0' then
-- 64-bit forms
if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1';
end if;
x_to_divider.dividend <= std_ulogic_vector(abs1);
x_to_divider.divisor <= std_ulogic_vector(abs2);
else
-- 32-bit forms
x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
else
x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
end if;
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
end if;

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
@ -550,13 +620,19 @@ begin
when OP_ICBI =>
icache_inval <= '1';

when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0';
v.mul_in_progress := '1';
stall_out <= '1';
x_to_multiply.valid <= '1';

when others =>
when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0';
v.div_in_progress := '1';
stall_out <= '1';
x_to_divider.valid <= '1';

when others =>
terminate_out <= '1';
report "illegal";
end case;
@ -603,6 +679,21 @@ begin
stall_out <= '1';
v.mul_in_progress := '1';
end if;
elsif r.div_in_progress = '1' then
if divider_to_x.valid = '1' then
v.e.write_reg := gpr_to_gspr(divider_to_x.write_reg_nr);
result := divider_to_x.write_reg_data;
result_en := '1';
v.e.rc := divider_to_x.rc;
v.e.xerc := divider_to_x.xerc;
v.e.write_xerc_enable := divider_to_x.write_xerc_enable;
v.e.valid := '1';
v.e.write_len := x"8";
v.e.sign_extend := '0';
else
stall_out <= '1';
v.div_in_progress := '1';
end if;
end if;

v.e.write_data := result;

@ -12,7 +12,6 @@ entity writeback is

e_in : in Execute1ToWritebackType;
l_in : in DcacheToWritebackType;
d_in : in DividerToWritebackType;

w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType;
@ -66,28 +65,21 @@ begin
begin
x := "" & e_in.valid;
y := "" & l_in.valid;
z := "" & d_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x := "" & e_in.write_enable;
y := "" & l_in.write_enable;
z := "" & d_in.write_reg_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w := "" & e_in.write_cr_enable;
x := "" & (e_in.write_enable and e_in.rc);
z := "" & (d_in.valid and d_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure;

x := "" & e_in.write_xerc_enable;
z := "" & D_in.write_xerc_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;

complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' or d_in.valid = '1' then
if e_in.valid = '1' or l_in.valid = '1' then
complete_out <= '1';
end if;

@ -138,19 +130,6 @@ begin
xe := l_in.xerc;
end if;

if d_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);
data_in <= d_in.write_reg_data;
rc <= d_in.rc;
xe := d_in.xerc;
end if;

if d_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= d_in.xerc;
end if;

-- shift and byte-reverse data bytes
for i in 0 to 7 loop
k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);

Loading…
Cancel
Save