Merge pull request #134 from paulusmack/master

Add bypass from execute1 output to input
pull/145/head
Anton Blanchard 4 years ago committed by GitHub
commit f77b31a552
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -31,7 +31,7 @@ common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
core_debug.o: common.o
countzero.o:
countzero_tb.o: common.o glibc_random.o countzero.o
@ -40,7 +40,7 @@ crhelpers.o: common.o
decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o
decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o divider.o
fetch1.o: common.o
fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o:

@ -109,6 +109,9 @@ package common is
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
bypass_data1: std_ulogic;
bypass_data2: std_ulogic;
bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic;
@ -124,44 +127,41 @@ package common is
is_signed: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
data_len: std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
(valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));
is_32bit => '0', is_signed => '0', xerc => xerc_init,
byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0'));

type Decode2ToMultiplyType is record
type Execute1ToMultiplyType is record
valid: std_ulogic;
insn_type: insn_type_t;
write_reg: gpr_index_t;
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic;
oe: std_ulogic;
is_32bit: std_ulogic;
xerc: xer_common_t;
end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
is_32bit => '0',
others => (others => '0'));

type Decode2ToDividerType is record
type Execute1ToDividerType is record
valid: std_ulogic;
write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic;
is_32bit: std_ulogic;
is_extended: std_ulogic;
is_modulus: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
neg_result: std_ulogic;
end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
neg_result => '0', others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
@ -193,7 +193,7 @@ package common is
end record;
constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0'));

type Decode2ToLoadstore1Type is record
type Execute1ToLoadstore1Type is record
valid : std_ulogic;
load : std_ulogic; -- is this a load or store
addr1 : std_ulogic_vector(63 downto 0);
@ -207,9 +207,9 @@ package common is
update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t;
end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));

type Loadstore1ToDcacheType is record
valid : std_ulogic;
@ -248,48 +248,32 @@ package common is
write_enable : std_ulogic;
write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
sign_extend: std_ulogic;
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
write_cr_enable => '0', sign_extend => '0',
write_cr_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0'));

type MultiplyToWritebackType is record
type MultiplyToExecute1Type is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
overflow : std_ulogic;
end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type DividerToWritebackType is record
type DividerToExecute1Type is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
overflow : std_ulogic;
end record;
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;

@ -21,6 +21,7 @@ entity control is

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
@ -36,7 +37,11 @@ entity control is

valid_out : out std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic
stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic
);
end entity control;

@ -71,10 +76,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,

stall_out => stall_a_out
stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);

gpr_hazard1: entity work.gpr_hazard
@ -87,10 +94,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,

stall_out => stall_b_out
stall_out => stall_b_out,
use_bypass => gpr_bypass_b
);

gpr_c_read_in_fmt <= "0" & gpr_c_read_in;
@ -105,10 +114,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,

stall_out => stall_c_out
stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);

cr_hazard0: entity work.cr_hazard

@ -9,7 +9,8 @@ use work.wishbone_types.all;
entity core is
generic (
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true
);
port (
clk : in std_logic;
@ -59,18 +60,10 @@ architecture behave of core is
signal execute1_to_fetch1: Execute1ToFetch1Type;

-- load store signals
signal decode2_to_loadstore1: Decode2ToLoadstore1Type;
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_writeback: DcacheToWritebackType;

-- multiply signals
signal decode2_to_multiply: Decode2ToMultiplyType;
signal multiply_to_writeback: MultiplyToWritebackType;

-- divider signals
signal decode2_to_divider: Decode2ToDividerType;
signal divider_to_writeback: DividerToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -115,8 +108,6 @@ architecture behave of core is
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
@ -186,6 +177,9 @@ begin
decode1_stall_in <= decode2_stall_out;

decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
rst => core_rst,
@ -196,9 +190,6 @@ begin
stopped_out => dbg_core_is_stopped,
d_in => decode1_to_decode2,
e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1,
m_out => decode2_to_multiply,
d_out => decode2_to_divider,
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
@ -232,11 +223,16 @@ begin
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
rst => core_rst,
flush_out => flush,
stall_out => ex1_stall_out,
e_in => decode2_to_execute1,
l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1,
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
@ -246,7 +242,7 @@ begin
loadstore1_0: entity work.loadstore1
port map (
clk => clk,
l_in => decode2_to_loadstore1,
l_in => execute1_to_loadstore1,
l_out => loadstore1_to_dcache
);

@ -265,28 +261,11 @@ begin
wishbone_out => wishbone_data_out
);

multiply_0: entity work.multiply
port map (
clk => clk,
m_in => decode2_to_multiply,
m_out => multiply_to_writeback
);

divider_0: entity work.divider
port map (
clk => clk,
rst => core_rst,
d_in => decode2_to_divider,
d_out => divider_to_writeback
);

writeback_0: entity work.writeback
port map (
clk => clk,
e_in => execute1_to_writeback,
l_in => dcache_to_writeback,
m_in => multiply_to_writeback,
d_in => divider_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
complete_out => complete

@ -6,6 +6,7 @@ library work;

entity zero_counter is
port (
clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0);
count_right : in std_ulogic;
is_32bit : in std_ulogic;
@ -14,10 +15,14 @@ entity zero_counter is
end entity zero_counter;

architecture behaviour of zero_counter is
signal y, z : std_ulogic_vector(3 downto 0);
signal v16 : std_ulogic_vector(15 downto 0);
signal v4 : std_ulogic_vector(3 downto 0);
signal sel : std_ulogic_vector(5 downto 0);
type intermediate_result is record
v16: std_ulogic_vector(15 downto 0);
sel_hi: std_ulogic_vector(1 downto 0);
is_32bit: std_ulogic;
count_right: std_ulogic;
end record;

signal r, r_in : intermediate_result;

-- Return the index of the leftmost or rightmost 1 in a set of 4 bits.
-- Assumes v is not "0000"; if it is, return (right ? "11" : "00").
@ -47,65 +52,83 @@ architecture behaviour of zero_counter is
end;

begin
zerocounter0: process(all)
zerocounter_0: process(clk)
begin
if rising_edge(clk) then
r <= r_in;
end if;
end process;

zerocounter_1: process(all)
variable v: intermediate_result;
variable y, z: std_ulogic_vector(3 downto 0);
variable sel: std_ulogic_vector(5 downto 0);
variable v4: std_ulogic_vector(3 downto 0);

begin
-- Test 4 groups of 16 bits each.
-- The top 2 groups are considered to be zero in 32-bit mode.
z(0) <= or (rs(15 downto 0));
z(1) <= or (rs(31 downto 16));
z(2) <= or (rs(47 downto 32));
z(3) <= or (rs(63 downto 48));
z(0) := or (rs(15 downto 0));
z(1) := or (rs(31 downto 16));
z(2) := or (rs(47 downto 32));
z(3) := or (rs(63 downto 48));
if is_32bit = '0' then
sel(5 downto 4) <= encoder(z, count_right);
v.sel_hi := encoder(z, count_right);
else
sel(5) <= '0';
v.sel_hi(1) := '0';
if count_right = '0' then
sel(4) <= z(1);
v.sel_hi(0) := z(1);
else
sel(4) <= not z(0);
v.sel_hi(0) := not z(0);
end if;
end if;

-- Select the leftmost/rightmost non-zero group of 16 bits
case sel(5 downto 4) is
case v.sel_hi is
when "00" =>
v16 <= rs(15 downto 0);
v.v16 := rs(15 downto 0);
when "01" =>
v16 <= rs(31 downto 16);
v.v16 := rs(31 downto 16);
when "10" =>
v16 <= rs(47 downto 32);
v.v16 := rs(47 downto 32);
when others =>
v16 <= rs(63 downto 48);
v.v16 := rs(63 downto 48);
end case;

-- Latch this and do the rest in the next cycle, for the sake of timing
v.is_32bit := is_32bit;
v.count_right := count_right;
r_in <= v;
sel(5 downto 4) := r.sel_hi;

-- Test 4 groups of 4 bits
y(0) <= or (v16(3 downto 0));
y(1) <= or (v16(7 downto 4));
y(2) <= or (v16(11 downto 8));
y(3) <= or (v16(15 downto 12));
sel(3 downto 2) <= encoder(y, count_right);
y(0) := or (r.v16(3 downto 0));
y(1) := or (r.v16(7 downto 4));
y(2) := or (r.v16(11 downto 8));
y(3) := or (r.v16(15 downto 12));
sel(3 downto 2) := encoder(y, r.count_right);

-- Select the leftmost/rightmost non-zero group of 4 bits
case sel(3 downto 2) is
when "00" =>
v4 <= v16(3 downto 0);
v4 := r.v16(3 downto 0);
when "01" =>
v4 <= v16(7 downto 4);
v4 := r.v16(7 downto 4);
when "10" =>
v4 <= v16(11 downto 8);
v4 := r.v16(11 downto 8);
when others =>
v4 <= v16(15 downto 12);
v4 := r.v16(15 downto 12);
end case;

sel(1 downto 0) <= encoder(v4, count_right);
sel(1 downto 0) := encoder(v4, r.count_right);

-- sel is now the index of the leftmost/rightmost 1 bit in rs
if v4 = "0000" then
-- operand is zero, return 32 for 32-bit, else 64
result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000";
elsif count_right = '0' then
result <= x"00000000000000" & '0' & not r.is_32bit & r.is_32bit & "00000";
elsif r.count_right = '0' then
-- return (63 - sel), trimmed to 5 bits in 32-bit mode
result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0);
result <= x"00000000000000" & "00" & (not sel(5) and not r.is_32bit) & not sel(4 downto 0);
else
result <= x"00000000000000" & "00" & sel;
end if;

@ -15,16 +15,26 @@ architecture behave of countzero_tb is
signal is_32bit, count_right: std_ulogic := '0';
signal result: std_ulogic_vector(63 downto 0);
signal randno: std_ulogic_vector(63 downto 0);
signal clk: std_ulogic;

begin
zerocounter_0: entity work.zero_counter
port map (
clk => clk,
rs => rs,
result => result,
count_right => count_right,
is_32bit => is_32bit
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

stim_process: process
variable r: std_ulogic_vector(63 downto 0);
begin

@ -44,8 +44,8 @@ architecture behaviour of decode1 is
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lbzu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '1'), -- lha
@ -54,7 +54,7 @@ architecture behaviour of decode1 is
41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu
32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz
33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu
7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli
7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi
@ -66,7 +66,7 @@ architecture behaviour of decode1 is
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- stw
37 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- stwu
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- subfic
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- subfic
2 => (ALU, OP_TDI, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tdi
--PPC_TWI 3
26 => (ALU, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- xori
@ -145,10 +145,10 @@ architecture behaviour of decode1 is
2#0000011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- and
2#0000111100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- andc
-- 2#0011111100# bperm
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmp
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmp
2#0111111100# => (ALU, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpb
-- 2#0011100000# cmpeqb
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
2#0000100000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
-- 2#0011000000# cmprb
2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- cntlzd
2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- cntlzw
@ -160,22 +160,22 @@ architecture behaviour of decode1 is
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
-- 2#1111110110# dcbz
2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu
2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo
2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu
2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo
2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde
2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo
2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe
2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo
2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu
2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo
2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu
2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo
2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd
2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo
2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw
2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu
2#1110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweuo
2#0110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divde
2#1110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdeo
2#0110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwe
2#1110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divweo
2#0111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdu
2#1111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divduo
2#0111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwu
2#1111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwuo
2#0111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divd
2#1111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdo
2#0111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divw
2#1111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwo
2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv
2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb
2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh
@ -238,36 +238,36 @@ architecture behaviour of decode1 is
-- 2#1001000000# mcrxrx
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- modud
2#0100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- moduw
2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd
2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
2#0000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
-- next 4 have reserved bit set
2#1001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#1000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld
2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo
2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw
2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo
2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
2#0001111010# => (ALU, OP_POPCNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNTD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNTW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
2#0001111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
-- 2#0010000000# setb
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw

@ -9,6 +9,9 @@ use work.helpers.all;
use work.insn_helpers.all;

entity decode2 is
generic (
EX1_BYPASS : boolean := true
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
@ -24,9 +27,6 @@ entity decode2 is
d_in : in Decode1ToDecode2Type;

e_out : out Decode2ToExecute1Type;
m_out : out Decode2ToMultiplyType;
d_out : out Decode2ToDividerType;
l_out : out Decode2ToLoadstore1Type;

r_in : in RegisterFileToDecode2Type;
r_out : out Decode2ToRegisterFileType;
@ -39,9 +39,6 @@ end entity decode2;
architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
m : Decode2ToMultiplyType;
d : Decode2ToDividerType;
l : Decode2ToLoadstore1Type;
end record;

signal r, rin : reg_type;
@ -188,15 +185,19 @@ architecture behaviour of decode2 is

signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic;

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_write_valid : std_ulogic;
begin
@ -217,6 +218,7 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,

gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,
@ -232,13 +234,17 @@ begin

valid_out => control_valid_out,
stall_out => stall_out,
stopped_out => stopped_out
stopped_out => stopped_out,

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass
);

decode2_0: process(clk)
begin
if rising_edge(clk) then
if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then
if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
@ -259,21 +265,16 @@ begin
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable signed_division: std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
begin
v := r;

v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit;

mul_a := (others => '0');
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
--v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
@ -303,12 +304,17 @@ begin
v.e.insn_type := d_in.decode.insn_type;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.bypass_data2 := gpr_b_bypass;
v.e.read_data3 := decoded_reg_c.data;
v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.e.cr := c_in.read_cr_data;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
@ -322,102 +328,9 @@ begin
end if;
v.e.insn := d_in.insn;
v.e.data_len := length;

-- multiply unit
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.m.is_32bit := d_in.decode.is_32bit;

if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then
v.m.data1 := (others => mul_a(31));
v.m.data1(31 downto 0) := mul_a(31 downto 0);
v.m.data2 := (others => mul_b(31));
v.m.data2(31 downto 0) := mul_b(31 downto 0);
else
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
end if;
else
if d_in.decode.is_signed = '1' then
v.m.data1 := mul_a(63) & mul_a;
v.m.data2 := mul_b(63) & mul_b;
else
v.m.data1 := '0' & mul_a;
v.m.data2 := '0' & mul_b;
end if;
end if;

-- divide unit
-- PPC divide and modulus instruction words have these bits in
-- the bottom 11 bits: o1dns 010t1 r
-- where o = OE for div instrs, signedness for mod instrs
-- d = 1 for div*, 0 for mod*
-- n = 1 for normal, 0 for extended (dividend << 32/64)
-- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code)
v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := d_in.insn(2);
if d_in.insn(8) = '1' then
signed_division := d_in.insn(6);
else
signed_division := d_in.insn(10);
end if;
v.d.is_signed := signed_division;
if d_in.insn(2) = '0' then
-- 64-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
v.d.is_extended := '1';
end if;
v.d.dividend := decoded_reg_a.data;
v.d.divisor := decoded_reg_b.data;
else
-- 32-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
-- sign extend to 64 bits
v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
else
v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
end if;
if signed_division = '1' and decoded_reg_b.data(31) = '1' then
v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
else
v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
end if;
end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.d.xerc := c_in.read_xerc_data;
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);

-- load/store unit
v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
v.l.addr1 := decoded_reg_a.data;
v.l.addr2 := decoded_reg_b.data;
v.l.data := decoded_reg_c.data;
v.l.write_reg := gspr_to_gpr(decoded_reg_o.reg);

if d_in.decode.insn_type = OP_LOAD then
v.l.load := '1';
else
v.l.load := '0';
end if;

v.l.length := length;
v.l.byte_reverse := d_in.decode.byte_reverse;
v.l.sign_extend := d_in.decode.sign_extend;
v.l.update := d_in.decode.update;
v.l.xerc := c_in.read_xerc_data;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;

-- issue control
control_valid_in <= d_in.valid;
@ -425,6 +338,10 @@ begin

gpr_write_valid <= decoded_reg_o.reg_valid;
gpr_write <= decoded_reg_o.reg;
gpr_bypassable <= '0';
if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1';
end if;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
@ -437,29 +354,13 @@ begin

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);

v.e.valid := '0';
v.m.valid := '0';
v.d.valid := '0';
v.l.valid := '0';
case d_in.decode.unit is
when ALU =>
v.e.valid := control_valid_out;
when LDST =>
v.l.valid := control_valid_out;
when MUL =>
v.m.valid := control_valid_out;
when DIV =>
v.d.valid := control_valid_out;
when NONE =>
v.e.valid := control_valid_out;
v.e.valid := control_valid_out;
if d_in.decode.unit = NONE then
v.e.insn_type := OP_ILLEGAL;
end case;
end if;

if rst = '1' then
v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit;
end if;

-- Update registers
@ -467,8 +368,5 @@ begin

-- Update outputs
e_out <= r.e;
l_out <= r.l;
m_out <= r.m;
d_out <= r.d;
end process;
end architecture behaviour;

@ -4,18 +4,18 @@ use ieee.std_logic_1164.all;
package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_POPCNT, OP_PRTY,
OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG
@ -46,7 +46,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type unit_t is (NONE, ALU, LDST);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record

@ -10,8 +10,8 @@ entity divider is
port (
clk : in std_logic;
rst : in std_logic;
d_in : in Decode2ToDividerType;
d_out : out DividerToWritebackType
d_in : in Execute1ToDividerType;
d_out : out DividerToExecute1Type
);
end entity divider;

@ -23,20 +23,15 @@ architecture behaviour of divider is
signal sresult : std_ulogic_vector(64 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal running : std_ulogic;
signal signcheck : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
signal is_32bit : std_ulogic;
signal extended : std_ulogic;
signal is_signed : std_ulogic;
signal rc : std_ulogic;
signal write_reg : std_ulogic_vector(4 downto 0);
signal overflow : std_ulogic;
signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic;
signal oe : std_ulogic;
signal xerc : xer_common_t;
begin
divider_0: process(clk)
begin
@ -48,40 +43,22 @@ begin
running <= '0';
count <= "0000000";
elsif d_in.valid = '1' then
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
if d_in.is_extended = '1' then
dend <= '0' & d_in.dividend & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & d_in.dividend;
end if;
div <= unsigned(d_in.divisor);
quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= '0';
neg_result <= d_in.neg_result;
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed;
rc <= d_in.rc;
oe <= d_in.oe;
xerc <= d_in.xerc;
count <= "1111111";
running <= '1';
overflow <= '0';
ovf32 <= '0';
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
elsif signcheck = '1' then
signcheck <= '0';
neg_result <= dend(63) xor (div(63) and not is_modulus);
if dend(63) = '1' then
if extended = '1' then
dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
end if;
end if;
if div(63) = '1' then
div <= unsigned(- signed(div));
end if;
elsif running = '1' then
if count = "0111111" then
running <= '0';
@ -113,9 +90,6 @@ begin

divider_1: process(all)
begin
d_out.write_reg_nr <= write_reg;
d_out.rc <= rc;

if is_modulus = '1' then
result <= dend(128 downto 65);
else
@ -151,23 +125,9 @@ begin
if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult;
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= '0';
d_out.xerc <= xerc;
d_out.overflow <= did_ovf;
if count = "1000000" then
d_out.valid <= '1';
d_out.write_reg_enable <= '1';
d_out.write_xerc_enable <= oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if oe = '1' then
d_out.xerc.ov <= did_ovf;
d_out.xerc.ov32 <= did_ovf;
d_out.xerc.so <= xerc.so or did_ovf;
end if;
end if;
end if;
end process;

@ -16,8 +16,8 @@ architecture behave of divider_tb is
signal rst : std_ulogic;
constant clk_period : time := 10 ns;

signal d1 : Decode2ToDividerType;
signal d2 : DividerToWritebackType;
signal d1 : Execute1ToDividerType;
signal d2 : DividerToExecute1Type;
begin
divider_0: entity work.divider
port map (clk => clk, rst => rst, d_in => d1, d_out => d2);
@ -43,14 +43,13 @@ begin
rst <= '0';

d1.valid <= '1';
d1.write_reg <= "10001";
d1.dividend <= x"0000000010001000";
d1.divisor <= x"0000000000001111";
d1.is_signed <= '0';
d1.is_32bit <= '0';
d1.is_extended <= '0';
d1.is_modulus <= '0';
d1.rc <= '0';
d1.neg_result <= '0';

wait for clk_period;
assert d2.valid = '0';
@ -65,16 +64,12 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '0';

wait for clk_period;
assert d2.valid = '0' report "valid";

d1.valid <= '1';
d1.rc <= '1';

wait for clk_period;
assert d2.valid = '0' report "valid";
@ -89,10 +84,7 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '1';

wait for clk_period;
assert d2.valid = '0';
@ -105,9 +97,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.valid <= '1';

wait for clk_period;
@ -142,6 +135,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.valid <= '1';

wait for clk_period;
@ -173,9 +167,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '1';
d1.valid <= '1';

@ -216,6 +211,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '1';
d1.valid <= '1';

@ -250,9 +246,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -289,6 +286,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -322,9 +320,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -365,6 +364,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -398,9 +398,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -438,6 +439,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -472,9 +474,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';
@ -517,6 +520,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';

@ -11,8 +11,12 @@ use work.insn_helpers.all;
use work.ppc_fx_insns.all;

entity execute1 is
generic (
EX1_BYPASS : boolean := true
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

-- asynchronous
flush_out : out std_ulogic;
@ -21,6 +25,7 @@ entity execute1 is
e_in : in Decode2ToExecute1Type;

-- asynchronous
l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type;

e_out : out Execute1ToWritebackType;
@ -35,10 +40,19 @@ architecture behaviour of execute1 is
e : Execute1ToWritebackType;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic;
div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic;
slow_op_dest : gpr_index_t;
slow_op_rc : std_ulogic;
slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t;
end record;

signal r, rin : reg_type;

signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);

signal ctrl: ctrl_t := (others => (others => '0'));
signal ctrl_tmp: ctrl_t := (others => (others => '0'));

@ -47,6 +61,16 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);
signal popcnt_result: std_ulogic_vector(63 downto 0);
signal parity_result: std_ulogic_vector(63 downto 0);

-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
signal multiply_to_x: MultiplyToExecute1Type;

-- divider signals
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type;

procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
@ -92,9 +116,9 @@ begin

rotator_0: entity work.rotator
port map (
rs => e_in.read_data3,
ra => e_in.read_data1,
shift => e_in.read_data2(6 downto 0),
rs => c_in,
ra => a_in,
shift => b_in(6 downto 0),
insn => e_in.insn,
is_32bit => e_in.is_32bit,
right_shift => right_shift,
@ -107,22 +131,45 @@ begin

logical_0: entity work.logical
port map (
rs => e_in.read_data3,
rb => e_in.read_data2,
rs => c_in,
rb => b_in,
op => e_in.insn_type,
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
result => logical_result
result => logical_result,
datalen => e_in.data_len,
popcnt => popcnt_result,
parity => parity_result
);

countzero_0: entity work.zero_counter
port map (
rs => e_in.read_data3,
clk => clk,
rs => c_in,
count_right => e_in.insn(10),
is_32bit => e_in.is_32bit,
result => countzero_result
);

multiply_0: entity work.multiply
port map (
clk => clk,
m_in => x_to_multiply,
m_out => multiply_to_x
);

divider_0: entity work.divider
port map (
clk => clk,
rst => rst,
d_in => x_to_divider,
d_out => divider_to_x
);

a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;

execute1_0: process(clk)
begin
if rising_edge(clk) then
@ -159,6 +206,14 @@ begin
variable l : std_ulogic;
variable next_nia : std_ulogic_vector(63 downto 0);
variable carry_32, carry_64 : std_ulogic;
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic;
variable negative : std_ulogic;
variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic;
variable lv : Execute1ToLoadstore1Type;
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -204,6 +259,83 @@ begin
end if;

v.lr_update := '0';
v.mul_in_progress := '0';
v.div_in_progress := '0';
v.cntz_in_progress := '0';

-- signals to multiply unit
x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.is_32bit <= e_in.is_32bit;

if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => a_in(31));
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
x_to_multiply.data2 <= (others => b_in(31));
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
else
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
end if;
else
if e_in.is_signed = '1' then
x_to_multiply.data1 <= a_in(63) & a_in;
x_to_multiply.data2 <= b_in(63) & b_in;
else
x_to_multiply.data1 <= '0' & a_in;
x_to_multiply.data2 <= '0' & b_in;
end if;
end if;

-- signals to divide unit
sign1 := '0';
sign2 := '0';
if e_in.is_signed = '1' then
if e_in.is_32bit = '1' then
sign1 := a_in(31);
sign2 := b_in(31);
else
sign1 := a_in(63);
sign2 := b_in(63);
end if;
end if;
-- take absolute values
if sign1 = '0' then
abs1 := signed(a_in);
else
abs1 := - signed(a_in);
end if;
if sign2 = '0' then
abs2 := signed(b_in);
else
abs2 := - signed(b_in);
end if;

x_to_divider <= Execute1ToDividerInit;
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1';
end if;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then
-- 64-bit forms
if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1';
end if;
x_to_divider.dividend <= std_ulogic_vector(abs1);
x_to_divider.divisor <= std_ulogic_vector(abs2);
else
-- 32-bit forms
x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
else
x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
end if;
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
end if;

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
@ -226,8 +358,10 @@ begin

v.e.valid := '1';
v.e.write_reg := e_in.write_reg;
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe;
v.slow_op_xerc := v.e.xerc;

case_0: case e_in.insn_type is

@ -236,51 +370,93 @@ begin
report "illegal";
when OP_NOP =>
-- Do nothing
when OP_ADD =>
when OP_ADD | OP_CMP =>
if e_in.invert_a = '0' then
a_inv := e_in.read_data1;
a_inv := a_in;
else
a_inv := not e_in.read_data1;
a_inv := not a_in;
end if;
result_with_carry := ppc_adde(a_inv, e_in.read_data2,
result_with_carry := ppc_adde(a_inv, b_in,
decode_input_carry(e_in.input_carry, v.e.xerc));
result := result_with_carry(63 downto 0);
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
carry_32 := result(32) xor a_inv(32) xor b_in(32);
carry_64 := result_with_carry(64);
if e_in.output_carry = '1' then
set_carry(v.e, carry_32, carry_64);
end if;
if e_in.oe = '1' then
set_ov(v.e,
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));