Merge pull request #134 from paulusmack/master

Add bypass from execute1 output to input
pull/145/head
Anton Blanchard 5 years ago committed by GitHub
commit f77b31a552
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -31,7 +31,7 @@ common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
core_debug.o: common.o
countzero.o:
countzero_tb.o: common.o glibc_random.o countzero.o
@ -40,7 +40,7 @@ crhelpers.o: common.o
decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o
decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o divider.o
fetch1.o: common.o
fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o:

@ -109,6 +109,9 @@ package common is
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
bypass_data1: std_ulogic;
bypass_data2: std_ulogic;
bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic;
@ -124,44 +127,41 @@ package common is
is_signed: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
data_len: std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
(valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));
is_32bit => '0', is_signed => '0', xerc => xerc_init,
byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0'));

type Decode2ToMultiplyType is record
type Execute1ToMultiplyType is record
valid: std_ulogic;
insn_type: insn_type_t;
write_reg: gpr_index_t;
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic;
oe: std_ulogic;
is_32bit: std_ulogic;
xerc: xer_common_t;
end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
is_32bit => '0',
others => (others => '0'));

type Decode2ToDividerType is record
type Execute1ToDividerType is record
valid: std_ulogic;
write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic;
is_32bit: std_ulogic;
is_extended: std_ulogic;
is_modulus: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
neg_result: std_ulogic;
end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
neg_result => '0', others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
@ -193,7 +193,7 @@ package common is
end record;
constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0'));

type Decode2ToLoadstore1Type is record
type Execute1ToLoadstore1Type is record
valid : std_ulogic;
load : std_ulogic; -- is this a load or store
addr1 : std_ulogic_vector(63 downto 0);
@ -207,9 +207,9 @@ package common is
update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t;
end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));

type Loadstore1ToDcacheType is record
valid : std_ulogic;
@ -248,48 +248,32 @@ package common is
write_enable : std_ulogic;
write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
sign_extend: std_ulogic;
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
write_cr_enable => '0', sign_extend => '0',
write_cr_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0'));

type MultiplyToWritebackType is record
type MultiplyToExecute1Type is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
overflow : std_ulogic;
end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type DividerToWritebackType is record
type DividerToExecute1Type is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
overflow : std_ulogic;
end record;
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;

@ -21,6 +21,7 @@ entity control is

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
@ -36,7 +37,11 @@ entity control is

valid_out : out std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic
stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic
);
end entity control;

@ -71,10 +76,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,

stall_out => stall_a_out
stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);

gpr_hazard1: entity work.gpr_hazard
@ -87,10 +94,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,

stall_out => stall_b_out
stall_out => stall_b_out,
use_bypass => gpr_bypass_b
);

gpr_c_read_in_fmt <= "0" & gpr_c_read_in;
@ -105,10 +114,12 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,

stall_out => stall_c_out
stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);

cr_hazard0: entity work.cr_hazard

@ -9,7 +9,8 @@ use work.wishbone_types.all;
entity core is
generic (
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true
);
port (
clk : in std_logic;
@ -59,18 +60,10 @@ architecture behave of core is
signal execute1_to_fetch1: Execute1ToFetch1Type;

-- load store signals
signal decode2_to_loadstore1: Decode2ToLoadstore1Type;
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_writeback: DcacheToWritebackType;

-- multiply signals
signal decode2_to_multiply: Decode2ToMultiplyType;
signal multiply_to_writeback: MultiplyToWritebackType;

-- divider signals
signal decode2_to_divider: Decode2ToDividerType;
signal divider_to_writeback: DividerToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -115,8 +108,6 @@ architecture behave of core is
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
@ -186,6 +177,9 @@ begin
decode1_stall_in <= decode2_stall_out;

decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
rst => core_rst,
@ -196,9 +190,6 @@ begin
stopped_out => dbg_core_is_stopped,
d_in => decode1_to_decode2,
e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1,
m_out => decode2_to_multiply,
d_out => decode2_to_divider,
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
@ -232,11 +223,16 @@ begin
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
rst => core_rst,
flush_out => flush,
stall_out => ex1_stall_out,
e_in => decode2_to_execute1,
l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1,
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
@ -246,7 +242,7 @@ begin
loadstore1_0: entity work.loadstore1
port map (
clk => clk,
l_in => decode2_to_loadstore1,
l_in => execute1_to_loadstore1,
l_out => loadstore1_to_dcache
);

@ -265,28 +261,11 @@ begin
wishbone_out => wishbone_data_out
);

multiply_0: entity work.multiply
port map (
clk => clk,
m_in => decode2_to_multiply,
m_out => multiply_to_writeback
);

divider_0: entity work.divider
port map (
clk => clk,
rst => core_rst,
d_in => decode2_to_divider,
d_out => divider_to_writeback
);

writeback_0: entity work.writeback
port map (
clk => clk,
e_in => execute1_to_writeback,
l_in => dcache_to_writeback,
m_in => multiply_to_writeback,
d_in => divider_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
complete_out => complete

@ -6,6 +6,7 @@ library work;

entity zero_counter is
port (
clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0);
count_right : in std_ulogic;
is_32bit : in std_ulogic;
@ -14,10 +15,14 @@ entity zero_counter is
end entity zero_counter;

architecture behaviour of zero_counter is
signal y, z : std_ulogic_vector(3 downto 0);
signal v16 : std_ulogic_vector(15 downto 0);
signal v4 : std_ulogic_vector(3 downto 0);
signal sel : std_ulogic_vector(5 downto 0);
type intermediate_result is record
v16: std_ulogic_vector(15 downto 0);
sel_hi: std_ulogic_vector(1 downto 0);
is_32bit: std_ulogic;
count_right: std_ulogic;
end record;

signal r, r_in : intermediate_result;

-- Return the index of the leftmost or rightmost 1 in a set of 4 bits.
-- Assumes v is not "0000"; if it is, return (right ? "11" : "00").
@ -47,65 +52,83 @@ architecture behaviour of zero_counter is
end;

begin
zerocounter0: process(all)
zerocounter_0: process(clk)
begin
if rising_edge(clk) then
r <= r_in;
end if;
end process;

zerocounter_1: process(all)
variable v: intermediate_result;
variable y, z: std_ulogic_vector(3 downto 0);
variable sel: std_ulogic_vector(5 downto 0);
variable v4: std_ulogic_vector(3 downto 0);

begin
-- Test 4 groups of 16 bits each.
-- The top 2 groups are considered to be zero in 32-bit mode.
z(0) <= or (rs(15 downto 0));
z(1) <= or (rs(31 downto 16));
z(2) <= or (rs(47 downto 32));
z(3) <= or (rs(63 downto 48));
z(0) := or (rs(15 downto 0));
z(1) := or (rs(31 downto 16));
z(2) := or (rs(47 downto 32));
z(3) := or (rs(63 downto 48));
if is_32bit = '0' then
sel(5 downto 4) <= encoder(z, count_right);
v.sel_hi := encoder(z, count_right);
else
sel(5) <= '0';
v.sel_hi(1) := '0';
if count_right = '0' then
sel(4) <= z(1);
v.sel_hi(0) := z(1);
else
sel(4) <= not z(0);
v.sel_hi(0) := not z(0);
end if;
end if;

-- Select the leftmost/rightmost non-zero group of 16 bits
case sel(5 downto 4) is
case v.sel_hi is
when "00" =>
v16 <= rs(15 downto 0);
v.v16 := rs(15 downto 0);
when "01" =>
v16 <= rs(31 downto 16);
v.v16 := rs(31 downto 16);
when "10" =>
v16 <= rs(47 downto 32);
v.v16 := rs(47 downto 32);
when others =>
v16 <= rs(63 downto 48);
v.v16 := rs(63 downto 48);
end case;

-- Latch this and do the rest in the next cycle, for the sake of timing
v.is_32bit := is_32bit;
v.count_right := count_right;
r_in <= v;
sel(5 downto 4) := r.sel_hi;

-- Test 4 groups of 4 bits
y(0) <= or (v16(3 downto 0));
y(1) <= or (v16(7 downto 4));
y(2) <= or (v16(11 downto 8));
y(3) <= or (v16(15 downto 12));
sel(3 downto 2) <= encoder(y, count_right);
y(0) := or (r.v16(3 downto 0));
y(1) := or (r.v16(7 downto 4));
y(2) := or (r.v16(11 downto 8));
y(3) := or (r.v16(15 downto 12));
sel(3 downto 2) := encoder(y, r.count_right);

-- Select the leftmost/rightmost non-zero group of 4 bits
case sel(3 downto 2) is
when "00" =>
v4 <= v16(3 downto 0);
v4 := r.v16(3 downto 0);
when "01" =>
v4 <= v16(7 downto 4);
v4 := r.v16(7 downto 4);
when "10" =>
v4 <= v16(11 downto 8);
v4 := r.v16(11 downto 8);
when others =>
v4 <= v16(15 downto 12);
v4 := r.v16(15 downto 12);
end case;

sel(1 downto 0) <= encoder(v4, count_right);
sel(1 downto 0) := encoder(v4, r.count_right);

-- sel is now the index of the leftmost/rightmost 1 bit in rs
if v4 = "0000" then
-- operand is zero, return 32 for 32-bit, else 64
result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000";
elsif count_right = '0' then
result <= x"00000000000000" & '0' & not r.is_32bit & r.is_32bit & "00000";
elsif r.count_right = '0' then
-- return (63 - sel), trimmed to 5 bits in 32-bit mode
result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0);
result <= x"00000000000000" & "00" & (not sel(5) and not r.is_32bit) & not sel(4 downto 0);
else
result <= x"00000000000000" & "00" & sel;
end if;

@ -15,16 +15,26 @@ architecture behave of countzero_tb is
signal is_32bit, count_right: std_ulogic := '0';
signal result: std_ulogic_vector(63 downto 0);
signal randno: std_ulogic_vector(63 downto 0);
signal clk: std_ulogic;

begin
zerocounter_0: entity work.zero_counter
port map (
clk => clk,
rs => rs,
result => result,
count_right => count_right,
is_32bit => is_32bit
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

stim_process: process
variable r: std_ulogic_vector(63 downto 0);
begin

@ -44,8 +44,8 @@ architecture behaviour of decode1 is
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lbzu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '1'), -- lha
@ -54,7 +54,7 @@ architecture behaviour of decode1 is
41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu
32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz
33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu
7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli
7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi
@ -66,7 +66,7 @@ architecture behaviour of decode1 is
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- stw
37 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- stwu
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- subfic
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- subfic
2 => (ALU, OP_TDI, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tdi
--PPC_TWI 3
26 => (ALU, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- xori
@ -145,10 +145,10 @@ architecture behaviour of decode1 is
2#0000011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- and
2#0000111100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- andc
-- 2#0011111100# bperm
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmp
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmp
2#0111111100# => (ALU, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpb
-- 2#0011100000# cmpeqb
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
2#0000100000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
-- 2#0011000000# cmprb
2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- cntlzd
2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- cntlzw
@ -160,22 +160,22 @@ architecture behaviour of decode1 is
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
-- 2#1111110110# dcbz
2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu
2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo
2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu
2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo
2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde
2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo
2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe
2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo
2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu
2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo
2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu
2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo
2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd
2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo
2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw
2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu
2#1110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweuo
2#0110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divde
2#1110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdeo
2#0110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwe
2#1110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divweo
2#0111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdu
2#1111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divduo
2#0111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwu
2#1111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwuo
2#0111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divd
2#1111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdo
2#0111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divw
2#1111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwo
2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv
2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb
2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh
@ -238,36 +238,36 @@ architecture behaviour of decode1 is
-- 2#1001000000# mcrxrx
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- modud
2#0100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- moduw
2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd
2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
2#0000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
-- next 4 have reserved bit set
2#1001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#1000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld
2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo
2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw
2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo
2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
2#0001111010# => (ALU, OP_POPCNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNTD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNTW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
2#0001111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
-- 2#0010000000# setb
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw

@ -9,6 +9,9 @@ use work.helpers.all;
use work.insn_helpers.all;

entity decode2 is
generic (
EX1_BYPASS : boolean := true
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
@ -24,9 +27,6 @@ entity decode2 is
d_in : in Decode1ToDecode2Type;

e_out : out Decode2ToExecute1Type;
m_out : out Decode2ToMultiplyType;
d_out : out Decode2ToDividerType;
l_out : out Decode2ToLoadstore1Type;

r_in : in RegisterFileToDecode2Type;
r_out : out Decode2ToRegisterFileType;
@ -39,9 +39,6 @@ end entity decode2;
architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
m : Decode2ToMultiplyType;
d : Decode2ToDividerType;
l : Decode2ToLoadstore1Type;
end record;

signal r, rin : reg_type;
@ -188,15 +185,19 @@ architecture behaviour of decode2 is

signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic;

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_write_valid : std_ulogic;
begin
@ -217,6 +218,7 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,

gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,
@ -232,13 +234,17 @@ begin

valid_out => control_valid_out,
stall_out => stall_out,
stopped_out => stopped_out
stopped_out => stopped_out,

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass
);

decode2_0: process(clk)
begin
if rising_edge(clk) then
if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then
if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
@ -259,21 +265,16 @@ begin
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable signed_division: std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
begin
v := r;

v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit;

mul_a := (others => '0');
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
--v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
@ -303,12 +304,17 @@ begin
v.e.insn_type := d_in.decode.insn_type;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.bypass_data2 := gpr_b_bypass;
v.e.read_data3 := decoded_reg_c.data;
v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.e.cr := c_in.read_cr_data;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
@ -322,102 +328,9 @@ begin
end if;
v.e.insn := d_in.insn;
v.e.data_len := length;

-- multiply unit
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.m.is_32bit := d_in.decode.is_32bit;

if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then
v.m.data1 := (others => mul_a(31));
v.m.data1(31 downto 0) := mul_a(31 downto 0);
v.m.data2 := (others => mul_b(31));
v.m.data2(31 downto 0) := mul_b(31 downto 0);
else
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
end if;
else
if d_in.decode.is_signed = '1' then
v.m.data1 := mul_a(63) & mul_a;
v.m.data2 := mul_b(63) & mul_b;
else
v.m.data1 := '0' & mul_a;
v.m.data2 := '0' & mul_b;
end if;
end if;

-- divide unit
-- PPC divide and modulus instruction words have these bits in
-- the bottom 11 bits: o1dns 010t1 r
-- where o = OE for div instrs, signedness for mod instrs
-- d = 1 for div*, 0 for mod*
-- n = 1 for normal, 0 for extended (dividend << 32/64)
-- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code)
v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := d_in.insn(2);
if d_in.insn(8) = '1' then
signed_division := d_in.insn(6);
else
signed_division := d_in.insn(10);
end if;
v.d.is_signed := signed_division;
if d_in.insn(2) = '0' then
-- 64-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
v.d.is_extended := '1';
end if;
v.d.dividend := decoded_reg_a.data;
v.d.divisor := decoded_reg_b.data;
else
-- 32-bit forms
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
-- sign extend to 64 bits
v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
else
v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
end if;
if signed_division = '1' and decoded_reg_b.data(31) = '1' then
v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
else
v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
end if;
end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.d.xerc := c_in.read_xerc_data;
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);

-- load/store unit
v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
v.l.addr1 := decoded_reg_a.data;
v.l.addr2 := decoded_reg_b.data;
v.l.data := decoded_reg_c.data;
v.l.write_reg := gspr_to_gpr(decoded_reg_o.reg);

if d_in.decode.insn_type = OP_LOAD then
v.l.load := '1';
else
v.l.load := '0';
end if;

v.l.length := length;
v.l.byte_reverse := d_in.decode.byte_reverse;
v.l.sign_extend := d_in.decode.sign_extend;
v.l.update := d_in.decode.update;
v.l.xerc := c_in.read_xerc_data;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;

-- issue control
control_valid_in <= d_in.valid;
@ -425,6 +338,10 @@ begin

gpr_write_valid <= decoded_reg_o.reg_valid;
gpr_write <= decoded_reg_o.reg;
gpr_bypassable <= '0';
if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1';
end if;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
@ -437,29 +354,13 @@ begin

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);

v.e.valid := '0';
v.m.valid := '0';
v.d.valid := '0';
v.l.valid := '0';
case d_in.decode.unit is
when ALU =>
v.e.valid := control_valid_out;
when LDST =>
v.l.valid := control_valid_out;
when MUL =>
v.m.valid := control_valid_out;
when DIV =>
v.d.valid := control_valid_out;
when NONE =>
v.e.valid := control_valid_out;
v.e.valid := control_valid_out;
if d_in.decode.unit = NONE then
v.e.insn_type := OP_ILLEGAL;
end case;
end if;

if rst = '1' then
v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
v.d := Decode2ToDividerInit;
end if;

-- Update registers
@ -467,8 +368,5 @@ begin

-- Update outputs
e_out <= r.e;
l_out <= r.l;
m_out <= r.m;
d_out <= r.d;
end process;
end architecture behaviour;

@ -4,18 +4,18 @@ use ieee.std_logic_1164.all;
package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_POPCNT, OP_PRTY,
OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG
@ -46,7 +46,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type unit_t is (NONE, ALU, LDST);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record

@ -10,8 +10,8 @@ entity divider is
port (
clk : in std_logic;
rst : in std_logic;
d_in : in Decode2ToDividerType;
d_out : out DividerToWritebackType
d_in : in Execute1ToDividerType;
d_out : out DividerToExecute1Type
);
end entity divider;

@ -23,20 +23,15 @@ architecture behaviour of divider is
signal sresult : std_ulogic_vector(64 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal running : std_ulogic;
signal signcheck : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
signal is_32bit : std_ulogic;
signal extended : std_ulogic;
signal is_signed : std_ulogic;
signal rc : std_ulogic;
signal write_reg : std_ulogic_vector(4 downto 0);
signal overflow : std_ulogic;
signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic;
signal oe : std_ulogic;
signal xerc : xer_common_t;
begin
divider_0: process(clk)
begin
@ -48,40 +43,22 @@ begin
running <= '0';
count <= "0000000";
elsif d_in.valid = '1' then
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
if d_in.is_extended = '1' then
dend <= '0' & d_in.dividend & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & d_in.dividend;
end if;
div <= unsigned(d_in.divisor);
quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= '0';
neg_result <= d_in.neg_result;
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed;
rc <= d_in.rc;
oe <= d_in.oe;
xerc <= d_in.xerc;
count <= "1111111";
running <= '1';
overflow <= '0';
ovf32 <= '0';
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
elsif signcheck = '1' then
signcheck <= '0';
neg_result <= dend(63) xor (div(63) and not is_modulus);
if dend(63) = '1' then
if extended = '1' then
dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
end if;
end if;
if div(63) = '1' then
div <= unsigned(- signed(div));
end if;
elsif running = '1' then
if count = "0111111" then
running <= '0';
@ -113,9 +90,6 @@ begin

divider_1: process(all)
begin
d_out.write_reg_nr <= write_reg;
d_out.rc <= rc;

if is_modulus = '1' then
result <= dend(128 downto 65);
else
@ -151,23 +125,9 @@ begin
if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult;
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= '0';
d_out.xerc <= xerc;
d_out.overflow <= did_ovf;
if count = "1000000" then
d_out.valid <= '1';
d_out.write_reg_enable <= '1';
d_out.write_xerc_enable <= oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if oe = '1' then
d_out.xerc.ov <= did_ovf;
d_out.xerc.ov32 <= did_ovf;
d_out.xerc.so <= xerc.so or did_ovf;
end if;
end if;
end if;
end process;

@ -16,8 +16,8 @@ architecture behave of divider_tb is
signal rst : std_ulogic;
constant clk_period : time := 10 ns;

signal d1 : Decode2ToDividerType;
signal d2 : DividerToWritebackType;
signal d1 : Execute1ToDividerType;
signal d2 : DividerToExecute1Type;
begin
divider_0: entity work.divider
port map (clk => clk, rst => rst, d_in => d1, d_out => d2);
@ -43,14 +43,13 @@ begin
rst <= '0';

d1.valid <= '1';
d1.write_reg <= "10001";
d1.dividend <= x"0000000010001000";
d1.divisor <= x"0000000000001111";
d1.is_signed <= '0';
d1.is_32bit <= '0';
d1.is_extended <= '0';
d1.is_modulus <= '0';
d1.rc <= '0';
d1.neg_result <= '0';

wait for clk_period;
assert d2.valid = '0';
@ -65,16 +64,12 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '0';

wait for clk_period;
assert d2.valid = '0' report "valid";

d1.valid <= '1';
d1.rc <= '1';

wait for clk_period;
assert d2.valid = '0' report "valid";
@ -89,10 +84,7 @@ begin
end loop;

assert d2.valid = '1';
assert d2.write_reg_enable = '1';
assert d2.write_reg_nr = "10001";
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
assert d2.rc = '1';

wait for clk_period;
assert d2.valid = '0';
@ -105,9 +97,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.valid <= '1';

wait for clk_period;
@ -142,6 +135,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.valid <= '1';

wait for clk_period;
@ -173,9 +167,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '1';
d1.valid <= '1';

@ -216,6 +211,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '1';
d1.valid <= '1';

@ -250,9 +246,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -289,6 +286,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -322,9 +320,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -365,6 +364,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -398,9 +398,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -438,6 +439,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -472,9 +474,10 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';
@ -517,6 +520,7 @@ begin
d1.dividend <= ra;
d1.divisor <= rb;
d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';

@ -11,8 +11,12 @@ use work.insn_helpers.all;
use work.ppc_fx_insns.all;

entity execute1 is
generic (
EX1_BYPASS : boolean := true
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

-- asynchronous
flush_out : out std_ulogic;
@ -21,6 +25,7 @@ entity execute1 is
e_in : in Decode2ToExecute1Type;

-- asynchronous
l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type;

e_out : out Execute1ToWritebackType;
@ -35,10 +40,19 @@ architecture behaviour of execute1 is
e : Execute1ToWritebackType;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic;
div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic;
slow_op_dest : gpr_index_t;
slow_op_rc : std_ulogic;
slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t;
end record;

signal r, rin : reg_type;

signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);

signal ctrl: ctrl_t := (others => (others => '0'));
signal ctrl_tmp: ctrl_t := (others => (others => '0'));

@ -47,6 +61,16 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);
signal popcnt_result: std_ulogic_vector(63 downto 0);
signal parity_result: std_ulogic_vector(63 downto 0);

-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
signal multiply_to_x: MultiplyToExecute1Type;

-- divider signals
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type;

procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
@ -92,9 +116,9 @@ begin

rotator_0: entity work.rotator
port map (
rs => e_in.read_data3,
ra => e_in.read_data1,
shift => e_in.read_data2(6 downto 0),
rs => c_in,
ra => a_in,
shift => b_in(6 downto 0),
insn => e_in.insn,
is_32bit => e_in.is_32bit,
right_shift => right_shift,
@ -107,22 +131,45 @@ begin

logical_0: entity work.logical
port map (
rs => e_in.read_data3,
rb => e_in.read_data2,
rs => c_in,
rb => b_in,
op => e_in.insn_type,
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
result => logical_result
result => logical_result,
datalen => e_in.data_len,
popcnt => popcnt_result,
parity => parity_result
);

countzero_0: entity work.zero_counter
port map (
rs => e_in.read_data3,
clk => clk,
rs => c_in,
count_right => e_in.insn(10),
is_32bit => e_in.is_32bit,
result => countzero_result
);

multiply_0: entity work.multiply
port map (
clk => clk,
m_in => x_to_multiply,
m_out => multiply_to_x
);

divider_0: entity work.divider
port map (
clk => clk,
rst => rst,
d_in => x_to_divider,
d_out => divider_to_x
);

a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;

execute1_0: process(clk)
begin
if rising_edge(clk) then
@ -159,6 +206,14 @@ begin
variable l : std_ulogic;
variable next_nia : std_ulogic_vector(63 downto 0);
variable carry_32, carry_64 : std_ulogic;
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic;
variable negative : std_ulogic;
variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic;
variable lv : Execute1ToLoadstore1Type;
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -204,6 +259,83 @@ begin
end if;

v.lr_update := '0';
v.mul_in_progress := '0';
v.div_in_progress := '0';
v.cntz_in_progress := '0';

-- signals to multiply unit
x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.is_32bit <= e_in.is_32bit;

if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => a_in(31));
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
x_to_multiply.data2 <= (others => b_in(31));
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
else
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
end if;
else
if e_in.is_signed = '1' then
x_to_multiply.data1 <= a_in(63) & a_in;
x_to_multiply.data2 <= b_in(63) & b_in;
else
x_to_multiply.data1 <= '0' & a_in;
x_to_multiply.data2 <= '0' & b_in;
end if;
end if;

-- signals to divide unit
sign1 := '0';
sign2 := '0';
if e_in.is_signed = '1' then
if e_in.is_32bit = '1' then
sign1 := a_in(31);
sign2 := b_in(31);
else
sign1 := a_in(63);
sign2 := b_in(63);
end if;
end if;
-- take absolute values
if sign1 = '0' then
abs1 := signed(a_in);
else
abs1 := - signed(a_in);
end if;
if sign2 = '0' then
abs2 := signed(b_in);
else
abs2 := - signed(b_in);
end if;

x_to_divider <= Execute1ToDividerInit;
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1';
end if;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then
-- 64-bit forms
if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1';
end if;
x_to_divider.dividend <= std_ulogic_vector(abs1);
x_to_divider.divisor <= std_ulogic_vector(abs2);
else
-- 32-bit forms
x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
else
x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
end if;
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
end if;

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
@ -226,8 +358,10 @@ begin

v.e.valid := '1';
v.e.write_reg := e_in.write_reg;
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe;
v.slow_op_xerc := v.e.xerc;

case_0: case e_in.insn_type is

@ -236,51 +370,93 @@ begin
report "illegal";
when OP_NOP =>
-- Do nothing
when OP_ADD =>
when OP_ADD | OP_CMP =>
if e_in.invert_a = '0' then
a_inv := e_in.read_data1;
a_inv := a_in;
else
a_inv := not e_in.read_data1;
a_inv := not a_in;
end if;
result_with_carry := ppc_adde(a_inv, e_in.read_data2,
result_with_carry := ppc_adde(a_inv, b_in,
decode_input_carry(e_in.input_carry, v.e.xerc));
result := result_with_carry(63 downto 0);
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
carry_32 := result(32) xor a_inv(32) xor b_in(32);
carry_64 := result_with_carry(64);
if e_in.output_carry = '1' then
set_carry(v.e, carry_32, carry_64);
end if;
if e_in.oe = '1' then
set_ov(v.e,
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
end if;
result_en := '1';
if e_in.insn_type = OP_ADD then
if e_in.output_carry = '1' then
set_carry(v.e, carry_32, carry_64);
end if;
if e_in.oe = '1' then
set_ov(v.e,
calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
end if;
result_en := '1';
else
-- CMP and CMPL instructions
-- Note, we have done RB - RA, not RA - RB
bf := insn_bf(e_in.insn);
l := insn_l(e_in.insn);
v.e.write_cr_enable := '1';
crnum := to_integer(unsigned(bf));
v.e.write_cr_mask := num_to_fxm(crnum);
zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
if zerolo = '1' and (l = '0' or zerohi = '1') then
-- values are equal
newcrf := "001" & v.e.xerc.so;
else
if l = '1' then
-- 64-bit comparison
msb_a := a_in(63);
msb_b := b_in(63);
else
-- 32-bit comparison
msb_a := a_in(31);
msb_b := b_in(31);
end if;
if msb_a /= msb_b then
-- Subtraction might overflow, but
-- comparison is clear from MSB difference.
-- for signed, 0 is greater; for unsigned, 1 is greater
a_lt := msb_a xnor e_in.is_signed;
else
-- Subtraction cannot overflow since MSBs are equal.
-- carry = 1 indicates RA is smaller (signed or unsigned)
a_lt := (not l and carry_32) or (l and carry_64);
end if;
newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
end if;
for i in 0 to 7 loop
lo := i*4;
hi := lo + 3;
v.e.write_cr_data(hi downto lo) := newcrf;
end loop;
end if;
when OP_AND | OP_OR | OP_XOR =>
result := logical_result;
result_en := '1';
when OP_B =>
f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if;
when OP_BC =>
-- read_data1 is CTR
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' then
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if;
end if;
when OP_BCREG =>
@ -289,53 +465,41 @@ begin
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' and e_in.insn(10) = '0' then
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
f_out.redirect <= '1';
f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00";
f_out.redirect_nia <= b_in(63 downto 2) & "00";
end if;
when OP_CMPB =>
result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
result := ppc_cmpb(c_in, b_in);
result_en := '1';
when OP_CMP =>
bf := insn_bf(e_in.insn);
l := insn_l(e_in.insn);
v.e.write_cr_enable := '1';
crnum := to_integer(unsigned(bf));
v.e.write_cr_mask := num_to_fxm(crnum);
for i in 0 to 7 loop
lo := i*4;
hi := lo + 3;
v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
end loop;
when OP_CMPL =>
bf := insn_bf(e_in.insn);
l := insn_l(e_in.insn);
v.e.write_cr_enable := '1';
crnum := to_integer(unsigned(bf));
v.e.write_cr_mask := num_to_fxm(crnum);
for i in 0 to 7 loop
lo := i*4;
hi := lo + 3;
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
end loop;
when OP_CNTZ =>
result := countzero_result;
result_en := '1';
when OP_EXTS =>
v.e.write_len := e_in.data_len;
v.e.sign_extend := '1';
result := e_in.read_data3;
when OP_CNTZ =>
v.e.valid := '0';
v.cntz_in_progress := '1';
stall_out <= '1';
when OP_EXTS =>
-- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and c_in(7)) or
(e_in.data_len(1) and c_in(15)) or
(e_in.data_len(2) and c_in(31));
result := (others => negative);
if e_in.data_len(2) = '1' then
result(31 downto 16) := c_in(31 downto 16);
end if;
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
result(15 downto 8) := c_in(15 downto 8);
end if;
result(7 downto 0) := c_in(7 downto 0);
result_en := '1';
when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
if e_in.cr(31-crbit) = '1' then
result := e_in.read_data1;
result := a_in;
else
result := e_in.read_data2;
result := b_in;
end if;
result_en := '1';
when OP_MCRF =>
@ -400,7 +564,7 @@ begin
end if;
when OP_MFSPR =>
if is_fast_spr(e_in.read_reg1) then
result := e_in.read_data1;
result := a_in;
if decode_spr_num(e_in.insn) = SPR_XER then
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
result(63 downto 32) := (others => '0');
@ -447,19 +611,19 @@ begin
crnum := fxm_to_num(insn_fxm(e_in.insn));
v.e.write_cr_mask := num_to_fxm(crnum);
end if;
v.e.write_cr_data := e_in.read_data3(31 downto 0);
v.e.write_cr_data := c_in(31 downto 0);
when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(e_in.read_data3);
"=" & to_hstring(c_in);
if is_fast_spr(e_in.write_reg) then
result := e_in.read_data3;
result := c_in;
result_en := '1';
if decode_spr_num(e_in.insn) = SPR_XER then
v.e.xerc.so := e_in.read_data3(63-32);
v.e.xerc.ov := e_in.read_data3(63-33);
v.e.xerc.ca := e_in.read_data3(63-34);
v.e.xerc.ov32 := e_in.read_data3(63-44);
v.e.xerc.ca32 := e_in.read_data3(63-45);
v.e.xerc.so := c_in(63-32);
v.e.xerc.ov := c_in(63-33);
v.e.xerc.ca := c_in(63-34);
v.e.xerc.ov32 := c_in(63-44);
v.e.xerc.ca32 := c_in(63-45);
v.e.write_xerc_enable := '1';
end if;
else
@ -468,20 +632,11 @@ begin
-- when others =>
-- end case;
end if;
when OP_POPCNTB =>
result := ppc_popcntb(e_in.read_data3);
when OP_POPCNT =>
result := popcnt_result;
result_en := '1';
when OP_POPCNTW =>
result := ppc_popcntw(e_in.read_data3);
result_en := '1';
when OP_POPCNTD =>
result := ppc_popcntd(e_in.read_data3);
result_en := '1';
when OP_PRTYD =>
result := ppc_prtyd(e_in.read_data3);
result_en := '1';
when OP_PRTYW =>
result := ppc_prtyw(e_in.read_data3);
when OP_PRTY =>
result := parity_result;
result_en := '1';
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
result := rotator_result;
@ -506,11 +661,29 @@ begin
when OP_ICBI =>
icache_inval <= '1';

when others =>
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0';
v.mul_in_progress := '1';
stall_out <= '1';
x_to_multiply.valid <= '1';

when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0';
v.div_in_progress := '1';
stall_out <= '1';
x_to_divider.valid <= '1';

when OP_LOAD | OP_STORE =>
-- loadstore/dcache has its own port to writeback
v.e.valid := '0';

when others =>
terminate_out <= '1';
report "illegal";
end case;

v.e.rc := e_in.rc and e_in.valid;

-- Update LR on the next cycle after a branch link
--
-- WARNING: The LR update isn't tracked by our hazard tracker. This
@ -533,20 +706,74 @@ begin
result_en := '1';
result := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.e.valid := '1';
elsif r.cntz_in_progress = '1' then
-- cnt[lt]z always takes two cycles
result := countzero_result;
result_en := '1';
v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
v.e.rc := v.slow_op_rc;
v.e.xerc := v.slow_op_xerc;
v.e.valid := '1';
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
(r.div_in_progress = '1' and divider_to_x.valid = '1') then
if r.mul_in_progress = '1' then
result := multiply_to_x.write_reg_data;
overflow := multiply_to_x.overflow;
else
result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow;
end if;
result_en := '1';
v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
v.e.rc := v.slow_op_rc;
v.e.xerc := v.slow_op_xerc;
v.e.write_xerc_enable := v.slow_op_oe;
-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
if v.slow_op_oe = '1' then
v.e.xerc.ov := overflow;
v.e.xerc.ov32 := overflow;
v.e.xerc.so := v.slow_op_xerc.so or overflow;
end if;
v.e.valid := '1';
else
stall_out <= '1';
v.mul_in_progress := r.mul_in_progress;
v.div_in_progress := r.div_in_progress;
end if;
end if;

v.e.write_data := result;
v.e.write_enable := result_en;
v.e.rc := e_in.rc and e_in.valid;

-- Outputs to loadstore1 (async)
lv := Execute1ToLoadstore1Init;
if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
lv.valid := '1';
end if;
if e_in.insn_type = OP_LOAD then
lv.load := '1';
end if;
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
lv.write_reg := gspr_to_gpr(e_in.write_reg);
lv.length := e_in.data_len;
lv.byte_reverse := e_in.byte_reverse;
lv.sign_extend := e_in.sign_extend;
lv.update := e_in.update;
lv.update_reg := gspr_to_gpr(e_in.read_reg1);
lv.xerc := v.e.xerc;

-- Update registers
rin <= v;

-- update outputs
--f_out <= r.f;
l_out <= lv;
e_out <= r.e;
flush_out <= f_out.redirect;
end process;

@ -12,18 +12,21 @@ entity gpr_hazard is

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0);
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0);

stall_out : out std_ulogic
stall_out : out std_ulogic;
use_bypass : out std_ulogic
);
end entity gpr_hazard;
architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
valid : std_ulogic;
bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0'));
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));

type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
@ -33,9 +36,7 @@ begin
gpr_hazard0: process(clk)
begin
if rising_edge(clk) then
if stall_in = '0' then
r <= rin;
end if;
r <= rin;
end if;
end process;

@ -45,22 +46,49 @@ begin
v := r;

stall_out <= '0';
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
if ((r(i).valid = gpr_read_valid_in) and r(i).gpr = gpr_read_in) then
stall_out <= '1';
use_bypass <= '0';
if gpr_read_valid_in = '1' then
if r(0).valid = '1' and r(0).gpr = gpr_read_in then
if r(0).bypass = '1' and stall_in = '0' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
end loop;
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
end loop;
end if;

v(0).valid := gpr_write_valid_in;
v(0).gpr := gpr_write_in;
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
-- propagate to next slot
v(i+1) := r(i);
end loop;
if stall_in = '0' then
v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in;
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
-- propagate to next slot
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end loop;

-- asynchronous output
if gpr_read_valid_in = '0' then
stall_out <= '0';
else
-- stage 0 stalled, so stage 1 becomes empty
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
-- propagate to next slot
if i = 1 then
v(i).valid := '0';
else
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end if;
end loop;
end if;

-- update registers

@ -13,7 +13,7 @@ entity loadstore1 is
port (
clk : in std_ulogic;

l_in : in Decode2ToLoadstore1Type;
l_in : in Execute1ToLoadstore1Type;

l_out : out Loadstore1ToDcacheType
);

@ -12,11 +12,29 @@ entity logical is
op : in insn_type_t;
invert_in : in std_ulogic;
invert_out : in std_ulogic;
result : out std_ulogic_vector(63 downto 0)
result : out std_ulogic_vector(63 downto 0);
datalen : in std_logic_vector(3 downto 0);
popcnt : out std_ulogic_vector(63 downto 0);
parity : out std_ulogic_vector(63 downto 0)
);
end entity logical;

architecture behaviour of logical is

subtype twobit is unsigned(1 downto 0);
type twobit32 is array(0 to 31) of twobit;
signal pc2 : twobit32;
subtype threebit is unsigned(2 downto 0);
type threebit16 is array(0 to 15) of threebit;
signal pc4 : threebit16;
subtype fourbit is unsigned(3 downto 0);
type fourbit8 is array(0 to 7) of fourbit;
signal pc8 : fourbit8;
subtype sixbit is unsigned(5 downto 0);
type sixbit2 is array(0 to 1) of sixbit;
signal pc32 : sixbit2;
signal par0, par1 : std_ulogic;

begin
logical_0: process(all)
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
@ -40,5 +58,45 @@ begin
result <= not tmp;
end if;

-- population counts
for i in 0 to 31 loop
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
end loop;
for i in 0 to 15 loop
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
end loop;
for i in 0 to 7 loop
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
end loop;
for i in 0 to 1 loop
pc32(i) <= ("00" & pc8(i * 4)) + ("00" & pc8(i * 4 + 1)) +
("00" & pc8(i * 4 + 2)) + ("00" & pc8(i * 4 + 3));
end loop;
popcnt <= (others => '0');
if datalen(3 downto 2) = "00" then
-- popcntb
for i in 0 to 7 loop
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8(i));
end loop;
elsif datalen(3) = '0' then
-- popcntw
for i in 0 to 1 loop
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
end loop;
else
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
end if;

-- parity calculations
par0 <= rs(0) xor rs(8) xor rs(16) xor rs(24);
par1 <= rs(32) xor rs(40) xor rs(48) xor rs(56);
parity <= (others => '0');
if datalen(3) = '1' then
parity(0) <= par0 xor par1;
else
parity(0) <= par0;
parity(32) <= par1;
end if;

end process;
end behaviour;

@ -13,31 +13,24 @@ entity multiply is
port (
clk : in std_logic;

m_in : in Decode2ToMultiplyType;
m_out : out MultiplyToWritebackType
m_in : in Execute1ToMultiplyType;
m_out : out MultiplyToExecute1Type
);
end entity multiply;

architecture behaviour of multiply is
signal m: Decode2ToMultiplyType;
signal m: Execute1ToMultiplyType;

type multiply_pipeline_stage is record
valid : std_ulogic;
insn_type : insn_type_t;
data : signed(129 downto 0);
write_reg : std_ulogic_vector(4 downto 0);
rc : std_ulogic;
oe : std_ulogic;
is_32bit : std_ulogic;
xerc : xer_common_t;
end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL,
rc => '0', oe => '0',
is_32bit => '0',
xerc => xerc_init,
data => (others => '0'),
others => (others => '0'));
data => (others => '0'));

type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
@ -64,16 +57,12 @@ begin
begin
v := r;

m_out <= MultiplyToWritebackInit;
m_out <= MultiplyToExecute1Init;

v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type;
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).write_reg := m.write_reg;
v.multiply_pipeline(0).rc := m.rc;
v.multiply_pipeline(0).oe := m.oe;
v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).xerc := m.xerc;

loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
@ -101,25 +90,10 @@ begin
end case;

m_out.write_reg_data <= d2;
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg;
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;
m_out.overflow <= ov;

-- Generate OV/OV32/SO when OE=1
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1';
m_out.write_reg_enable <= '1';
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
m_out.xerc.ov <= ov;
m_out.xerc.ov32 <= ov;
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
end if;
end if;

rin <= v;

@ -17,8 +17,8 @@ architecture behave of multiply_tb is

constant pipeline_depth : integer := 4;

signal m1 : Decode2ToMultiplyType;
signal m2 : MultiplyToWritebackType;
signal m1 : Execute1ToMultiplyType;
signal m2 : MultiplyToExecute1Type;
begin
multiply_0: entity work.multiply
generic map (PIPELINE_DEPTH => pipeline_depth)
@ -40,10 +40,8 @@ begin

m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;
m1.write_reg <= "10001";
m1.data1 <= '0' & x"0000000000001000";
m1.data2 <= '0' & x"0000000000001111";
m1.rc <= '0';

wait for clk_period;
assert m2.valid = '0';
@ -58,16 +56,12 @@ begin

wait for clk_period;
assert m2.valid = '1';
assert m2.write_reg_enable = '1';
assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '0';

wait for clk_period;
assert m2.valid = '0';

m1.valid <= '1';
m1.rc <= '1';

wait for clk_period;
assert m2.valid = '0';
@ -76,10 +70,7 @@ begin

wait for clk_period * (pipeline_depth-1);
assert m2.valid = '1';
assert m2.write_reg_enable = '1';
assert m2.write_reg_nr = "10001";
assert m2.write_reg_data = x"0000000001111000";
assert m2.rc = '1';

-- test mulld
mulld_loop : for i in 0 to 1000 loop

@ -12,8 +12,6 @@ entity writeback is

e_in : in Execute1ToWritebackType;
l_in : in DcacheToWritebackType;
m_in : in MultiplyToWritebackType;
d_in : in DividerToWritebackType;

w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType;
@ -44,7 +42,6 @@ architecture behaviour of writeback is
signal sign_extend : std_ulogic;
signal negative : std_ulogic;
signal second_word : std_ulogic;
signal zero : std_ulogic;
begin
writeback_0: process(clk)
begin
@ -64,44 +61,32 @@ begin
variable k : unsigned(3 downto 0);
variable cf: std_ulogic_vector(3 downto 0);
variable xe: xer_common_t;
variable zero : std_ulogic;
variable sign : std_ulogic;
begin
x := "" & e_in.valid;
y := "" & l_in.valid;
z := "" & m_in.valid;
w := "" & d_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x := "" & e_in.write_enable;
y := "" & l_in.write_enable;
z := "" & m_in.write_reg_enable;
w := "" & d_in.write_reg_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w := "" & e_in.write_cr_enable;
x := "" & (e_in.write_enable and e_in.rc);
y := "" & (m_in.valid and m_in.rc);
z := "" & (d_in.valid and d_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;

x := "" & e_in.write_xerc_enable;
y := "" & m_in.write_xerc_enable;
z := "" & D_in.write_xerc_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;

complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' or m_in.valid = '1' or d_in.valid = '1' then
if e_in.valid = '1' or l_in.valid = '1' then
complete_out <= '1';
end if;

rc <= '0';
brev_lenm1 <= "000";
byte_offset <= "000";
data_len <= x"8";
partial_write <= '0';
sign_extend <= '0';
second_word <= '0';
xe := e_in.xerc;
data_in <= (others => '0');
@ -109,9 +94,6 @@ begin
if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
w_out.write_enable <= '1';
data_in <= e_in.write_data;
data_len <= unsigned(e_in.write_len);
sign_extend <= e_in.sign_extend;
rc <= e_in.rc;
end if;

@ -126,12 +108,11 @@ begin
c_out.write_xerc_data <= e_in.xerc;
end if;

sign_extend <= l_in.sign_extend;
data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift);
if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
data_in <= l_in.write_data;
data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift);
sign_extend <= l_in.sign_extend;
if l_in.byte_reverse = '1' then
brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1;
end if;
@ -143,32 +124,6 @@ begin
xe := l_in.xerc;
end if;

if m_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr);
data_in <= m_in.write_reg_data;
rc <= m_in.rc;
xe := m_in.xerc;
end if;

if m_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= m_in.xerc;
end if;

if d_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);
data_in <= d_in.write_reg_data;
rc <= d_in.rc;
xe := d_in.xerc;
end if;

if d_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= d_in.xerc;
end if;

-- shift and byte-reverse data bytes
for i in 0 to 7 loop
k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
@ -177,7 +132,7 @@ begin
end loop;
for i in 0 to 7 loop
j := to_integer(perm(i)) * 8;
data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j);
data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j);
end loop;

-- If the data can arrive split over two cycles, this will be correct
@ -199,16 +154,12 @@ begin
trim_ctl(i) <= '0' & (negative and sign_extend);
end if;
end loop;
zero <= not negative;
for i in 0 to 7 loop
case trim_ctl(i) is
when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then
zero <= '0';
end if;
when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
when others =>
@ -217,14 +168,21 @@ begin
end loop;

-- deliver to regfile
w_out.write_data <= data_trimmed;
if l_in.write_enable = '1' then
w_out.write_data <= data_trimmed;
else
w_out.write_data <= e_in.write_data;
end if;

-- Perform CR0 update for RC forms
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
if rc = '1' then
sign := e_in.write_data(63);
zero := not (or e_in.write_data);
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
cf(3) := negative;
cf(2) := not negative and not zero;
cf(3) := sign;
cf(2) := not sign and not zero;
cf(1) := zero;
cf(0) := xe.so;
c_out.write_cr_data(31 downto 28) <= cf;

Loading…
Cancel
Save