Merge pull request #269 from paulusmack/pipeline

Rework load/store pipeline to achieve one load/store per cycle throughput
pull/274/head
Michael Neuling 3 years ago committed by GitHub
commit 6c7689052d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -43,8 +43,8 @@ all: $(all)

core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.decode_types.all;

package common is
@ -126,7 +127,19 @@ package common is
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

type irq_state_t is (WRITE_SRR0, WRITE_SRR1);
-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
subtype tag_number_t is integer range 0 to TAG_COUNT - 1;
subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0);
type instr_tag_t is record
tag : tag_number_t;
valid : std_ulogic;
end record;
constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');
function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean;

subtype intr_vector_t is integer range 0 to 16#fff#;

-- For now, fixed 16 sources, make this either a parametric
-- package of some sort or an unconstrainted array.
@ -144,8 +157,6 @@ package common is
dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0);
irq_state : irq_state_t;
srr1: std_ulogic_vector(63 downto 0);
end record;

type Fetch1ToIcacheType is record
@ -176,26 +187,40 @@ package common is
insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
ispro: gspr_index_t; -- (G)SPR written with LR or CTR
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
big_endian: std_ulogic;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init,
br_pred => '0', big_endian => '0');
ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
decode => decode_rom_init, br_pred => '0', big_endian => '0');

type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record;

type bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(63 downto 0);
end record;
constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type cr_bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(31 downto 0);
end record;
constant cr_bypass_data_init : cr_bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type Decode2ToExecute1Type is record
valid: std_ulogic;
unit : unit_t;
fac : facility_t;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg: gspr_index_t;
write_reg_enable: std_ulogic;
read_reg1: gspr_index_t;
@ -203,13 +228,10 @@ package common is
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
bypass_data1: std_ulogic;
bypass_data2: std_ulogic;
bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0);
bypass_cr : std_ulogic;
xerc: xer_common_t;
lr: std_ulogic;
br_abs: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
@ -219,6 +241,7 @@ package common is
output_carry: std_ulogic;
input_cr: std_ulogic;
output_cr: std_ulogic;
output_xer: std_ulogic;
is_32bit: std_ulogic;
is_signed: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
@ -234,10 +257,11 @@ package common is
second : std_ulogic; -- set if this is the second op
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
output_cr => '0', output_xer => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
@ -289,9 +313,9 @@ package common is
end record;

type RegisterFileToDecode2Type is record
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
end record;

type Decode2ToCrFileType is record
@ -303,27 +327,12 @@ package common is
read_xerc_data : xer_common_t;
end record;

type Execute1ToFetch1Type is record
redirect: std_ulogic;
virt_mode: std_ulogic;
priv_mode: std_ulogic;
big_endian: std_ulogic;
mode_32bit: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0);
br_nia : std_ulogic_vector(63 downto 0);
br_last : std_ulogic;
br_taken : std_ulogic;
end record;
constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
priv_mode => '0', big_endian => '0',
mode_32bit => '0', br_taken => '0',
br_last => '0', others => (others => '0'));

type Execute1ToLoadstore1Type is record
valid : std_ulogic;
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
@ -333,7 +342,6 @@ package common is
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t;
reserve : std_ulogic; -- set for larx/stcx.
rc : std_ulogic; -- set for stcx.
@ -343,30 +351,29 @@ package common is
is_32bit : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0', others => (others => '0'));
msr : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
instr_tag => instr_tag_init,
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0',
msr => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
alignment : std_ulogic;
invalid : std_ulogic;
perm_error : std_ulogic;
rc_error : std_ulogic;
badtree : std_ulogic;
segment_fault : std_ulogic;
instr_fault : std_ulogic;
in_progress : std_ulogic;
end record;

type Loadstore1ToDcacheType is record
valid : std_ulogic;
hold : std_ulogic;
load : std_ulogic; -- is this a load
dcbz : std_ulogic;
nc : std_ulogic;
@ -438,18 +445,28 @@ package common is

type Loadstore1ToWritebackType is record
valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable: std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
store_done : std_ulogic;
interrupt : std_ulogic;
intr_vec : intr_vector_t;
srr0: std_ulogic_vector(63 downto 0);
srr1: std_ulogic_vector(15 downto 0);
end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init,
rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0'));
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, write_enable => '0',
write_reg => (others => '0'), write_data => (others => '0'),
xerc => xerc_init, rc => '0', store_done => '0',
interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0'));

type Execute1ToWritebackType is record
valid: std_ulogic;
instr_tag : instr_tag_t;
rc : std_ulogic;
mode_32bit : std_ulogic;
write_enable : std_ulogic;
@ -460,21 +477,34 @@ package common is
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
exc_write_enable : std_ulogic;
exc_write_reg : gspr_index_t;
exc_write_data : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', mode_32bit => '0', write_enable => '0',
write_cr_enable => '0', exc_write_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));
interrupt : std_ulogic;
intr_vec : intr_vector_t;
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
last_nia: std_ulogic_vector(63 downto 0);
br_offset: std_ulogic_vector(63 downto 0);
br_last: std_ulogic;
br_taken: std_ulogic;
abs_br: std_ulogic;
srr1: std_ulogic_vector(15 downto 0);
msr: std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0',
write_enable => '0', write_cr_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), br_offset => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
@ -486,6 +516,7 @@ package common is
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
@ -494,21 +525,30 @@ package common is
type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
interrupt : std_ulogic;
illegal : std_ulogic;
end record;
constant FPUToExecute1Init : FPUToExecute1Type := (others => '0');

type FPUToWritebackType is record
valid : std_ulogic;
interrupt : std_ulogic;
instr_tag : instr_tag_t;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;
constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0'));
intr_vec : intr_vector_t;
srr0 : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0);
end record;
constant FPUToWritebackInit : FPUToWritebackType :=
(valid => '0', interrupt => '0', instr_tag => instr_tag_init,
write_enable => '0', write_reg => (others => '0'),
write_cr_enable => '0', write_cr_mask => (others => '0'),
write_cr_data => (others => '0'),
intr_vec => 0, srr1 => (others => '0'),
others => (others => '0'));

type DividerToExecute1Type is record
valid: std_ulogic;
@ -518,12 +558,29 @@ package common is
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type WritebackToFetch1Type is record
redirect: std_ulogic;
virt_mode: std_ulogic;
priv_mode: std_ulogic;
big_endian: std_ulogic;
mode_32bit: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0);
br_nia : std_ulogic_vector(63 downto 0);
br_last : std_ulogic;
br_taken : std_ulogic;
end record;
constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', write_data => (others => '0'), others => (others => '0'));
constant WritebackToRegisterFileInit : WritebackToRegisterFileType :=
(write_enable => '0', write_data => (others => '0'), others => (others => '0'));

type WritebackToCrFileType is record
write_cr_enable : std_ulogic;
@ -553,9 +610,9 @@ package body common is
begin
case spr is
when SPR_LR =>
n := 0;
n := 0; -- N.B. decode2 relies on this specific value
when SPR_CTR =>
n:= 1;
n := 1; -- N.B. decode2 relies on this specific value
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
@ -616,4 +673,9 @@ package body common is
begin
return "10" & f;
end;

function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
begin
return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
end;
end common;

@ -6,13 +6,14 @@ use work.common.all;

entity control is
generic (
PIPELINE_DEPTH : natural := 2
EX1_BYPASS : boolean := true;
PIPELINE_DEPTH : natural := 3
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in std_ulogic;
complete_in : in instr_tag_t;
valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic;
@ -23,10 +24,6 @@ entity control is

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;

update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
@ -37,9 +34,11 @@ entity control is
gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gspr_index_t;

execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
cr_bypassable : in std_ulogic;

valid_out : out std_ulogic;
stall_out : out std_ulogic;
@ -48,7 +47,9 @@ entity control is
gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic
cr_bypass : out std_ulogic;

instr_tag_out : out instr_tag_t
);
end entity control;

@ -63,122 +64,165 @@ architecture rtl of control is

signal r_int, rin_int : reg_internal_type := reg_internal_init;

signal stall_a_out : std_ulogic;
signal stall_b_out : std_ulogic;
signal stall_c_out : std_ulogic;
signal cr_stall_out : std_ulogic;

signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

begin
gpr_hazard0: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);

gpr_hazard1: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_b_out,
use_bypass => gpr_bypass_b
);

gpr_hazard2: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);

cr_hazard0: entity work.cr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,

cr_read_in => cr_read_in,
cr_write_in => cr_write_valid,
bypassable => cr_bypassable,

stall_out => cr_stall_out,
use_bypass => cr_bypass
);
type tag_register is record
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
wr_cr : std_ulogic;
end record;

type tag_regs_array is array(tag_number_t) of tag_register;
signal tag_regs : tag_regs_array;

signal instr_tag : instr_tag_t;

signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic;

signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

signal curr_cr_tag : tag_number_t;

begin
control0: process(clk)
begin
if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
tag_regs(i).recent <= '0';
if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
report "tag " & integer'image(i) & " not recent";
end if;
end if;
if instr_tag.valid = '1' and i = instr_tag.tag then
tag_regs(i).wr_gpr <= gpr_write_valid;
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid;
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if;
end if;
end if;
end loop;
if rst = '1' then
curr_tag <= 0;
curr_cr_tag <= 0;
else
curr_tag <= next_tag;
if cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag;
end if;
end if;
end if;
end process;

control_hazards : process(all)
variable gpr_stall : std_ulogic;
variable tag_a : instr_tag_t;
variable tag_b : instr_tag_t;
variable tag_c : instr_tag_t;
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
tag_a.valid := gpr_a_read_valid_in;
tag_a.tag := i;
end if;
end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
tag_b.valid := gpr_b_read_valid_in;
tag_b.tag := i;
end if;
end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
tag_c.valid := gpr_c_read_valid_in;
tag_c.tag := i;
end if;
end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;

byp_a := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := '1';
end if;
byp_b := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1';
end if;
byp_c := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1';
end if;

gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c;

gpr_tag_stall <= (tag_a.valid and not byp_a) or
(tag_b.valid and not byp_b) or
(tag_c.valid and not byp_c);

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
instr_tag.valid <= valid_out and not deferred;
if instr_tag.valid = '1' then
incr_tag := (curr_tag + 1) mod TAG_COUNT;
end if;
next_tag <= incr_tag;
instr_tag_out <= instr_tag;

-- CR hazards
tag_cr.tag := curr_cr_tag;
tag_cr.valid := cr_read_in and tag_regs(curr_cr_tag).wr_cr;
if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0';
end if;
byp_cr := '0';
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1';
end if;

cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr;
end process;

control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic;
@ -191,11 +235,14 @@ begin
stall_tmp := '0';

if flush_in = '1' then
-- expect to see complete_in next cycle
v_int.outstanding := 1;
elsif complete_in = '1' then
v_int.outstanding := 0;
elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if r_int.outstanding >= PIPELINE_DEPTH + 1 then
valid_tmp := '0';
stall_tmp := '1';
end if;

if rst = '1' then
v_int := reg_internal_init;
@ -222,8 +269,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;

@ -249,8 +296,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else
@ -262,15 +309,11 @@ begin
valid_tmp := '0';
end if;

if valid_tmp = '1' then
if deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in;
else
gpr_write_valid <= '0';
cr_write_valid <= '0';
gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp;

if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;

-- update outputs

@ -46,6 +46,7 @@ end core;
architecture behave of core is
-- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType;
signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;

@ -66,7 +67,8 @@ architecture behave of core is

-- execute signals
signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_to_fetch1: Execute1ToFetch1Type;
signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t;

-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -102,10 +104,11 @@ architecture behave of core is
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;

signal complete: std_ulogic;
signal complete: instr_tag_t;
signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal icache_inv: std_ulogic;
signal do_interrupt: std_ulogic;

-- Delayed/Latched resets and alt_reset
signal rst_fetch1 : std_ulogic := '1';
@ -117,6 +120,7 @@ architecture behave of core is
signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1';
signal rst_wback : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic;

@ -180,6 +184,7 @@ begin
rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst;
rst_wback <= core_rst;
rst_dbg <= rst;
alt_reset_d <= alt_reset;
end if;
@ -200,7 +205,7 @@ begin
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
e_in => execute1_to_fetch1,
w_in => writeback_to_fetch1,
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
);
@ -273,6 +278,8 @@ begin
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass,
log_out => log_data(119 downto 110)
);
decode2_busy_in <= ex1_busy_out;
@ -320,16 +327,18 @@ begin
port map (
clk => clk,
rst => rst_ex1,
flush_out => flush,
flush_in => flush,
busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
interrupt_in => do_interrupt,
l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
terminate_out => terminate,
@ -410,11 +419,15 @@ begin
writeback_0: entity work.writeback
port map (
clk => clk,
rst => rst_wback,
flush_out => flush,
e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
f_out => writeback_to_fetch1,
interrupt_out => do_interrupt,
complete_out => complete
);


@ -1,86 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity cr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
);
port(
clk : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
bypassable : in std_ulogic;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
);
end entity cr_hazard;
architecture behaviour of cr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0');

type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);

signal r, rin : pipeline_t := pipeline_t_init;
begin
cr_hazard0: process(clk)
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;

cr_hazard1: process(all)
variable v : pipeline_t;
begin
v := r;

-- XXX assumes PIPELINE_DEPTH = 1
if complete_in = '1' then
v(1).valid := '0';
end if;

use_bypass <= '0';
stall_out <= '0';
if cr_read_in = '1' then
loop_0: for i in 0 to PIPELINE_DEPTH loop
if v(i).valid = '1' then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
end loop;
end if;

-- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := r(0);
v(0).valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := cr_write_in;
v(0).bypass := bypassable;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(1).valid := '0';
end if;

-- update registers
rin <= v;

end process;
end;

@ -275,6 +275,7 @@ architecture rtl of dcache is
doall : std_ulogic; -- with tlbie, indicates flush whole TLB
tlbld : std_ulogic; -- indicates a TLB load request (from MMU)
mmu_req : std_ulogic; -- indicates source of request
d_valid : std_ulogic; -- indicates req.data is valid now
end record;

signal r0 : reg_stage_0_t;
@ -564,17 +565,27 @@ begin
r.mmu_req := '1';
else
r.req := d_in;
r.req.data := (others => '0');
r.tlbie := '0';
r.doall := '0';
r.tlbld := '0';
r.mmu_req := '0';
end if;
r.d_valid := '0';
if rst = '1' then
r0_full <= '0';
elsif r1.full = '0' or r0_full = '0' then
elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then
r0 <= r;
r0_full <= r.req.valid;
end if;
-- Sample data the cycle after a request comes in from loadstore1.
-- If another request has come in already then the data will get
-- put directly into req.data below.
if r0.req.valid = '1' and r.req.valid = '0' and r0.d_valid = '0' and
r0.mmu_req = '0' then
r0.req.data <= d_in.data;
r0.d_valid <= '1';
end if;
end if;
end process;

@ -582,8 +593,8 @@ begin
m_out.stall <= '0';

-- Hold off the request in r0 when r1 has an uncompleted request
r0_stall <= r0_full and r1.full;
r0_valid <= r0_full and not r1.full;
r0_stall <= r0_full and (r1.full or d_in.hold);
r0_valid <= r0_full and not r1.full and not d_in.hold;
stall_out <= r0_stall;

-- TLB
@ -1305,10 +1316,12 @@ begin
req.dcbz := r0.req.dcbz;
req.real_addr := ra;
-- Force data to 0 for dcbz
if r0.req.dcbz = '0' then
req.data := d_in.data;
else
if r0.req.dcbz = '1' then
req.data := (others => '0');
elsif r0.d_valid = '1' then
req.data := r0.req.data;
else
req.data := d_in.data;
end if;
-- Select all bytes for dcbz and for cacheable loads
if r0.req.dcbz = '1' or (r0.req.load = '1' and r0.req.nc = '0') then
@ -1438,10 +1451,10 @@ begin
-- complete the request next cycle.
-- Compare the whole address in case the request in
-- r1.req is not the one that started this refill.
if r1.full = '1' and r1.req.same_tag = '1' and
((r1.dcbz = '1' and r1.req.dcbz = '1') or
(r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS)) and
r1.store_row = get_row(r1.req.real_addr) then
if req.valid = '1' and req.same_tag = '1' and
((r1.dcbz = '1' and req.dcbz = '1') or
(r1.dcbz = '0' and req.op = OP_LOAD_MISS)) and
r1.store_row = get_row(req.real_addr) then
r1.full <= '0';
r1.slow_valid <= '1';
if r1.mmu_req = '0' then

@ -79,23 +79,23 @@ architecture behaviour of decode1 is
28 => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andi.
29 => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andis.
0 => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- attn
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
16 => (ALU, NONE, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- bc
11 => (ALU, NONE, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmpi
10 => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpli
34 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbz
35 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lbzu
35 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lbzu
50 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfd
51 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lfdu
51 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lfdu
48 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- lfs
49 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- lfsu
49 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', DUPD), -- lfsu
42 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lha
43 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhau
43 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhau
40 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhz
41 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhzu
41 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzu
56 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTE), -- lq
32 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwz
33 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwzu
33 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwzu
7 => (ALU, NONE, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- mulli
24 => (ALU, NONE, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ori
25 => (ALU, NONE, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- oris
@ -104,15 +104,15 @@ architecture behaviour of decode1 is
23 => (ALU, NONE, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- rlwnm
17 => (ALU, NONE, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sc
38 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stb
39 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbu
39 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbu
54 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfd
55 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdu
55 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdu
52 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- stfs
53 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsu
53 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsu
44 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sth
45 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthu
45 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthu
36 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stw
37 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwu
37 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwu
8 => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- subfic
2 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- tdi
3 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1', NONE), -- twi
@ -214,7 +214,7 @@ architecture behaviour of decode1 is
2#0100111010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cbcdtd
2#0100011010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cdtbcd
2#0000000000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmp
2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
2#0011100000# => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpeqb
2#0000100000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpl
2#0011000000# => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmprb
@ -290,33 +290,33 @@ architecture behaviour of decode1 is
2#1111101111# => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- isel
2#0000110100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lbarx
2#1101010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbzcix
2#0001110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lbzux
2#0001110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lbzux
2#0001010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbzx
2#0001010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- ldarx
2#1000010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldbrx
2#1101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldcix
2#0000110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- ldux
2#0000110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- ldux
2#0000010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldx
2#1001010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfdx
2#1001110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lfdux
2#1001110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lfdux
2#1101010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfiwax
2#1101110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfiwzx
2#1000010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- lfsx
2#1000110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- lfsux
2#1000110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', DUPD), -- lfsux
2#0001110100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lharx
2#0101110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhaux
2#0101110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhaux
2#0101010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhax
2#1100010110# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhbrx
2#1100110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzcix
2#0100110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhzux
2#0100110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzux
2#0100010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzx
2#0100010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTE), -- lqarx
2#0000010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lwarx
2#0101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwaux
2#0101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwaux
2#0101010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwax
2#1000010110# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwbrx
2#1100010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwzcix
2#0000110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwzux
2#0000110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwzux
2#0000010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwzx
2#1001000000# => (ALU, NONE, OP_MCRXRX, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mcrxrx
2#0000010011# => (ALU, NONE, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfcr/mfocrf
@ -376,28 +376,28 @@ architecture behaviour of decode1 is
2#1000011000# => (ALU, NONE, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- srw
2#1111010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stbcix
2#1010110110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stbcx
2#0011110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbux
2#0011110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbux
2#0011010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stbx
2#1010010100# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdbrx
2#1111110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdcix
2#0011010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stdcx
2#0010110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdux
2#0010110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdux
2#0010010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdx
2#1011010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfdx
2#1011110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdux
2#1011110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdux
2#1111010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfiwx
2#1010010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- stfsx
2#1010110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsux
2#1010110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsux
2#1110010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthbrx
2#1110110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthcix
2#1011010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- sthcx
2#0110110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthux
2#0110110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthux
2#0110010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthx
2#0010110110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSE), -- stqcx
2#1010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwbrx
2#1110010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwcix
2#0010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stwcx
2#0010110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwux
2#0010110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwux
2#0010010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwx
2#0000101000# => (ALU, NONE, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subf
2#1000101000# => (ALU, NONE, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfo
@ -424,7 +424,7 @@ architecture behaviour of decode1 is
-- unit fac internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl rpt
-- op in out A out in out len ext pipe
0 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ld
1 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- ldu
1 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- ldu
2 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwa
others => decode_rom_init
);
@ -451,7 +451,7 @@ architecture behaviour of decode1 is
-- unit fac internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl rpt
-- op in out A out in out len ext pipe
0 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- std
1 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdu
1 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdu
2 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSE), -- stq
others => decode_rom_init
);
@ -597,9 +597,10 @@ begin
-- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));

-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
-- Work out ispr1/ispro independent of v.decode since they seem to be critical path
sprn := decode_spr_num(f_in.insn);
v.ispr1 := fast_spr_num(sprn);
v.ispro := fast_spr_num(sprn);

if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
@ -627,6 +628,9 @@ begin
-- CTR may be needed as input to bc
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
v.ispro := fast_spr_num(SPR_CTR);
elsif f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;
-- Predict backward branches as taken, forward as untaken
v.br_pred := f_in.insn(15);
@ -636,6 +640,9 @@ begin
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
if f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;

when 19 =>
vi.override := not decode_op_19_valid(to_integer(unsigned(f_in.insn(5 downto 1) & f_in.insn(10 downto 6))));
@ -648,8 +655,12 @@ begin
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
if f_in.insn(23) = '0' then
-- bcctr doesn't update CTR or use it in the branch condition
if f_in.insn(23) = '0' and (f_in.insn(10) = '0' or f_in.insn(6) = '1') then
v.ispr1 := fast_spr_num(SPR_CTR);
v.ispro := fast_spr_num(SPR_CTR);
elsif f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);

@ -19,7 +19,7 @@ entity decode2 is
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in std_ulogic;
complete_in : in instr_tag_t;
busy_in : in std_ulogic;
stall_out : out std_ulogic;

@ -37,6 +37,9 @@ entity decode2 is
c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType;

execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t;

log_out : out std_ulogic_vector(9 downto 0)
);
end entity decode2;
@ -205,22 +208,6 @@ architecture behaviour of decode2 is
end case;
end;

-- For now, use "rc" in the decode table to decide whether oe exists.
-- This is not entirely correct architecturally: For mulhd and
-- mulhdu, the OE field is reserved. It remains to be seen what an
-- actual POWER9 does if we set it on those instructions, for now we
-- test that further down when assigning to the multiplier oe input.
--
function decode_oe (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
return insn_oe(insn_in);
when OTHERS =>
return '0';
end case;
end;

-- control signals that are derived from insn_type
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);

@ -249,6 +236,9 @@ architecture behaviour of decode2 is
OP_MOD => "011",
OP_CNTZ => "100", -- countzero_result
OP_MFSPR => "101", -- spr_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",
@ -271,6 +261,12 @@ architecture behaviour of decode2 is
OP_MFMSR => "100",
OP_MFCR => "101",
OP_SETB => "110",
OP_CMP => "000", -- cr_result
OP_CMPRB => "001",
OP_CMPEQB => "010",
OP_CROP => "011",
OP_MCRXRX => "100",
OP_MTCRF => "101",
others => "000"
);

@ -282,31 +278,29 @@ architecture behaviour of decode2 is

signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;

signal update_gpr_write_valid : std_ulogic;
signal update_gpr_write_reg : gspr_index_t;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic;
signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic;

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;
signal cr_bypass_avail : std_ulogic;

signal instr_tag : instr_tag_t;

begin
control_0: entity work.control
generic map (
PIPELINE_DEPTH => 1
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
@ -323,10 +317,6 @@ begin

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,

update_gpr_write_valid => update_gpr_write_valid,
update_gpr_write_reg => update_gpr_write_reg,

gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,
@ -337,10 +327,12 @@ begin
gpr_c_read_valid_in => gpr_c_read_valid,
gpr_c_read_in => gpr_c_read,

cr_read_in => d_in.decode.input_cr,
execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag,

cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,
cr_bypassable => cr_bypass_avail,

valid_out => control_valid_out,
stall_out => control_stall_out,
@ -348,7 +340,9 @@ begin

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass
gpr_bypass_c => gpr_c_bypass,

instr_tag_out => instr_tag
);

deferred <= r.e.valid and busy_in;
@ -376,6 +370,7 @@ begin
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t;
begin
v := r;

@ -386,12 +381,35 @@ begin

--v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

-- Work out whether XER common bits are set
v.e.output_xer := d_in.decode.output_carry;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
end if;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1';
end if;
when others =>
end case;

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispr1);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);

if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
-- b and bc have even major opcodes; bcreg is considered absolute
v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
end if;
op := d_in.decode.insn_type;

if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
@ -407,8 +425,19 @@ begin
if r.repeat = d_in.big_endian then
decoded_reg_o.reg(0) := '1';
end if;
when DUPD =>
-- update-form loads, 2nd instruction writes RA
if r.repeat = '1' then
decoded_reg_o.reg := decoded_reg_a.reg;
end if;
when others =>
end case;
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
v.e.repeat := '1';
v.e.second := r.repeat;
-- first one does CTR, second does LR
decoded_reg_o.reg(0) := not r.repeat;
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
@ -435,43 +464,21 @@ begin
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.insn_type := d_in.decode.insn_type;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.bypass_data2 := gpr_b_bypass;
v.e.read_data3 := decoded_reg_c.data;
v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.e.cr := c_in.read_cr_data;
v.e.bypass_cr := cr_bypass;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then
-- add -1 to CTR
v.e.addm1 := '1';
if d_in.insn(23) = '1' or
(d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- don't write decremented CTR if BO(2) = 1 or bcctr
v.e.write_reg_enable := '0';
end if;
end if;
v.e.insn_type := op;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
v.e.is_32bit := d_in.decode.is_32bit;
v.e.is_signed := d_in.decode.is_signed;
if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
end if;
v.e.insn := d_in.insn;
v.e.data_len := length;
v.e.byte_reverse := d_in.decode.byte_reverse;
@ -479,8 +486,41 @@ begin
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
v.e.result_sel := result_select(d_in.decode.insn_type);
v.e.sub_select := subresult_select(d_in.decode.insn_type);
v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op);
if op = OP_BC or op = OP_BCREG then
if d_in.insn(23) = '0' and r.repeat = '0' and
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- decrement CTR if BO(2) = 0 and not bcctr
v.e.addm1 := '1';
v.e.result_sel := "000"; -- select adder output
end if;
end if;

-- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is
when '1' =>
v.e.read_data1 := execute_bypass.data;
when others =>
v.e.read_data1 := decoded_reg_a.data;
end case;
case gpr_b_bypass is
when '1' =>
v.e.read_data2 := execute_bypass.data;
when others =>
v.e.read_data2 := decoded_reg_b.data;
end case;
case gpr_c_bypass is
when '1' =>
v.e.read_data3 := execute_bypass.data;
when others =>