Merge pull request #379 from paulusmack/master

Lots of improvements
pull/381/head
Michael Neuling 3 years ago committed by GitHub
commit 281a125f1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -86,35 +86,24 @@ package common is
-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(6 downto 0);
-- Extended GPR index (can hold a GPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
-- FPRs are stored in the register file, using GSPR
-- numbers from 32 to 63.
--
-- The function fast_spr_num() returns the corresponding fast
-- pseudo-GPR number for a given SPR number. The result MSB
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
function gspr_to_gpr(i: gspr_index_t) return gpr_index_t;
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
-- control). The rest is stored as a fast SPR.
-- control). The rest is stored in ctrl_t (effectively in execute1).
type xer_common_t is record
ca : std_ulogic;
ca32 : std_ulogic;
@ -124,6 +113,48 @@ package common is
end record;
constant xerc_init : xer_common_t := (others => '0');

-- Some SPRs are stored in a pair of small RAMs in execute1
-- Even half:
subtype ramspr_index is natural range 0 to 7;
constant RAMSPR_SRR0 : ramspr_index := 0;
constant RAMSPR_HSRR0 : ramspr_index := 1;
constant RAMSPR_SPRG0 : ramspr_index := 2;
constant RAMSPR_SPRG2 : ramspr_index := 3;
constant RAMSPR_HSPRG0 : ramspr_index := 4;
constant RAMSPR_LR : ramspr_index := 5; -- must equal RAMSPR_CTR
constant RAMSPR_TAR : ramspr_index := 6;
-- Odd half:
constant RAMSPR_SRR1 : ramspr_index := 0;
constant RAMSPR_HSRR1 : ramspr_index := 1;
constant RAMSPR_SPRG1 : ramspr_index := 2;
constant RAMSPR_SPRG3 : ramspr_index := 3;
constant RAMSPR_HSPRG1 : ramspr_index := 4;
constant RAMSPR_CTR : ramspr_index := 5; -- must equal RAMSPR_LR

type ram_spr_info is record
index : ramspr_index;
isodd : std_ulogic;
valid : std_ulogic;
end record;
constant ram_spr_info_init: ram_spr_info := (index => 0, others => '0');

subtype spr_selector is std_ulogic_vector(2 downto 0);
type spr_id is record
sel : spr_selector;
valid : std_ulogic;
ispmu : std_ulogic;
end record;
constant spr_id_init : spr_id := (sel => "000", others => '0');

constant SPRSEL_TB : spr_selector := 3x"0";
constant SPRSEL_TBU : spr_selector := 3x"1";
constant SPRSEL_DEC : spr_selector := 3x"2";
constant SPRSEL_PVR : spr_selector := 3x"3";
constant SPRSEL_LOGA : spr_selector := 3x"4";
constant SPRSEL_LOGD : spr_selector := 3x"5";
constant SPRSEL_CFAR : spr_selector := 3x"6";
constant SPRSEL_XER : spr_selector := 3x"7";

-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
@ -192,7 +223,10 @@ package common is
dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0);
xer_low: std_ulogic_vector(17 downto 0);
end record;
constant ctrl_t_init : ctrl_t :=
(xer_low => 18x"0", others => (others => '0'));

type Fetch1ToIcacheType is record
req: std_ulogic;
@ -226,23 +260,35 @@ package common is
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
ispro: gspr_index_t; -- (G)SPR written with LR or CTR
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
big_endian: std_ulogic;
spr_info : spr_id;
ram_spr : ram_spr_info;
reg_a : gspr_index_t;
reg_b : gspr_index_t;
reg_c : gspr_index_t;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
decode => decode_rom_init, br_pred => '0', big_endian => '0');
decode => decode_rom_init, br_pred => '0', big_endian => '0',
spr_info => spr_id_init, ram_spr => ram_spr_info_init,
reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));

type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record;

type Decode1ToRegisterFileType is record
reg_1_addr : gspr_index_t;
reg_2_addr : gspr_index_t;
reg_3_addr : gspr_index_t;
read_1_enable : std_ulogic;
read_2_enable : std_ulogic;
read_3_enable : std_ulogic;
end record;

type bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(63 downto 0);
@ -266,6 +312,7 @@ package common is
write_reg_enable: std_ulogic;
read_reg1: gspr_index_t;
read_reg2: gspr_index_t;
read_reg3: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
@ -276,7 +323,6 @@ package common is
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
addm1 : std_ulogic;
invert_out: std_ulogic;
input_carry: carry_in_t;
output_carry: std_ulogic;
@ -296,11 +342,21 @@ package common is
sub_select : std_ulogic_vector(2 downto 0); -- sub-result selection
repeat : std_ulogic; -- set if instruction is cracked into two ops
second : std_ulogic; -- set if this is the second op
spr_select : spr_id;
spr_is_ram : std_ulogic;
ramspr_even_rdaddr : ramspr_index;
ramspr_odd_rdaddr : ramspr_index;
ramspr_rd_odd : std_ulogic;
ramspr_wraddr : ramspr_index;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
dbg_spr_access : std_ulogic;
dec_ctr : std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
output_cr => '0', output_xer => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
@ -308,7 +364,13 @@ package common is
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
result_sel => "000", sub_select => "000",
repeat => '0', second => '0', others => (others => '0'));
repeat => '0', second => '0', spr_select => spr_id_init,
spr_is_ram => '0',
ramspr_even_rdaddr => 0, ramspr_odd_rdaddr => 0, ramspr_rd_odd => '0',
ramspr_wraddr => 0, ramspr_write_even => '0', ramspr_write_odd => '0',
dbg_spr_access => '0',
dec_ctr => '0',
others => (others => '0'));

type MultiplyInputType is record
valid: std_ulogic;
@ -332,6 +394,7 @@ package common is

type Execute1ToDividerType is record
valid: std_ulogic;
flush: std_ulogic;
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic;
@ -340,9 +403,8 @@ package common is
is_modulus: std_ulogic;
neg_result: std_ulogic;
end record;
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
neg_result => '0', others => (others => '0'));
constant Execute1ToDividerInit: Execute1ToDividerType := (
dividend => 64x"0", divisor => 64x"0", others => '0');

type PMUEventType is record
no_instr_avail : std_ulogic;
@ -391,11 +453,8 @@ package common is

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
read1_reg : gspr_index_t;
read2_enable : std_ulogic;
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : gspr_index_t;
end record;

type RegisterFileToDecode2Type is record
@ -437,6 +496,7 @@ package common is
is_32bit : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
e2stall : std_ulogic;
msr : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
@ -449,13 +509,12 @@ package common is
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0',
repeat => '0', second => '0', e2stall => '0',
msr => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
in_progress : std_ulogic;
interrupt : std_ulogic;
l2stall : std_ulogic;
end record;

type Loadstore1ToDcacheType is record
@ -498,7 +557,9 @@ package common is
iside : std_ulogic;
load : std_ulogic;
priv : std_ulogic;
sprn : std_ulogic_vector(9 downto 0);
ric : std_ulogic_vector(1 downto 0);
sprnf : std_ulogic;
sprnt : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
rs : std_ulogic_vector(63 downto 0);
end record;
@ -549,7 +610,6 @@ package common is
store_done : std_ulogic;
interrupt : std_ulogic;
intr_vec : intr_vector_t;
srr0: std_ulogic_vector(63 downto 0);
srr1: std_ulogic_vector(15 downto 0);
end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
@ -557,7 +617,7 @@ package common is
write_reg => (others => '0'), write_data => (others => '0'),
xerc => xerc_init, rc => '0', store_done => '0',
interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0'));
srr1 => (others => '0'));

type Loadstore1EventType is record
load_complete : std_ulogic;
@ -602,29 +662,37 @@ package common is
srr1 => (others => '0'), msr => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
is_signed : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
m32b : std_ulogic;
out_cr : std_ulogic;
oe : std_ulogic;
xerc : xer_common_t;
stall : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0',
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');
single => '0', is_signed => '0', out_cr => '0',
m32b => '0', oe => '0', xerc => xerc_init,
stall => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
f2stall : std_ulogic;
exception : std_ulogic;
end record;
constant FPUToExecute1Init : FPUToExecute1Type := (others => '0');
@ -639,8 +707,9 @@ package common is
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc : std_ulogic;
xerc : xer_common_t;
intr_vec : intr_vector_t;
srr0 : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0);
end record;
constant FPUToWritebackInit : FPUToWritebackType :=
@ -648,6 +717,7 @@ package common is
write_enable => '0', write_reg => (others => '0'),
write_cr_enable => '0', write_cr_mask => (others => '0'),
write_cr_data => (others => '0'),
write_xerc => '0', xerc => xerc_init,
intr_vec => 0, srr1 => (others => '0'),
others => (others => '0'));

@ -695,6 +765,11 @@ package common is
write_cr_mask => (others => '0'),
write_cr_data => (others => '0'));

type WritebackToExecute1Type is record
intr : std_ulogic;
srr1 : std_ulogic_vector(15 downto 0);
end record;

type WritebackEventType is record
instr_complete : std_ulogic;
fp_complete : std_ulogic;
@ -707,49 +782,6 @@ package body common is
begin
return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16)));
end;
function fast_spr_num(spr: spr_num_t) return gspr_index_t is
variable n : integer range 0 to 31;
-- tmp variable introduced as workaround for VCS compilation
-- simulation was failing with subtype constraint mismatch error
-- see GitHub PR #173
variable tmp : std_ulogic_vector(4 downto 0);
begin
case spr is
when SPR_LR =>
n := 0; -- N.B. decode2 relies on this specific value
when SPR_CTR =>
n := 1; -- N.B. decode2 relies on this specific value
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
n := 3;
when SPR_HSRR0 =>
n := 4;
when SPR_HSRR1 =>
n := 5;
when SPR_SPRG0 =>
n := 6;
when SPR_SPRG1 =>
n := 7;
when SPR_SPRG2 =>
n := 8;
when SPR_SPRG3 | SPR_SPRG3U =>
n := 9;
when SPR_HSPRG0 =>
n := 10;
when SPR_HSPRG1 =>
n := 11;
when SPR_XER =>
n := 12;
when SPR_TAR =>
n := 13;
when others =>
n := 0;
return "0000000";
end case;
tmp := std_ulogic_vector(to_unsigned(n, 5));
return "01" & tmp;
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
begin
@ -758,26 +790,12 @@ package body common is

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "00" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
begin
if s(5) = '1' then
return s;
else
return gpr_to_gspr(g);
end if;
end;

function is_fast_spr(s: gspr_index_t) return std_ulogic is
begin
return s(5);
return "0" & i;
end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
return "1" & f;
end;

function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is

@ -15,11 +15,9 @@ entity control is

complete_in : in instr_tag_t;
valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic;
serialize : in std_ulogic;
stop_mark_in : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
@ -36,42 +34,38 @@ entity control is

execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t;
execute2_next_tag : in instr_tag_t;
execute2_next_cr_tag : in instr_tag_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
ov_read_in : in std_ulogic;
ov_write_in : in std_ulogic;

valid_out : out std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic;
gpr_bypass_a : out std_ulogic_vector(1 downto 0);
gpr_bypass_b : out std_ulogic_vector(1 downto 0);
gpr_bypass_c : out std_ulogic_vector(1 downto 0);
cr_bypass : out std_ulogic_vector(1 downto 0);

instr_tag_out : out instr_tag_t
);
end entity control;

architecture rtl of control is
type state_type is (IDLE, WAIT_FOR_PREV_TO_COMPLETE, WAIT_FOR_CURR_TO_COMPLETE);

type reg_internal_type is record
state : state_type;
outstanding : integer range -1 to PIPELINE_DEPTH+2;
end record;
constant reg_internal_init : reg_internal_type := (state => IDLE, outstanding => 0);

signal r_int, rin_int : reg_internal_type := reg_internal_init;

signal gpr_write_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal ov_write_valid : std_ulogic;

type tag_register is record
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
wr_cr : std_ulogic;
wr_ov : std_ulogic;
valid : std_ulogic;
end record;

type tag_regs_array is array(tag_number_t) of tag_register;
@ -81,30 +75,37 @@ architecture rtl of control is

signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic;
signal ov_tag_stall : std_ulogic;
signal serial_stall : std_ulogic;

signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

signal curr_cr_tag : tag_number_t;
signal curr_ov_tag : tag_number_t;
signal prev_tag : tag_number_t;

begin
control0: process(clk)
begin
if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
assert tag_regs(i).valid = '1' report "spurious completion" severity failure;
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
if instr_tag.valid = '1' and gpr_write_valid = '1' and
tag_regs(i).reg = gpr_write_in then
tag_regs(i).recent <= '0';
if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
report "tag " & integer'image(i) & " not recent";
@ -115,6 +116,8 @@ begin
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid;
tag_regs(i).wr_ov <= ov_write_valid;
tag_regs(i).valid <= '1';
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if;
@ -124,11 +127,19 @@ begin
if rst = '1' then
curr_tag <= 0;
curr_cr_tag <= 0;
curr_ov_tag <= 0;
prev_tag <= 0;
else
curr_tag <= next_tag;
if cr_write_valid = '1' then
if instr_tag.valid = '1' and cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag;
end if;
if instr_tag.valid = '1' and ov_write_valid = '1' then
curr_ov_tag <= instr_tag.tag;
end if;
if valid_out = '1' then
prev_tag <= instr_tag.tag;
end if;
end if;
end if;
end process;
@ -141,11 +152,13 @@ begin
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
variable byp_a : std_ulogic_vector(1 downto 0);
variable byp_b : std_ulogic_vector(1 downto 0);
variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic;
variable byp_cr : std_ulogic_vector(1 downto 0);
variable tag_ov : instr_tag_t;
variable tag_prev : instr_tag_t;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
@ -154,9 +167,6 @@ begin
tag_a.tag := i;
end if;
end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
@ -164,9 +174,6 @@ begin
tag_b.tag := i;
end if;
end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
@ -174,30 +181,39 @@ begin
tag_c.tag := i;
end if;
end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;

byp_a := '0';
byp_a := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := '1';
byp_a := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
byp_a := "10";
elsif tag_match(complete_in, tag_a) then
byp_a := "11";
end if;
byp_b := '0';
byp_b := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1';
byp_b := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
byp_b := "10";
elsif tag_match(complete_in, tag_b) then
byp_b := "11";
end if;
byp_c := '0';
byp_c := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1';
byp_c := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
byp_c := "10";
elsif tag_match(complete_in, tag_c) then
byp_c := "11";
end if;

gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c;

gpr_tag_stall <= (tag_a.valid and not byp_a) or
(tag_b.valid and not byp_b) or
(tag_c.valid and not byp_c);
gpr_tag_stall <= (tag_a.valid and not (or (byp_a))) or
(tag_b.valid and not (or (byp_b))) or
(tag_c.valid and not (or (byp_c)));

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
@ -214,115 +230,59 @@ begin
if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0';
end if;
byp_cr := '0';
byp_cr := "00";
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1';
byp_cr := "10";
elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then
byp_cr := "11";
end if;

cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr(1);

-- OV hazards
tag_ov.tag := curr_ov_tag;
tag_ov.valid := ov_read_in and tag_regs(curr_ov_tag).wr_ov;
if tag_match(tag_ov, complete_in) then
tag_ov.valid := '0';
end if;
ov_tag_stall <= tag_ov.valid;

tag_prev.tag := prev_tag;
tag_prev.valid := tag_regs(prev_tag).valid;
if tag_match(tag_prev, complete_in) then
tag_prev.valid := '0';
end if;
serial_stall <= tag_prev.valid;
end process;

control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic;
variable stall_tmp : std_ulogic;
begin
v_int := r_int;

-- asynchronous
valid_tmp := valid_in and not flush_in;
stall_tmp := '0';

if flush_in = '1' then
v_int.outstanding := 0;
elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if r_int.outstanding >= PIPELINE_DEPTH + 1 then
valid_tmp := '0';
stall_tmp := '1';
end if;

if rst = '1' then
gpr_write_valid <= '0';
cr_write_valid <= '0';
v_int := reg_internal_init;
valid_tmp := '0';
end if;

-- Handle debugger stop
stopped_out <= '0';
if stop_mark_in = '1' and v_int.outstanding = 0 then
stopped_out <= '1';
end if;

-- state machine to handle instructions that must be single
-- through the pipeline.
case r_int.state is
when IDLE =>
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;

when WAIT_FOR_PREV_TO_COMPLETE =>
if v_int.outstanding = 0 then
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
else
stall_tmp := '1';
end if;
stopped_out <= stop_mark_in and not serial_stall;

when WAIT_FOR_CURR_TO_COMPLETE =>
if v_int.outstanding = 0 then
v_int.state := IDLE;
-- XXX Don't replicate this
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else
stall_tmp := '1';
end if;
end case;

if stall_tmp = '1' then
-- Don't let it go out if there are GPR or CR hazards
-- or we are waiting for the previous instruction to complete
if (gpr_tag_stall or cr_tag_stall or ov_tag_stall or
(serialize and serial_stall)) = '1' then
valid_tmp := '0';
end if;

gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp;

if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
ov_write_valid <= ov_write_in and valid_tmp;

-- update outputs
valid_out <= valid_tmp;
stall_out <= stall_tmp or deferred;

-- update registers
rin_int <= v_int;
end process;
end;

@ -63,6 +63,7 @@ architecture behave of core is
-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
signal decode1_to_fetch1: Decode1ToFetch1Type;
signal decode1_to_register_file: Decode1ToRegisterFileType;
signal decode2_to_execute1: Decode2ToExecute1Type;

-- register file signals
@ -79,6 +80,8 @@ architecture behave of core is
signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t;
signal execute2_bypass: bypass_data_t;
signal execute2_cr_bypass: cr_bypass_data_t;

-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -98,6 +101,10 @@ architecture behave of core is
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;

-- Writeback signals
signal writeback_bypass: bypass_data_t;
signal wb_interrupt: WritebackToExecute1Type;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -117,7 +124,6 @@ architecture behave of core is
signal complete: instr_tag_t;
signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal do_interrupt: std_ulogic;

-- Delayed/Latched resets and alt_reset
signal rst_fetch1 : std_ulogic;
@ -133,6 +139,7 @@ architecture behave of core is
signal rst_dbg : std_ulogic;
signal alt_reset_d : std_ulogic;

signal sim_ex_dump: std_ulogic;
signal sim_cr_dump: std_ulogic;

-- Debug actions
@ -144,8 +151,16 @@ architecture behave of core is
signal dbg_gpr_ack : std_ulogic;
signal dbg_gpr_addr : gspr_index_t;
signal dbg_gpr_data : std_ulogic_vector(63 downto 0);
signal dbg_spr_req : std_ulogic;
signal dbg_spr_ack : std_ulogic;
signal dbg_spr_addr : std_ulogic_vector(7 downto 0);
signal dbg_spr_data : std_ulogic_vector(63 downto 0);
signal dbg_ls_spr_req : std_ulogic;
signal dbg_ls_spr_ack : std_ulogic;
signal dbg_ls_spr_addr : std_ulogic_vector(1 downto 0);
signal dbg_ls_spr_data : std_ulogic_vector(63 downto 0);

signal msr : std_ulogic_vector(63 downto 0);
signal ctrl_debug : ctrl_t;

-- PMU event bus
signal icache_events : IcacheEventType;
@ -271,6 +286,7 @@ begin
f_in => icache_to_decode1,
d_out => decode1_to_decode2,
f_out => decode1_to_fetch1,
r_out => decode1_to_register_file,
log_out => log_data(109 downto 97)
);

@ -298,6 +314,11 @@ begin
c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass,
execute2_bypass => execute2_bypass,
execute2_cr_bypass => execute2_cr_bypass,
writeback_bypass => writeback_bypass,
dbg_spr_req => dbg_spr_req,
dbg_spr_addr => dbg_spr_addr,
log_out => log_data(119 downto 110)
);
decode2_busy_in <= ex1_busy_out;
@ -310,6 +331,8 @@ begin
)
port map (
clk => clk,
stall => decode2_stall_out,
d1_in => decode1_to_register_file,
d_in => decode2_to_register_file,
d_out => register_file_to_decode2,
w_in => writeback_to_register_file,
@ -318,7 +341,7 @@ begin
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump,
sim_dump_done => sim_ex_dump,
log_out => log_data(255 downto 184)
);

@ -333,11 +356,13 @@ begin
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump,
ctrl => ctrl_debug,
log_out => log_data(183 downto 171)
);

execute1_0: entity work.execute1
generic map (
SIM => SIM,
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
HAS_SHORT_MULT => HAS_SHORT_MULT,
@ -352,19 +377,27 @@ begin
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
interrupt_in => do_interrupt,
interrupt_in => wb_interrupt,
l_out => execute1_to_loadstore1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass,
bypass2_data => execute2_bypass,
bypass2_cr_data => execute2_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
dbg_ctrl_out => ctrl_debug,
wb_events => writeback_events,
ls_events => loadstore_events,
dc_events => dcache_events,
ic_events => icache_events,
terminate_out => terminate,
dbg_spr_req => dbg_spr_req,
dbg_spr_ack => dbg_spr_ack,
dbg_spr_addr => dbg_spr_addr,
dbg_spr_data => dbg_spr_data,
sim_dump => sim_ex_dump,
sim_dump_done => sim_cr_dump,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
@ -377,6 +410,7 @@ begin
port map (
clk => clk,
rst => rst_fpu,
flush_in => flush,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
@ -406,6 +440,10 @@ begin
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
events => loadstore_events,
dbg_spr_req => dbg_ls_spr_req,
dbg_spr_ack => dbg_ls_spr_ack,
dbg_spr_addr => dbg_ls_spr_addr,
dbg_spr_data => dbg_ls_spr_data,
log_out => log_data(149 downto 140)
);

@ -455,8 +493,9 @@ begin
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
f_out => writeback_to_fetch1,
wb_bypass => writeback_bypass,
events => writeback_events,
interrupt_out => do_interrupt,
interrupt_out => wb_interrupt,
complete_out => complete
);

@ -482,11 +521,19 @@ begin
terminate => terminate,
core_stopped => dbg_core_is_stopped,
nia => fetch1_to_icache.nia,
msr => msr,
msr => ctrl_debug.msr,
dbg_gpr_req => dbg_gpr_req,
dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
dbg_spr_req => dbg_spr_req,
dbg_spr_ack => dbg_spr_ack,
dbg_spr_addr => dbg_spr_addr,
dbg_spr_data => dbg_spr_data,
dbg_ls_spr_req => dbg_ls_spr_req,
dbg_ls_spr_ack => dbg_ls_spr_ack,
dbg_ls_spr_addr => dbg_ls_spr_addr,
dbg_ls_spr_data => dbg_ls_spr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,

@ -33,12 +33,24 @@ entity core_debug is
nia : in std_ulogic_vector(63 downto 0);
msr : in std_ulogic_vector(63 downto 0);

-- GSPR register read port
-- GPR/FPR register read port
dbg_gpr_req : out std_ulogic;
dbg_gpr_ack : in std_ulogic;
dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0);

-- SPR register read port for SPRs in execute1
dbg_spr_req : out std_ulogic;
dbg_spr_ack : in std_ulogic;
dbg_spr_addr : out std_ulogic_vector(7 downto 0);
dbg_spr_data : in std_ulogic_vector(63 downto 0);

-- SPR register read port for SPRs in loadstore1 and mmu
dbg_ls_spr_req : out std_ulogic;
dbg_ls_spr_ack : in std_ulogic;
dbg_ls_spr_addr : out std_ulogic_vector(1 downto 0);
dbg_ls_spr_data : in std_ulogic_vector(63 downto 0);

-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
@ -105,7 +117,10 @@ architecture behave of core_debug is
signal do_icreset : std_ulogic;
signal terminated : std_ulogic;
signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t;
signal gspr_index : std_ulogic_vector(7 downto 0);
signal gspr_data : std_ulogic_vector(63 downto 0);

signal spr_index_valid : std_ulogic;

signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0');
signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0');
@ -119,9 +134,7 @@ architecture behave of core_debug is
begin
-- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
else dbg_gpr_ack;
dbg_gpr_req <= dmi_req when dmi_addr = DBG_CORE_GSPR_DATA
else '0';
else dbg_gpr_ack or dbg_spr_ack or dbg_ls_spr_ack;

-- Status register read composition
stat_reg <= (2 => terminated,
@ -129,12 +142,17 @@ begin
0 => stopping,
others => '0');

gspr_data <= dbg_gpr_data when gspr_index(5) = '0' else
dbg_ls_spr_data when dbg_ls_spr_req = '1' else
dbg_spr_data when spr_index_valid = '1' else
(others => '0');

-- DMI read data mux
with dmi_addr select dmi_dout <=
stat_reg when DBG_CORE_STAT,
nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA,
gspr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
log_dmi_trigger when DBG_CORE_LOG_TRIGGER,
@ -191,7 +209,7 @@ begin
terminated <= '0';
end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0);
gspr_index <= dmi_din(7 downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
@ -226,7 +244,70 @@ begin
end if;
end process;

dbg_gpr_addr <= gspr_index;
gspr_access: process(clk)
variable valid : std_ulogic;
variable sel : spr_selector;
variable isram : std_ulogic;
variable raddr : ramspr_index;
variable odd : std_ulogic;
begin
if rising_edge(clk) then
dbg_gpr_req <= '0';
dbg_spr_req <= '0';
dbg_ls_spr_req <= '0';
if rst = '0' and dmi_req = '1' and dmi_addr = DBG_CORE_GSPR_DATA then
if gspr_index(5) = '0' then
dbg_gpr_req <= '1';
elsif gspr_index(4 downto 2) = "111" then
dbg_ls_spr_req <= '1';
else
dbg_spr_req <= '1';
end if;
end if;

-- Map 0 - 0x1f to GPRs, 0x20 - 0x3f to SPRs, and 0x40 - 0x5f to FPRs
dbg_gpr_addr <= gspr_index(6) & gspr_index(4 downto 0);
dbg_ls_spr_addr <= gspr_index(1 downto 0);

-- For SPRs, use the same mapping as when the fast SPRs were in the GPR file
valid := '1';
sel := "000";
isram := '1';
raddr := 0;
odd := '0';
case gspr_index(4 downto 0) is
when 5x"00" =>
raddr := RAMSPR_LR;
when 5x"01" =>
odd := '1';
raddr := RAMSPR_CTR;
when 5x"02" | 5x"03" =>
odd := gspr_index(0);
raddr := RAMSPR_SRR0;
when 5x"04" | 5x"05" =>
odd := gspr_index(0);
raddr := RAMSPR_HSRR0;
when 5x"06" | 5x"07" =>
odd := gspr_index(0);
raddr := RAMSPR_SPRG0;
when 5x"08" | 5x"09" =>
odd := gspr_index(0);
raddr := RAMSPR_SPRG2;
when 5x"0a" | 5x"0b" =>
odd := gspr_index(0);
raddr := RAMSPR_HSPRG0;
when 5x"0c" =>
isram := '0';
sel := SPRSEL_XER;
when 5x"0d" =>
raddr := RAMSPR_TAR;
when others =>
valid := '0';
end case;
dbg_spr_addr <= isram & sel & std_ulogic_vector(to_unsigned(raddr, 3)) & odd;
spr_index_valid <= valid;
end if;
end process;

-- Core control signals generated by the debug module
core_stop <= stopping and not do_step;

@ -9,6 +9,7 @@ entity bit_counter is
port (
clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0);
stall : in std_ulogic;
count_right : in std_ulogic;
do_popcnt : in std_ulogic;
is_32bit : in std_ulogic;
@ -49,7 +50,7 @@ architecture behaviour of bit_counter is
begin
countzero_r: process(clk)
begin
if rising_edge(clk) then
if rising_edge(clk) and stall = '0' then
inp_r <= inp;
sum_r <= sum;
end if;
@ -88,7 +89,7 @@ begin

popcnt_r: process(clk)
begin
if rising_edge(clk) then
if rising_edge(clk) and stall = '0' then
for i in 0 to 7 loop
pc8_r(i) <= pc8(i);
end loop;

@ -26,6 +26,7 @@ begin
bitcounter_0: entity work.bit_counter
port map (
clk => clk,
stall => '0',
rs => rs,
result => res,
count_right => count_right,

@ -18,6 +18,7 @@ entity cr_file is
d_out : out CrFileToDecode2Type;

w_in : in WritebackToCrFileType;
ctrl : in ctrl_t;

-- debug
sim_dump : in std_ulogic;
@ -65,7 +66,11 @@ begin
crs <= crs_updated;
end if;
if w_in.write_xerc_enable = '1' then
report "Writing XERC";
report "Writing XERC SO=" & std_ulogic'image(xerc_updated.so) &
" OV=" & std_ulogic'image(xerc_updated.ov) &
" CA=" & std_ulogic'image(xerc_updated.ca) &
" OV32=" & std_ulogic'image(xerc_updated.ov32) &
" CA32=" & std_ulogic'image(xerc_updated.ca32);
xerc <= xerc_updated;
end if;
end if;
@ -84,9 +89,18 @@ begin

sim_dump_test: if SIM generate
dump_cr: process(all)
variable xer : std_ulogic_vector(31 downto 0);
begin
if sim_dump = '1' then
report "CR 00000000" & to_hstring(crs);
xer := (others => '0');
xer(31) := xerc.so;
xer(30) := xerc.ov;
xer(29) := xerc.ca;
xer(19) := xerc.ov32;
xer(18) := xerc.ca32;
xer(17 downto 0) := ctrl.xer_low;
report "XER 00000000" & to_hstring(xer);
assert false report "end of test" severity failure;
end if;
end process;

@ -588,7 +588,7 @@ begin
end if;
if rst = '1' then
r0_full <= '0';
elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then
elsif r1.full = '0' and d_in.hold = '0' then
r0 <= r;
r0_full <= r.req.valid;
elsif r0.d_valid = '0' then
@ -605,9 +605,9 @@ begin
m_out.stall <= '0';

-- Hold off the request in r0 when r1 has an uncompleted request
r0_stall <= r0_full and (r1.full or d_in.hold);
r0_stall <= r1.full or d_in.hold;
r0_valid <= r0_full and not r1.full and not d_in.hold;
stall_out <= r0_stall;
stall_out <= r1.full;

events <= ev;


@ -5,6 +5,7 @@ use ieee.numeric_std.all;
library work;
use work.common.all;
use work.decode_types.all;
use work.insn_helpers.all;

entity decode1 is
generic (
@ -24,18 +25,30 @@ entity decode1 is
f_in : in IcacheToDecode1Type;
f_out : out Decode1ToFetch1Type;
d_out : out Decode1ToDecode2Type;
r_out : out Decode1ToRegisterFileType;
log_out : out std_ulogic_vector(12 downto 0)
);
end entity decode1;

architecture behaviour of decode1 is
signal r, rin : Decode1ToDecode2Type;
signal s : Decode1ToDecode2Type;
signal f, fin : Decode1ToFetch1Type;

constant illegal_inst : decode_rom_t :=
(NONE, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE);

-- If we have an FPU, then it is used for integer divisions,
-- otherwise a dedicated divider in the ALU is used.
function divider_unit(hf : boolean) return unit_t is
begin
if hf then
return FPU;
else
return ALU;
end if;
end;
constant DVU : unit_t := divider_unit(HAS_FPU);

type reg_internal_t is record
override : std_ulogic;
override_decode: decode_rom_t;
@ -46,7 +59,6 @@ architecture behaviour of decode1 is
(override => '0', override_decode => illegal_inst, override_unit => '0', force_single => '0');

signal ri, ri_in : reg_internal_t;
signal si : reg_internal_t;

type br_predictor_t is record
br_nia : std_ulogic_vector(61 downto 0);
@ -79,8 +91,8 @@ architecture behaviour of decode1 is
28 => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andi.
29 => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andis.
0 => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- attn
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
16 => (ALU, NONE, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- bc
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
16 => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- bc
11 => (ALU, NONE, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmpi
10 => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpli
34 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbz
@ -93,7 +105,6 @@ architecture behaviour of decode1 is
43 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhau
40 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhz
41 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzu
56 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTE), -- lq
32 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwz
33 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwzu
7 => (ALU, NONE, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- mulli
@ -168,11 +179,11 @@ architecture behaviour of decode1 is
-- addpcis
2#001# => (ALU, NONE, OP_ADD, CIA, CONST_DXHI4, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-- bclr, bcctr, bctar
2#100# => (ALU, NONE, OP_BCREG, SPR, SPR, NONE, SPR, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
2#100# => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
-- isync
2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
-- rfid
2#101# => (ALU, NONE, OP_RFID, SPR, SPR, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
2#101# => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
others => illegal_inst
);

@ -223,31 +234,31 @@ architecture behaviour of decode1 is
2#1000111010# => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- cnttzd
2#1000011010# => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- cnttzw
2#1011110011# => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- darn
2#0001010110# => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- dcbf
2#0000110110# => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- dcbst
2#0100010110# => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- dcbt
2#0011110110# => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- dcbtst
2#0001010110# => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbf
2#0000110110# => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbst
2#0100010110# => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbt
2#0011110110# => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbtst
2#1111110110# => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbz
2#0110001001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeu
2#1110001001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeuo
2#0110001011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweu
2#1110001011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweuo
2#0110101001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divde
2#1110101001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdeo
2#0110101011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwe
2#1110101011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divweo
2#0111001001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdu
2#1111001001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divduo
2#0111001011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwu
2#1111001011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwuo
2#0111101001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divd
2#1111101001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdo
2#0111101011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divw
2#1111101011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwo
2#0110001001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeu
2#1110001001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeuo
2#0110001011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweu
2#1110001011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweuo
2#0110101001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divde
2#1110101001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdeo
2#0110101011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwe
2#1110101011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divweo
2#0111001001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdu
2#1111001001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divduo
2#0111001011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwu
2#1111001011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwuo
2#0111101001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divd
2#1111101001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdo
2#0111101011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divw
2#1111101011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwo
2#1100110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dss
2#0101010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dst
2#0101110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dstst
2#1101010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- eieio
2#1101010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- eieio
2#0100011100# => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- eqv
2#1110111010# => (ALU, NONE, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- extsb
2#1110011010# => (ALU, NONE, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- extsh
@ -310,7 +321,6 @@ architecture behaviour of decode1 is
2#1100110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzcix
2#0100110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzux
2#0100010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzx
2#0100010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTE), -- lqarx
2#0000010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lwarx
2#0101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwaux
2#0101010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwax
@ -321,15 +331,15 @@ architecture behaviour of decode1 is
2#1001000000# => (ALU, NONE, OP_MCRXRX, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mcrxrx
2#0000010011# => (ALU, NONE, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfcr/mfocrf
2#0001010011# => (ALU, NONE, OP_MFMSR, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- mfmsr
2#0101010011# => (ALU, NONE, OP_MFSPR, SPR, NONE, RS, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfspr
2#0100001001# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- modud
2#0100001011# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- moduw
2#1100001001# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- modsd
2#1100001011# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0', NONE), -- modsw
2#0101010011# => (ALU, NONE, OP_MFSPR, NONE, NONE, RS, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfspr
2#0100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- modud
2#0100001011# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- moduw
2#1100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- modsd
2#1100001011# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0', NONE), -- modsw
2#0010010000# => (ALU, NONE, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtcrf/mtocrf
2#0010010010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1', NONE), -- mtmsr
2#0010110010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- mtmsrd # ignore top bits and d
2#0111010011# => (ALU, NONE, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtspr
2#0010010010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- mtmsr
2#0010110010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtmsrd # ignore top bits and d
2#0111010011# => (ALU, NONE, OP_MTSPR, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtspr
2#0001001001# => (ALU, NONE, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- mulhd
2#0000001001# => (ALU, NONE, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- mulhdu
2#0001001011# => (ALU, NONE, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- mulhw
@ -393,7 +403,6 @@ architecture behaviour of decode1 is
2#1011010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- sthcx
2#0110110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthux
2#0110010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthx
2#0010110110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSE), -- stqcx
2#1010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwbrx
2#1110010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwcix
2#0010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stwcx
@ -409,13 +418,13 @@ architecture behaviour of decode1 is
2#1011101000# => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfmeo
2#0011001000# => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfze
2#1011001000# => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfzeo
2#1001010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- sync
2#1001010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sync
2#0001000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- td
2#0000000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- tw
2#0100110010# => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tlbie
2#0100010010# => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tlbiel
2#1000110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- tlbsync
2#0000011110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- wait
2#1000110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tlbsync
2#0000011110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- wait
2#0100111100# => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- xor
others => illegal_inst
);
@ -452,7 +461,6 @@ architecture behaviour of decode1 is
-- op in out A out in out len ext pipe
0 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- std
1 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdu
2 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSE), -- stq
others => decode_rom_init
);

@ -519,32 +527,95 @@ architecture behaviour of decode1 is
constant nop_instr : decode_rom_t := (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE);
constant fetch_fail_inst: decode_rom_t := (LDST, NONE, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE);

function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
variable ret : ram_spr_info;
begin
ret := (index => 0, isodd => '0', valid => '1');
case sprn is
when SPR_LR =>
ret.index := RAMSPR_LR;
when SPR_CTR =>
ret.index := RAMSPR_CTR;
ret.isodd := '1';
when SPR_TAR =>
ret.index := RAMSPR_TAR;
when SPR_SRR0 =>
ret.index := RAMSPR_SRR0;
when SPR_SRR1 =>
ret.index := RAMSPR_SRR1;
ret.isodd := '1';
when SPR_HSRR0 =>
ret.index := RAMSPR_HSRR0;
when SPR_HSRR1 =>
ret.index := RAMSPR_HSRR1;
ret.isodd := '1';
when SPR_SPRG0 =>
ret.index := RAMSPR_SPRG0;
when SPR_SPRG1 =>
ret.index := RAMSPR_SPRG1;
ret.isodd := '1';
when SPR_SPRG2 =>
ret.index := RAMSPR_SPRG2;
when SPR_SPRG3 | SPR_SPRG3U =>
ret.index := RAMSPR_SPRG3;
ret.isodd := '1';
when SPR_HSPRG0 =>
ret.index := RAMSPR_HSPRG0;
when SPR_HSPRG1 =>
ret.index := RAMSPR_HSPRG1;
ret.isodd := '1';
when others =>
ret.valid := '0';
end case;
return ret;
end;

function map_spr(sprn : spr_num_t) return spr_id is
variable i : spr_id;
begin
i.sel := "000";
i.valid := '1';
i.ispmu := '0';
case sprn is
when SPR_TB =>
i.sel := SPRSEL_TB;
when SPR_TBU =>
i.sel := SPRSEL_TBU;
when SPR_DEC =>
i.sel := SPRSEL_DEC;
when SPR_PVR =>
i.sel := SPRSEL_PVR;
when 724 => -- LOG_ADDR SPR
i.sel := SPRSEL_LOGA;
when 725 => -- LOG_DATA SPR
i.sel := SPRSEL_LOGD;
when SPR_UPMC1 | SPR_UPMC2 | SPR_UPMC3 | SPR_UPMC4 | SPR_UPMC5 | SPR_UPMC6 |
SPR_UMMCR0 | SPR_UMMCR1 | SPR_UMMCR2 | SPR_UMMCRA | SPR_USIER | SPR_USIAR | SPR_USDAR |
SPR_PMC1 | SPR_PMC2 | SPR_PMC3 | SPR_PMC4 | SPR_PMC5 | SPR_PMC6 |
SPR_MMCR0 | SPR_MMCR1 | SPR_MMCR2 | SPR_MMCRA | SPR_SIER | SPR_SIAR | SPR_SDAR =>
i.ispmu := '1';
when SPR_CFAR =>
i.sel := SPRSEL_CFAR;
when SPR_XER =>
i.sel := SPRSEL_XER;
when others =>
i.valid := '0';
end case;
return i;
end;

begin
decode1_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r <= Decode1ToDecode2Init;
s <= Decode1ToDecode2Init;
ri <= reg_internal_t_init;
si <= reg_internal_t_init;
elsif flush_in = '1' then
r.valid <= '0';
s.valid <= '0';
elsif s.valid = '1' then
if stall_in = '0' then
r <= s;
ri <= si;
s.valid <= '0';
end if;
else
s <= rin;
si <= ri_in;
s.valid <= rin.valid and r.valid and stall_in;
if r.valid = '0' or stall_in = '0' then
r <= rin;
ri <= ri_in;
end if;
elsif stall_in = '0' then
r <= rin;
ri <= ri_in;
end if;
if rst = '1' then
br.br_nia <= (others => '0');
@ -555,10 +626,11 @@ begin
end if;
end if;
end process;
busy_out <= s.valid;
busy_out <= stall_in;

decode1_1: process(all)
variable v : Decode1ToDecode2Type;
variable vr : Decode1ToRegisterFileType;
variable vi : reg_internal_t;
variable majorop : major_opcode_t;
variable minor4op : std_ulogic_vector(10 downto 0);
@ -567,6 +639,9 @@ begin
variable br_target : std_ulogic_vector(61 downto 0);
variable br_offset : signed(23 downto 0);
variable bv : br_predictor_t;
variable fprs, fprabc : std_ulogic;
variable in3rc : std_ulogic;
variable may_read_rb : std_ulogic;
begin
v := Decode1ToDecode2Init;
vi := reg_internal_t_init;
@ -577,6 +652,11 @@ begin
v.stop_mark := f_in.stop_mark;
v.big_endian := f_in.big_endian;

fprs := '0';
fprabc := '0';
in3rc := '0';
may_read_rb := '0';

if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
end if;
@ -586,52 +666,52 @@ begin
majorop := unsigned(f_in.insn(31 downto 26));
v.decode := major_decode_rom_array(to_integer(majorop));

sprn := decode_spr_num(f_in.insn);
v.spr_info := map_spr(sprn);
v.ram_spr := decode_ram_spr(sprn);

case to_integer(unsigned(majorop)) is
when 4 =>
-- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*)
minor4op := f_in.insn(5 downto 0) & f_in.insn(10 downto 6);
vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op)));
v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
in3rc := '1';
may_read_rb := '1';

when 23 =>
-- rlwnm[.]
may_read_rb := '1';

when 31 =>
-- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));

-- Work out ispr1/ispro independent of v.decode since they seem to be critical path
sprn := decode_spr_num(f_in.insn);
v.ispr1 := fast_spr_num(sprn);
v.ispro := fast_spr_num(sprn);
may_read_rb := '1';

if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
-- Make slow SPRs single issue
if is_fast_spr(v.ispr1) = '0' then
vi.force_single := '1';
-- send MMU-related SPRs to loadstore1
case sprn is
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
vi.override_decode.unit := LDST;
vi.override_unit := '1';
when others =>
end case;
-- Make mtspr to slow SPRs single issue
if v.spr_info.valid = '1' then
vi.force_single := f_in.insn(8);
end if;
-- send MMU-related SPRs to loadstore1
case sprn is
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
vi.override_decode.unit := LDST;
vi.override_unit := '1';
-- make mtspr to loadstore SPRs single-issue
if f_in.insn(8) = '1' then
vi.force_single := '1';
end if;
when others =>
end case;
end if;
if std_match(f_in.insn(10 downto 1), "0100010100") then
-- lqarx, illegal if RA = RT or RB = RT
if f_in.insn(25 downto 21) = f_in.insn(20 downto 16) or
f_in.insn(25 downto 21) = f_in.insn(15 downto 11) then
vi.override := '1';
end if;
if HAS_FPU and std_match(f_in.insn(10 downto 1), "1----10111") then
-- lower half of column 23 has FP loads and stores
fprs := '1';
end if;

when 16 =>
-- CTR may be needed as input to bc
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
v.ispro := fast_spr_num(SPR_CTR);
elsif f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;
-- Predict backward branches as taken, forward as untaken
v.br_pred := f_in.insn(15);
br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
@ -640,41 +720,12 @@ begin
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
if f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;

when 19 =>
vi.override := not decode_op_19_valid(to_integer(unsigned(f_in.insn(5 downto 1) & f_in.insn(10 downto 6))));
op_19_bits := f_in.insn(5) & f_in.insn(3) & f_in.insn(2);
v.decode := decode_op_19_array(to_integer(unsigned(op_19_bits)));

-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
if f_in.insn(2) = '0' then
-- Could be OP_BCREG: bclr, bcctr, bctar
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
-- bcctr doesn't update CTR or use it in the branch condition
if f_in.insn(23) = '0' and (f_in.insn(10) = '0' or f_in.insn(6) = '1') then
v.ispr1 := fast_spr_num(SPR_CTR);
v.ispro := fast_spr_num(SPR_CTR);
elsif f_in.insn(0) = '1' then
v.ispro := fast_spr_num(SPR_LR);
end if;
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
elsif f_in.insn(6) = '0' then
v.ispr2 := fast_spr_num(SPR_CTR);
else
v.ispr2 := fast_spr_num(SPR_TAR);
end if;
else
-- Could be OP_RFID
v.ispr1 := fast_spr_num(SPR_SRR1);
v.ispr2 := fast_spr_num(SPR_SRR0);
end if;

when 24 =>
-- ori, special-case the standard NOP
if std_match(f_in.insn, "01100000000000000000000000000000") then
@ -685,11 +736,12 @@ begin

when 30 =>
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
may_read_rb := f_in.insn(4);

when 56 =>
-- lq, illegal if RA = RT
if f_in.insn(25 downto 21) = f_in.insn(20 downto 16) then
vi.override := '1';
when 52 | 53 | 54 | 55 =>
-- stfd[u] and stfs[u]
if HAS_FPU then
fprs := '1';
end if;

when 58 =>
@ -702,6 +754,10 @@ begin
if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then
vi.override := '1';
end if;
in3rc := '1';
fprabc := '1';
fprs := '1';
may_read_rb := '1';
end if;

when 62 =>
@ -715,11 +771,31 @@ begin
else
v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
end if;
in3rc := '1';
fprabc := '1';
fprs := '1';
may_read_rb := '1';
end if;

when others =>
end case;

-- Work out GPR/FPR read addresses
vr.reg_1_addr := fprabc & insn_ra(f_in.insn);
vr.reg_2_addr := fprabc & insn_rb(f_in.insn);
if in3rc = '1' then
vr.reg_3_addr := fprabc & insn_rcreg(f_in.insn);
else
vr.reg_3_addr := fprs & insn_rs(f_in.insn);
end if;
vr.read_1_enable := f_in.valid and not f_in.fetch_failed;
vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb;
vr.read_3_enable := f_in.valid and not f_in.fetch_failed;

v.reg_a := vr.reg_1_addr;
v.reg_b := vr.reg_2_addr;
v.reg_c := vr.reg_3_addr;

if f_in.fetch_failed = '1' then
v.valid := '1';
vi.override := '1';
@ -765,6 +841,8 @@ begin
f_out.redirect <= br.predict;
f_out.redirect_nia <= br_target & "00";
flush_out <= bv.predict or br.predict;

r_out <= vr;
end process;

d1_log: if LOG_LENGTH > 0 generate

@ -39,6 +39,13 @@ entity decode2 is

execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t;
execute2_bypass : in bypass_data_t;
execute2_cr_bypass : in cr_bypass_data_t;
writeback_bypass : in bypass_data_t;

-- Access to SPRs from core_debug module
dbg_spr_req : in std_ulogic;
dbg_spr_addr : in std_ulogic_vector(7 downto 0);

log_out : out std_ulogic_vector(9 downto 0)
);
@ -47,10 +54,22 @@ end entity decode2;
architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
repeat : std_ulogic;
repeat : repeat_t;
busy : std_ulogic;
sgl_pipe : std_ulogic;
prev_sgl : std_ulogic;
reg_a_valid : std_ulogic;
reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic;
input_ov : std_ulogic;
output_ov : std_ulogic;
read_rspr : std_ulogic;
end record;
constant reg_type_init : reg_type :=
(e => Decode2ToExecute1Init, repeat => NONE, others => '0');

signal r, rin : reg_type;
signal dc2, dc2in : reg_type;

signal deferred : std_ulogic;

@ -59,49 +78,39 @@ architecture behaviour of decode2 is
reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0);
end record;
constant decode_input_reg_init : decode_input_reg_t := ('0', (others => '0'), (others => '0'));

type decode_output_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
end record;
constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t;
instr_addr : std_ulogic_vector(63 downto 0))
return decode_input_reg_t is
begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
end;

function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0))
return decode_input_reg_t is
variable ret : decode_input_reg_t;
begin
case t is
when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0'));
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), (others => '0'));
else
ret := ('0', (others => '0'), (others => '0'));
end if;
@ -129,14 +138,6 @@ architecture behaviour of decode2 is
ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 =>
ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
when NONE =>
ret := ('0', (others => '0'), (others => '0'));
end case;
@ -144,23 +145,23 @@ architecture behaviour of decode2 is
return ret;
end;

function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0))
return decode_input_reg_t is
begin
case t is
when RS =>
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
return ('1', gpr_to_gspr(insn_rs(insn_in)), (others => '0'));
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), (others => '0'));
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_frt(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_frc(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
@ -169,8 +170,8 @@ architecture behaviour of decode2 is
end case;
end;

function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
ispr : gspr_index_t) return decode_output_reg_t is
function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0))
return decode_output_reg_t is
begin
case t is
when RT =>
@ -181,18 +182,10 @@ architecture behaviour of decode2 is
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
return ('0', "000000");
end if;
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return ('0', "0000000");
return ('0', "000000");
end case;
end;

@ -228,14 +221,6 @@ architecture behaviour of decode2 is
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_MUL_H64 => "011",
OP_MUL_H32 => "011",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
OP_CNTZ => "100", -- countbits_result
OP_POPCNT => "100",
OP_MFSPR => "101", -- spr_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
@ -270,30 +255,37 @@ architecture behaviour of decode2 is
others => "000"
);

signal decoded_reg_a : decode_input_reg_t;
signal decoded_reg_b : decode_input_reg_t;
signal decoded_reg_c : decode_input_reg_t;
signal decoded_reg_o : decode_output_reg_t;

-- issue control signals
signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic;
signal control_stall_out : std_ulogic;
signal control_sgl_pipe : std_logic;
signal control_serialize : std_logic;

signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic;
signal gpr_a_bypass : std_ulogic_vector(1 downto 0);

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;
signal gpr_b_bypass : std_ulogic_vector(1 downto 0);

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;
signal gpr_c_bypass : std_ulogic_vector(1 downto 0);

signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;
signal cr_bypass : std_ulogic_vector(1 downto 0);

signal ov_read_valid : std_ulogic;
signal ov_write_valid : std_ulogic;

signal instr_tag : instr_tag_t;

@ -308,11 +300,9 @@ begin

complete_in => complete_in,
valid_in => control_valid_in,
repeated => r.repeat,
busy_in => busy_in,
deferred => deferred,
flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe,
serialize => control_serialize,
stop_mark_in => d_in.stop_mark,

gpr_write_valid_in => gpr_write_valid,
@ -329,13 +319,17 @@ begin

execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag,
execute2_next_tag => execute2_bypass.tag,
execute2_next_cr_tag => execute2_cr_bypass.tag,

cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,

ov_read_in => ov_read_valid,
ov_write_in => ov_write_valid,

valid_out => control_valid_out,
stall_out => control_stall_out,
stopped_out => stopped_out,

gpr_bypass_a => gpr_a_bypass,
@ -345,221 +339,348 @@ begin
instr_tag_out => instr_tag
);

deferred <= r.e.valid and busy_in;
deferred <= dc2.e.valid and busy_in;

decode2_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or flush_in = '1' or deferred = '0' then
if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
if rst = '1' or flush_in = '1' then
dc2 <= reg_type_init;
elsif deferred = '0' then
if dc2in.e.valid = '1' then
report "execute " & to_hstring(dc2in.e.nia) &
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid);
end if;
r <= rin;
dc2 <= dc2in;
elsif dc2.read_rspr = '0' then
-- Update debug SPR access signals even when stalled
-- if the instruction in dc2.e doesn't read any SPRs.
dc2.e.dbg_spr_access <= dc2in.e.dbg_spr_access;
dc2.e.ramspr_even_rdaddr <= dc2in.e.ramspr_even_rdaddr;
dc2.e.ramspr_odd_rdaddr <= dc2in.e.ramspr_odd_rdaddr;
dc2.e.ramspr_rd_odd <= dc2in.e.ramspr_rd_odd;
end if;
if d_in.valid = '1' then
assert decoded_reg_a.reg_valid = '0' or decoded_reg_a.reg = d_in.reg_a severity failure;
assert decoded_reg_b.reg_valid = '0' or decoded_reg_b.reg = d_in.reg_b severity failure;
assert decoded_reg_c.reg_valid = '0' or decoded_reg_c.reg = d_in.reg_c severity failure;
end if;
end if;
end process;

c_out.read <= d_in.decode.input_cr;

decode2_addrs: process(all)
begin
decoded_reg_a <= decode_input_reg_init;
decoded_reg_b <= decode_input_reg_init;
decoded_reg_c <= decode_input_reg_init;
decoded_reg_o <= decode_output_reg_init;
if d_in.valid = '1' then
decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia);
decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn);
decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read2_enable <= decoded_reg_b.reg_valid;
r_out.read3_enable <= decoded_reg_c.reg_valid;

end process;

decode2_1: process(all)
variable v : reg_type;
variable mul_a : std_ulogic_vector(63 downto 0);
variable mul_b : std_ulogic_vector(63 downto 0);
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t;
variable valid_in : std_ulogic;
variable decctr : std_ulogic;
variable sprs_busy : std_ulogic;
begin
v := r;

v.e := Decode2ToExecute1Init;
v := dc2;

mul_a := (others => '0');
mul_b := (others => '0');
valid_in := d_in.valid or dc2.busy;

--v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;
if dc2.busy = '0' then
v.e := Decode2ToExecute1Init;

-- Work out whether XER common bits are set
v.e.output_xer := d_in.decode.output_carry;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
end if;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1';
end if;
when others =>
end case;
sprs_busy := '0';

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
if d_in.valid = '1' then
v.prev_sgl := dc2.sgl_pipe;
v.sgl_pipe := d_in.decode.sgl_pipe;
end if;

if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
-- b and bc have even major opcodes; bcreg is considered absolute
v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
end if;
op := d_in.decode.insn_type;

if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
v.e.second := r.repeat;
case d_in.decode.repeat is
when DRSE =>
-- do RS|1,RS for LE; RS,RS|1 for BE
if r.repeat = d_in.big_endian then
decoded_reg_c.reg(0) := '1';
v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

-- Work out whether XER SO/OV/OV32 bits are set
-- or used by this instruction
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.output_xer := d_in.decode.output_carry;
v.input_ov := d_in.decode.output_carry;
v.output_ov := '0';
if d_in.decode.input_carry = OV then
v.input_ov := '1';
v.output_ov := '1';
end if;
if v.e.rc = '1' and d_in.decode.facility /= FPU then
v.input_ov := '1';
end if;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
v.output_ov := '1';
v.input_ov := '1'; -- need SO state if setting OV to 0
end if;
when DRTE =>
-- do RT|1,RT for LE; RT,RT|1 for BE
if r.repeat = d_in.big_endian then
decoded_reg_o.reg(0) := '1';
when OP_MFSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.input_ov := '1';
end if;
when DUPD =>
-- update-form loads, 2nd instruction writes RA
if r.repeat = '1' then
decoded_reg_o.reg := decoded_reg_a.reg;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1';
v.output_ov := '1';
end if;
when OP_CMP | OP_MCRXRX =>
v.input_ov := '1';
when others =>
end case;
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
v.e.repeat := '1';
v.e.second := r.repeat;
-- first one does CTR, second does LR
decoded_reg_o.reg(0) := not r.repeat;
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid and d_in.valid;
r_out.read3_reg <= decoded_reg_c.reg;

case d_in.decode.length is
when is1B =>
length := "0001";
when is2B =>
length := "0010";
when is4B =>
length := "0100";
when is8B =>
length := "1000";
when NONE =>
length := "0000";
end case;
v.reg_a_valid := decoded_reg_a.reg_valid;
v.reg_b_valid := decoded_reg_b.reg_valid;
v.reg_c_valid := decoded_reg_c.reg_valid;
v.reg_o_valid := decoded_reg_o.reg_valid;

-- execute unit
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
v.e.insn_type := op;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
v.e.is_32bit := d_in.decode.is_32bit;
v.e.is_signed := d_in.decode.is_signed;
v.e.insn := d_in.insn;
v.e.data_len := length;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op);
if op = OP_BC or op = OP_BCREG then
if d_in.insn(23) = '0' and r.repeat = '0' and
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- decrement CTR if BO(2) = 0 and not bcctr
v.e.addm1 := '1';
v.e.result_sel := "000"; -- select adder output
if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
-- b and bc have even major opcodes; bcreg is considered absolute
v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
end if;
end if;
op := d_in.decode.insn_type;

-- Does this instruction decrement CTR?
-- bc, bclr, bctar with BO(2) = 0 do, but not bcctr.
decctr := '0';
if d_in.insn(23) = '0' and
(op = OP_BC or
(op = OP_BCREG and not (d_in.insn(10) = '1' and d_in.insn(6) = '0'))) then
decctr := '1';
end if;
v.e.dec_ctr := decctr;

-- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is
when '1' =>
v.e.read_data1 := execute_bypass.data;
when others =>
v.e.read_data1 := decoded_reg_a.data;
end case;
case gpr_b_bypass is
when '1' =>
v.e.read_data2 := execute_bypass.data;
when others =>
v.e.read_data2 := decoded_reg_b.data;
end case;
case gpr_c_bypass is
when '1' =>
v.e.read_data3 := execute_bypass.data;
when others =>
v.e.read_data3 := decoded_reg_c.data;
end case;
v.repeat := d_in.decode.repeat;
if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
end if;

v.e.spr_select := d_in.spr_info;

if decctr = '1' then
-- read and write CTR
v.e.ramspr_odd_rdaddr := RAMSPR_CTR;
v.e.ramspr_wraddr := RAMSPR_CTR;
v.e.ramspr_write_odd := '1';
sprs_busy := '1';
end if;
if v.e.lr = '1' then
-- write LR
v.e.ramspr_wraddr := RAMSPR_LR;
v.e.ramspr_write_even := '1';
end if;

case op is
when OP_BCREG =>
if d_in.insn(10) = '0' then
v.e.ramspr_even_rdaddr := RAMSPR_LR;
elsif d_in.insn(6) = '0' then
v.e.ramspr_odd_rdaddr := RAMSPR_CTR;
v.e.ramspr_rd_odd := '1';
else
v.e.ramspr_even_rdaddr := RAMSPR_TAR;
end if;
sprs_busy := '1';
when OP_MFSPR =>
v.e.ramspr_even_rdaddr := d_in.ram_spr.index;
v.e.ramspr_odd_rdaddr := d_in.ram_spr.index;
v.e.ramspr_rd_odd := d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid;
sprs_busy := d_in.ram_spr.valid;
when OP_MTSPR =>
v.e.ramspr_wraddr := d_in.ram_spr.index;
v.e.ramspr_write_even := d_in.ram_spr.valid and not d_in.ram_spr.isodd;
v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid;
when OP_RFID =>
v.e.ramspr_even_rdaddr := RAMSPR_SRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_SRR1;
sprs_busy := '1';
when others =>
end case;
v.read_rspr := sprs_busy and d_in.valid;

case d_in.decode.length is
when is1B =>
length := "0001";
when is2B =>
length := "0010";
when is4B =>
length := "0100";
when is8B =>
length := "1000";
when NONE =>
length := "0000";
end case;

-- execute unit
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.read_reg1 := d_in.reg_a;
v.e.read_reg2 := d_in.reg_b;
v.e.read_reg3 := d_in.reg_c;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.invert_a := d_in.decode.invert_a;
v.e.insn_type := op;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
v.e.is_32bit := d_in.decode.is_32bit;
v.e.is_signed := d_in.decode.is_signed;
v.e.insn := d_in.insn;
v.e.data_len := length;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op);
if op = OP_MFSPR then
if d_in.ram_spr.valid = '1' then
v.e.result_sel := "101"; -- ramspr_result
elsif d_in.spr_info.valid = '0' then
-- Privileged mfspr to invalid/unimplemented SPR numbers
-- writes the contents of RT back to RT (i.e. it's a no-op)
v.e.result_sel := "001"; -- logical_result
end if;
end if;

v.e.cr := c_in.read_cr_data;
if cr_bypass = '1' then
v.e.cr := execute_cr_bypass.data;
elsif dc2.e.valid = '1' then
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
-- Set up for the second iteration (if deferred = 1 this will all be ignored)
v.e.second := '1';
-- DUPD is the only possibility here:
-- update-form loads, 2nd instruction writes RA
v.e.write_reg := dc2.e.read_reg1;
end if;

-- issue control
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;
control_valid_in <= valid_in;
control_serialize <= v.sgl_pipe or v.prev_sgl;

gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg;
gpr_write_valid <= v.reg_o_valid;
gpr_write <= v.e.write_reg;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
gpr_a_read_valid <= v.reg_a_valid;
gpr_a_read <= v.e.read_reg1;

gpr_b_read_valid <= decoded_reg_b.reg_valid;
gpr_b_read <= decoded_reg_b.reg;
gpr_b_read_valid <= v.reg_b_valid;
gpr_b_read <= v.e.read_reg2;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg;
gpr_c_read_valid <= v.reg_c_valid;
gpr_c_read <= v.e.read_reg3;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_write_valid <= v.e.output_cr or v.e.rc;
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or d_in.decode.input_cr;
cr_read_valid <= cr_write_valid or v.e.input_cr;

v.e.valid := control_valid_out;
if control_valid_out = '1' then
v.repeat := v.e.repeat and not r.repeat;
ov_read_valid <= v.input_ov;
ov_write_valid <= v.output_ov;

-- See if any of the operands can get their value via the bypass path.
if dc2.busy = '0' or gpr_a_bypass /= "00" then
case gpr_a_bypass is
when "01" =>
v.e.read_data1 := execute_bypass.data;
when "10" =>
v.e.read_data1 := execute2_bypass.data;
when "11" =>
v.e.read_data1 := writeback_bypass.data;
when others =>
if decoded_reg_a.reg_valid = '1' then
v.e.read_data1 := r_in.read1_data;
else
v.e.read_data1 := decoded_reg_a.data;
end if;
end case;
end if;
if dc2.busy = '0' or gpr_b_bypass /= "00" then
case gpr_b_bypass is
when "01" =>
v.e.read_data2 := execute_bypass.data;
when "10" =>
v.e.read_data2 := execute2_bypass.data;
when "11" =>
v.e.read_data2 := writeback_bypass.data;
when others =>
if decoded_reg_b.reg_valid = '1' then
v.e.read_data2 := r_in.read2_data;
else
v.e.read_data2 := decoded_reg_b.data;
end if;
end case;
end if;
if dc2.busy = '0' or gpr_c_bypass /= "00" then
case gpr_c_bypass is
when "01" =>
v.e.read_data3 := execute_bypass.data;
when "10" =>
v.e.read_data3 := execute2_bypass.data;
when "11" =>
v.e.read_data3 := writeback_bypass.data;
when others =>
if decoded_reg_c.reg_valid = '1' then
v.e.read_data3 := r_in.read3_data;
else
v.e.read_data3 := decoded_reg_c.data;
end if;
end case;
end if;

stall_out <= control_stall_out or v.repeat;
case cr_bypass is
when "10" =>
v.e.cr := execute_cr_bypass.data;
when "11" =>
v.e.cr := execute2_cr_bypass.data;
when others =>
v.e.cr := c_in.read_cr_data;
end case;
v.e.xerc := c_in.read_xerc_data;

if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init;
v.repeat := '0';
v.e.valid := control_valid_out;
v.e.instr_tag := instr_tag;
v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second));

stall_out <= dc2.busy or deferred;

v.e.dbg_spr_access := dbg_spr_req and not v.read_rspr;
if v.e.dbg_spr_access = '1' then
v.e.ramspr_even_rdaddr := to_integer(unsigned(dbg_spr_addr(3 downto 1)));
v.e.ramspr_odd_rdaddr := to_integer(unsigned(dbg_spr_addr(3 downto 1)));
v.e.ramspr_rd_odd := dbg_spr_addr(0);
end if;

-- Update registers
rin <= v;
dc2in <= v;

-- Update outputs
e_out <= r.e;
e_out <= dc2.e;
end process;

d2_log: if LOG_LENGTH > 0 generate
@ -568,13 +689,13 @@ begin
dec2_log : process(clk)
begin
if rising_edge(clk) then
log_data <= r.e.nia(5 downto 2) &
r.e.valid &
log_data <= dc2.e.nia(5 downto 2) &
dc2.e.valid &
stopped_out &
stall_out &
gpr_a_bypass &
gpr_b_bypass &
gpr_c_bypass;
(gpr_a_bypass(1) xor gpr_a_bypass(0)) &
(gpr_b_bypass(1) xor gpr_b_bypass(0)) &
(gpr_c_bypass(1) xor gpr_c_bypass(0));
end if;
end process;
log_out <= log_data;

@ -22,11 +22,11 @@ package decode_types is
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type output_reg_a_t is (NONE, RT, RA, FRT);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE);

@ -53,8 +53,6 @@ package decode_types is
type length_t is (NONE, is1B, is2B, is4B, is8B);

type repeat_t is (NONE, -- instruction is not repeated
DRSE, -- double RS, endian twist
DRTE, -- double RT, endian twist
DUPD); -- update-form load

type decode_rom_t is record

@ -36,7 +36,7 @@ begin
divider_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
if rst = '1' or d_in.flush = '1' then
dend <= (others => '0');
div <= (others => '0');
quot <= (others => '0');

File diff suppressed because it is too large Load Diff

@ -93,7 +93,7 @@ begin
end if;
-- always send the up-to-date stop mark and req
r.stop_mark <= stop_in;
r.req <= not rst;
r.req <= not rst and not stop_in;
end if;
end process;
log_out <= log_nia;

1029
fpu.vhdl

File diff suppressed because it is too large Load Diff

@ -35,6 +35,12 @@ entity loadstore1 is

events : out Loadstore1EventType;

-- Access to SPRs from core_debug module
dbg_spr_req : in std_ulogic;
dbg_spr_ack : out std_ulogic;
dbg_spr_addr : in std_ulogic_vector(1 downto 0);
dbg_spr_data : out std_ulogic_vector(63 downto 0);

log_out : out std_ulogic_vector(9 downto 0)
);
end loadstore1;
@ -43,9 +49,7 @@ architecture behave of loadstore1 is

-- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction
MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
FINISH_LFS -- write back converted SP data for lfs*
MMU_WAIT -- waiting for MMU to finish doing something
);

type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
@ -63,9 +67,7 @@ architecture behave of loadstore1 is
write_spr : std_ulogic;
mmu_op : std_ulogic;
instr_fault : std_ulogic;
load_zero : std_ulogic;
do_update : std_ulogic;
noop : std_ulogic;
mode_32bit : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0);
@ -88,31 +90,32 @@ architecture behave of loadstore1 is
virt_mode : std_ulogic;
priv_mode : std_ulogic;
load_sp : std_ulogic;
sprn : std_ulogic_vector(9 downto 0);
sprsel : std_ulogic_vector(1 downto 0);
ric : std_ulogic_vector(1 downto 0);
is_slbia : std_ulogic;
align_intr : std_ulogic;
dword_index : std_ulogic;
two_dwords : std_ulogic;
nia : std_ulogic_vector(63 downto 0);
incomplete : std_ulogic;
end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', load_zero => '0', do_update => '0', noop => '0',
instr_fault => '0', do_update => '0',
mode_32bit => '0', addr => (others => '0'),
byte_sel => x"00", second_bytes => x"00",
store_data => (others => '0'), instr_tag => instr_tag_init,
write_reg => 7x"00", length => x"0",
write_reg => 6x"00", length => x"0",
elt_length => x"0", byte_reverse => '0', brev_mask => "000",
sign_extend => '0', update => '0',
xerc => xerc_init, reserve => '0',
atomic => '0', atomic_last => '0', rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0',
sprn => 10x"0", is_slbia => '0', align_intr => '0',
dword_index => '0', two_dwords => '0',
nia => (others => '0'));
sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0',
dword_index => '0', two_dwords => '0', incomplete => '0');

type reg_stage1_t is record
req : request_t;
busy : std_ulogic;
issued : std_ulogic;
addr0 : std_ulogic_vector(63 downto 0);
end record;
@ -121,15 +124,20 @@ architecture behave of loadstore1 is
req : request_t;
byte_index : byte_index_t;
use_second : std_ulogic_vector(7 downto 0);
busy : std_ulogic;
wait_dc : std_ulogic;
wait_mmu : std_ulogic;
one_cycle : std_ulogic;
wr_sel : std_ulogic_vector(1 downto 0);
addr0 : std_ulogic_vector(63 downto 0);
sprsel : std_ulogic_vector(1 downto 0);
dbg_spr : std_ulogic_vector(63 downto 0);
dbg_spr_ack: std_ulogic;
end record;

type reg_stage3_t is record
state : state_t;
complete : std_ulogic;
instr_tag : instr_tag_t;
write_enable : std_ulogic;
write_reg : gspr_index_t;
@ -137,7 +145,6 @@ architecture behave of loadstore1 is
rc : std_ulogic;
xerc : xer_common_t;
store_done : std_ulogic;
convert_lfs : std_ulogic;
load_data : std_ulogic_vector(63 downto 0);
dar : std_ulogic_vector(63 downto 0);
dsisr : std_ulogic_vector(31 downto 0);
@ -147,7 +154,6 @@ architecture behave of loadstore1 is
stage1_en : std_ulogic;
interrupt : std_ulogic;
intr_vec : integer range 0 to 16#fff#;
nia : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0);
events : Loadstore1EventType;
end record;
@ -157,21 +163,18 @@ architecture behave of loadstore1 is
signal r2, r2in : reg_stage2_t;
signal r3, r3in : reg_stage3_t;

signal flush : std_ulogic;
signal busy : std_ulogic;
signal complete : std_ulogic;
signal in_progress : std_ulogic;
signal flushing : std_ulogic;

signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);
signal store_data : std_ulogic_vector(63 downto 0);

signal stage1_issue_enable : std_ulogic;
signal stage1_req : request_t;
signal stage1_dcreq : std_ulogic;
signal stage1_dreq : std_ulogic;
signal stage2_busy_next : std_ulogic;
signal stage3_busy_next : std_ulogic;

-- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
@ -274,22 +277,29 @@ begin
begin
if rising_edge(clk) then
if rst = '1' then
r1.busy <= '0';
r1.issued <= '0';
r1.req.valid <= '0';
r1.req.dc_req <= '0';
r1.req.incomplete <= '0';
r1.req.tlbie <= '0';
r1.req.is_slbia <= '0';
r1.req.instr_fault <= '0';
r1.req.load <= '0';
r1.req.priv_mode <= '0';
r1.req.sprn <= (others => '0');
r1.req.sprsel <= "00";
r1.req.ric <= "00";
r1.req.xerc <= xerc_init;

r2.req.valid <= '0';
r2.busy <= '0';
r2.req.tlbie <= '0';
r2.req.is_slbia <= '0';
r2.req.instr_fault <= '0';
r2.req.load <= '0';
r2.req.priv_mode <= '0';
r2.req.sprn <= (others => '0');
r2.req.sprsel <= "00";
r2.req.ric <= "00";
r2.req.xerc <= xerc_init;

r2.wait_dc <= '0';
@ -301,8 +311,8 @@ begin
r3.state <= IDLE;
r3.write_enable <= '0';
r3.interrupt <= '0';
r3.complete <= '0';
r3.stage1_en <= '1';
r3.convert_lfs <= '0';
r3.events.load_complete <= '0';
r3.events.store_complete <= '0';
flushing <= '0';
@ -311,7 +321,7 @@ begin
r2 <= r2in;
r3 <= r3in;
flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
not r3in.interrupt;
not flush;
end if;
stage1_dreq <= stage1_dcreq;
if d_in.valid = '1' then
@ -321,7 +331,7 @@ begin
assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
end if;
if m_in.done = '1' or m_in.err = '1' then
assert r2.req.valid = '1' and (r3.state = MMU_LOOKUP or r3.state = TLBIE_WAIT) severity failure;
assert r2.req.valid = '1' and r3.state = MMU_WAIT severity failure;
end if;
end if;
end process;
@ -410,8 +420,14 @@ begin
v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode;
v.sprn := sprn;
v.nia := l_in.nia;
v.ric := l_in.insn(19 downto 18);
if sprn(1) = '1' then
-- DSISR and DAR
v.sprsel := '1' & sprn(0);
else
-- PID and PTCR
v.sprsel := '0' & sprn(8);
end if;

lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));

@ -457,17 +473,6 @@ begin
-- check alignment for larx/stcx
misaligned := or (addr_mask and addr(2 downto 0));
v.align_intr := l_in.reserve and misaligned;
if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
-- length is really 16 not 8
-- Make misaligned lq cause an alignment interrupt in LE mode,
-- in order to avoid the case with RA = RT + 1 where the second half
-- faults but the first doesn't (and updates RT+1, destroying RA).
-- The equivalent BE case doesn't occur because RA = RT is illegal.
misaligned := '1';
if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
v.align_intr := '1';
end if;
end if;

v.atomic := not misaligned;
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
@ -498,7 +503,7 @@ begin
v.read_spr := '1';
when OP_MTSPR =>
v.write_spr := '1';
v.mmu_op := sprn(8) or sprn(5);
v.mmu_op := not sprn(1);
when OP_FETCH_FAILED =>
-- send it to the MMU to do the radix walk
v.instr_fault := '1';
@ -507,6 +512,7 @@ begin
when others =>
end case;
v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
v.incomplete := v.dc_req and v.two_dwords;

-- Work out controls for load and store formatting
brev_lenm1 := "000";
@ -518,16 +524,8 @@ begin
req_in <= v;
end process;

busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
(r1.issued and d_in.error) or
stage2_busy_next or
(r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or
(r2.wait_mmu and m_in.done) or r3.convert_lfs;
in_progress <= r1.req.valid or (r2.req.valid and not complete);

stage1_issue_enable <= r3.stage1_en and not (r1.req.valid and r1.req.mmu_op) and
not (r2.req.valid and r2.req.mmu_op);
busy <= dc_stall or d_in.error or r1.busy or r2.busy;
complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or r3.complete;

-- Processing done in the first cycle of a load/store instruction
loadstore1_1: process(all)
@ -538,10 +536,11 @@ begin
begin
v := r1;
issue := '0';
dcreq := '0';

if busy = '0' then
if r1.busy = '0' then
req := req_in;
v.issued := '0';
req.valid := l_in.valid;
if flushing = '1' then
-- Make this a no-op request rather than simply invalid.
-- It will never get to stage 3 since there is a request ahead of
@ -554,37 +553,49 @@ begin
end if;
else
req := r1.req;
end if;

if r1.req.valid = '1' then
if r1.req.dc_req = '1' and r1.issued = '0' then
issue := '1';
elsif r1.issued = '1' and d_in.error = '1' then
v.issued := '0';
elsif stage2_busy_next = '0' then
-- we can change what's in r1 next cycle because the current thing
-- in r1 will go into r2
if r1.req.dc_req = '1' and r1.req.two_dwords = '1' and r1.req.dword_index = '0' then
-- construct the second request for a misaligned access
req.dword_index := '1';
req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
if r1.req.mode_32bit = '1' then
req.addr(32) := '0';
end if;
req.byte_sel := r1.req.second_bytes;
issue := '1';
elsif r1.req.incomplete = '1' then
-- construct the second request for a misaligned access
req.dword_index := '1';
req.incomplete := '0';
req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
if r1.req.mode_32bit = '1' then
req.addr(32) := '0';
end if;
req.byte_sel := r1.req.second_bytes;
issue := '1';
else
-- For the lfs conversion cycle, leave the request valid
-- for another cycle but with req.dc_req = 0.
-- For an MMU request last cycle, we have nothing
-- to do in this cycle, so make it invalid.
if r1.req.load_sp = '0' then
req.valid := '0';
end if;
req.dc_req := '0';
end if;
end if;
if r3in.interrupt = '1' then
req.valid := '0';
issue := '0';
end if;

v.req := req;
dcreq := issue and stage1_issue_enable and not d_in.error and not dc_stall;
if issue = '1' then
v.issued := dcreq;
if flush = '1' then
v.req.valid := '0';
v.req.dc_req := '0';
v.req.incomplete := '0';
v.issued := '0';
v.busy := '0';
elsif (dc_stall or d_in.error or r2.busy) = '0' then
-- we can change what's in r1 next cycle because the current thing
-- in r1 will go into r2
v.req := req;
dcreq := issue;
v.issued := issue;
v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op);
else
-- pipeline is stalled
if r1.issued = '1' and d_in.error = '1' then
v.issued := '0';
v.busy := '1';
end if;
end if;

stage1_req <= req;
@ -602,6 +613,10 @@ begin
variable kk : unsigned(3 downto 0);
variable idx : unsigned(2 downto 0);
variable byte_offset : unsigned(2 downto 0);
variable interrupt : std_ulogic;
variable dbg_spr_rd : std_ulogic;
variable sprsel : std_ulogic_vector(1 downto 0);
variable sprval : std_ulogic_vector(63 downto 0);
begin
v := r2;

@ -614,47 +629,91 @@ begin
store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
end loop;

if stage3_busy_next = '0' and
(r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0') then
v.req := r1.req;
v.addr0 := r1.addr0;
v.req.store_data := store_data;
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
not (r1.req.two_dwords and not r1.req.dword_index);
v.wait_mmu := r1.req.valid and r1.req.mmu_op;
v.one_cycle := r1.req.valid and (r1.req.noop or r1.req.read_spr or
(r1.req.write_spr and not r1.req.mmu_op) or
r1.req.load_zero or r1.req.do_update);
if r1.req.read_spr = '1' then
v.wr_sel := "00";
elsif r1.req.do_update = '1' or r1.req.store = '1' then
v.wr_sel := "01";
elsif r1.req.load_sp = '1' then
v.wr_sel := "10";
dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr);
if dbg_spr_rd = '0' then
sprsel := r1.req.sprsel;
else
sprsel := dbg_spr_addr;
end if;
if sprsel(1) = '1' then
if sprsel(0) = '0' then
sprval := x"00000000" & r3.dsisr;
else
v.wr_sel := "11";
sprval := r3.dar;
end if;
else
sprval := m_in.sprval;
end if;
if dbg_spr_req = '0' then
v.dbg_spr_ack := '0';
elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then
v.dbg_spr := sprval;
v.dbg_spr_ack := '1';
end if;

-- Work out load formatter controls for next cycle
for i in 0 to 7 loop
idx := to_unsigned(i, 3) xor r1.req.brev_mask;
kk := ('0' & idx) + ('0' & byte_offset);
v.use_second(i) := kk(3);
v.byte_index(i) := kk(2 downto 0);
end loop;
elsif stage3_busy_next = '0' then
v.req.valid := '0';
v.wait_dc := '0';
if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then
if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then
v.req := r1.req;
v.addr0 := r1.addr0;
v.req.store_data := store_data;
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
not r1.req.incomplete;
v.wait_mmu := r1.req.valid and r1.req.mmu_op;
v.busy := r1.req.valid and r1.req.mmu_op;
v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op);
if r1.req.do_update = '1' or r1.req.store = '1' or r1.req.read_spr = '1' then
v.wr_sel := "00";
elsif r1.req.load_sp = '1' then
v.wr_sel := "01";
else
v.wr_sel := "10";
end if;
if r1.req.read_spr = '1' then
v.addr0 := sprval;
end if;

-- Work out load formatter controls for next cycle
for i in 0 to 7 loop
idx := to_unsigned(i, 3) xor r1.req.brev_mask;
kk := ('0' & idx) + ('0' & byte_offset);
v.use_second(i) := kk(3);
v.byte_index(i) := kk(2 downto 0);
end loop;
else
v.req.valid := '0';
v.wait_dc := '0';
v.wait_mmu := '0';
v.one_cycle := '0';
end if;
end if;
if r2.wait_mmu = '1' and m_in.done = '1' then
if r2.req.mmu_op = '1' then
v.req.valid := '0';
v.busy := '0';
end if;
v.wait_mmu := '0';
end if;
if r2.busy = '1' and r2.wait_mmu = '0' then
v.busy := '0';
end if;

stage2_busy_next <= r1.req.valid and stage3_busy_next;

if r3in.interrupt = '1' then
interrupt := (r2.req.valid and r2.req.align_intr) or
(d_in.error and d_in.cache_paradox) or m_in.err;
if interrupt = '1' then
v.req.valid := '0';
v.busy := '0';
v.wait_dc := '0';
v.wait_mmu := '0';
elsif d_in.error = '1' then
v.wait_mmu := '1';
v.busy := '1';
end if;

r2in <= v;

-- SPR values for core_debug
dbg_spr_data <= r2.dbg_spr;
dbg_spr_ack <= r2.dbg_spr_ack;
end process;

-- Processing done in the third cycle of a load/store instruction.
@ -671,7 +730,6 @@ begin
variable write_data : std_ulogic_vector(63 downto 0);
variable do_update : std_ulogic;
variable done : std_ulogic;
variable part_done : std_ulogic;
variable exception : std_ulogic;
variable data_permuted : std_ulogic_vector(63 downto 0);
variable data_trimmed : std_ulogic_vector(63 downto 0);
@ -687,13 +745,12 @@ begin
mmureq := '0';
mmu_mtspr := '0';
done := '0';
part_done := '0';
exception := '0';
dsisr := (others => '0');
write_enable := '0';
sprval := (others => '0');
do_update := '0';
v.convert_lfs := '0';
v.complete := '0';
v.srr1 := (others => '0');
v.events := (others => '0');

@ -755,114 +812,83 @@ begin
v.load_data := data_permuted;
end if;


if r2.req.valid = '1' then
if r2.req.read_spr = '1' then
write_enable := '1';
-- partial decode on SPR number should be adequate given
-- the restricted set that get sent down this path
if r2.req.sprn(8) = '0' and r2.req.sprn(5) = '0' then
if r2.req.sprn(0) = '0' then
sprval := x"00000000" & r3.dsisr;
else
sprval := r3.dar;
end if;
else
-- reading one of the SPRs in the MMU
sprval := m_in.sprval;
end if;
end if;
if r2.req.align_intr = '1' then
-- generate alignment interrupt
exception := '1';
end if;
if r2.req.load_zero = '1' then
write_enable := '1';
end if;
if r2.req.do_update = '1' then
do_update := '1';
end if;
end if;

case r3.state is
when IDLE =>
if d_in.valid = '1' then
if r2.req.two_dwords = '0' or r2.req.dword_index = '1' then
write_enable := r2.req.load and not r2.req.load_sp;
if HAS_FPU and r2.req.load_sp = '1' then
-- SP to DP conversion takes a cycle
v.state := FINISH_LFS;
v.convert_lfs := '1';
else
-- stores write back rA update
do_update := r2.req.update and r2.req.store;
end if;
else
part_done := '1';
end if;
if r2.req.load_sp = '1' and r2.req.dc_req = '0' then
write_enable := '1';
end if;
if d_in.error = '1' then
if d_in.cache_paradox = '1' then
-- signal an interrupt straight away
exception := '1';
dsisr(63 - 38) := not r2.req.load;
-- XXX there is no architected bit for this
-- (probably should be a machine check in fact)
dsisr(63 - 35) := d_in.cache_paradox;
if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then
if r2.req.sprsel(0) = '0' then
v.dsisr := r2.req.store_data(31 downto 0);
else
-- Look up the translation for TLB miss
-- and also for permission error and RC error
-- in case the PTE has been updated.
mmureq := '1';
v.state := MMU_LOOKUP;
v.stage1_en := '0';
v.dar := r2.req.store_data;
end if;
end if;
if r2.req.valid = '1' then
if r2.req.mmu_op = '1' then
-- send request (tlbie, mtspr, itlb miss) to MMU
mmureq := not r2.req.write_spr;
mmu_mtspr := r2.req.write_spr;
if r2.req.instr_fault = '1' then
v.state := MMU_LOOKUP;
v.events.itlb_miss := '1';
else
v.state := TLBIE_WAIT;
end if;
elsif r2.req.write_spr = '1' then
if r2.req.sprn(0) = '0' then
v.dsisr := r2.req.store_data(31 downto 0);
else
v.dar := r2.req.store_data;
end if;
end if;
end if;

if r3.state = IDLE and r2.req.valid = '1' and r2.req.mmu_op = '1' then
-- send request (tlbie, mtspr, itlb miss) to MMU
mmureq := not r2.req.write_spr;
mmu_mtspr := r2.req.write_spr;
if r2.req.instr_fault = '1' then
v.events.itlb_miss := '1';
end if;
v.state := MMU_WAIT;
end if;

when MMU_LOOKUP =>
if m_in.done = '1' then
if r2.req.instr_fault = '0' then
-- retry the request now that the MMU has installed a TLB entry
req := '1';
v.stage1_en := '1';
v.state := IDLE;
end if;
if d_in.valid = '1' then
if r2.req.incomplete = '0' then
write_enable := r2.req.load and not r2.req.load_sp;
-- stores write back rA update
do_update := r2.req.update and r2.req.store;
end if;
if m_in.err = '1' then
end if;
if d_in.error = '1' then
if d_in.cache_paradox = '1' then
-- signal an interrupt straight away
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
dsisr(63 - 38) := not r2.req.load;
-- XXX there is no architected bit for this
-- (probably should be a machine check in fact)
dsisr(63 - 35) := d_in.cache_paradox;
else
-- Look up the translation for TLB miss
-- and also for permission error and RC error
-- in case the PTE has been updated.
mmureq := '1';
v.state := MMU_WAIT;
v.stage1_en := '0';
end if;
end if;

when TLBIE_WAIT =>

when FINISH_LFS =>
write_enable := '1';

end case;
if m_in.done = '1' then
if r2.req.dc_req = '1' then
-- retry the request now that the MMU has installed a TLB entry
req := '1';
else
v.complete := '1';
end if;
end if;
if m_in.err = '1' then
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
end if;

if complete = '1' or exception = '1' then
if (m_in.done or m_in.err) = '1' then
v.stage1_en := '1';
v.state := IDLE;
end if;
@ -874,7 +900,6 @@ begin
-- or ISI or ISegI for instruction fetch exceptions
v.interrupt := exception;
if exception = '1' then
v.nia := r2.req.nia;
if r2.req.align_intr = '1' then
v.intr_vec := 16#600#;
v.dar := r2.req.addr;
@ -901,12 +926,9 @@ begin

case r2.wr_sel is
when "00" =>
-- mfspr result
write_data := sprval;
when "01" =>
-- update reg
write_data := r2.addr0;
when "10" =>
when "01" =>
-- lfs result
write_data := load_dp_data;
when others =>
@ -915,7 +937,7 @@ begin
end case;

-- Update outputs to dcache
if stage1_issue_enable = '1' then
if r3.stage1_en = '1' then
d_out.valid <= stage1_dcreq;
d_out.load <= stage1_req.load;
d_out.dcbz <= stage1_req.dcbz;
@ -945,7 +967,7 @@ begin
else
d_out.data <= r2.req.store_data;
end if;
d_out.hold <= r2.req.valid and r2.req.load_sp and d_in.valid;
d_out.hold <= l_in.e2stall;

-- Update outputs to MMU
m_out.valid <= mmureq;
@ -953,8 +975,10 @@ begin
m_out.load <= r2.req.load;
m_out.priv <= r2.req.priv_mode;
m_out.tlbie <= r2.req.tlbie;
m_out.ric <= r2.req.ric;
m_out.mtspr <= mmu_mtspr;
m_out.sprn <= r2.req.sprn;
m_out.sprnf <= r1.req.sprsel(0);
m_out.sprnt <= r2.req.sprsel(0);
m_out.addr <= r2.req.addr;
m_out.slbia <= r2.req.is_slbia;
m_out.rs <= r2.req.store_data;
@ -970,18 +994,15 @@ begin
l_out.store_done <= d_in.store_done;
l_out.interrupt <= r3.interrupt;
l_out.intr_vec <= r3.intr_vec;
l_out.srr0 <= r3.nia;
l_out.srr1 <= r3.srr1;

-- update busy signal back to execute1
e_out.busy <= busy;
e_out.in_progress <= in_progress;
e_out.interrupt <= r3.interrupt;
e_out.l2stall <= dc_stall or d_in.error or r2.busy;

events <= r3.events;

-- Busy calculation.
stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);
flush <= exception;

-- Update registers
r3in <= v;
@ -1001,7 +1022,9 @@ begin
d_out.valid &
m_in.done &
r2.req.dword_index &
std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 3));
r2.req.valid &
r2.wait_dc &
std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 1));
end if;
end process;
log_out <= log_data;

@ -167,7 +167,7 @@ begin
end if;
tmp(7 downto 0) := rs(7 downto 0);
when others =>
-- e.g. OP_MTSPR
-- e.g. OP_MFSPR
tmp := rs;
end case;


@ -81,8 +81,8 @@ architecture behave of mmu is

begin
-- Multiplex internal SPR values back to loadstore1, selected
-- by l_in.sprn.
l_out.sprval <= r.ptcr when l_in.sprn(8) = '1' else x"00000000" & r.pid;
-- by l_in.sprnf.
l_out.sprval <= r.ptcr when l_in.sprnf = '1' else x"00000000" & r.pid;

mmu_0: process(clk)
begin
@ -259,9 +259,8 @@ begin
-- RB[IS] != 0 or RB[AP] != 0, or for slbia
v.inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
-- The RIC field of the tlbie instruction comes across on the
-- sprn bus as bits 2--3. RIC=2 flushes process table caches.
if l_in.sprn(3) = '1' then
-- RIC=2 or 3 flushes process table caches.
if l_in.ric(1) = '1' then
v.pt0_valid := '0';
v.pt3_valid := '0';
v.ptb_valid := '0';
@ -291,7 +290,7 @@ begin
-- Move to PID needs to invalidate L1 TLBs and cached
-- pgtbl0 value. Move to PTCR does that plus
-- invalidating the cached pgtbl3 and prtbl values as well.
if l_in.sprn(8) = '0' then
if l_in.sprnt = '0' then
v.pid := l_in.rs(31 downto 0);
else
v.ptcr := l_in.rs;

@ -14,7 +14,9 @@ entity register_file is
);
port(
clk : in std_logic;
stall : in std_ulogic;

d1_in : in Decode1ToRegisterFileType;
d_in : in Decode2ToRegisterFileType;
d_out : out RegisterFileToDecode2Type;

@ -34,84 +36,130 @@ entity register_file is
end entity register_file;

architecture behaviour of register_file is
type regfile is array(0 to 127) of std_ulogic_vector(63 downto 0);
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic;
signal dbg_gpr_done : std_ulogic;
signal addr_1_reg : gspr_index_t;
signal addr_2_reg : gspr_index_t;
signal addr_3_reg : gspr_index_t;
signal rd_2 : std_ulogic;
signal fwd_1 : std_ulogic;
signal fwd_2 : std_ulogic;
signal fwd_3 : std_ulogic;
signal data_1 : std_ulogic_vector(63 downto 0);
signal data_2 : std_ulogic_vector(63 downto 0);
signal data_3 : std_ulogic_vector(63 downto 0);
signal prev_write_data : std_ulogic_vector(63 downto 0);

begin
-- synchronous writes
-- synchronous reads and writes
register_write_0: process(clk)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable b_enable : std_ulogic;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
w_addr := w_in.write_reg;
if HAS_FPU and w_addr(6) = '1' then
if HAS_FPU and w_addr(5) = '1' then
report "Writing FPR " & to_hstring(w_addr(4 downto 0)) & " " & to_hstring(w_in.write_data);
else
w_addr(6) := '0';
if w_addr(5) = '0' then
report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
end if;
w_addr(5) := '0';
report "Writing GPR " & to_hstring(w_addr) & " " & to_hstring(w_in.write_data);
end if;
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
registers(to_integer(unsigned(w_addr))) <= w_in.write_data;
end if;

a_addr := d1_in.reg_1_addr;
b_addr := d1_in.reg_2_addr;
c_addr := d1_in.reg_3_addr;
b_enable := d1_in.read_2_enable;
if stall = '1' then
a_addr := addr_1_reg;
b_addr := addr_2_reg;
c_addr := addr_3_reg;
b_enable := rd_2;
else
addr_1_reg <= a_addr;
addr_2_reg <= b_addr;
addr_3_reg <= c_addr;
rd_2 <= b_enable;
end if;

fwd_1 <= '0';
fwd_2 <= '0';
fwd_3 <= '0';
if w_in.write_enable = '1' then
if w_addr = a_addr then
fwd_1 <= '1';
end if;
if w_addr = b_addr then
fwd_2 <= '1';
end if;
if w_addr = c_addr then
fwd_3 <= '1';
end if;
end if;

-- Do debug reads to GPRs and FPRs using the B port when it is not in use
if dbg_gpr_req = '1' then
if b_enable = '0' then
b_addr := dbg_gpr_addr(5 downto 0);
dbg_gpr_done <= '1';
end if;
else
dbg_gpr_done <= '0';
end if;

if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
end if;
data_1 <= registers(to_integer(unsigned(a_addr)));
data_2 <= registers(to_integer(unsigned(b_addr)));
data_3 <= registers(to_integer(unsigned(c_addr)));

prev_write_data <= w_in.write_data;
end if;
end process register_write_0;

-- asynchronous reads
-- asynchronous forwarding of write data
register_read_0: process(all)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable out_data_1 : std_ulogic_vector(63 downto 0);
variable out_data_2 : std_ulogic_vector(63 downto 0);
variable out_data_3 : std_ulogic_vector(63 downto 0);
begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 64 GSPRs for a no-FPU implementation
a_addr(6) := '0';
b_addr(6) := '0';
c_addr(6) := '0';
w_addr(6) := '0';
out_data_1 := data_1;
out_data_2 := data_2;
out_data_3 := data_3;
if fwd_1 = '1' then
out_data_1 := prev_write_data;
end if;
if fwd_2 = '1' then
out_data_2 := prev_write_data;
end if;
if fwd_3 = '1' then
out_data_3 := prev_write_data;
end if;

if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
end if;
if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
end if;
if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if;
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
if not HAS_FPU then
b_addr(6) := '0';
end if;
end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));

-- Forward any written data
if w_in.write_enable = '1' then
if a_addr = w_addr then
d_out.read1_data <= w_in.write_data;
end if;
if b_addr = w_addr then
d_out.read2_data <= w_in.write_data;
end if;
if c_addr = w_addr then
d_out.read3_data <= w_in.write_data;
end if;
report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
end if;

d_out.read1_data <= out_data_1;
d_out.read2_data <= out_data_2;
d_out.read3_data <= out_data_3;
end process register_read_0;

-- Latch read data and ack if dbg read requested and B port not busy
@ -119,8 +167,8 @@ begin
begin
if rising_edge(clk) then
if dbg_gpr_req = '1' then
if d_in.read2_enable = '0' and dbg_ack = '0' then
dbg_data <= rd_port_b;
if dbg_ack = '0' and dbg_gpr_done = '1' then
dbg_data <= data_2;
dbg_ack <= '1';
end if;
else
@ -140,10 +188,6 @@ begin
loop_0: for i in 0 to 31 loop
report "GPR" & integer'image(i) & " " & to_hstring(registers(i));
end loop loop_0;

report "LR " & to_hstring(registers(to_integer(unsigned(fast_spr_num(SPR_LR)))));
report "CTR " & to_hstring(registers(to_integer(unsigned(fast_spr_num(SPR_CTR)))));
report "XER " & to_hstring(registers(to_integer(unsigned(fast_spr_num(SPR_XER)))));
sim_dump_done <= '1';
else
sim_dump_done <= '0';
@ -164,7 +208,7 @@ begin
if rising_edge(clk) then
log_data <= w_in.write_data &
w_in.write_enable &
w_in.write_reg;
'0' & w_in.write_reg;
end if;
end process;
log_out <= log_data;

@ -548,7 +548,11 @@ static const char *fast_spr_names[] =
{
"lr", "ctr", "srr0", "srr1", "hsrr0", "hsrr1",
"sprg0", "sprg1", "sprg2", "sprg3",
"hsprg0", "hsprg1", "xer"
"hsprg0", "hsprg1", "xer", "tar",
};

static const char *ldst_spr_names[] = {
"pidr", "ptcr", "dsisr", "dar"
};

static void gpr_read(uint64_t reg, uint64_t count)
@ -566,8 +570,10 @@ static void gpr_read(uint64_t reg, uint64_t count)
printf("r%"PRId64, reg);
else if ((reg - 32) < sizeof(fast_spr_names) / sizeof(fast_spr_names[0]))
printf("%s", fast_spr_names[reg - 32]);
else if (reg < 64)
else if (reg < 60)
printf("gspr%"PRId64, reg);
else if (reg < 64)
printf("%s", ldst_spr_names[reg - 60]);
else
printf("FPR%"PRId64, reg - 64);
printf(":\t%016"PRIx64"\n", data);

@ -21,9 +21,9 @@ cd $TMPDIR

cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin

${MICROWATT_DIR}/core_tb | sed 's/.*: //' | egrep '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 | grep -v XER > test.out || true
${MICROWATT_DIR}/core_tb | sed 's/.*: //' | egrep '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true

grep -v "^$" ${MICROWATT_DIR}/tests/${TEST}.out | sort | grep -v GPR31 | grep -v XER > exp.out
grep -v "^$" ${MICROWATT_DIR}/tests/${TEST}.out | sort | grep -v GPR31 > exp.out

cp test.out /tmp
cp exp.out /tmp

@ -1410,6 +1410,110 @@ int fpu_test_23(void)
return trapit(0, test23);
}

struct idiv_tests {
unsigned long denom;
unsigned long divisor;
unsigned long divd;
unsigned long divdu;
unsigned long divde;
unsigned long divdeu;
unsigned long modsd;
unsigned long modud;
} idiv_tests[] = {
{ 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x56789a, 0x1234, 0x4c0, 0x4c0, 0, 0, 0x19a, 0x19a },
{ 2, 3, 0, 0, 0, 0xaaaaaaaaaaaaaaaa, 2, 2 },
{ 31, 157, 0, 0, 0x328c3ab35cf15328, 0x328c3ab35cf15328, 31, 31 },
{ -4329874, 43879, -98, 0x17e5a119b9170, 0, 0, -29732, 39518 },
{ -4329874, -43879, 98, 0, 0, 0xffffffffffbe99d4, -29732, -4329874 },
{ 0x8000000000000000ul, -1, 0, 0, 0, 0x8000000000000000ul, 0, 0x8000000000000000ul },
};

int fpu_test_24(void)
{
long i;
unsigned long a, b, results[6];

for (i = 0; i < sizeof(idiv_tests) / sizeof(idiv_tests[0]); ++i) {
a = idiv_tests[i].denom;
b = idiv_tests[i].divisor;
asm("divd %0,%1,%2" : "=r" (results[0]) : "r" (a), "r" (b));
asm("divdu %0,%1,%2" : "=r" (results[1]) : "r" (a), "r" (b));
asm("divde %0,%1,%2" : "=r" (results[2]) : "r" (a), "r" (b));
asm("divdeu %0,%1,%2" : "=r" (results[3]) : "r" (a), "r" (b));
asm("modsd %0,%1,%2" : "=r" (results[4]) : "r" (a), "r" (b));
asm("modud %0,%1,%2" : "=r" (results[5]) : "r" (a), "r" (b));
if (results[0] != idiv_tests[i].divd ||
results[1] != idiv_tests[i].divdu ||
results[2] != idiv_tests[i].divde ||
results[3] != idiv_tests[i].divdeu ||
results[4] != idiv_tests[i].modsd ||
results[5] != idiv_tests[i].modud) {
print_hex(i, 2, " ");
print_hex(results[0], 16, " ");
print_hex(results[1], 16, " ");
print_hex(results[2], 16, " ");
print_hex(results[3], 16, " ");
print_hex(results[4], 16, " ");
print_hex(results[5], 16, "\r\n");
return i + 1;
}
}
return 0;
}

struct wdiv_tests {
unsigned int denom;
unsigned int divisor;
unsigned int divw;
unsigned int divwu;
unsigned int divwe;
unsigned int divweu;
unsigned int modsw;
unsigned int moduw;
} wdiv_tests[] = {
{ 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x56789a, 0x1234, 0x4c0, 0x4c0, 0, 0, 0x19a, 0x19a },
{ 2, 3, 0, 0, 0, 0xaaaaaaaa, 2, 2 },
{ 31, 157, 0, 0, 0x328c3ab3, 0x328c3ab3, 31, 31 },
{ -4329874, 43879, -98, 0x17df7, 0, 0, -29732, 17165 },
{ -4329874, -43879, 98, 0, 0, 0xffbe99a9, -29732, -4329874 },
{ 0x80000000u, -1, 0, 0, 0, 0x80000000u, 0, 0x80000000u },
};

int fpu_test_25(void)
{
long i;
unsigned int a, b, results[6];

for (i = 0; i < sizeof(wdiv_tests) / sizeof(wdiv_tests[0]); ++i) {
a = wdiv_tests[i].denom;
b = wdiv_tests[i].divisor;
asm("divw %0,%1,%2" : "=r" (results[0]) : "r" (a), "r" (b));
asm("divwu %0,%1,%2" : "=r" (results[1]) : "r" (a), "r" (b));
asm("divwe %0,%1,%2" : "=r" (results[2]) : "r" (a), "r" (b));
asm("divweu %0,%1,%2" : "=r" (results[3]) : "r" (a), "r" (b));
asm("modsw %0,%1,%2" : "=r" (results[4]) : "r" (a), "r" (b));
asm("moduw %0,%1,%2" : "=r" (results[5]) : "r" (a), "r" (b));
if (results[0] != wdiv_tests[i].divw ||
results[1] != wdiv_tests[i].divwu ||
results[2] != wdiv_tests[i].divwe ||
results[3] != wdiv_tests[i].divweu ||
results[4] != wdiv_tests[i].modsw ||
results[5] != wdiv_tests[i].moduw) {
print_hex(i, 2, " ");
print_hex(results[0], 8, " ");
print_hex(results[1], 8, " ");
print_hex(results[2], 8, " ");
print_hex(results[3], 8, " ");
print_hex(results[4], 8, " ");
print_hex(results[5], 8, "\r\n");
return i + 1;
}
}
return 0;
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -1458,6 +1562,8 @@ int main(void)
do_test(21, fpu_test_21);
do_test(22, fpu_test_22);
do_test(23, fpu_test_23);
do_test(24, fpu_test_24);
do_test(25, fpu_test_25);

return fail;
}

@ -230,63 +230,3 @@ restore:
ld %r0,16(%r1)
mtlr %r0
blr

.global do_lq
do_lq:
lq %r6,0(%r3)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_np /* "non-preferred" form of lq */
do_lq_np:
mr %r7,%r3
lq %r6,0(%r7)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_bad /* illegal form of lq */
do_lq_bad:
mr %r6,%r3
.long 0xe0c60000 /* lq %r6,0(%r6) */
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_stq
do_stq:
ld %r8,0(%r4)
ld %r9,8(%r4)
stq %r8,0(%r3)
li %r3,0
blr

/* big-endian versions of the above */
.global do_lq_be
do_lq_be:
.long 0x0000c3e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_lq_np_be /* "non-preferred" form of lq */
do_lq_np_be:
.long 0x781b677c
.long 0x0000c7e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_stq_be
do_stq_be:
.long 0x000004e9
.long 0x080024e9
.long 0x020003f9
.long 0x00006038
.long 0x2000804e

@ -12,14 +12,6 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long fn, unsigned long msr);

extern void do_lq(void *src, unsigned long *regs);
extern void do_lq_np(void *src, unsigned long *regs);
extern void do_lq_bad(void *src, unsigned long *regs);
extern void do_stq(void *dst, unsigned long *regs);
extern void do_lq_be(void *src, unsigned long *regs);
extern void do_lq_np_be(void *src, unsigned long *regs);
extern void do_stq_be(void *dst, unsigned long *regs);

static inline void do_tlbie(unsigned long rb, unsigned long rs)
{
__asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory");
@ -302,167 +294,6 @@ int mode_test_6(void)
return 0;
}

int mode_test_7(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in LE mode
*/
msr = MSR_SF | MSR_LE;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 2;
/* unaligned may give alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret == 0) {
if (regs[0] != quad[2] || regs[1] != quad[1])
return 3;
} else if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 4;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[1] || quad[1] != regs[0])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[1] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[0])
return 8;

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 10;
/* unaligned should give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq_np + 4 ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 11;
} else
return 12;

/* make sure lq with rt = ra causes an illegal instruction interrupt */
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_bad, msr);
if (ret != 0x700)
return 13;
if (mfspr(SPRG0) != (unsigned long)&do_lq_bad + 4 ||
!(mfspr(SPRG3) & 0x80000))
return 14;
return 0;
}

int mode_test_8(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in BE mode
*/
msr = MSR_SF;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[0] || regs[1] != quad[1]) {
print_hex(regs[0], 16);
print_string(" ");
print_hex(regs[1], 16);
print_string(" ");
return 2;
}
/* don't expect alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret == 0) {
if (regs[0] != quad[1] || regs[1] != quad[2])
return 3;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[0] || quad[1] != regs[1])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[0] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[1]) {
print_hex(quad[0], 16);
print_string(" ");
print_hex(quad[1], 16);
print_string(" ");
print_hex(quad[2], 16);
print_string(" ");
return 8;
}

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[0] || regs[1] != quad[1])
return 10;
/* unaligned should not give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 11;
if (regs[0] != quad[1] || regs[1] != quad[2])
return 12;
return 0;
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -507,8 +338,6 @@ int main(void)
do_test(4, mode_test_4);
do_test(5, mode_test_5);
do_test(6, mode_test_6);
do_test(7, mode_test_7);
do_test(8, mode_test_8);

return fail;
}

@ -155,31 +155,3 @@ call_ret:
ld %r31,248(%r1)
addi %r1,%r1,256
blr

.global do_lqarx
do_lqarx:
/* r3 = src, r4 = regs */
lqarx %r10,0,%r3
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_lqarx_bad
do_lqarx_bad:
/* r3 = src, r4 = regs */
.long 0x7d405228 /* lqarx %r10,0,%r10 */
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_stqcx
do_stqcx:
/* r3 = dest, r4 = regs, return CR */
ld %r10,0(%r4)
ld %r11,8(%r4)
stqcx. %r10,0,%r3
mfcr %r3
oris %r3,%r3,1 /* to distinguish from trap number */
blr

@ -7,10 +7,6 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long (*fn)(unsigned long, unsigned long));

extern unsigned long do_lqarx(unsigned long src, unsigned long regs);
extern unsigned long do_lqarx_bad(unsigned long src, unsigned long regs);
extern unsigned long do_stqcx(unsigned long dst, unsigned long regs);

#define DSISR 18
#define DAR 19
#define SRR0 26
@ -184,63 +180,6 @@ int resv_test_2(void)
return 0;
}

/* test lqarx/stqcx */
int resv_test_3(void)
{
unsigned long x[4] __attribute__((__aligned__(16)));
unsigned long y[2], regs[2];
unsigned long ret, offset;
int count;

x[0] = 0x7766554433221100ul;
x[1] = 0xffeeddccbbaa9988ul;
y[0] = 0x0badcafef00dd00dul;
y[1] = 0xdeadbeef07070707ul;
for (count = 0; count < 1000; ++count) {
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx);
if (ret)
return ret | 1;
ret = callit((unsigned long)x, (unsigned long)y, do_stqcx);
if (ret < 0x10000)
return ret | 2;
if (ret & 0x20000000)
break;
}
if (count == 1000)
return 3;
if (x[0] != y[1] || x[1] != y[0])
return 4;
if (regs[1] != 0x7766554433221100ul || regs[0] != 0xffeeddccbbaa9988ul)
return 5;
ret = callit((unsigned long)x, (unsigned long)regs, do_stqcx);
if (ret < 0x10000 || (ret & 0x20000000))
return ret | 12;
/* test alignment interrupts */
for (offset = 0; offset < 16; ++offset) {
ret = callit((unsigned long)x + offset, (unsigned long)regs, do_lqarx);
if (ret == 0 && (offset & 15) != 0)
return 6;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 7;
} else if (ret)
return ret;
ret = callit((unsigned long)x + offset, (unsigned long)y, do_stqcx);
if (ret >= 0x10000 && (offset & 15) != 0)
return 8;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 9;
} else if (ret < 0x10000)
return ret;
}
/* test illegal interrupt for bad lqarx case */
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx_bad);
if (ret != 0x700 || !(mfspr(SRR1) & 0x80000))
return ret + 10;
return 0;
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -265,7 +204,6 @@ int main(void)

do_test(1, resv_test_1);
do_test(2, resv_test_2);
do_test(3, resv_test_3);

return fail;
}

Binary file not shown.

@ -4,5 +4,3 @@ test 03:PASS
test 04:PASS
test 05:PASS
test 06:PASS
test 07:PASS
test 08:PASS

Binary file not shown.

@ -1,3 +1,2 @@
test 01:PASS
test 02:PASS
test 03:PASS

@ -19,24 +19,18 @@ entity writeback is
c_out : out WritebackToCrFileType;
f_out : out WritebackToFetch1Type;

wb_bypass : out bypass_data_t;

-- PMU event bus
events : out WritebackEventType;

flush_out : out std_ulogic;
interrupt_out: out std_ulogic;
interrupt_out: out WritebackToExecute1Type;
complete_out : out instr_tag_t
);
end entity writeback;

architecture behaviour of writeback is
type irq_state_t is (WRITE_SRR0, WRITE_SRR1);

type reg_type is record
state : irq_state_t;
srr1 : std_ulogic_vector(63 downto 0);
end record;

signal r, rin : reg_type;

begin
writeback_0: process(clk)
@ -45,13 +39,6 @@ begin
variable w : std_ulogic_vector(0 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
r.state <= WRITE_SRR0;
r.srr1 <= (others => '0');
else
r <= rin;
end if;

-- Do consistency checks only on the clock edge
x(0) := e_in.valid;
y(0) := l_in.valid;
@ -66,11 +53,13 @@ begin
to_integer(unsigned(w))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
x(0) := l_in.rc;
y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;

assert (e_in.write_xerc_enable and fp_in.write_xerc) /= '1' severity failure;

assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure;
assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure;
assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure;
@ -78,11 +67,7 @@ begin
end process;

writeback_1: process(all)
variable v : reg_type;
variable f : WritebackToFetch1Type;
variable cf: std_ulogic_vector(3 downto 0);
variable zero : std_ulogic;
variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0);
variable vec : integer range 0 to 16#fff#;
variable srr1 : std_ulogic_vector(15 downto 0);
@ -91,9 +76,7 @@ begin
w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;
f := WritebackToFetch1Init;
interrupt_out <= '0';
vec := 0;
v := r;

complete_out <= instr_tag_init;
if e_in.valid = '1' then
@ -107,37 +90,21 @@ begin
events.fp_complete <= fp_in.valid;

intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;
interrupt_out.intr <= intr;

if r.state = WRITE_SRR1 then
w_out.write_reg <= fast_spr_num(SPR_SRR1);
w_out.write_data <= r.srr1;
w_out.write_enable <= '1';
interrupt_out <= '1';
v.state := WRITE_SRR0;

elsif intr = '1' then
w_out.write_reg <= fast_spr_num(SPR_SRR0);
w_out.write_enable <= '1';
v.state := WRITE_SRR1;
if intr = '1' then
srr1 := (others => '0');
if e_in.interrupt = '1' then
vec := e_in.intr_vec;
w_out.write_data <= e_in.last_nia;
srr1 := e_in.srr1;
elsif l_in.interrupt = '1' then
vec := l_in.intr_vec;
w_out.write_data <= l_in.srr0;
srr1 := l_in.srr1;
elsif fp_in.interrupt = '1' then
vec := fp_in.intr_vec;
w_out.write_data <= fp_in.srr0;
srr1 := fp_in.srr1;
end if;
v.srr1(63 downto 31) := e_in.msr(63 downto 31);
v.srr1(30 downto 27) := srr1(14 downto 11);
v.srr1(26 downto 22) := e_in.msr(26 downto 22);
v.srr1(21 downto 16) := srr1(5 downto 0);
v.srr1(15 downto 0) := e_in.msr(15 downto 0);
interrupt_out.srr1 <= srr1;

else
if e_in.write_enable = '1' then
@ -169,6 +136,11 @@ begin
c_out.write_cr_data <= fp_in.write_cr_data;
end if;

if fp_in.write_xerc = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= fp_in.xerc;
end if;

if l_in.write_enable = '1' then
w_out.write_reg <= l_in.write_reg;
w_out.write_data <= l_in.write_data;
@ -186,24 +158,6 @@ begin
c_out.write_cr_data(31 downto 28) <= scf;
end if;

-- Perform CR0 update for RC forms
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
if e_in.rc = '1' and e_in.write_enable = '1' then
zero := not (or e_in.write_data(31 downto 0));
if e_in.mode_32bit = '0' then
sign := e_in.write_data(63);
zero := zero and not (or e_in.write_data(63 downto 32));
else
sign := e_in.write_data(31);
end if;
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
cf(3) := sign;
cf(2) := not sign and not zero;
cf(1) := zero;
cf(0) := e_in.xerc.so;
c_out.write_cr_data(31 downto 28) <= cf;
end if;
end if;

-- Outputs to fetch1
@ -236,6 +190,10 @@ begin
f_out <= f;
flush_out <= f_out.redirect;

rin <= v;
-- Register write data bypass to decode2
wb_bypass.tag.tag <= complete_out.tag;
wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable;
wb_bypass.data <= w_out.write_data;

end process;
end;

Loading…
Cancel
Save