Merge pull request #419 from paulusmack/prefix

Add support for prefixed instructions
pull/421/head
Paul Mackerras 8 months ago committed by GitHub
commit f668597f67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -263,6 +263,10 @@ package common is
valid: std_ulogic;
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
prefixed: std_ulogic;
prefix: std_ulogic_vector(25 downto 0);
illegal_suffix: std_ulogic;
misaligned_prefix: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
@ -274,7 +278,9 @@ package common is
reg_c : gspr_index_t;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
(valid => '0', stop_mark => '0', nia => (others => '0'),
prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
illegal_suffix => '0', misaligned_prefix => '0',
decode => decode_rom_init, br_pred => '0', big_endian => '0',
spr_info => spr_id_init, ram_spr => ram_spr_info_init,
reg_a => (others => '0'), reg_b => (others => '0'), reg_c => (others => '0'));
@ -359,9 +365,12 @@ package common is
ramspr_write_odd : std_ulogic;
dbg_spr_access : std_ulogic;
dec_ctr : std_ulogic;
prefixed : std_ulogic;
illegal_suffix : std_ulogic;
misaligned_prefix : std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
@ -378,6 +387,7 @@ package common is
ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0',
dbg_spr_access => '0',
dec_ctr => '0',
prefixed => '0', illegal_suffix => '0', misaligned_prefix => '0',
others => (others => '0'));

type MultiplyInputType is record
@ -500,6 +510,7 @@ package common is
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
prefixed : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
e2stall : std_ulogic;
@ -514,7 +525,7 @@ package common is
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
mode_32bit => '0', is_32bit => '0', prefixed => '0',
repeat => '0', second => '0', e2stall => '0',
msr => (others => '0'));


@ -45,6 +45,16 @@ architecture behaviour of decode1 is
signal decode_rom_addr : insn_code;
signal decode : decode_rom_t;

type prefix_state_t is record
prefixed : std_ulogic;
prefix : std_ulogic_vector(25 downto 0);
pref_ia : std_ulogic_vector(3 downto 0);
end record;
constant prefix_state_init : prefix_state_t := (prefixed => '0', prefix => (others => '0'),
pref_ia => (others => '0'));

signal pr, pr_in : prefix_state_t;

signal fetch_failed : std_ulogic;

-- If we have an FPU, then it is used for integer divisions,
@ -64,7 +74,7 @@ architecture behaviour of decode1 is
constant decode_rom : decoder_rom_t := (
-- unit fac internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl rpt
-- op in out A out in out len ext pipe
INSN_illegal => (NONE, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_illegal => (ALU, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_fetch_fail => (LDST, NONE, OP_FETCH_FAILED, CIA, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),

INSN_add => (ALU, NONE, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
@ -79,10 +89,10 @@ architecture behaviour of decode1 is
INSN_addme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_addpcis => (ALU, NONE, OP_ADD, CIA, CONST_DXHI4, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_addze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_and => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_andc => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_andi_dot => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_andis_dot => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_and => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_andc => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_andi_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_andis_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_attn => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_b => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bc => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
@ -90,6 +100,9 @@ architecture behaviour of decode1 is
INSN_bclr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bctar => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bperm => (ALU, NONE, OP_BPERM, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_brh => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_brw => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
@ -258,14 +271,30 @@ architecture behaviour of decode1 is
INSN_mulld => (ALU, NONE, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
INSN_mulli => (ALU, NONE, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_mullw => (ALU, NONE, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RCOE, '0', '0', NONE),
INSN_nand => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_nand => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_neg => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_nop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_nor => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_or => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_orc => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_ori => (ALU, NONE, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_oris => (ALU, NONE, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_nor => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE),
INSN_or => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE),
INSN_orc => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE),
INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfs => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_plha => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plhz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plwa => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plwz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstb => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstd => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstfd => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntb => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntd => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntw => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -347,7 +376,7 @@ architecture behaviour of decode1 is
INSN_xori => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_xoris => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),

others => (NONE, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
others => (ALU, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE)
);

function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
@ -434,12 +463,17 @@ begin
if rst = '1' then
r <= Decode1ToDecode2Init;
fetch_failed <= '0';
pr <= prefix_state_init;
elsif flush_in = '1' then
r.valid <= '0';
fetch_failed <= '0';
pr <= prefix_state_init;
elsif stall_in = '0' then
r <= rin;
fetch_failed <= f_in.fetch_failed;
if f_in.valid = '1' then
pr <= pr_in;
end if;
end if;
if rst = '1' then
br.br_nia <= (others => '0');
@ -471,12 +505,18 @@ begin
variable icode : insn_code;
variable sprn : spr_num_t;
variable maybe_rb : std_ulogic;
variable pv : prefix_state_t;
variable icode_bits : std_ulogic_vector(9 downto 0);
variable valid_suffix : std_ulogic;
begin
v := Decode1ToDecode2Init;
pv := pr;

v.valid := f_in.valid;
v.nia := f_in.nia;
v.insn := f_in.insn;
v.prefix := pr.prefix;
v.prefixed := pr.prefixed;
v.stop_mark := f_in.stop_mark;
v.big_endian := f_in.big_endian;

@ -490,17 +530,65 @@ begin
end if;

icode := f_in.icode;
icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(icode), 10));

if f_in.fetch_failed = '1' then
icode := INSN_fetch_fail;
icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_fetch_fail), 10));
-- Only send down a single OP_FETCH_FAILED
v.valid := not fetch_failed;
pv := prefix_state_init;

elsif pr.prefixed = '1' then
-- Check suffix value and convert to the prefixed instruction code
if pr.prefix(24) = '1' then
-- either pnop or illegal
icode_bits := std_ulogic_vector(to_unsigned(insn_code'pos(INSN_pnop), 10));
else
-- various load/store instructions
icode_bits(0) := '1';
end if;
valid_suffix := '0';
case pr.prefix(25 downto 23) is
when "000" => -- 8LS
if icode >= INSN_first_8ls and icode < INSN_first_rb then
valid_suffix := '1';
end if;
when "100" => -- MLS
if icode >= INSN_first_mls and icode < INSN_first_8ls then
valid_suffix := '1';
elsif icode >= INSN_first_fp_mls and icode < INSN_first_fp_nonmls then
valid_suffix := '1';
end if;
when "110" => -- MRR, i.e. pnop
if pr.prefix(22 downto 20) = "000" then
valid_suffix := '1';
end if;
when others =>
end case;
v.nia(5 downto 2) := pr.pref_ia;
v.prefixed := '1';
v.prefix := pr.prefix;
v.illegal_suffix := not valid_suffix;
pv := prefix_state_init;

elsif icode = INSN_prefix then
pv.prefixed := '1';
pv.pref_ia := f_in.nia(5 downto 2);
pv.prefix := f_in.insn(25 downto 0);
-- Check if the address of the prefix mod 64 is 60;
-- if so we need to arrange to generate an alignment interrupt
if f_in.nia(5 downto 2) = "1111" then
v.misaligned_prefix := '1';
else
v.valid := '0';
end if;

end if;
decode_rom_addr <= icode;
decode_rom_addr <= insn_code'val(to_integer(unsigned(icode_bits)));

if f_in.valid = '1' then
report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) &
" at " & to_hstring(f_in.nia);
report "Decode " & insn_code'image(insn_code'val(to_integer(unsigned(icode_bits)))) & " " &
to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
end if;

-- Branch predictor
@ -533,6 +621,8 @@ begin
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);

-- Work out GPR/FPR read addresses
-- Note that for prefixed instructions we are working this out based
-- only on the suffix.
maybe_rb := '0';
vr.reg_1_addr := '0' & insn_ra(f_in.insn);
vr.reg_2_addr := '0' & insn_rb(f_in.insn);
@ -568,6 +658,7 @@ begin
-- Update registers
rin <= v;
br_in <= bv;
pr_in <= pv;

-- Update outputs
d_out <= r;

@ -83,12 +83,13 @@ architecture behaviour of decode2 is
constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
prefix : std_ulogic_vector(25 downto 0);
instr_addr : std_ulogic_vector(63 downto 0))
return decode_input_reg_t is
begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
if t = RA or ((t = RA_OR_ZERO or t = RA0_OR_CIA) and insn_ra(insn_in) /= "00000") then
return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
elsif t = CIA then
elsif t = CIA or (t = RA0_OR_CIA and insn_prefix_r(prefix) = '1') then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
@ -97,7 +98,8 @@ architecture behaviour of decode2 is
end if;
end;

function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0))
function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
prefix : std_ulogic_vector(25 downto 0))
return decode_input_reg_t is
variable ret : decode_input_reg_t;
begin
@ -114,6 +116,8 @@ architecture behaviour of decode2 is
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
when CONST_PSI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_prefixed_si(prefix, insn_in)), 64)));
when CONST_SI_HI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
when CONST_UI_HI =>
@ -201,13 +205,13 @@ architecture behaviour of decode2 is
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);

constant result_select : mux_select_array_t := (
OP_AND => "001", -- logical_result
OP_OR => "001",
OP_LOGIC => "001", -- logical_result
OP_XOR => "001",
OP_PRTY => "001",
OP_CMPB => "001",
OP_EXTS => "001",
OP_BPERM => "001",
OP_BREV => "001",
OP_BCD => "001",
OP_MTSPR => "001",
OP_RLC => "010", -- rotator_result
@ -367,21 +371,27 @@ begin
c_out.read <= d_in.decode.input_cr;

decode2_addrs: process(all)
variable dec_a, dec_b, dec_c : decode_input_reg_t;
variable dec_o : decode_output_reg_t;
begin
decoded_reg_a <= decode_input_reg_init;
decoded_reg_b <= decode_input_reg_init;
decoded_reg_c <= decode_input_reg_init;
decoded_reg_o <= decode_output_reg_init;
if d_in.valid = '1' then
decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.nia);
decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn);
decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
dec_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.prefix, d_in.nia);
dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
if d_in.valid = '0' or d_in.illegal_suffix = '1' then
dec_a.reg_valid := '0';
dec_b.reg_valid := '0';
dec_c.reg_valid := '0';
dec_o.reg_valid := '0';
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read2_enable <= decoded_reg_b.reg_valid;
r_out.read3_enable <= decoded_reg_c.reg_valid;
decoded_reg_a <= dec_a;
decoded_reg_b <= dec_b;
decoded_reg_c <= dec_c;
decoded_reg_o <= dec_o;
r_out.read1_enable <= dec_a.reg_valid;
r_out.read2_enable <= dec_b.reg_valid;
r_out.read3_enable <= dec_c.reg_valid;

end process;

@ -588,6 +598,9 @@ begin
v.e.result_sel := "001"; -- logical_result
end if;
end if;
v.e.prefixed := d_in.prefixed;
v.e.illegal_suffix := d_in.illegal_suffix;
v.e.misaligned_prefix := d_in.misaligned_prefix;

elsif dc2.e.valid = '1' then
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.

@ -3,8 +3,9 @@ use ieee.std_logic_1164.all;

package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BCD, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BCD, OP_BPERM, OP_BREV,
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_ICBI, OP_ICBT,
@ -12,10 +13,11 @@ package decode_types is
OP_DIV, OP_DIVE, OP_MOD,
OP_EXTS, OP_EXTSWSLI,
OP_ISEL, OP_ISYNC,
OP_LOGIC,
OP_LOAD, OP_STORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_MUL_H64, OP_MUL_H32,
OP_POPCNT, OP_PRTY, OP_RFID,
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR,
@ -34,222 +36,241 @@ package decode_types is
-- The following instructions don't have an RB operand or access FPRs
INSN_illegal, -- 0
INSN_fetch_fail,
INSN_addi,
INSN_prefix,
INSN_pnop,
INSN_addic,
INSN_addic_dot,
INSN_addis,
INSN_addme,
INSN_addpcis,
INSN_addze,
INSN_andi_dot,
INSN_andis_dot, -- 10
INSN_andi_dot, -- 10
INSN_andis_dot,
INSN_attn,
INSN_b,
INSN_bc,
INSN_bcctr,
INSN_bclr,
INSN_bctar,
INSN_brh,
INSN_brw,
INSN_brd, -- 20
INSN_cbcdtd,
INSN_cdtbcd,
INSN_cmpi,
INSN_cmpli, -- 20
INSN_cmpli,
INSN_cntlzw,
INSN_cntlzd,
INSN_cnttzw,
INSN_cnttzd,
INSN_crand,
INSN_crandc,
INSN_crandc, -- 30
INSN_creqv,
INSN_crnand,
INSN_crnor,
INSN_cror, -- 30
INSN_cror,
INSN_crorc,
INSN_crxor,
INSN_darn,
INSN_eieio,
INSN_extsb,
INSN_extsh,
INSN_extsh, -- 40
INSN_extsw,
INSN_extswsli,
INSN_isync,
INSN_lbz, -- 40
INSN_lbzu,
INSN_ld,
INSN_ldu,
INSN_lha,
INSN_lhau,
INSN_lhz,
INSN_lhzu,
INSN_lwa,
INSN_lwz,
INSN_lwzu, -- 50
INSN_mcrf,
INSN_mcrfs,
INSN_lwzu,
INSN_mcrf, -- 50
INSN_mcrxrx,
INSN_mfcr,
INSN_mfmsr,
INSN_mfspr,
INSN_mtcrf,
INSN_mtfsb,
INSN_mtfsfi,
INSN_mtmsr, -- 60
INSN_mtmsr,
INSN_mtmsrd,
INSN_mtspr,
INSN_mulli,
INSN_neg,
INSN_neg, -- 60
INSN_nop,
INSN_ori,
INSN_oris,
INSN_popcntb,
INSN_popcntw,
INSN_popcntd, -- 70
INSN_popcntd,
INSN_prtyw,
INSN_prtyd,
INSN_rfid,
INSN_rldic,
INSN_rldic, -- 70
INSN_rldicl,
INSN_rldicr,
INSN_rldimi,
INSN_rlwimi,
INSN_rlwinm,
INSN_sc, -- 80
INSN_sc,
INSN_setb,
INSN_slbia,
INSN_sradi,
INSN_srawi,
INSN_stb,
INSN_srawi, -- 80
INSN_stbu,
INSN_std,
INSN_stdu,
INSN_sth,
INSN_sthu, -- 90
INSN_stw,
INSN_sthu,
INSN_stwu,
INSN_subfic,
INSN_subfme,
INSN_subfze,
INSN_sync,
INSN_tdi,
INSN_tdi, -- 90
INSN_tlbsync,
INSN_twi,
INSN_wait, -- 100
INSN_wait,
INSN_xori,
INSN_xoris,

-- pad to 112 to simplify comparison logic
INSN_103,
INSN_104, INSN_105, INSN_106, INSN_107,
INSN_108, INSN_109, INSN_110, INSN_111,
-- Non-prefixed instructions that have a MLS:D prefixed form and
-- their corresponding prefixed instructions.
-- The non-prefixed versions have even indexes so that we can
-- convert them to the prefixed version by setting bit 0
INSN_addi, -- 96
INSN_paddi,
INSN_lbz,
INSN_plbz,
INSN_lha, -- 100
INSN_plha,
INSN_lhz,
INSN_plhz,
INSN_lwz,
INSN_plwz,
INSN_stb,
INSN_pstb,
INSN_sth,
INSN_psth,
INSN_stw, -- 110
INSN_pstw,

-- Slots for non-prefixed opcodes that are 8LS:D when prefixed
INSN_lhzu, -- 112
INSN_plwa,
INSN_op57,
INSN_pld,
INSN_op61,
INSN_pstd,

-- pad to 128 to simplify comparison logic
INSN_076, INSN_077,
INSN_078, INSN_079, INSN_07a, INSN_07b, INSN_07c, INSN_07d, INSN_07e, INSN_07f,

-- The following instructions have an RB operand but don't access FPRs
INSN_add,
INSN_addc,
INSN_adde,
INSN_adde, -- 130
INSN_addex,
INSN_addg6s,
INSN_and,
INSN_andc,
INSN_bperm,
INSN_cmp, -- 120
INSN_cmp,
INSN_cmpb,
INSN_cmpeqb,
INSN_cmpl,
INSN_cmprb,
INSN_cmprb, -- 140
INSN_dcbf,
INSN_dcbst,
INSN_dcbt,
INSN_dcbtst,
INSN_dcbz,
INSN_divd, -- 130
INSN_divd,
INSN_divdu,
INSN_divde,
INSN_divdeu,
INSN_divw,
INSN_divw, -- 150
INSN_divwu,
INSN_divwe,
INSN_divweu,
INSN_eqv,
INSN_icbi,
INSN_icbt, -- 140
INSN_icbt,
INSN_isel,
INSN_lbarx,
INSN_lbzcix,
INSN_lbzux,
INSN_lbzux, -- 160
INSN_lbzx,
INSN_ldarx,
INSN_ldbrx,
INSN_ldcix,
INSN_ldx,
INSN_ldux, -- 150
INSN_ldux,
INSN_lharx,
INSN_lhax,
INSN_lhaux,
INSN_lhbrx,
INSN_lhbrx, -- 170
INSN_lhzcix,
INSN_lhzx,
INSN_lhzux,
INSN_lwarx,
INSN_lwax,
INSN_lwaux, -- 160
INSN_lwaux,
INSN_lwbrx,
INSN_lwzcix,
INSN_lwzx,
INSN_lwzux,
INSN_lwzux, -- 180
INSN_modsd,
INSN_modsw,
INSN_moduw,
INSN_modud,
INSN_mulhw,
INSN_mulhwu, -- 170
INSN_mulhwu,
INSN_mulhd,
INSN_mulhdu,
INSN_mullw,
INSN_mulld,
INSN_mulld, -- 190
INSN_nand,
INSN_nor,
INSN_or,
INSN_orc,
INSN_rldcl,
INSN_rldcr, -- 180
INSN_rldcr,
INSN_rlwnm,
INSN_slw,
INSN_sld,
INSN_sraw,
INSN_sraw, -- 200
INSN_srad,
INSN_srw,
INSN_srd,
INSN_stbcix,
INSN_stbcx,
INSN_stbx, -- 190
INSN_stbx,
INSN_stbux,
INSN_stdbrx,
INSN_stdcix,
INSN_stdcx,
INSN_stdcx, -- 210
INSN_stdx,
INSN_stdux,
INSN_sthbrx,
INSN_sthcix,
INSN_sthcx,
INSN_sthx, -- 200
INSN_sthx,
INSN_sthux,
INSN_stwbrx,
INSN_stwcix,
INSN_stwcx,
INSN_stwcx, -- 220
INSN_stwx,
INSN_stwux,
INSN_subf,
INSN_subfc,
INSN_subfe,
INSN_td, -- 210
INSN_td,
INSN_tlbie,
INSN_tlbiel,
INSN_tw,
INSN_xor,
INSN_xor, -- 230

-- pad to 224 to simplify comparison logic
INSN_215,
INSN_216, INSN_217, INSN_218, INSN_219,
INSN_220, INSN_221, INSN_222, INSN_223,
-- pad to 232 to simplify comparison logic
INSN_231,

-- The following instructions have a third input addressed by RC
INSN_maddld,
@ -257,9 +278,7 @@ package decode_types is
INSN_maddhdu,

-- pad to 256 to simplify comparison logic
INSN_227,
INSN_228, INSN_229, INSN_230, INSN_231,
INSN_232, INSN_233, INSN_234, INSN_235,
INSN_235,
INSN_236, INSN_237, INSN_238, INSN_239,
INSN_240, INSN_241, INSN_242, INSN_243,
INSN_244, INSN_245, INSN_246, INSN_247,
@ -267,36 +286,54 @@ package decode_types is
INSN_252, INSN_253, INSN_254, INSN_255,

-- The following instructions access floating-point registers
-- These ones have an FRS operand, but RA/RB are GPRs
INSN_stfd,
INSN_stfdu,
-- They have an FRS operand, but RA/RB are GPRs

-- Non-prefixed floating-point loads and stores that have a MLS:D
-- prefixed form, and their corresponding prefixed instructions.
INSN_stfd, -- 256
INSN_pstfd,
INSN_stfs,
INSN_pstfs,
INSN_lfd, -- 260
INSN_plfd,
INSN_lfs,
INSN_plfs,

-- opcodes that can't have a prefix
INSN_stfdu, -- 264
INSN_stfsu,
INSN_stfdux, -- 260
INSN_stfdux,
INSN_stfdx,
INSN_stfiwx,
INSN_stfsux,
INSN_stfsx,
INSN_stfsx, -- 270
-- These ones don't actually have an FRS operand (rather an FRT destination)
-- but are here so that all FP instructions are >= INST_first_frs.
INSN_lfd,
INSN_lfdu,
INSN_lfs,
INSN_lfsu,
INSN_lfdx,
INSN_lfdux, -- 270
INSN_lfdux,
INSN_lfiwax,
INSN_lfiwzx,
INSN_lfsx,
INSN_lfsux,
INSN_275, -- padding
-- These are here in order to keep the FP instructions together
INSN_mcrfs,
INSN_mtfsb, -- 280
INSN_mtfsfi,
INSN_282, -- padding
INSN_283,
INSN_284,
INSN_285,
INSN_286,
INSN_287,

-- The following instructions access FRA and/or FRB operands
INSN_fabs,
INSN_fabs, -- 288
INSN_fadd,
INSN_fadds,
INSN_fadds, -- 290
INSN_fcfid,
INSN_fcfids, -- 280
INSN_fcfids,
INSN_fcfidu,
INSN_fcfidus,
INSN_fcmpo,
@ -304,9 +341,9 @@ package decode_types is
INSN_fcpsgn,
INSN_fctid,
INSN_fctidz,
INSN_fctidu,
INSN_fctidu, -- 300
INSN_fctiduz,
INSN_fctiw, -- 290
INSN_fctiw,
INSN_fctiwz,
INSN_fctiwu,
INSN_fctiwuz,
@ -314,9 +351,9 @@ package decode_types is
INSN_fdivs,
INSN_fmr,
INSN_fmrgew,
INSN_fmrgow,
INSN_fmrgow, -- 310
INSN_fnabs,
INSN_fneg, -- 300
INSN_fneg,
INSN_fre,
INSN_fres,
INSN_frim,
@ -324,9 +361,9 @@ package decode_types is
INSN_frip,
INSN_friz,
INSN_frsp,
INSN_frsqrte,
INSN_frsqrte, -- 320
INSN_frsqrtes,
INSN_fsqrt, -- 310
INSN_fsqrt,
INSN_fsqrts,
INSN_fsub,
INSN_fsubs,
@ -335,21 +372,21 @@ package decode_types is
INSN_mffs,
INSN_mtfsf,

-- pad to 320
INSN_318, INSN_319,
-- pad to 336
INSN_330, INSN_331, INSN_332, INSN_333, INSN_334, INSN_335,

-- The following instructions access FRA, FRB (possibly) and FRC operands
INSN_fmul, -- 320
INSN_fmul, -- 336
INSN_fmuls,
INSN_fmadd,
INSN_fmadds,
INSN_fmsub,
INSN_fmsub, -- 340
INSN_fmsubs,
INSN_fnmadd,
INSN_fnmadds,
INSN_fnmsub,
INSN_fnmsubs,
INSN_fsel -- 330
INSN_fsel
);

constant INSN_first_rb : insn_code := INSN_add;
@ -357,10 +394,14 @@ package decode_types is
constant INSN_first_frs : insn_code := INSN_stfd;
constant INSN_first_frab : insn_code := INSN_fabs;
constant INSN_first_frabc : insn_code := INSN_fmul;
constant INSN_first_mls : insn_code := INSN_addi;
constant INSN_first_8ls : insn_code := INSN_lhzu;
constant INSN_first_fp_mls : insn_code := INSN_stfd;
constant INSN_first_fp_nonmls : insn_code := INSN_stfdu;

type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, FRT);
type rc_t is (NONE, ONE, RC, RCOE);
@ -384,7 +425,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, FPU);
type unit_t is (ALU, LDST, FPU);
type facility_t is (NONE, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B);

@ -425,7 +466,7 @@ package decode_types is
sgl_pipe : std_ulogic;
repeat : repeat_t;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE, facility => NONE,
constant decode_rom_init : decode_rom_t := (unit => ALU, facility => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',

@ -118,6 +118,7 @@ architecture behaviour of execute1 is
fp_exception_next : std_ulogic;
trace_next : std_ulogic;
prev_op : insn_type_t;
prev_prefixed : std_ulogic;
oe : std_ulogic;
mul_select : std_ulogic_vector(1 downto 0);
res2_sel : std_ulogic_vector(1 downto 0);
@ -141,6 +142,7 @@ architecture behaviour of execute1 is
(e => Execute1ToWritebackInit, se => side_effect_init,
busy => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0",
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
@ -390,6 +392,7 @@ begin
op => e_in.insn_type,
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
is_signed => e_in.is_signed,
result => logical_result,
datalen => e_in.data_len
);
@ -834,14 +837,27 @@ begin
end if;
misc_result <= mfcr_result;
when "110" =>
-- setb
bfa := insn_bfa(e_in.insn);
crbit := to_integer(unsigned(bfa)) * 4;
-- setb and set[n]bc[r]
setb_result := (others => '0');
if cr_in(31 - crbit) = '1' then
setb_result := (others => '1');
elsif cr_in(30 - crbit) = '1' then
setb_result(0) := '1';
if e_in.insn(9) = '0' then
-- setb
bfa := insn_bfa(e_in.insn);
crbit := to_integer(unsigned(bfa)) * 4;
if cr_in(31 - crbit) = '1' then
setb_result := (others => '1');
elsif cr_in(30 - crbit) = '1' then
setb_result(0) := '1';
end if;
else
-- set[n]bc[r]
crbit := to_integer(unsigned(insn_bi(e_in.insn)));
if (cr_in(31 - crbit) xor e_in.insn(6)) = '1' then
if e_in.insn(7) = '0' then
setb_result(0) := '1';
else
setb_result := (others => '1');
end if;
end if;
end if;
misc_result <= setb_result;
when others =>
@ -978,6 +994,7 @@ begin
variable bo, bi : std_ulogic_vector(4 downto 0);
variable illegal : std_ulogic;
variable privileged : std_ulogic;
variable misaligned : std_ulogic;
variable slow_op : std_ulogic;
variable owait : std_ulogic;
variable srr1 : std_ulogic_vector(63 downto 0);
@ -1021,16 +1038,14 @@ begin

illegal := '0';
privileged := '0';
misaligned := e_in.misaligned_prefix;
slow_op := '0';
owait := '0';

if ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
privileged := '1';
end if;

if (not HAS_FPU and e_in.fac = FPU) or e_in.unit = NONE then
-- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
if e_in.illegal_suffix = '1' then
illegal := '1';
elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
privileged := '1';
end if;

v.do_trace := ex1.msr(MSR_SE);
@ -1091,8 +1106,8 @@ begin
when OP_ADDG6S =>
when OP_CMPRB =>
when OP_CMPEQB =>
when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
OP_BPERM | OP_BCD =>
when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
OP_BPERM | OP_BREV | OP_BCD =>

when OP_B =>
v.take_branch := '1';
@ -1320,9 +1335,22 @@ begin
end if;
end case;

if privileged = '1' then
if misaligned = '1' then
-- generate an alignment interrupt
-- This is higher priority than illegal because a misaligned
-- prefix will come down as an OP_ILLEGAL instruction.
v.exception := '1';
v.e.intr_vec := 16#600#;
v.e.srr1(47 - 35) := '1';
v.e.srr1(47 - 34) := '1';
if e_in.valid = '1' then
report "misaligned prefixed instruction interrupt";
end if;

elsif privileged = '1' then
-- generate a program interrupt
v.exception := '1';
v.e.srr1(47 - 34) := e_in.prefixed;
-- set bit 45 to indicate privileged instruction type interrupt
v.e.srr1(47 - 45) := '1';
if e_in.valid = '1' then
@ -1331,6 +1359,7 @@ begin

elsif illegal = '1' then
v.exception := '1';
v.e.srr1(47 - 34) := e_in.prefixed;
-- Since we aren't doing Hypervisor emulation assist (0xe40) we
-- set bit 44 to indicate we have an illegal
v.e.srr1(47 - 44) := '1';
@ -1341,6 +1370,7 @@ begin
elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then
-- generate a floating-point unavailable interrupt
v.exception := '1';
v.e.srr1(47 - 34) := e_in.prefixed;
v.e.intr_vec := 16#800#;
if e_in.valid = '1' then
report "FP unavailable interrupt";
@ -1406,6 +1436,7 @@ begin

if valid_in = '1' then
v.prev_op := e_in.insn_type;
v.prev_prefixed := e_in.prefixed;
end if;

-- Determine if there is any interrupt to be taken
@ -1427,6 +1458,7 @@ begin
v.e.intr_vec := 16#d00#;
v.e.srr1 := (others => '0');
v.e.srr1(47 - 33) := '1';
v.e.srr1(47 - 34) := ex1.prev_prefixed;
if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then
v.e.srr1(47 - 35) := '1';
@ -1589,6 +1621,7 @@ begin
lv.priv_mode := not ex1.msr(MSR_PR);
lv.mode_32bit := not ex1.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit;
lv.prefixed := e_in.prefixed;
lv.repeat := e_in.repeat;
lv.second := e_in.second;
lv.e2stall := fp_in.f2stall;

@ -41,7 +41,6 @@ architecture behaviour of fetch1 is
mode_32bit: std_ulogic;
rd_is_niap4: std_ulogic;
predicted_taken: std_ulogic;
pred_not_taken: std_ulogic;
predicted_nia: std_ulogic_vector(63 downto 0);
end record;
signal r, r_next : Fetch1ToIcacheType;
@ -87,7 +86,6 @@ begin
r.pred_ntaken <= r_next.pred_ntaken;
r.nia <= r_next.nia;
r_int.predicted_taken <= r_next_int.predicted_taken;
r_int.pred_not_taken <= r_next_int.pred_not_taken;
r_int.predicted_nia <= r_next_int.predicted_nia;
r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
end if;
@ -155,7 +153,6 @@ begin
v.predicted := '0';
v.pred_ntaken := '0';
v_int.predicted_taken := '0';
v_int.pred_not_taken := '0';
v_int.rd_is_niap4 := '0';

if rst = '1' then
@ -185,10 +182,8 @@ begin
end if;
elsif r_int.predicted_taken = '1' then
v.nia := r_int.predicted_nia;
v.predicted := '1';
else
elsif r.req = '1' then
v_int.rd_is_niap4 := '1';
v.pred_ntaken := r_int.pred_not_taken;
v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := x"00000000";
@ -198,7 +193,8 @@ begin
btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
= v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1);
v_int.pred_not_taken := not btc_rd_data(BTC_WIDTH - 1);
v.predicted := btc_rd_data(BTC_WIDTH - 1);
v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
end if;
end if;
v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";

@ -192,6 +192,8 @@ architecture rtl of icache is
hit_smark : std_ulogic;
hit_valid : std_ulogic;
big_endian: std_ulogic;
predicted : std_ulogic;
pred_ntaken: std_ulogic;

-- Cache miss state (reload state machine)
state : state_t;
@ -629,8 +631,8 @@ begin
i_out.stop_mark <= r.hit_smark;
i_out.fetch_failed <= r.fetch_failed;
i_out.big_endian <= r.big_endian;
i_out.next_predicted <= i_in.predicted;
i_out.next_pred_ntaken <= i_in.pred_ntaken;
i_out.next_predicted <= r.predicted;
i_out.next_pred_ntaken <= r.pred_ntaken;

-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok);
@ -673,6 +675,8 @@ begin
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
r.big_endian <= i_in.big_endian;
r.predicted <= i_in.predicted;
r.pred_ntaken <= i_in.pred_ntaken;
end if;
if i_out.valid = '1' then
assert not is_X(i_out.insn) severity failure;

@ -43,6 +43,9 @@ package insn_helpers is
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic;
function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
return std_ulogic_vector;
end package insn_helpers;

package body insn_helpers is
@ -250,4 +253,16 @@ package body insn_helpers is
begin
return insn_in(15 downto 12);
end;

function insn_prefix_r(prefix : std_ulogic_vector) return std_ulogic is
begin
return prefix(20);
end;

function insn_prefixed_si(prefix : std_ulogic_vector; suffix : std_ulogic_vector)
return std_ulogic_vector is
begin
return prefix(17 downto 0) & suffix(15 downto 0);
end;

end package body insn_helpers;

@ -69,6 +69,7 @@ architecture behave of loadstore1 is
instr_fault : std_ulogic;
do_update : std_ulogic;
mode_32bit : std_ulogic;
prefixed : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0);
second_bytes : std_ulogic_vector(7 downto 0);
@ -99,7 +100,8 @@ architecture behave of loadstore1 is
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', do_update => '0',
mode_32bit => '0', addr => (others => '0'),
mode_32bit => '0', prefixed => '0',
addr => (others => '0'),
byte_sel => x"00", second_bytes => x"00",
store_data => (others => '0'), instr_tag => instr_tag_init,
write_reg => 6x"00", length => x"0",
@ -411,6 +413,7 @@ begin
v.valid := l_in.valid;
v.instr_tag := l_in.instr_tag;
v.mode_32bit := l_in.mode_32bit;
v.prefixed := l_in.prefixed;
v.write_reg := l_in.write_reg;
v.length := l_in.length;
v.elt_length := l_in.length;
@ -906,8 +909,10 @@ begin
if exception = '1' then
if r2.req.align_intr = '1' then
v.intr_vec := 16#600#;
v.srr1(47 - 34) := r2.req.prefixed;
v.dar := r2.req.addr;
elsif r2.req.instr_fault = '0' then
v.srr1(47 - 34) := r2.req.prefixed;
v.dar := r2.req.addr;
if m_in.segerr = '0' then
v.intr_vec := 16#300#;

@ -13,6 +13,7 @@ entity logical is
op : in insn_type_t;
invert_in : in std_ulogic;
invert_out : in std_ulogic;
is_signed : in std_ulogic;
result : out std_ulogic_vector(63 downto 0);
datalen : in std_logic_vector(3 downto 0)
);
@ -92,7 +93,8 @@ architecture behaviour of logical is

begin
logical_0: process(all)
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
variable rb_adj, rs_adj : std_ulogic_vector(63 downto 0);
variable tmp : std_ulogic_vector(63 downto 0);
variable negative : std_ulogic;
variable j : integer;
begin
@ -123,19 +125,34 @@ begin
end if;

case op is
when OP_AND | OP_OR | OP_XOR =>
case op is
when OP_AND =>
tmp := rs and rb_adj;
when OP_OR =>
tmp := rs or rb_adj;
when others =>
tmp := rs xor rb_adj;
end case;
when OP_LOGIC =>
-- for now, abuse the 'is_signed' field to indicate inversion of RS
rs_adj := rs;
if is_signed = '1' then
rs_adj := not rs;
end if;
tmp := rs_adj and rb_adj;
if invert_out = '1' then
tmp := not tmp;
end if;
when OP_XOR =>
tmp := rs xor rb;
if invert_out = '1' then
tmp := not tmp;
end if;

when OP_BREV =>
if datalen(3) = '1' then
tmp := rs( 7 downto 0) & rs(15 downto 8) & rs(23 downto 16) & rs(31 downto 24) &
rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56);
elsif datalen(2) = '1' then
tmp := rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56) &
rs( 7 downto 0) & rs(15 downto 8) & rs(23 downto 16) & rs(31 downto 24);
else
tmp := rs(55 downto 48) & rs(63 downto 56) & rs(39 downto 32) & rs(47 downto 40) &
rs(23 downto 16) & rs(31 downto 24) & rs( 7 downto 0) & rs(15 downto 8);
end if;

when OP_PRTY =>
tmp := parity;
when OP_CMPB =>

@ -158,6 +158,11 @@ architecture behaviour of predecoder is
2#111111_11010# to 2#111111_11011# => INSN_fmadd,
2#111111_11100# to 2#111111_11101# => INSN_fnmsub,
2#111111_11110# to 2#111111_11111# => INSN_fnmadd,
-- prefix word, PO1
2#000001_00000# to 2#000001_11111# => INSN_prefix,
-- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed
2#111001_00000# to 2#111001_11111# => INSN_op57,
2#111101_00000# to 2#111101_11111# => INSN_op61,
others => INSN_illegal
);

@ -179,6 +184,9 @@ architecture behaviour of predecoder is
2#0_00000_11100# => INSN_and,
2#0_00001_11100# => INSN_andc,
2#0_00111_11100# => INSN_bperm,
2#0_00110_11011# => INSN_brh,
2#0_00100_11011# => INSN_brw,
2#0_00101_11011# => INSN_brd,
2#0_01001_11010# => INSN_cbcdtd,
2#0_01000_11010# => INSN_cdtbcd,
2#0_00000_00000# => INSN_cmp,
@ -331,6 +339,10 @@ architecture behaviour of predecoder is
2#0_00101_11010# => INSN_prtyd,
2#0_00100_11010# => INSN_prtyw,
2#0_00100_00000# => INSN_setb,
2#0_01100_00000# => INSN_setb, -- setbc
2#0_01101_00000# => INSN_setb, -- setbcr
2#0_01110_00000# => INSN_setb, -- setnbc
2#0_01111_00000# => INSN_setb, -- setnbcr
2#0_01111_10010# => INSN_slbia,
2#0_00000_11011# => INSN_sld,
2#0_00000_11000# => INSN_slw,

@ -0,0 +1,3 @@
TEST=prefix

include ../Makefile.test

@ -0,0 +1,247 @@
/* Copyright 2013-2014 IBM Corp.
* Copyright 2023 Paul Mackerras <paulus@ozlabs.org>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* Load an immediate 64-bit value into a register */
#define LOAD_IMM64(r, e) \
lis r,(e)@highest; \
ori r,r,(e)@higher; \
rldicr r,r, 32, 31; \
oris r,r, (e)@h; \
ori r,r, (e)@l;

.section ".head","ax"

/*
* Microwatt currently enters in LE mode at 0x0, so we don't need to
* do any endian fix ups
*/
. = 0
.global _start
_start:
LOAD_IMM64(%r10,__bss_start)
LOAD_IMM64(%r11,__bss_end)
subf %r11,%r10,%r11
addi %r11,%r11,63
srdi. %r11,%r11,6
beq 2f
mtctr %r11
1: dcbz 0,%r10
addi %r10,%r10,64
bdnz 1b

2: LOAD_IMM64(%r1,__stack_top)
li %r0,0
stdu %r0,-16(%r1)
LOAD_IMM64(%r10, die)
mtsprg0 %r10
LOAD_IMM64(%r12, main)
mtctr %r12
bctrl
die: attn // terminate on exit
b .

.global trapit
trapit:
mflr %r0
std %r0,16(%r1)
stdu %r1,-256(%r1)
mtsprg1 %r1
r = 14
.rept 18
std r,r*8(%r1)
r = r + 1
.endr
mfcr %r0
stw %r0,13*8(%r1)
LOAD_IMM64(%r10, ret)
mtsprg0 %r10
mr %r12,%r4
mtctr %r4
bctrl
ret:
mfsprg1 %r1
LOAD_IMM64(%r10, die)
mtsprg0 %r10
r = 14
.rept 18
ld r,r*8(%r1)
r = r + 1
.endr
lwz %r0,13*8(%r1)
mtcr %r0
ld %r0,256+16(%r1)
addi %r1,%r1,256
mtlr %r0
blr

#define EXCEPTION(nr) \
.= nr ;\
mfsprg0 %r0 ;\
mtctr %r0 ;\
li %r3,nr ;\
bctr

EXCEPTION(0x300)
EXCEPTION(0x380)
EXCEPTION(0x400)
EXCEPTION(0x480)
EXCEPTION(0x500)
EXCEPTION(0x600)
EXCEPTION(0x700)
EXCEPTION(0x800)
EXCEPTION(0x900)
EXCEPTION(0x980)
EXCEPTION(0xa00)
EXCEPTION(0xb00)
EXCEPTION(0xc00)
EXCEPTION(0xd00)
EXCEPTION(0xe00)
EXCEPTION(0xe20)
EXCEPTION(0xe40)
EXCEPTION(0xe60)
EXCEPTION(0xe80)
EXCEPTION(0xf00)
EXCEPTION(0xf20)
EXCEPTION(0xf40)
EXCEPTION(0xf60)
EXCEPTION(0xf80)

. = 0x1000
.globl test_paddi
test_paddi:
nop
nop
.machine "power10"
paddi %r3,%r3,0x123456789,0
blr

.globl test_paddi_r
test_paddi_r:
nop
nop
paddi %r3,0,0x123456789 - 0x101c,1
blr

.globl test_paddi_neg
test_paddi_neg:
nop
nop
paddi %r3,%r3,-0x123456789,0
blr

.globl test_pld
test_pld:
nop
nop
pld %r4,lvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_plfd
test_plfd:
nop
nop
plfd %f0,fpvar(0)
stfd %f0,0(%r3)
blr

. = 0x1074
.globl test_paddi_mis
test_paddi_mis:
nop
nop
.long 0x06012345
.long 0x38636789
blr

.globl test_pstd
test_pstd:
nop
nop
pstd %r3,lvar(0)
li %r3,0
blr

.globl test_plbz
test_plbz:
nop
nop
plbz %r4,bvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_pstb
test_pstb:
nop
nop
pstb %r3,bvar(0)
li %r3,0
blr

.globl test_plha
test_plha:
nop
nop
plha %r4,hvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_plhz
test_plhz:
nop
nop
plhz %r4,hvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_psth
test_psth:
nop
nop
psth %r3,hvar(0)
li %r3,0
blr

.globl test_plwa
test_plwa:
nop
nop
plwa %r4,wvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_plwz
test_plwz:
nop
nop
plwz %r4,wvar(0)
std %r4,0(%r3)
li %r3,0
blr

.globl test_pstw
test_pstw:
nop
nop
pstw %r3,wvar(0)
li %r3,0
blr

@ -0,0 +1,27 @@
SECTIONS
{
. = 0;
_start = .;
.head : {
KEEP(*(.head))
}
. = ALIGN(0x1000);
.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
. = ALIGN(0x1000);
.data : { *(.data) *(.data.*) *(.got) *(.toc) }
. = ALIGN(0x80);
__bss_start = .;
.bss : {
*(.dynsbss)
*(.sbss)
*(.scommon)