Merge pull request #434 from paulusmack/compliance

Improve architecture compliance
master
Paul Mackerras 1 week ago committed by GitHub
commit f4ec0c2043
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -74,7 +74,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl bitsort.vhdl

soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \

@ -0,0 +1,102 @@
-- Implements instructions that involve sorting bits,
-- that is, cfuged, pextd and pdepd.
--
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
-- and move the bits in RS in the same fashion to give the result
-- pextd: Like cfuged but the only use the bits of RS where the
-- corresponding bit in RB is 1
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
-- to the bit positions which have a 1 in RB

-- NB opc is bits 7-6 of the instruction:
-- 00 = pdepd, 01 = pextd, 10 = cfuged

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity bit_sorter is
port (
clk : in std_ulogic;
rst : in std_ulogic;
rs : in std_ulogic_vector(63 downto 0);
rb : in std_ulogic_vector(63 downto 0);
go : in std_ulogic;
opc : in std_ulogic_vector(1 downto 0);
done : out std_ulogic;
result : out std_ulogic_vector(63 downto 0)
);
end entity bit_sorter;

architecture behaviour of bit_sorter is

signal val : std_ulogic_vector(63 downto 0);
signal st : std_ulogic;
signal sd : std_ulogic;
signal opr : std_ulogic_vector(1 downto 0);
signal bc : unsigned(5 downto 0);
signal jl : unsigned(5 downto 0);
signal jr : unsigned(5 downto 0);
signal sr_ml : std_ulogic_vector(63 downto 0);
signal sr_mr : std_ulogic_vector(63 downto 0);
signal sr_vl : std_ulogic_vector(63 downto 0);
signal sr_vr : std_ulogic_vector(63 downto 0);

begin
bsort_r: process(clk)
begin
if rising_edge(clk) then
sd <= '0';
if rst = '1' then
st <= '0';
opr <= "00";
val <= (others => '0');
elsif go = '1' then
st <= '1';
sr_ml <= rb;
sr_mr <= rb;
sr_vl <= rs;
sr_vr <= rs;
opr <= opc;
val <= (others => '0');
bc <= to_unsigned(0, 6);
jl <= to_unsigned(63, 6);
jr <= to_unsigned(0, 6);
elsif st = '1' then
if bc = 6x"3f" then
st <= '0';
sd <= '1';
end if;
bc <= bc + 1;
if sr_ml(63) = '0' and opr(1) = '1' then
-- cfuged
val(to_integer(jl)) <= sr_vl(63);
jl <= jl - 1;
end if;
if sr_mr(0) = '1' then
if opr = "00" then
-- pdepd
val(to_integer(bc)) <= sr_vr(0);
else
-- cfuged or pextd
val(to_integer(jr)) <= sr_vr(0);
end if;
jr <= jr + 1;
end if;
sr_vl <= sr_vl(62 downto 0) & '0';
if opr /= "00" or sr_mr(0) = '1' then
sr_vr <= '0' & sr_vr(63 downto 1);
end if;
sr_ml <= sr_ml(62 downto 0) & '0';
sr_mr <= '0' & sr_mr(63 downto 1);
end if;
end if;
end process;

done <= sd;
result <= val;

end behaviour;

@ -12,6 +12,7 @@ package common is

-- MSR bit numbers
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_HV : integer := (63 - 3); -- Hypervisor mode (always 1)
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
@ -54,6 +55,15 @@ package common is
constant SPR_PID : spr_num_t := 48;
constant SPR_PTCR : spr_num_t := 464;
constant SPR_PVR : spr_num_t := 287;
constant SPR_FSCR : spr_num_t := 153;
constant SPR_HFSCR : spr_num_t := 190;
constant SPR_HEIR : spr_num_t := 339;
constant SPR_CTRL : spr_num_t := 136;
constant SPR_CTRLW : spr_num_t := 152;
constant SPR_UDSCR : spr_num_t := 3;
constant SPR_DSCR : spr_num_t := 17;
constant SPR_VRSAVE : spr_num_t := 256;
constant SPR_PIR : spr_num_t := 1023;

-- PMU registers
constant SPR_UPMC1 : spr_num_t := 771;
@ -131,30 +141,52 @@ package common is
constant RAMSPR_SPRG3 : ramspr_index := to_unsigned(3,3);
constant RAMSPR_HSPRG1 : ramspr_index := to_unsigned(4,3);
constant RAMSPR_CTR : ramspr_index := to_unsigned(5,3); -- must equal RAMSPR_LR
constant RAMSPR_VRSAVE : ramspr_index := to_unsigned(6,3);

type ram_spr_info is record
index : ramspr_index;
isodd : std_ulogic;
is32b : std_ulogic;
valid : std_ulogic;
end record;
constant ram_spr_info_init: ram_spr_info := (index => to_unsigned(0,3), others => '0');

subtype spr_selector is std_ulogic_vector(2 downto 0);
subtype spr_selector is std_ulogic_vector(3 downto 0);
type spr_id is record
sel : spr_selector;
valid : std_ulogic;
ispmu : std_ulogic;
end record;
constant spr_id_init : spr_id := (sel => "000", others => '0');

constant SPRSEL_TB : spr_selector := 3x"0";
constant SPRSEL_TBU : spr_selector := 3x"1";
constant SPRSEL_DEC : spr_selector := 3x"2";
constant SPRSEL_PVR : spr_selector := 3x"3";
constant SPRSEL_LOGA : spr_selector := 3x"4";
constant SPRSEL_LOGD : spr_selector := 3x"5";
constant SPRSEL_CFAR : spr_selector := 3x"6";
constant SPRSEL_XER : spr_selector := 3x"7";
ronly : std_ulogic;
wonly : std_ulogic;
end record;
constant spr_id_init : spr_id := (sel => "0000", others => '0');

constant SPRSEL_TB : spr_selector := 4x"0";
constant SPRSEL_TBU : spr_selector := 4x"1";
constant SPRSEL_DEC : spr_selector := 4x"2";
constant SPRSEL_PVR : spr_selector := 4x"3";
constant SPRSEL_LOGA : spr_selector := 4x"4";
constant SPRSEL_LOGD : spr_selector := 4x"5";
constant SPRSEL_CFAR : spr_selector := 4x"6";
constant SPRSEL_FSCR : spr_selector := 4x"7";
constant SPRSEL_HFSCR : spr_selector := 4x"8";
constant SPRSEL_HEIR : spr_selector := 4x"9";
constant SPRSEL_CTRL : spr_selector := 4x"a";
constant SPRSEL_DSCR : spr_selector := 4x"b";
constant SPRSEL_PIR : spr_selector := 4x"c";
constant SPRSEL_XER : spr_selector := 4x"f";

-- FSCR and HFSCR bit numbers
constant FSCR_PREFIX : integer := 63 - 50;
constant FSCR_SCV : integer := 63 - 51;
constant FSCR_TAR : integer := 63 - 55;
constant FSCR_DSCR : integer := 63 - 61;
constant HFSCR_PREFIX : integer := 63 - 50;
constant HFSCR_MSG : integer := 63 - 53;
constant HFSCR_TAR : integer := 63 - 55;
constant HFSCR_PMUSPR : integer := 63 - 60;
constant HFSCR_DSCR : integer := 63 - 61;
constant HFSCR_FP : integer := 63 - 63;

-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
@ -224,14 +256,32 @@ package common is

-- This needs to die...
type ctrl_t is record
wait_state: std_ulogic;
run: std_ulogic;
tb: std_ulogic_vector(63 downto 0);
dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0);
xer_low: std_ulogic_vector(17 downto 0);
fscr_ic: std_ulogic_vector(3 downto 0);
fscr_pref: std_ulogic;
fscr_scv: std_ulogic;
fscr_tar: std_ulogic;
fscr_dscr: std_ulogic;
hfscr_ic: std_ulogic_vector(3 downto 0);
hfscr_pref: std_ulogic;
hfscr_tar: std_ulogic;
hfscr_dscr: std_ulogic;
hfscr_fp: std_ulogic;
heir: std_ulogic_vector(63 downto 0);
dscr: std_ulogic_vector(24 downto 0);
end record;
constant ctrl_t_init : ctrl_t :=
(xer_low => 18x"0", others => (others => '0'));
(wait_state => '0', run => '1', xer_low => 18x"0",
fscr_ic => x"0", fscr_pref => '1', fscr_scv => '1', fscr_tar => '1', fscr_dscr => '1',
hfscr_ic => x"0", hfscr_pref => '1', hfscr_tar => '1', hfscr_dscr => '1', hfscr_fp => '1',
dscr => (others => '0'),
others => (others => '0'));

type Fetch1ToIcacheType is record
req: std_ulogic;
@ -270,6 +320,7 @@ package common is
type Decode1ToDecode2Type is record
valid: std_ulogic;
stop_mark : std_ulogic;
second : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
prefixed: std_ulogic;
prefix: std_ulogic_vector(25 downto 0);
@ -286,7 +337,7 @@ package common is
reg_c : gspr_index_t;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'),
(valid => '0', stop_mark => '0', second => '0', nia => (others => '0'),
prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
illegal_suffix => '0', misaligned_prefix => '0',
decode => decode_rom_init, br_pred => '0', big_endian => '0',
@ -371,11 +422,16 @@ package common is
ramspr_wraddr : ramspr_index;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
ramspr_32bit : std_ulogic;
dbg_spr_access : std_ulogic;
dec_ctr : std_ulogic;
prefixed : std_ulogic;
prefix : std_ulogic_vector(25 downto 0);
illegal_suffix : std_ulogic;
misaligned_prefix : std_ulogic;
illegal_form : std_ulogic;
uses_tar : std_ulogic;
uses_dscr : std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
@ -393,9 +449,11 @@ package common is
spr_is_ram => '0',
ramspr_even_rdaddr => (others => '0'), ramspr_odd_rdaddr => (others => '0'), ramspr_rd_odd => '0',
ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0',
ramspr_32bit => '0',
dbg_spr_access => '0',
dec_ctr => '0',
prefixed => '0', illegal_suffix => '0', misaligned_prefix => '0',
prefixed => '0', prefix => (others => '0'), illegal_suffix => '0',
misaligned_prefix => '0', illegal_form => '0', uses_tar => '0', uses_dscr => '0',
others => (others => '0'));

type MultiplyInputType is record
@ -547,14 +605,23 @@ package common is
hold : std_ulogic;
load : std_ulogic; -- is this a load
dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
nc : std_ulogic;
reserve : std_ulogic;
atomic_qw : std_ulogic; -- part of a quadword atomic op
atomic_first : std_ulogic;
atomic_last : std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1
byte_sel : std_ulogic_vector(7 downto 0);
end record;
constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType :=
(addr => (others => '0'), data => (others => '0'), byte_sel => x"00",
others => '0');

type DcacheToLoadstore1Type is record
valid : std_ulogic;
@ -562,6 +629,7 @@ package common is
store_done : std_ulogic;
error : std_ulogic;
cache_paradox : std_ulogic;
reserve_nc : std_ulogic;
end record;

type DcacheEventType is record
@ -662,6 +730,8 @@ package common is
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
interrupt : std_ulogic;
hv_intr : std_ulogic;
is_scv : std_ulogic;
intr_vec : intr_vector_t;
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
@ -678,7 +748,8 @@ package common is
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
interrupt => '0', hv_intr => '0', is_scv => '0', intr_vec => 0,
redirect => '0', redir_mode => "0000",
last_nia => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));
@ -766,13 +837,13 @@ package common is
br_last : std_ulogic;
br_taken : std_ulogic;
interrupt : std_ulogic;
intr_vec : std_ulogic_vector(11 downto 0);
intr_vec : std_ulogic_vector(16 downto 0);
end record;
constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'),
interrupt => '0', intr_vec => x"000");
interrupt => '0', intr_vec => 17x"0");

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;
@ -795,8 +866,10 @@ package common is
write_cr_data => (others => '0'));

type WritebackToExecute1Type is record
intr : std_ulogic;
srr1 : std_ulogic_vector(15 downto 0);
intr : std_ulogic;
hv_intr : std_ulogic;
scv_int : std_ulogic;
srr1 : std_ulogic_vector(15 downto 0);
end record;

type WritebackEventType is record

@ -9,6 +9,7 @@ use work.wishbone_types.all;
entity core is
generic (
SIM : boolean := false;
CPU_INDEX : natural := 0;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
@ -48,6 +49,7 @@ entity core is

ext_irq : in std_ulogic;

run_out : out std_ulogic;
terminated_out : out std_logic
);
end core;
@ -363,6 +365,7 @@ begin
execute1_0: entity work.execute1
generic map (
SIM => SIM,
CPU_INDEX => CPU_INDEX,
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
@ -390,6 +393,7 @@ begin
ls_events => loadstore_events,
dc_events => dcache_events,
ic_events => icache_events,
run_out => run_out,
terminate_out => terminate,
dbg_spr_req => dbg_spr_req,
dbg_spr_ack => dbg_spr_ack,

@ -294,7 +294,7 @@ begin

-- For SPRs, use the same mapping as when the fast SPRs were in the GPR file
valid := '1';
sel := "000";
sel := "0000";
isram := '1';
raddr := (others => '0');
odd := '0';
@ -324,10 +324,26 @@ begin
sel := SPRSEL_XER;
when 5x"0d" =>
raddr := RAMSPR_TAR;
when 5x"0e" =>
isram := '0';
sel := SPRSEL_FSCR;
when 5x"0f" =>
isram := '0';
sel := SPRSEL_HFSCR;
when 5x"10" =>
isram := '0';
sel := SPRSEL_HEIR;
when 5x"11" =>
isram := '0';
sel := SPRSEL_CFAR;
when others =>
valid := '0';
end case;
dbg_spr_addr <= isram & sel & std_ulogic_vector(raddr) & odd;
if isram = '1' then
dbg_spr_addr <= "1000" & std_ulogic_vector(raddr) & odd;
else
dbg_spr_addr <= "0000" & sel;
end if;
spr_index_valid <= valid;
end if;
end process;

@ -181,22 +181,13 @@ architecture rtl of dcache is

constant real_mode_perm_attr : perm_attr_t := (nocache => '0', others => '1');

-- Type of operation on a "valid" input
type op_t is (OP_NONE,
OP_BAD, -- NC cache hit, TLB miss, prot/RC failure
OP_STCX_FAIL, -- conditional store w/o reservation
OP_LOAD_HIT, -- Cache hit on load
OP_LOAD_MISS, -- Load missing cache
OP_LOAD_NC, -- Non-cachable load
OP_STORE_HIT, -- Store hitting cache
OP_STORE_MISS); -- Store missing cache
-- Cache state machine
type state_t is (IDLE, -- Normal load hit processing
RELOAD_WAIT_ACK, -- Cache reload wait ack
STORE_WAIT_ACK, -- Store wait ack
NC_LOAD_WAIT_ACK);-- Non-cachable load wait ack

NC_LOAD_WAIT_ACK, -- Non-cachable load wait ack
DO_STCX, -- Check for stcx. validity
FLUSH_CYCLE); -- Cycle for invalidating cache line

--
-- Dcache operations:
@ -230,8 +221,9 @@ architecture rtl of dcache is
-- Clock edge between cycle 1 and cycle 2:
-- Request is stored in r1 (assuming r1.full was 0)
-- The state machine transitions out of IDLE state for a load miss,
-- a store, a dcbz, or a non-cacheable load. r1.full is set to 1
-- for a load miss, dcbz or non-cacheable load but not a store.
-- a store, a dcbz, a flush (dcbf) or a non-cacheable load.
-- r1.full is set to 1 for a load miss, dcbz, flush or
-- non-cacheable load but not a store.
--
-- Cycle 2: Completion signals are asserted for a load hit,
-- a store (excluding dcbz), a TLB operation, a conditional
@ -272,6 +264,23 @@ architecture rtl of dcache is
-- subsequent load requests to the same line can be completed as
-- soon as the necessary data comes in from memory, without
-- waiting for the whole line to be read.
--
-- Aligned loads and stores of a doubleword or less are atomic
-- because they are done in a single wishbone operation.
-- For quadword atomic loads and stores we rely on the wishbone
-- arbiter not interrupting access to a target once it has first
-- given access; i.e. once we have the main wishbone, no other
-- master gets access until we drop cyc.
--
-- Note on loads potentially hitting the victim line that is
-- currently being replaced: the new tag is available starting
-- with the 3rd cycle of RELOAD_WAIT_ACK state. As long as the
-- first read on the wishbone takes at least one cycle (i.e. the
-- ack doesn't arrive in the same cycle as stb was asserted),
-- r1.full will be true at least until that 3rd cycle and so a load
-- following a load miss can't hit on the old tag of the victim
-- line. As long as ack is not generated combinationally from
-- stb, this will be fine.

-- Stage 0 register, basically contains just the latched request
type reg_stage_0_t is record
@ -287,12 +296,23 @@ architecture rtl of dcache is
signal r0_full : std_ulogic;

type mem_access_request_t is record
op : op_t;
op_lmiss : std_ulogic;
op_store : std_ulogic;
op_flush : std_ulogic;
op_sync : std_ulogic;
nc : std_ulogic;
valid : std_ulogic;
dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
reserve : std_ulogic;
first_dw : std_ulogic;
last_dw : std_ulogic;
real_addr : real_addr_t;
data : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0);
is_hit : std_ulogic;
hit_way : way_t;
same_tag : std_ulogic;
mmu_req : std_ulogic;
@ -306,12 +326,16 @@ architecture rtl of dcache is
full : std_ulogic; -- have uncompleted request
mmu_req : std_ulogic; -- request is from MMU
req : mem_access_request_t;
atomic_more : std_ulogic; -- atomic request isn't finished

-- Cache hit state
hit_way : way_t;
hit_load_valid : std_ulogic;
hit_index : index_t;
cache_hit : std_ulogic;
prev_hit : std_ulogic;
prev_way : way_t;
prev_hit_reload : std_ulogic;

-- TLB hit state
tlb_hit : std_ulogic;
@ -352,6 +376,7 @@ architecture rtl of dcache is
mmu_done : std_ulogic;
mmu_error : std_ulogic;
cache_paradox : std_ulogic;
reserve_nc : std_ulogic;

-- Signal to complete a failed stcx.
stcx_fail : std_ulogic;
@ -365,27 +390,34 @@ architecture rtl of dcache is
--
type reservation_t is record
valid : std_ulogic;
addr : std_ulogic_vector(63 downto LINE_OFF_BITS);
addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
end record;

signal reservation : reservation_t;
signal kill_rsrv : std_ulogic;
signal kill_rsrv2 : std_ulogic;

-- Async signals on incoming request
signal req_index : index_t;
signal req_hit_way : way_t;
signal req_tag : cache_tag_t;
signal req_op : op_t;
signal req_data : std_ulogic_vector(63 downto 0);
signal req_same_tag : std_ulogic;
signal req_go : std_ulogic;
signal req_index : index_t;
signal req_hit_way : way_t;
signal req_is_hit : std_ulogic;
signal req_tag : cache_tag_t;
signal req_op_load_hit : std_ulogic;
signal req_op_load_miss : std_ulogic;
signal req_op_store : std_ulogic;
signal req_op_flush : std_ulogic;
signal req_op_sync : std_ulogic;
signal req_op_bad : std_ulogic;
signal req_op_nop : std_ulogic;
signal req_data : std_ulogic_vector(63 downto 0);
signal req_same_tag : std_ulogic;
signal req_go : std_ulogic;
signal req_nc : std_ulogic;
signal req_hit_reload : std_ulogic;

signal early_req_row : row_t;
signal early_rd_valid : std_ulogic;

signal cancel_store : std_ulogic;
signal set_rsrv : std_ulogic;
signal clear_rsrv : std_ulogic;

signal r0_valid : std_ulogic;
signal r0_stall : std_ulogic;

@ -427,10 +459,13 @@ architecture rtl of dcache is
-- TLB PLRU output interface
signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);

signal snoop_active : std_ulogic;
signal snoop_tag_set : cache_tags_set_t;
signal snoop_valid : std_ulogic;
signal snoop_wrtag : cache_tag_t;
signal snoop_index : index_t;
signal snoop_paddr : real_addr_t;
signal snoop_addr : real_addr_t;
signal snoop_hits : cache_way_valids_t;
signal req_snoop_hit : std_ulogic;

--
-- Helper functions to decode incoming requests
@ -565,12 +600,9 @@ begin
assert (d_in.valid and m_in.valid) = '0' report
"request collision loadstore vs MMU";
if m_in.valid = '1' then
r.req := Loadstore1ToDcacheInit;
r.req.valid := '1';
r.req.load := not (m_in.tlbie or m_in.tlbld);
r.req.dcbz := '0';
r.req.nc := '0';
r.req.reserve := '0';
r.req.virt_mode := '0';
r.req.priv_mode := '1';
r.req.addr := m_in.addr;
r.req.data := m_in.pte;
@ -861,34 +893,51 @@ begin
end if;
end process;

-- Snoop logic
-- Don't snoop our own cycles
snoop_addr <= addr_to_real(wb_to_addr(snoop_in.adr));
snoop_active <= snoop_in.cyc and snoop_in.stb and snoop_in.we and
not (r1.wb.cyc and not wishbone_in.stall);
kill_rsrv <= '1' when (snoop_active = '1' and reservation.valid = '1' and
snoop_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';

-- Cache tag RAM second read port, for snooping
cache_tag_read_2 : process(clk)
variable addr : real_addr_t;
begin
if rising_edge(clk) then
-- Don't snoop our own cycles
snoop_valid <= '0';
if not (r1.wb.cyc = '1' and wishbone_in.stall = '0') then
if (snoop_in.cyc and snoop_in.stb and snoop_in.we) = '1' then
snoop_valid <= '1';
addr := addr_to_real(wb_to_addr(snoop_in.adr));
assert not is_X(addr);
snoop_tag_set <= cache_tags(to_integer(get_index(addr)));
snoop_wrtag <= get_tag(addr);
snoop_index <= get_index(addr);
end if;
if is_X(snoop_addr) then
snoop_tag_set <= (others => 'X');
else
snoop_tag_set <= cache_tags(to_integer(get_index(snoop_addr)));
end if;
snoop_paddr <= snoop_addr;
snoop_valid <= snoop_active;
end if;
end process;

-- Compare the previous cycle's snooped store address to the reservation,
-- to catch the case where a write happens on cycle 1 of a cached larx
kill_rsrv2 <= '1' when (snoop_valid = '1' and reservation.valid = '1' and
snoop_paddr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';

snoop_tag_match : process(all)
begin
snoop_hits <= (others => '0');
for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = get_tag(snoop_paddr) then
snoop_hits(i) <= '1';
end if;
end loop;
end process;

-- Cache request parsing and hit detection
dcache_request : process(all)
variable req_row : row_t;
variable rindex : index_t;
variable is_hit : std_ulogic;
variable hit_way : way_t;
variable op : op_t;
variable opsel : std_ulogic_vector(2 downto 0);
variable go : std_ulogic;
variable nc : std_ulogic;
variable s_hit : std_ulogic;
@ -901,6 +950,9 @@ begin
variable rel_match : std_ulogic;
variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable fwd_match : std_ulogic;
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snoop_match : std_ulogic;
variable hit_reload : std_ulogic;
begin
-- Extract line, row and tag from request
rindex := get_index(r0.req.addr);
@ -924,9 +976,11 @@ begin
is_hit := '0';
rel_match := '0';
fwd_match := '0';
snoop_match := '0';
if r0.req.virt_mode = '1' then
rel_matches := (others => '0');
fwd_matches := (others => '0');
snp_matches := (others => '0');
for j in tlb_way_t loop
hit_way_set(j) := to_unsigned(0, WAY_BITS);
s_hit := '0';
@ -943,6 +997,9 @@ begin
tlb_valid_way(j) = '1' then
hit_way_set(j) := to_unsigned(i, WAY_BITS);
s_hit := '1';
if snoop_hits(i) = '1' then
snp_matches(j) := '1';
end if;
end if;
end loop;
hit_set(j) := s_hit;
@ -959,6 +1016,7 @@ begin
hit_way := hit_way_set(to_integer(tlb_hit_way));
rel_match := rel_matches(to_integer(tlb_hit_way));
fwd_match := fwd_matches(to_integer(tlb_hit_way));
snoop_match := snp_matches(to_integer(tlb_hit_way));
end if;
else
s_tag := get_tag(r0.req.addr);
@ -970,6 +1028,9 @@ begin
read_tag(i, cache_tag_set) = s_tag then
hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1';
if snoop_hits(i) = '1' then
snoop_match := '1';
end if;
end if;
end loop;
if go = '1' and not is_X(r1.reload_tag) and s_tag = r1.reload_tag then
@ -982,6 +1043,13 @@ begin
req_same_tag <= rel_match;
fwd_same_tag <= fwd_match;

-- This is 1 if the snooped write from the previous cycle hits the same
-- cache line that is being accessed in this cycle.
req_snoop_hit <= '0';
if go = '1' and snoop_match = '1' and get_index(snoop_paddr) = rindex then
req_snoop_hit <= '1';
end if;

-- Whether to use forwarded data for a load or not
use_forward_st <= '0';
use_forward_rl <= '0';
@ -1029,6 +1097,7 @@ begin
assert not is_X(rindex);
assert not is_X(r1.store_index);
end if;
hit_reload := '0';
if r1.state = RELOAD_WAIT_ACK and rel_match = '1' and
rindex = r1.store_index then
-- Ignore is_hit from above, because a load miss writes the new tag
@ -1037,13 +1106,29 @@ begin
-- since it will be by the time we perform the store.
-- For a load, check the appropriate row valid bit; but also,
-- if use_forward_rl is 1 then we can consider this a hit.
is_hit := not r0.req.load or r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
-- For a touch, since the line we want is being reloaded already,
-- consider this a hit.
is_hit := not r0.req.load or r0.req.touch or
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
use_forward_rl;
hit_way := replace_way;
hit_reload := is_hit;
elsif r0.req.load = '1' and r0.req.atomic_qw = '1' and r0.req.atomic_first = '0' and
r0.req.nc = '0' and perm_attr.nocache = '0' and r1.prev_hit = '1' then
-- For the second half of an atomic quadword load, just use the
-- same way as the first half, without considering whether the line
-- is valid; it is as if we had read the second dword at the same
-- time as the first dword, and the line was valid back then.
-- (Cases where the line is currently being reloaded are handled above.)
-- NB lq to noncacheable isn't required to be atomic per the ISA.
is_hit := '1';
hit_way := r1.prev_way;
end if;

-- The way that matched on a hit
req_hit_way <= hit_way;
req_is_hit <= is_hit;
req_hit_reload <= hit_reload;

-- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP
@ -1056,29 +1141,44 @@ begin
-- operation needs to be done
--
nc := r0.req.nc or perm_attr.nocache;
op := OP_NONE;
req_op_bad <= '0';
req_op_load_hit <= '0';
req_op_load_miss <= '0';
req_op_store <= '0';
req_op_nop <= '0';
req_op_flush <= '0';
req_op_sync <= '0';
if go = '1' then
if access_ok = '0' then
op := OP_BAD;
elsif cancel_store = '1' then
op := OP_STCX_FAIL;
if r0.req.sync = '1' then
req_op_sync <= '1';
elsif r0.req.touch = '1' then
if access_ok = '1' and is_hit = '0' and nc = '0' then
req_op_load_miss <= '1';
elsif access_ok = '1' and is_hit = '1' and nc = '0' then
-- Make this OP_LOAD_HIT so the PLRU gets updated
req_op_load_hit <= '1';
else
req_op_nop <= '1';
end if;
elsif access_ok = '0' then
req_op_bad <= '1';
elsif r0.req.flush = '1' then
if is_hit = '0' then
req_op_nop <= '1';
else
req_op_flush <= '1';
end if;
elsif nc = '1' and (is_hit = '1' or r0.req.reserve = '1') then
req_op_bad <= '1';
elsif r0.req.load = '0' then
req_op_store <= '1'; -- includes dcbz
else
opsel := r0.req.load & nc & is_hit;
case opsel is
when "101" => op := OP_LOAD_HIT;
when "100" => op := OP_LOAD_MISS;
when "110" => op := OP_LOAD_NC;
when "001" => op := OP_STORE_HIT;
when "000" => op := OP_STORE_MISS;
when "010" => op := OP_STORE_MISS;
when "011" => op := OP_BAD;
when "111" => op := OP_BAD;
when others => op := OP_NONE;
end case;
req_op_load_hit <= is_hit;
req_op_load_miss <= not is_hit; -- includes non-cacheable loads
end if;
end if;
req_op <= op;
req_go <= go;
req_nc <= nc;

-- Version of the row number that is valid one cycle earlier
-- in the cases where we need to read the cache data BRAM.
@ -1101,45 +1201,6 @@ begin
-- Wire up wishbone request latch out of stage 1
wishbone_out <= r1.wb;

-- Handle load-with-reservation and store-conditional instructions
reservation_comb: process(all)
begin
cancel_store <= '0';
set_rsrv <= '0';
clear_rsrv <= '0';
if r0_valid = '1' and r0.req.reserve = '1' then
-- XXX generate alignment interrupt if address is not aligned
-- XXX or if r0.req.nc = '1'
if r0.req.load = '1' then
-- load with reservation
set_rsrv <= '1';
else
-- store conditional
clear_rsrv <= '1';
if reservation.valid = '0' or
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
cancel_store <= '1';
end if;
end if;
end if;
end process;

reservation_reg: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
reservation.valid <= '0';
elsif r0_valid = '1' and access_ok = '1' then
if clear_rsrv = '1' then
reservation.valid <= '0';
elsif set_rsrv = '1' then
reservation.valid <= '1';
reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS);
end if;
end if;
end if;
end process;

-- Return data for loads & completion control logic
--
writeback_control: process(all)
@ -1149,6 +1210,7 @@ begin
d_out.store_done <= not r1.stcx_fail;
d_out.error <= r1.ls_error;
d_out.cache_paradox <= r1.cache_paradox;
d_out.reserve_nc <= r1.reserve_nc;

-- Outputs to MMU
m_out.done <= r1.mmu_done;
@ -1185,7 +1247,7 @@ begin
report "completing ld/st with error";
end if;

-- Slow ops (load miss, NC, stores)
-- Slow ops (load miss, NC, stores, sync)
if r1.slow_valid = '1' then
report "completing store or load miss data=" & to_hstring(r1.data_out);
end if;
@ -1288,14 +1350,6 @@ begin
variable data_out : std_ulogic_vector(63 downto 0);
begin
if rising_edge(clk) then
if req_op /= OP_NONE then
report "op:" & op_t'image(req_op) &
" addr:" & to_hstring(r0.req.addr) &
" nc:" & std_ulogic'image(r0.req.nc) &
" idx:" & to_hstring(req_index) &
" tag:" & to_hstring(req_tag) &
" way: " & to_hstring(req_hit_way);
end if;
if r0_valid = '1' then
r1.mmu_req <= r0.mmu_req;
end if;
@ -1341,36 +1395,19 @@ begin
r1.forward_valid <= '1';
end if;

-- Fast path for load/store hits. Set signals for the writeback controls.
if req_op = OP_LOAD_HIT then
r1.hit_load_valid <= '1';
else
r1.hit_load_valid <= '0';
end if;
r1.hit_load_valid <= req_op_load_hit;
r1.cache_hit <= req_op_load_hit or (req_op_store and req_is_hit); -- causes PLRU update

-- The cache hit indication is used for PLRU updates
if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
r1.cache_hit <= '1';
else
r1.cache_hit <= '0';
end if;

if req_op = OP_BAD then
r1.cache_paradox <= access_ok and req_nc and req_is_hit;
r1.reserve_nc <= access_ok and r0.req.reserve and req_nc;
if req_op_bad = '1' then
report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) &
" rc_ok=" & std_ulogic'image(rc_ok) & " perm_ok=" & std_ulogic'image(perm_ok);
r1.ls_error <= not r0.mmu_req;
r1.mmu_error <= r0.mmu_req;
r1.cache_paradox <= access_ok;
else
r1.ls_error <= '0';
r1.mmu_error <= '0';
r1.cache_paradox <= '0';
end if;

if req_op = OP_STCX_FAIL then
r1.stcx_fail <= '1';
else
r1.stcx_fail <= '0';
end if;

-- Record TLB hit information for updating TLB PLRU
@ -1423,6 +1460,10 @@ begin
r1.acks_pending <= to_unsigned(0, 3);
r1.stalled <= '0';
r1.dec_acks <= '0';
r1.prev_hit <= '0';
r1.prev_hit_reload <= '0';
reservation.valid <= '0';
reservation.addr <= (others => '0');

-- Not useful normally but helps avoiding tons of sim warnings
r1.wb.adr <= (others => '0');
@ -1430,27 +1471,33 @@ begin
-- One cycle pulses reset
r1.slow_valid <= '0';
r1.write_bram <= '0';
r1.stcx_fail <= '0';

r1.ls_valid <= '0';
r1.ls_valid <= (req_op_load_hit or req_op_nop) and not r0.mmu_req;
-- complete tlbies and TLB loads in the third cycle
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
if r0.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
r1.mmu_done <= (r0_valid and (r0.tlbie or r0.tlbld)) or
(req_op_load_hit and r0.mmu_req);

-- The kill_rsrv2 term covers the case where the reservation
-- address was set at the beginning of this cycle, and a store
-- to that address happened in the previous cycle.
if kill_rsrv = '1' or kill_rsrv2 = '1' then
reservation.valid <= '0';
end if;
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
r0.req.reserve = '1' and r0.req.atomic_first = '1' then
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
reservation.valid <= req_is_hit and not req_snoop_hit;
end if;

-- Do invalidations from snooped stores to memory
if snoop_valid = '1' then
assert not is_X(snoop_tag_set);
assert not is_X(snoop_wrtag);
assert not is_X(snoop_paddr);
assert not is_X(snoop_hits);
end if;
for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = snoop_wrtag then
assert not is_X(snoop_index);
cache_valids(to_integer(snoop_index))(i) <= '0';
if snoop_hits(i) = '1' then
cache_valids(to_integer(get_index(snoop_paddr)))(i) <= '0';
end if;
end loop;

@ -1469,14 +1516,24 @@ begin
end if;

-- Take request from r1.req if there is one there,
-- else from req_op, ra, etc.
-- else from req_op_*, ra, etc.
if r1.full = '1' then
req := r1.req;
else
req.op := req_op;
req.op_lmiss := req_op_load_miss;
req.op_store := req_op_store;
req.op_flush := req_op_flush;
req.op_sync := req_op_sync;
req.nc := req_nc;
req.valid := req_go;
req.mmu_req := r0.mmu_req;
req.dcbz := r0.req.dcbz;
req.flush := r0.req.flush;
req.touch := r0.req.touch;
req.sync := r0.req.sync;
req.reserve := r0.req.reserve;
req.first_dw := not r0.req.atomic_qw or r0.req.atomic_first;
req.last_dw := not r0.req.atomic_qw or r0.req.atomic_last;
req.real_addr := ra;
-- Force data to 0 for dcbz
if r0.req.dcbz = '1' then
@ -1493,14 +1550,16 @@ begin
req.byte_sel := r0.req.byte_sel;
end if;
req.hit_way := req_hit_way;
req.is_hit := req_is_hit;
req.same_tag := req_same_tag;

-- Store the incoming request from r0, if it is a slow request
-- Note that r1.full = 1 implies req_op = OP_NONE
if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC or
req_op = OP_STORE_MISS or req_op = OP_STORE_HIT then
-- Note that r1.full = 1 implies none of the req_op_* are 1.
-- For the sake of timing we put any valid request in r1.req,
-- but only set r1.full if it is a slow request.
if req_go = '1' then
r1.req <= req;
r1.full <= '1';
r1.full <= req_op_load_miss or req_op_store or req_op_flush or req_op_sync;
end if;
end if;

@ -1512,9 +1571,14 @@ begin
r1.victim_way <= plru_victim;
report "victim way:" & to_hstring(plru_victim);
end if;
if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then
if req_op_load_miss = '1' or (r0.req.dcbz = '1' and req_is_hit = '0') then
r1.choose_victim <= '1';
end if;
if req_go = '1' then
r1.prev_hit <= req_is_hit;
r1.prev_way <= req_hit_way;
r1.prev_hit_reload <= req_hit_reload;
end if;

-- Update count of pending acks
acks := r1.acks_pending;
@ -1536,6 +1600,7 @@ begin
r1.wb.sel <= req.byte_sel;
r1.wb.dat <= req.data;
r1.dcbz <= req.dcbz;
r1.atomic_more <= not req.last_dw;

-- Keep track of our index and way for subsequent stores.
r1.store_index <= get_index(req.real_addr);
@ -1544,44 +1609,52 @@ begin
r1.reload_tag <= get_tag(req.real_addr);
r1.req.same_tag <= '1';

if req.op = OP_STORE_HIT then
if req.is_hit = '1' then
r1.store_way <= req.hit_way;
end if;

-- Reset per-row valid bits, ready for handling OP_LOAD_MISS
-- Reset per-row valid bits, ready for handling the next load miss
for i in 0 to ROW_PER_LINE - 1 loop
r1.rows_valid(i) <= '0';
end loop;

case req.op is
when OP_LOAD_HIT =>
-- stay in IDLE state

when OP_LOAD_MISS =>
if req.op_lmiss = '1' then
-- Normal load cache miss, start the reload machine
--
report "cache miss real addr:" & to_hstring(req.real_addr) &
" idx:" & to_hstring(get_index(req.real_addr)) &
" tag:" & to_hstring(get_tag(req.real_addr));
-- Or non-cacheable load
if req.nc = '0' then
report "cache miss real addr:" & to_hstring(req.real_addr) &
" idx:" & to_hstring(get_index(req.real_addr)) &
" tag:" & to_hstring(get_tag(req.real_addr));
end if;

-- Start the wishbone cycle
r1.wb.we <= '0';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';

-- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK;
r1.write_tag <= '1';
ev.load_miss <= '1';
if req.nc = '0' then
-- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK;
r1.write_tag <= '1';
ev.load_miss <= '1';

when OP_LOAD_NC =>
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
r1.wb.we <= '0';
r1.state <= NC_LOAD_WAIT_ACK;
-- If this is a touch, complete the instruction
if req.touch = '1' then
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
end if;
else
r1.state <= NC_LOAD_WAIT_ACK;
end if;
end if;

when OP_STORE_HIT | OP_STORE_MISS =>
if req.dcbz = '0' then
if req.op_store = '1' then
if req.reserve = '1' then
-- stcx needs to wait until next cycle
-- for the reservation address check
r1.state <= DO_STCX;
elsif req.dcbz = '0' then
r1.state <= STORE_WAIT_ACK;
r1.full <= '0';
r1.slow_valid <= '1';
@ -1590,30 +1663,33 @@ begin
else
r1.mmu_done <= '1';
end if;
if req.op = OP_STORE_HIT then
r1.write_bram <= '1';
end if;
r1.write_bram <= req.is_hit;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
else
-- dcbz is handled much like a load miss except
-- that we are writing to memory instead of reading
r1.state <= RELOAD_WAIT_ACK;
if req.op = OP_STORE_MISS then
r1.write_tag <= '1';
end if;
end if;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
if req.op = OP_STORE_MISS then
ev.store_miss <= '1';
r1.write_tag <= not req.is_hit;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
end if;
ev.store_miss <= not req.is_hit;
end if;

-- OP_NONE and OP_BAD do nothing
-- OP_BAD & OP_STCX_FAIL were handled above already
when OP_NONE =>
when OP_BAD =>
when OP_STCX_FAIL =>
end case;
if req.op_flush = '1' then
r1.state <= FLUSH_CYCLE;
end if;

if req.op_sync = '1' then
-- sync/lwsync can complete now that the state machine
-- is idle.
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
end if;

when RELOAD_WAIT_ACK =>
-- If we are still sending requests, was one accepted ?
@ -1643,7 +1719,7 @@ begin
assert not is_X(r1.req.real_addr);
end if;
if r1.full = '1' and r1.req.same_tag = '1' and
((r1.dcbz = '1' and req.dcbz = '1') or r1.req.op = OP_LOAD_MISS) and
((r1.dcbz = '1' and r1.req.dcbz = '1') or r1.req.op_lmiss = '1') and
r1.store_row = get_row(r1.req.real_addr) then
r1.full <= '0';
r1.slow_valid <= '1';
@ -1652,6 +1728,10 @@ begin
else
r1.mmu_done <= '1';
end if;
-- NB: for lqarx, set the reservation on the first dword
if r1.req.reserve = '1' and r1.req.first_dw = '1' then
reservation.valid <= '1';
end if;
end if;

-- Check for completion
@ -1667,6 +1747,10 @@ begin
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '1';

ev.dcache_refill <= not r1.dcbz;
-- Second half of a lq/lqarx can assume a hit on this line now
-- if the first half hit this line.
r1.prev_hit <= r1.prev_hit_reload;
r1.prev_way <= r1.store_way;
r1.state <= IDLE;
end if;

@ -1680,6 +1764,10 @@ begin
if wishbone_in.stall = '0' then
-- See if there is another store waiting to be done
-- which is in the same real page.
-- This could be either in r1.req or in r0.
-- Ignore store-conditionals, they have to go through
-- DO_STCX state, unless they are the second half of a
-- successful stqcx, which is handled here.
if req.valid = '1' then
r1.wb.adr(SET_SIZE_BITS - ROW_OFF_BITS - 1 downto 0) <=
req.real_addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS);
@ -1687,30 +1775,33 @@ begin
r1.wb.sel <= req.byte_sel;
end if;
assert not is_X(acks);
if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then
r1.wb.stb <= '1';
stbs_done := false;
r1.store_way <= req.hit_way;
r1.store_row <= get_row(req.real_addr);
if req.op = OP_STORE_HIT then
r1.write_bram <= '1';
r1.wb.stb <= '0';
if req.op_store = '1' and req.same_tag = '1' and req.dcbz = '0' and
(req.reserve = '0' or r1.atomic_more = '1') then
if acks < 7 then
r1.wb.stb <= '1';
stbs_done := false;
r1.store_way <= req.hit_way;
r1.store_row <= get_row(req.real_addr);
r1.write_bram <= req.is_hit;
r1.atomic_more <= not req.last_dw;
r1.full <= '0';
r1.slow_valid <= '1';
-- Store requests never come from the MMU
r1.ls_valid <= '1';
end if;
r1.full <= '0';
r1.slow_valid <= '1';
-- Store requests never come from the MMU
r1.ls_valid <= '1';
stbs_done := false;
else
r1.wb.stb <= '0';
stbs_done := true;
if req.valid = '1' then
r1.atomic_more <= '0';
end if;
end if;
end if;

-- Got ack ? See if complete.
if wishbone_in.ack = '1' then
if stbs_done and r1.atomic_more = '0' then
assert not is_X(acks);
if stbs_done and acks = 1 then
if acks = 0 or (wishbone_in.ack = '1' and acks = 1) then
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
@ -1736,6 +1827,51 @@ begin
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
end if;

when DO_STCX =>
if reservation.valid = '0' or kill_rsrv = '1' or
r1.req.real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) /= reservation.addr then
-- Wrong address, didn't have reservation, or lost reservation
-- Abandon the wishbone cycle if started and fail the stcx.
r1.stcx_fail <= '1';
r1.full <= '0';
r1.ls_valid <= '1';
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
reservation.valid <= '0';
-- If this is the first half of a stqcx., the second half
-- will fail also because the reservation is not valid.
r1.state <= IDLE;
elsif r1.wb.cyc = '0' then
-- Right address and have reservation, so start the
-- wishbone cycle
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
elsif r1.wb.stb = '1' and wishbone_in.stall = '0' then
-- Store has been accepted, so now we can write the
-- cache data RAM and complete the request
r1.write_bram <= r1.req.is_hit;
r1.wb.stb <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
reservation.valid <= '0';
-- For a stqcx, STORE_WAIT_ACK will issue the second half
-- without checking the reservation, which is what we want
-- given that the first half has gone out.
-- With r1.atomic_more set, STORE_WAIT_ACK won't exit to
-- IDLE state until it sees the second half.
r1.state <= STORE_WAIT_ACK;
end if;

when FLUSH_CYCLE =>
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
r1.state <= IDLE;
end case;
end if;
end if;
@ -1753,7 +1889,7 @@ begin
r1.wb.stb & r1.wb.cyc &
d_out.error &
d_out.valid &
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) &
req_op_load_miss & req_op_store & req_op_bad &
stall_out &
std_ulogic_vector(resize(tlb_hit_way, 3)) &
valid_ra &

@ -44,6 +44,8 @@ architecture behaviour of decode1 is
signal decode_rom_addr : insn_code;
signal decode : decode_rom_t;

signal double : std_ulogic;

type prefix_state_t is record
prefixed : std_ulogic;
prefix : std_ulogic_vector(25 downto 0);
@ -106,6 +108,7 @@ architecture behaviour of decode1 is
INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cfuged => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -113,10 +116,10 @@ architecture behaviour of decode1 is
INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cntlzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cntlzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_cnttzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cnttzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_cntlzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cntlzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_cnttzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cnttzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -126,10 +129,10 @@ architecture behaviour of decode1 is
INSN_crorc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crxor => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbf => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbf => (LDST, NONE, OP_DCBF, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbt => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbtst => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbt => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbtst => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
INSN_divde => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
@ -197,7 +200,7 @@ architecture behaviour of decode1 is
INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE),
@ -234,6 +237,8 @@ architecture behaviour of decode1 is
INSN_lhzu => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD),
INSN_lhzux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD),
INSN_lhzx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lq => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP),
INSN_lqarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTP),
INSN_lwa => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lwarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE),
INSN_lwaux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD),
@ -281,12 +286,15 @@ architecture behaviour of decode1 is
INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pdepd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pextd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfs => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_plha => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plhz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plq => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP),
INSN_plwa => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plwz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -295,13 +303,15 @@ architecture behaviour of decode1 is
INSN_pstfd => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstq => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP),
INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntb => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntd => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntw => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntb => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rfscv => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rldcl => (ALU, NONE, OP_RLCL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_rldcr => (ALU, NONE, OP_RLCR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_rldic => (ALU, NONE, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
@ -352,6 +362,8 @@ architecture behaviour of decode1 is
INSN_sthu => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sthux => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sthx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stq => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP),
INSN_stqcx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSP),
INSN_stw => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stwbrx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stwcix => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '1', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -365,7 +377,7 @@ architecture behaviour of decode1 is
INSN_subfic => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_subfme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_subfze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_sync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sync => (LDST, NONE, OP_SYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_td => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tdi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tlbie => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -373,7 +385,7 @@ architecture behaviour of decode1 is
INSN_tlbsync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tw => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_twi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_wait => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_wait => (ALU, NONE, OP_WAIT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_xor => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_xori => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_xoris => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -384,7 +396,7 @@ architecture behaviour of decode1 is
function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
variable ret : ram_spr_info;
begin
ret := (index => (others => '0'), isodd => '0', valid => '1');
ret := (index => (others => '0'), isodd => '0', is32b => '0', valid => '1');
case sprn is
when SPR_LR =>
ret.index := RAMSPR_LR;
@ -418,6 +430,10 @@ architecture behaviour of decode1 is
when SPR_HSPRG1 =>
ret.index := RAMSPR_HSPRG1;
ret.isodd := '1';
when SPR_VRSAVE =>
ret.index := RAMSPR_VRSAVE;
ret.isodd := '1';
ret.is32b := '1';
when others =>
ret.valid := '0';
end case;
@ -427,9 +443,11 @@ architecture behaviour of decode1 is
function map_spr(sprn : spr_num_t) return spr_id is
variable i : spr_id;
begin
i.sel := "000";
i.sel := "0000";
i.valid := '1';
i.ispmu := '0';
i.ronly := '0';
i.wonly := '0';
case sprn is
when SPR_TB =>
i.sel := SPRSEL_TB;
@ -452,6 +470,24 @@ architecture behaviour of decode1 is
i.sel := SPRSEL_CFAR;
when SPR_XER =>
i.sel := SPRSEL_XER;
when SPR_FSCR =>
i.sel := SPRSEL_FSCR;
when SPR_HFSCR =>
i.sel := SPRSEL_HFSCR;
when SPR_HEIR =>
i.sel := SPRSEL_HEIR;
when SPR_CTRL =>
i.sel := SPRSEL_CTRL;
i.ronly := '1';
when SPR_CTRLW =>
i.sel := SPRSEL_CTRL;
i.wonly := '1';
when SPR_UDSCR =>
i.sel := SPRSEL_DSCR;
when SPR_DSCR =>
i.sel := SPRSEL_DSCR;
when SPR_PIR =>
i.sel := SPRSEL_PIR;
when others =>
i.valid := '0';
end case;
@ -459,6 +495,8 @@ architecture behaviour of decode1 is
end;

begin
double <= not r.second when (r.valid = '1' and decode.repeat /= NONE) else '0';

decode1_0: process(clk)
begin
if rising_edge(clk) then
@ -471,10 +509,15 @@ begin
fetch_failed <= '0';
pr <= prefix_state_init;
elsif stall_in = '0' then
r <= rin;
fetch_failed <= f_in.fetch_failed;
if f_in.valid = '1' then
pr <= pr_in;
if double = '0' then
r <= rin;
fetch_failed <= f_in.fetch_failed;
if f_in.valid = '1' then
pr <= pr_in;
end if;
else
r.second <= '1';
r.reg_c <= rin.reg_c;
end if;
end if;
if rst = '1' then
@ -485,12 +528,12 @@ begin
end if;
end process;

busy_out <= stall_in;
busy_out <= stall_in or double;

decode1_rom: process(clk)
begin
if rising_edge(clk) then
if stall_in = '0' then
if stall_in = '0' and double = '0' then
decode <= decode_rom(decode_rom_addr);
end if;
end if;
@ -521,7 +564,7 @@ begin
v.big_endian := f_in.big_endian;

if is_X(f_in.insn) then
v.spr_info := (sel => "XXX", others => 'X');
v.spr_info := (sel => "XXXX", others => 'X');
v.ram_spr := (index => (others => 'X'), others => 'X');
else
sprn := decode_spr_num(f_in.insn);
@ -620,33 +663,55 @@ begin
-- Work out GPR/FPR read addresses
-- Note that for prefixed instructions we are working this out based
-- only on the suffix.
maybe_rb := '0';
vr.reg_1_addr := '0' & insn_ra(f_in.insn);
vr.reg_2_addr := '0' & insn_rb(f_in.insn);
vr.reg_3_addr := '0' & insn_rs(f_in.insn);
if icode >= INSN_first_rb then
maybe_rb := '1';
if icode < INSN_first_frs then
if icode >= INSN_first_rc then
vr.reg_3_addr := '0' & insn_rcreg(f_in.insn);
end if;
else
-- access FRS operand
vr.reg_3_addr(5) := '1';
if icode >= INSN_first_frab then
-- access FRA and/or FRB operands
vr.reg_1_addr(5) := '1';
vr.reg_2_addr(5) := '1';
end if;
if icode >= INSN_first_frabc then
-- access FRC operand
vr.reg_3_addr := '1' & insn_rcreg(f_in.insn);
if double = '0' then
maybe_rb := '0';
vr.reg_1_addr := '0' & insn_ra(f_in.insn);
vr.reg_2_addr := '0' & insn_rb(f_in.insn);
vr.reg_3_addr := '0' & insn_rs(f_in.insn);
if icode >= INSN_first_rb then
maybe_rb := '1';
if icode < INSN_first_frs then
if icode >= INSN_first_rc then
vr.reg_3_addr := '0' & insn_rcreg(f_in.insn);
end if;
else
-- access FRS operand
vr.reg_3_addr(5) := '1';
if icode >= INSN_first_frab then
-- access FRA and/or FRB operands
vr.reg_1_addr(5) := '1';
vr.reg_2_addr(5) := '1';
end if;
if icode >= INSN_first_frabc then
-- access FRC operand
vr.reg_3_addr := '1' & insn_rcreg(f_in.insn);
end if;
end if;
end if;
-- See if this is an instruction where repeat_t = DRSP and we need
-- to read RS|1 followed by RS, i.e. stq or stqcx. in LE mode
-- (note we don't have access to the decode for the current instruction)
if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then
vr.reg_3_addr(0) := '1';
end if;
vr.read_1_enable := f_in.valid;
vr.read_2_enable := f_in.valid and maybe_rb;
vr.read_3_enable := f_in.valid;
else
-- second instance of a doubled instruction
vr.reg_1_addr := r.reg_a;
vr.reg_2_addr := r.reg_b;
vr.reg_3_addr := r.reg_c;
vr.read_1_enable := '0'; -- (not actually used)
vr.read_2_enable := '0';
vr.read_3_enable := '1'; -- (not actually used)
-- For pstq, and for stq and stqcx in BE mode,
-- we need to read register RS|1 in the cycle after we read RS;
-- stq and stqcx in LE mode read RS.
if decode.repeat = DRSP then
vr.reg_3_addr(0) := r.prefixed or f_in.big_endian;
end if;
end if;
vr.read_1_enable := f_in.valid;
vr.read_2_enable := f_in.valid and maybe_rb;
vr.read_3_enable := f_in.valid;

v.reg_a := vr.reg_1_addr;
v.reg_b := vr.reg_2_addr;

@ -232,12 +232,13 @@ architecture behaviour of decode2 is
);

constant subresult_select : mux_select_array_t := (
OP_MUL_L64 => "000", -- muldiv_result
OP_MUL_H64 => "001",
OP_MUL_H32 => "010",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
OP_MUL_L64 => "000", -- multicyc_result
OP_MUL_H64 => "010",
OP_MUL_H32 => "001",
OP_DIV => "101",
OP_DIVE => "101",
OP_MOD => "101",
OP_BSORT => "100",
OP_ADDG6S => "001", -- misc_result
OP_ISEL => "010",
OP_DARN => "011",
@ -347,7 +348,8 @@ begin
elsif deferred = '0' then
if dc2in.e.valid = '1' then
report "execute " & to_hstring(dc2in.e.nia) &
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid);
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid) &
" rpt=" & std_ulogic'image(dc2in.e.repeat) & " 2nd=" & std_ulogic'image(dc2in.e.second) & " wr=" & to_hstring(dc2in.e.write_reg);
end if;
dc2 <= dc2in;
elsif dc2.read_rspr = '0' then
@ -376,6 +378,31 @@ begin
dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
case d_in.decode.repeat is
when DUPD =>
if d_in.second = '1' then
-- update-form loads, 2nd instruction writes RA
dec_o.reg := dec_a.reg;
end if;
when DRSP =>
-- non-prefixed stq, stqcx do RS|1, RS in LE mode; others do RS, RS|1
if d_in.second = (d_in.big_endian or d_in.prefixed) then
dec_c.reg(0) := '1'; -- do RS, RS|1
end if;
when DRTP =>
-- non-prefixed lq, lqarx do RT|1, RT in LE mode; others do RT, RT|1
if d_in.second = (d_in.big_endian or d_in.prefixed) then
dec_o.reg(0) := '1';
end if;
when others =>
end case;
-- For the second instance of a doubled instruction, we ignore the RA
-- and RB operands, in order to avoid false dependencies on the output
-- of the first instance.
if d_in.second = '1' then
dec_a.reg_valid := '0';
dec_b.reg_valid := '0';
end if;
if d_in.valid = '0' or d_in.illegal_suffix = '1' then
dec_a.reg_valid := '0';
dec_b.reg_valid := '0';
@ -420,6 +447,8 @@ begin
v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

v.e.spr_select := d_in.spr_info;

-- Work out whether XER SO/OV/OV32 bits are set
-- or used by this instruction
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
@ -450,8 +479,15 @@ begin
v.input_ov := '1';
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
unit := LDST;
when SPR_TAR =>
v.e.uses_tar := '1';
when SPR_UDSCR =>
v.e.uses_dscr := '1';
when others =>
end case;
if d_in.spr_info.wonly = '1' then
v.e.spr_select.valid := '0';
end if;
end if;
when OP_MTSPR =>
if is_X(d_in.insn) then
@ -468,9 +504,15 @@ begin
if d_in.valid = '1' then
v.sgl_pipe := '1';
end if;
when SPR_TAR =>
v.e.uses_tar := '1';
when SPR_UDSCR =>
v.e.uses_dscr := '1';
when others =>
end case;
if d_in.spr_info.valid = '1' and d_in.valid = '1' then
if d_in.spr_info.ronly = '1' then
v.e.spr_select.valid := '0';
elsif d_in.spr_info.valid = '1' and d_in.valid = '1' then
v.sgl_pipe := '1';
end if;
end if;
@ -496,12 +538,10 @@ begin
end if;
v.e.dec_ctr := decctr;

v.repeat := d_in.decode.repeat;
if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
end if;

v.e.spr_select := d_in.spr_info;
v.e.second := d_in.second;

if decctr = '1' then
-- read and write CTR
@ -525,12 +565,14 @@ begin
v.e.ramspr_rd_odd := '1';
else
v.e.ramspr_even_rdaddr := RAMSPR_TAR;
v.e.uses_tar := '1';
end if;
sprs_busy := '1';
when OP_MFSPR =>
v.e.ramspr_even_rdaddr := d_in.ram_spr.index;
v.e.ramspr_odd_rdaddr := d_in.ram_spr.index;
v.e.ramspr_rd_odd := d_in.ram_spr.isodd;
v.e.ramspr_32bit := d_in.ram_spr.is32b;
v.e.spr_is_ram := d_in.ram_spr.valid;
sprs_busy := d_in.ram_spr.valid;
when OP_MTSPR =>
@ -539,8 +581,19 @@ begin
v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid;
when OP_RFID =>
v.e.ramspr_even_rdaddr := RAMSPR_SRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_SRR1;
if d_in.insn(7) = '1' then
-- rfscv
v.e.ramspr_even_rdaddr := RAMSPR_LR;
v.e.ramspr_odd_rdaddr := RAMSPR_CTR;
elsif d_in.insn(9) = '0' then
-- rfid
v.e.ramspr_even_rdaddr := RAMSPR_SRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_SRR1;
else
-- hrfid
v.e.ramspr_even_rdaddr := RAMSPR_HSRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_HSRR1;
end if;
sprs_busy := '1';
when others =>
end case;
@ -590,23 +643,28 @@ begin
if op = OP_MFSPR then
if d_in.ram_spr.valid = '1' then
v.e.result_sel := "101"; -- ramspr_result
elsif d_in.spr_info.valid = '0' then
elsif d_in.spr_info.valid = '0' or d_in.spr_info.wonly = '1' then
-- Privileged mfspr to invalid/unimplemented SPR numbers
-- writes the contents of RT back to RT (i.e. it's a no-op)
v.e.result_sel := "001"; -- logical_result
end if;
end if;
v.e.prefixed := d_in.prefixed;
v.e.prefix := d_in.prefix;
v.e.illegal_suffix := d_in.illegal_suffix;
v.e.misaligned_prefix := d_in.misaligned_prefix;

elsif dc2.e.valid = '1' then
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
-- Set up for the second iteration (if deferred = 1 this will all be ignored)
v.e.second := '1';
-- DUPD is the only possibility here:
-- update-form loads, 2nd instruction writes RA
v.e.write_reg := dc2.e.read_reg1;
-- check for invalid forms that cause an illegal instruction interrupt
-- Does RA = RT for a load quadword instr, or RB = RT for lqarx?
if d_in.decode.repeat = DRTP and
(insn_ra(d_in.insn) = insn_rt(d_in.insn) or
(d_in.decode.reserve = '1' and insn_rb(d_in.insn) = insn_rt(d_in.insn))) then
v.e.illegal_form := '1';
end if;
-- Is RS/RT odd for a load/store quadword instruction?
if (d_in.decode.repeat = DRSP or d_in.decode.repeat = DRTP) and d_in.insn(21) = '1' then
v.e.illegal_form := '1';
end if;
end if;

-- issue control
@ -695,7 +753,7 @@ begin

v.e.valid := control_valid_out;
v.e.instr_tag := instr_tag;
v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second));
v.busy := valid_in and not control_valid_out;

stall_out <= dc2.busy or deferred;


@ -6,9 +6,10 @@ package decode_types is
OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BCD, OP_BPERM, OP_BREV,
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_ICBI, OP_ICBT,
OP_COUNTB, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBZ,
OP_SPARE,
OP_ICBI, OP_ICBT,
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
OP_DIV, OP_DIVE, OP_MOD,
OP_EXTS, OP_EXTSWSLI,
@ -18,12 +19,14 @@ package decode_types is
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32,
OP_POPCNT, OP_PRTY, OP_RFID,
OP_BSORT,
OP_PRTY, OP_RFID,
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP,
OP_XOR,
OP_ADDG6S,
OP_WAIT,
OP_FETCH_FAILED
);

@ -106,6 +109,7 @@ package decode_types is
INSN_prtyw,
INSN_prtyd, -- 70
INSN_rfid,
INSN_rfscv,
INSN_rldic,
INSN_rldicl,
INSN_rldicr,
@ -113,8 +117,8 @@ package decode_types is
INSN_rlwimi,
INSN_rlwinm,
INSN_rnop,
INSN_sc,
INSN_setb, -- 80
INSN_sc, -- 80
INSN_setb,
INSN_slbia,
INSN_sradi,
INSN_srawi,
@ -122,9 +126,10 @@ package decode_types is
INSN_std,
INSN_stdu,
INSN_sthu,
INSN_stwu,
INSN_stq,
INSN_stwu, -- 90
INSN_subfic,
INSN_subfme, -- 90
INSN_subfme,
INSN_subfze,
INSN_sync,
INSN_tdi,
@ -132,23 +137,23 @@ package decode_types is
INSN_twi,
INSN_wait,
INSN_xori,
INSN_xoris,
-- pad to 104
INSN_063, INSN_064, INSN_065, INSN_066, INSN_067,
INSN_xoris, -- 100
-- pad to 102
INSN_065,

-- Non-prefixed instructions that have a MLS:D prefixed form and
-- their corresponding prefixed instructions.
-- The non-prefixed versions have even indexes so that we can
-- convert them to the prefixed version by setting bit 0
INSN_addi, -- 104
INSN_addi, -- 102
INSN_paddi,
INSN_lbz,
INSN_plbz,
INSN_lha,
INSN_plha,
INSN_lhz, -- 110
INSN_lhz,
INSN_plhz,
INSN_lwz,
INSN_lwz, -- 110
INSN_plwz,
INSN_stb,
INSN_pstb,
@ -158,15 +163,18 @@ package decode_types is
INSN_pstw,

-- Slots for non-prefixed opcodes that are 8LS:D when prefixed
INSN_lhzu, -- 120
INSN_lhzu,
INSN_plwa,
INSN_lq, -- 120
INSN_plq,
INSN_op57,
INSN_pld,
INSN_op60,
INSN_pstq,
INSN_op61,
INSN_pstd,

-- pad to 128 to simplify comparison logic
INSN_07e, INSN_07f,

-- The following instructions have an RB operand but don't access FPRs
INSN_add,
@ -177,11 +185,12 @@ package decode_types is
INSN_and,
INSN_andc,
INSN_bperm,
INSN_cfuged,
INSN_cmp,
INSN_cmpb,
INSN_cmpeqb,
INSN_cmpl,
INSN_cmprb, -- 140
INSN_cmpl, -- 140
INSN_cmprb,
INSN_dcbf,
INSN_dcbst,
INSN_dcbt,
@ -190,8 +199,8 @@ package decode_types is
INSN_divd,
INSN_divdu,
INSN_divde,
INSN_divdeu,
INSN_divw, -- 150
INSN_divdeu, -- 150
INSN_divw,
INSN_divwu,
INSN_divwe,
INSN_divweu,
@ -200,8 +209,8 @@ package decode_types is
INSN_icbt,
INSN_isel,
INSN_lbarx,
INSN_lbzcix,
INSN_lbzux, -- 160
INSN_lbzcix, -- 160
INSN_lbzux,
INSN_lbzx,
INSN_ldarx,
INSN_ldbrx,
@ -210,18 +219,19 @@ package decode_types is
INSN_ldux,
INSN_lharx,
INSN_lhax,
INSN_lhaux,
INSN_lhbrx, -- 170
INSN_lhaux, -- 170
INSN_lhbrx,
INSN_lhzcix,
INSN_lhzx,
INSN_lhzux,
INSN_lqarx,
INSN_lwarx,
INSN_lwax,
INSN_lwaux,
INSN_lwbrx,
INSN_lwzcix,
INSN_lwzcix, -- 180
INSN_lwzx,
INSN_lwzux, -- 180
INSN_lwzux,
INSN_modsd,
INSN_modsw,
INSN_moduw,
@ -229,52 +239,55 @@ package decode_types is
INSN_mulhw,
INSN_mulhwu,
INSN_mulhd,
INSN_mulhdu,
INSN_mulhdu, -- 190
INSN_mullw,
INSN_mulld, -- 190
INSN_mulld,
INSN_nand,
INSN_nor,
INSN_or,
INSN_orc,
INSN_pdepd,
INSN_pextd,
INSN_rldcl,
INSN_rldcr,
INSN_rldcr, -- 200
INSN_rlwnm,
INSN_slw,
INSN_sld,
INSN_sraw, -- 200
INSN_sraw,
INSN_srad,
INSN_srw,
INSN_srd,
INSN_stbcix,
INSN_stbcx,
INSN_stbx,
INSN_stbx, -- 210
INSN_stbux,
INSN_stdbrx,
INSN_stdcix,
INSN_stdcx, -- 210
INSN_stdcx,
INSN_stdx,
INSN_stdux,
INSN_sthbrx,
INSN_sthcix,
INSN_sthcx,
INSN_sthx,
INSN_sthx, -- 220
INSN_sthux,
INSN_stqcx,
INSN_stwbrx,
INSN_stwcix,
INSN_stwcx, -- 220
INSN_stwcx,
INSN_stwx,
INSN_stwux,
INSN_subf,
INSN_subfc,
INSN_subfe,
INSN_subfe, -- 230
INSN_td,
INSN_tlbie,
INSN_tlbiel,
INSN_tw,
INSN_xor, -- 230
INSN_xor,

-- pad to 232 to simplify comparison logic
INSN_231,
-- pad to 240 to simplify comparison logic
INSN_236, INSN_237, INSN_238, INSN_239,

-- The following instructions have a third input addressed by RC
INSN_maddld,
@ -282,9 +295,7 @@ package decode_types is
INSN_maddhdu,

-- pad to 256 to simplify comparison logic
INSN_235,
INSN_236, INSN_237, INSN_238, INSN_239,
INSN_240, INSN_241, INSN_242, INSN_243,
INSN_243,
INSN_244, INSN_245, INSN_246, INSN_247,
INSN_248, INSN_249, INSN_250, INSN_251,
INSN_252, INSN_253, INSN_254, INSN_255,
@ -434,7 +445,9 @@ package decode_types is
type length_t is (NONE, is1B, is2B, is4B, is8B);

type repeat_t is (NONE, -- instruction is not repeated
DUPD); -- update-form load
DUPD, -- update-form load
DRSP, -- double RS (RS, RS+1)
DRTP); -- double RT (RT, RT+1, or RT+1, RT)

type decode_rom_t is record
unit : unit_t;
@ -518,6 +531,7 @@ package body decode_types is
when INSN_lhau => return "101011";
when INSN_lhz => return "101000";
when INSN_lhzu => return "101001";
when INSN_lq => return "111000";
when INSN_lwz => return "100000";
when INSN_lwzu => return "100001";
when INSN_mulli => return "000111";
@ -537,6 +551,7 @@ package body decode_types is
when INSN_sth => return "101100";
when INSN_sthu => return "101101";
when INSN_stw => return "100100";
when INSN_stq => return "111110";
when INSN_stwu => return "100101";
when INSN_subfic => return "001000";
when INSN_tdi => return "000010";
@ -582,6 +597,7 @@ package body decode_types is
when INSN_fnmadd => return "111111";
when INSN_prefix => return "000001";
when INSN_op57 => return "111001";
when INSN_op60 => return "111100";
when INSN_op61 => return "111101";
when INSN_add => return "011111";
when INSN_addc => return "011111";
@ -649,6 +665,7 @@ package body decode_types is
when INSN_lhzcix => return "011111";
when INSN_lhzux => return "011111";
when INSN_lhzx => return "011111";
when INSN_lqarx => return "011111";
when INSN_lwarx => return "011111";
when INSN_lwaux => return "011111";
when INSN_lwax => return "011111";
@ -714,6 +731,7 @@ package body decode_types is
when INSN_sthcx => return "011111";
when INSN_sthux => return "011111";
when INSN_sthx => return "011111";
when INSN_stqcx => return "011111";
when INSN_stwbrx => return "011111";
when INSN_stwcix => return "011111";
when INSN_stwcx => return "011111";

@ -15,6 +15,7 @@ entity execute1 is
SIM : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
CPU_INDEX : natural;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -45,6 +46,7 @@ entity execute1 is

dbg_ctrl_out : out ctrl_t;

run_out : out std_ulogic;
icache_inval : out std_ulogic;
terminate_out : out std_ulogic;

@ -79,12 +81,23 @@ architecture behaviour of execute1 is
write_xerlow : std_ulogic;
write_dec : std_ulogic;
write_cfar : std_ulogic;
set_cfar : std_ulogic;
write_loga : std_ulogic;
inc_loga : std_ulogic;
write_pmuspr : std_ulogic;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
mult_32s : std_ulogic;
write_fscr : std_ulogic;
write_ic : std_ulogic;
write_hfscr : std_ulogic;
write_hic : std_ulogic;
write_heir : std_ulogic;
set_heir : std_ulogic;
write_ctrl : std_ulogic;
write_dscr : std_ulogic;
enter_wait : std_ulogic;
scv_trap : std_ulogic;
end record;
constant side_effect_init : side_effect_type := (others => '0');

@ -101,16 +114,18 @@ architecture behaviour of execute1 is
direct_branch : std_ulogic;
start_mul : std_ulogic;
start_div : std_ulogic;
start_bsort : std_ulogic;
do_trace : std_ulogic;
fp_intr : std_ulogic;
res2_sel : std_ulogic_vector(1 downto 0);
bypass_valid : std_ulogic;
ramspr_odd_data : std_ulogic_vector(63 downto 0);
ic : std_ulogic_vector(3 downto 0);
end record;
constant actions_type_init : actions_type :=
(e => Execute1ToWritebackInit, se => side_effect_init,
new_msr => (others => '0'), res2_sel => "00",
ramspr_odd_data => 64x"0", others => '0');
ramspr_odd_data => 64x"0", ic => x"0", others => '0');

type reg_stage1_type is record
e : Execute1ToWritebackType;
@ -121,7 +136,7 @@ architecture behaviour of execute1 is
prev_op : insn_type_t;
prev_prefixed : std_ulogic;
oe : std_ulogic;
mul_select : std_ulogic_vector(1 downto 0);
mul_select : std_ulogic_vector(2 downto 0);
res2_sel : std_ulogic_vector(1 downto 0);
spr_select : spr_id;
pmu_spr_num : std_ulogic_vector(4 downto 0);
@ -131,6 +146,7 @@ architecture behaviour of execute1 is
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
bsort_in_progress : std_ulogic;
no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic;
@ -141,21 +157,28 @@ architecture behaviour of execute1 is
xerc_valid : std_ulogic;
ramspr_wraddr : ramspr_index;
ramspr_odd_data : std_ulogic_vector(63 downto 0);
ic : std_ulogic_vector(3 downto 0);
prefixed : std_ulogic;
insn : std_ulogic_vector(31 downto 0);
prefix : std_ulogic_vector(25 downto 0);
end record;
constant reg_stage1_type_init : reg_stage1_type :=
(e => Execute1ToWritebackInit, se => side_effect_init,
busy => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00",
oe => '0', mul_select => "000", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0",
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
bsort_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
msr => 64x"0",
xerc => xerc_init, xerc_valid => '0',
ramspr_wraddr => (others => '0'), ramspr_odd_data => 64x"0");
ramspr_wraddr => (others => '0'), ramspr_odd_data => 64x"0",
ic => x"0",
prefixed => '0', insn => 32x"0", prefix => 26x"0");

type reg_stage2_type is record
e : Execute1ToWritebackType;
@ -190,7 +213,8 @@ architecture behaviour of execute1 is
signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0);
signal multicyc_result: std_ulogic_vector(63 downto 0);
signal bsort_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0);
signal next_nia : std_ulogic_vector(63 downto 0);
signal s1_sel : std_ulogic_vector(2 downto 0);
@ -215,6 +239,10 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init;

-- bit-sort unit signals
signal bsort_start : std_ulogic;
signal bsort_done : std_ulogic;

-- random number generator signals
signal random_raw : std_ulogic_vector(63 downto 0);
signal random_cond : std_ulogic_vector(63 downto 0);
@ -322,6 +350,7 @@ architecture behaviour of execute1 is
-- 48:63, and partial function MSR bits lie in the range
-- 33:36 and 42:47. (Note this is IBM bit numbering).
msr_out := (others => '0');
msr_out(MSR_HV) := '1'; -- HV is always set
msr_out(63 downto 31) := msr(63 downto 31);
msr_out(26 downto 22) := msr(26 downto 22);
msr_out(15 downto 0) := msr(15 downto 0);
@ -332,6 +361,9 @@ architecture behaviour of execute1 is
return std_ulogic_vector is
variable srr1: std_ulogic_vector(63 downto 0);
begin
srr1(63 downto 61) := msr(63 downto 61);
srr1(MSR_HV) := '1';
srr1(59 downto 31) := msr(59 downto 31);
srr1(63 downto 31) := msr(63 downto 31);
srr1(30 downto 27) := flags(14 downto 11);
srr1(26 downto 22) := msr(26 downto 22);
@ -365,6 +397,39 @@ architecture behaviour of execute1 is
xerc.ov32 & xerc.ca32 & xer_low(17 downto 0);
end;

function assemble_fscr(c: ctrl_t) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(59 downto 56) := c.fscr_ic;
ret(FSCR_PREFIX) := c.fscr_pref;
ret(FSCR_SCV) := c.fscr_scv;
ret(FSCR_TAR) := c.fscr_tar;
ret(FSCR_DSCR) := c.fscr_dscr;
return ret;
end;

function assemble_hfscr(c: ctrl_t) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(59 downto 56) := c.hfscr_ic;
ret(HFSCR_PREFIX) := c.hfscr_pref;
ret(HFSCR_TAR) := c.hfscr_tar;
ret(HFSCR_DSCR) := c.hfscr_dscr;
ret(HFSCR_FP) := c.hfscr_fp;
return ret;
end;

function assemble_ctrl(c: ctrl_t; msrpr: std_ulogic) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(0) := c.run;
ret(15) := c.run and not msrpr;
return ret;
end;

-- Tell vivado to keep the hierarchy for the random module so that the
-- net names in the xdc file match.
attribute keep_hierarchy : string;
@ -437,6 +502,18 @@ begin
);
end generate;

bsort_0: entity work.bit_sorter
port map (
clk => clk,
rst => rst,
rs => c_in,
rb => b_in,
go => bsort_start,
opc => e_in.insn(7 downto 6),
done => bsort_done,
result => bsort_result
);

random_0: entity work.random
port map (
clk => clk,
@ -484,7 +561,7 @@ begin
x_to_pmu.addr_v <= '0';
x_to_pmu.spr_num <= ex1.pmu_spr_num;
x_to_pmu.spr_val <= ex1.e.write_data;
x_to_pmu.run <= '1';
x_to_pmu.run <= ctrl.run;

-- XER forwarding. The CA and CA32 bits are only modified by instructions
-- that are handled here, so for them we can just use the result most
@ -501,7 +578,7 @@ begin

-- N.B. the busy signal from each source includes the
-- stage2 stall from that source in it.
busy_out <= l_in.busy or ex1.busy or fp_in.busy;
busy_out <= l_in.busy or ex1.busy or fp_in.busy or ctrl.wait_state;

valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt);

@ -533,7 +610,13 @@ begin
even_wr_enab := (ex1.se.ramspr_write_even and doit) or interrupt_in.intr;
odd_wr_enab := (ex1.se.ramspr_write_odd and doit) or interrupt_in.intr;
if interrupt_in.intr = '1' then
wr_addr := RAMSPR_SRR0;
if interrupt_in.hv_intr = '1' then
wr_addr := RAMSPR_HSRR0;
elsif interrupt_in.scv_int = '1' then
wr_addr := RAMSPR_LR;
else
wr_addr := RAMSPR_SRR0;
end if;
else
wr_addr := ex1.ramspr_wraddr;
end if;
@ -573,6 +656,9 @@ begin
else
ramspr_result <= ramspr_odd;
end if;
if e_in.ramspr_32bit = '1' then
ramspr_result(63 downto 32) <= 32x"0";
end if;
end process;

ramspr_write: process(clk)
@ -599,7 +685,7 @@ begin
adder_result when "000",
logical_result when "001",
rotator_result when "010",
muldiv_result when "100",
multicyc_result when "100",
ramspr_result when "101",
misc_result when others;

@ -610,16 +696,18 @@ begin
ex1 <= reg_stage1_type_init;
ex2 <= reg_stage2_type_init;
ctrl <= ctrl_t_init;
ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0');
ex1.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0');
ctrl.msr <= (MSR_SF => '1', MSR_HV => '1', MSR_LE => '1', others => '0');
ex1.msr <= (MSR_SF => '1', MSR_HV => '1', MSR_LE => '1', others => '0');
else
ex1 <= ex1in;
ex2 <= ex2in;
ctrl <= ctrl_tmp;
if valid_in = '1' then
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
report "CPU " & natural'image(CPU_INDEX) & " execute " & to_hstring(e_in.nia) &
" op=" & insn_type_t'image(e_in.insn_type) &
" wr=" & to_hstring(ex1in.e.write_reg) & " we=" & std_ulogic'image(ex1in.e.write_enable) &
" tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid);
" tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid) &
" 2nd=" & std_ulogic'image(e_in.second);
end if;
-- We mustn't get stalled on a cycle where execute2 is
-- completing an instruction or generating an interrupt
@ -638,7 +726,18 @@ begin
if dbg_spr_addr(7) = '1' then
dbg_spr_data <= ramspr_result;
else
dbg_spr_data <= assemble_xer(xerc_in, ctrl.xer_low);
case dbg_spr_addr(3 downto 0) is
when SPRSEL_FSCR =>
dbg_spr_data <= assemble_fscr(ctrl);
when SPRSEL_HFSCR =>
dbg_spr_data <= assemble_hfscr(ctrl);
when SPRSEL_HEIR =>
dbg_spr_data <= ctrl.heir;
when SPRSEL_CFAR =>
dbg_spr_data <= ctrl.cfar;
when others =>
dbg_spr_data <= assemble_xer(xerc_in, ctrl.xer_low);
end case;
end if;
dbg_spr_ack <= '1';
end if;
@ -769,17 +868,21 @@ begin
x_to_mult_32s.subtract <= '0';
x_to_mult_32s.addend <= (others => '0');

case ex1.mul_select is
when "00" =>
muldiv_result <= multiply_to_x.result(63 downto 0);
when "01" =>
muldiv_result <= multiply_to_x.result(127 downto 64);
when "10" =>
muldiv_result <= multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when others =>
muldiv_result <= divider_to_x.write_reg_data;
end case;
if ex1.mul_select(2) = '0' then
case ex1.mul_select(1 downto 0) is
when "00" =>
multicyc_result <= multiply_to_x.result(63 downto 0);
when "01" =>
multicyc_result <= multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when others =>
multicyc_result <= multiply_to_x.result(127 downto 64);
end case;
elsif ex1.mul_select(0) = '1' and not HAS_FPU then
multicyc_result <= divider_to_x.write_reg_data;
else
multicyc_result <= bsort_result;
end if;

-- Compute misc_result
case e_in.sub_select is
@ -1047,7 +1150,7 @@ begin
slow_op := '0';
owait := '0';

if e_in.illegal_suffix = '1' then
if e_in.illegal_suffix = '1' or e_in.illegal_form = '1' then
illegal := '1';
elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
privileged := '1';
@ -1058,18 +1161,20 @@ begin
when OP_ILLEGAL =>
illegal := '1';
when OP_SC =>
-- check bit 1 of the instruction is 1 so we know this is sc;
-- 0 would mean scv, so generate an illegal instruction interrupt
-- check bit 1 of the instruction to distinguish sc from scv
if e_in.insn(1) = '1' then
v.trap := '1';
v.advance_nia := '1';
-- sc
v.e.intr_vec := 16#C00#;
if e_in.valid = '1' then
report "sc";
end if;
else
illegal := '1';
-- scv
v.se.scv_trap := '1';
v.e.intr_vec := to_integer(unsigned(e_in.insn(11 downto 5))) * 32;
end if;
v.trap := '1';
v.advance_nia := '1';
when OP_ATTN =>
-- check bits 1-10 of the instruction to make sure it's attn
-- if not then it is illegal
@ -1081,8 +1186,8 @@ begin
else
illegal := '1';
end if;
when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT =>
-- Do nothing
when OP_NOP | OP_DCBST | OP_ICBT =>
-- Do nothing
when OP_ADD =>
if e_in.output_carry = '1' then
if e_in.input_carry /= OV then
@ -1126,7 +1231,7 @@ begin
if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1';
end if;
v.se.write_cfar := '1';
v.se.set_cfar := '1';
when OP_BC =>
-- If CTR is being decremented, it is in ramspr_odd.
bo := insn_bo(e_in.insn);
@ -1145,7 +1250,7 @@ begin
if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1';
end if;
v.se.write_cfar := v.take_branch;
v.se.set_cfar := v.take_branch;
when OP_BCREG =>
-- If CTR is being decremented, it is in ramspr_odd.
-- The target address is in ramspr_result (LR, CTR or TAR).
@ -1158,15 +1263,20 @@ begin
if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1';
end if;
v.se.write_cfar := v.take_branch;
v.se.set_cfar := v.take_branch;

when OP_RFID =>
-- rfid, hrfid and rfscv.
-- These all act the same given that we don't have
-- privileged non-hypervisor mode or ultravisor mode.
srr1 := ramspr_odd;
v.e.redir_mode := (srr1(MSR_IR) or srr1(MSR_PR)) & not srr1(MSR_PR) &
not srr1(MSR_LE) & not srr1(MSR_SF);
-- Can't use msr_copy here because the partial function MSR
-- bits should be left unchanged, not zeroed.
v.new_msr(63 downto 31) := srr1(63 downto 31);
v.new_msr(63 downto 61) := srr1(63 downto 61);
v.new_msr(MSR_HV) := '1';
v.new_msr(59 downto 31) := srr1(59 downto 31);
v.new_msr(26 downto 22) := srr1(26 downto 22);
v.new_msr(15 downto 0) := srr1(15 downto 0);
if srr1(MSR_PR) = '1' then
@ -1176,14 +1286,14 @@ begin
end if;
v.se.write_msr := '1';
v.e.redirect := '1';
v.se.write_cfar := '1';
v.se.set_cfar := '1';
if HAS_FPU then
v.fp_intr := fp_in.exception and
(srr1(MSR_FE0) or srr1(MSR_FE1));
end if;
v.do_trace := '0';

when OP_CNTZ | OP_POPCNT =>
when OP_COUNTB =>
v.res2_sel := "01";
slow_op := '1';
when OP_ISEL =>
@ -1270,6 +1380,18 @@ begin
v.se.write_dec := '1';
when SPRSEL_LOGA =>
v.se.write_loga := '1';
when SPRSEL_CFAR =>
v.se.write_cfar := '1';
when SPRSEL_FSCR =>
v.se.write_fscr := '1';
when SPRSEL_HFSCR =>
v.se.write_hfscr := '1';
when SPRSEL_HEIR =>
v.se.write_heir := '1';
when SPRSEL_CTRL =>
v.se.write_ctrl := '1';
when SPRSEL_DSCR =>
v.se.write_dscr := '1';
when others =>
end case;
end if;
@ -1293,6 +1415,11 @@ begin
when OP_ICBI =>
v.se.icache_inval := '1';

when OP_BSORT =>
v.start_bsort := '1';
slow_op := '1';
owait := '1';

when OP_MUL_L64 =>
if e_in.is_32bit = '1' then
v.se.mult_32s := '1';
@ -1321,6 +1448,11 @@ begin
owait := '1';
end if;

when OP_WAIT =>
if e_in.insn(22 downto 21) = "00" then
v.se.enter_wait := '1';
end if;

when OP_FETCH_FAILED =>
-- Handling an ITLB miss doesn't count as having executed an instruction
v.do_trace := '0';
@ -1331,7 +1463,25 @@ begin
end if;
end case;

if misaligned = '1' then
if ex1.msr(MSR_PR) = '1' and e_in.prefixed = '1' and
(ctrl.hfscr_pref = '0' or ctrl.fscr_pref = '0') then
-- [Hypervisor] facility unavailable for prefixed instructions,
-- which has higher priority than the alignment interrupt for
-- misaligned prefixed instructions, which has higher priority than
-- other [hypervisor] facility unavailable interrupts (e.g. for
-- plfs with HFSCR[FP] = 0).
v.exception := '1';
v.ic := x"b";
if ctrl.hfscr_pref = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif misaligned = '1' then
-- generate an alignment interrupt
-- This is higher priority than illegal because a misaligned
-- prefix will come down as an OP_ILLEGAL instruction.
@ -1354,15 +1504,62 @@ begin
end if;

elsif illegal = '1' then
-- generate hypervisor emulation assistance interrupt (HEAI)
-- and write the offending instruction into HEIR
v.exception := '1';
v.e.srr1(47 - 34) := e_in.prefixed;
-- Since we aren't doing Hypervisor emulation assist (0xe40) we
-- set bit 44 to indicate we have an illegal
v.e.srr1(47 - 44) := '1';
v.e.intr_vec := 16#e40#;
v.e.hv_intr := '1';
v.se.set_heir := '1';
if e_in.valid = '1' then
report "illegal instruction";
end if;

elsif ex1.msr(MSR_PR) = '1' and v.se.scv_trap = '1' and
ctrl.fscr_scv = '0' then
-- Facility unavailable for scv instruction
v.exception := '1';
v.ic := x"c";
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';

elsif ex1.msr(MSR_PR) = '1' and e_in.uses_tar = '1' and
(ctrl.hfscr_tar = '0' or ctrl.fscr_tar = '0') then
-- [Hypervisor] facility unavailable for TAR access
v.exception := '1';
v.ic := x"8";
if ctrl.hfscr_tar = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif ex1.msr(MSR_PR) = '1' and e_in.uses_dscr = '1' and
(ctrl.hfscr_dscr = '0' or ctrl.fscr_dscr = '0') then
-- [Hypervisor] facility unavailable for DSCR access
v.exception := '1';
v.ic := x"2";
if ctrl.hfscr_dscr = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif HAS_FPU and ex1.msr(MSR_PR) = '1' and e_in.fac = FPU and
ctrl.hfscr_fp = '0' then
-- Hypervisor facility unavailable for FP instructions
v.exception := '1';
v.ic := x"0";
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';

elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then
-- generate a floating-point unavailable interrupt
v.exception := '1';
@ -1391,19 +1588,24 @@ begin
variable fv : Execute1ToFPUType;
variable go : std_ulogic;
variable bypass_valid : std_ulogic;
variable is_scv : std_ulogic;
begin
v := ex1;
if (ex1.busy or l_in.busy or fp_in.busy) = '0' then
if busy_out = '0' then
v.e := actions.e;
v.e.valid := '0';
v.oe := e_in.oe;
v.spr_select := e_in.spr_select;
v.pmu_spr_num := e_in.insn(20 downto 16);
v.mul_select := e_in.sub_select(1 downto 0);
v.mul_select := e_in.sub_select;
v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr;
v.lr_from_next := e_in.lr;
v.ramspr_odd_data := actions.ramspr_odd_data;
v.ic := actions.ic;
v.prefixed := e_in.prefixed;
v.insn := e_in.insn;
v.prefix := e_in.prefix;
end if;

lv := Execute1ToLoadstore1Init;
@ -1426,7 +1628,7 @@ begin
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';

do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0';
do_popcnt <= '1' when e_in.insn_type = OP_COUNTB and e_in.insn(7 downto 6) = "11" else '0';

if valid_in = '1' then
v.prev_op := e_in.insn_type;
@ -1454,10 +1656,9 @@ begin
v.e.srr1(47 - 33) := '1';
v.e.srr1(47 - 34) := ex1.prev_prefixed;
if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then
ex1.prev_op = OP_DCBF then
v.e.srr1(47 - 35) := '1';
elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ or
ex1.prev_op = OP_DCBTST then
elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ then
v.e.srr1(47 - 36) := '1';
end if;

@ -1474,6 +1675,7 @@ begin
v.e.intr_vec := 16#500#;
report "IRQ valid: External";
v.ext_interrupt := '1';
v.e.hv_intr := '1';
end if;
v.e.srr1 := (others => '0');
exception := '1';
@ -1500,6 +1702,7 @@ begin
v.mul_in_progress := actions.start_mul;
x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div;
v.bsort_in_progress := actions.start_bsort;
v.br_mispredict := v.e.redirect and actions.direct_branch;
v.advance_nia := actions.advance_nia;
v.redir_to_next := actions.redir_to_next;
@ -1510,7 +1713,7 @@ begin
-- multiply is happening in order to stop following
-- instructions from using the wrong XER value
-- (and for simplicity in the OE=0 case).
v.busy := actions.start_div or actions.start_mul;
v.busy := actions.start_div or actions.start_mul or actions.start_bsort;

-- instruction for other units, i.e. LDST
if e_in.unit = LDST then
@ -1520,6 +1723,8 @@ begin
fv.valid := '1';
end if;
end if;
is_scv := go and actions.se.scv_trap;
bsort_start <= go and actions.start_bsort;

if not HAS_FPU and ex1.div_in_progress = '1' then
v.div_in_progress := not divider_to_x.valid;
@ -1552,6 +1757,13 @@ begin
end if;
v.e.valid := '1';
end if;
if ex1.bsort_in_progress = '1' then
v.bsort_in_progress := not bsort_done;
v.e.valid := bsort_done;
v.busy := not bsort_done;
v.e.write_data := alu_result;
bypass_valid := bsort_done;
end if;

if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
v.xerc := v.e.xerc;
@ -1560,6 +1772,7 @@ begin

if (ex1.busy or l_in.busy or fp_in.busy) = '0' then
v.e.interrupt := exception;
v.e.is_scv := is_scv;
end if;
if v.e.valid = '0' then
v.e.redirect := '0';
@ -1658,6 +1871,12 @@ begin
log_wr_addr & ex2.log_addr_spr when SPRSEL_LOGA,
log_rd_data when SPRSEL_LOGD,
ctrl.cfar when SPRSEL_CFAR,
assemble_fscr(ctrl) when SPRSEL_FSCR,
assemble_hfscr(ctrl) when SPRSEL_HFSCR,
ctrl.heir when SPRSEL_HEIR,
assemble_ctrl(ctrl, ex1.msr(MSR_PR)) when SPRSEL_CTRL,
39x"0" & ctrl.dscr when SPRSEL_DSCR,
56x"0" & std_ulogic_vector(to_unsigned(CPU_INDEX, 8)) when SPRSEL_PIR,
assemble_xer(ex1.e.xerc, ctrl.xer_low) when others;

stage2_stall <= l_in.l2stall or fp_in.f2stall;
@ -1673,6 +1892,7 @@ begin
variable cr_mask : std_ulogic_vector(7 downto 0);
variable sign, zero : std_ulogic;
variable rcnz_hi, rcnz_lo : std_ulogic;
variable irq_exc : std_ulogic;
begin
-- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4);
@ -1792,6 +2012,8 @@ begin
ctrl_tmp.dec <= ex1.e.write_data;
end if;
if ex1.se.write_cfar = '1' then
ctrl_tmp.cfar <= ex1.e.write_data;
elsif ex1.se.set_cfar = '1' then
ctrl_tmp.cfar <= ex1.e.last_nia;
end if;
if ex1.se.write_loga = '1' then
@ -1800,11 +2022,56 @@ begin
v.log_addr_spr := std_ulogic_vector(unsigned(ex2.log_addr_spr) + 1);
end if;
x_to_pmu.mtspr <= ex1.se.write_pmuspr;
if ex1.se.write_hfscr = '1' then
ctrl_tmp.hfscr_ic <= ex1.e.write_data(59 downto 56);
ctrl_tmp.hfscr_pref <= ex1.e.write_data(HFSCR_PREFIX);
ctrl_tmp.hfscr_tar <= ex1.e.write_data(HFSCR_TAR);
ctrl_tmp.hfscr_dscr <= ex1.e.write_data(HFSCR_DSCR);
ctrl_tmp.hfscr_fp <= ex1.e.write_data(HFSCR_FP);
elsif ex1.se.write_hic = '1' then
ctrl_tmp.hfscr_ic <= ex1.ic;
end if;
if ex1.se.write_fscr = '1' then
ctrl_tmp.fscr_ic <= ex1.e.write_data(59 downto 56);
ctrl_tmp.fscr_pref <= ex1.e.write_data(FSCR_PREFIX);
ctrl_tmp.fscr_scv <= ex1.e.write_data(FSCR_SCV);
ctrl_tmp.fscr_tar <= ex1.e.write_data(FSCR_TAR);
ctrl_tmp.fscr_dscr <= ex1.e.write_data(FSCR_DSCR);
elsif ex1.se.write_ic = '1' then
ctrl_tmp.fscr_ic <= ex1.ic;
end if;
if ex1.se.write_heir = '1' then
ctrl_tmp.heir <= ex1.e.write_data;
elsif ex1.se.set_heir = '1' then
ctrl_tmp.heir(31 downto 0) <= ex1.insn;
if ex1.prefixed = '1' then
ctrl_tmp.heir(63 downto 58) <= 6x"01";
ctrl_tmp.heir(57 downto 32) <= ex1.prefix;
else
ctrl_tmp.heir(63 downto 32) <= (others => '0');
end if;
end if;
if ex1.se.write_ctrl = '1' then
ctrl_tmp.run <= ex1.e.write_data(0);
end if;
if ex1.se.write_dscr = '1' then
ctrl_tmp.dscr <= ex1.e.write_data(24 downto 0);
end if;
if ex1.se.enter_wait = '1' then
ctrl_tmp.wait_state <= '1';
end if;
end if;

-- pending exceptions clear any wait state
-- ex1.fp_exception_next is not tested because it is not possible to
-- get into wait state with a pending FP exception.
irq_exc := pmu_to_x.intr or ctrl.dec(63) or ext_irq_in;
if ex1.trace_next = '1' or irq_exc = '1' or interrupt_in.intr = '1' then
ctrl_tmp.wait_state <= '0';
end if;

if interrupt_in.intr = '1' then
ctrl_tmp.msr(MSR_SF) <= '1';
ctrl_tmp.msr(MSR_EE) <= '0';
ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0';
@ -1813,8 +2080,11 @@ begin
ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1';
if interrupt_in.scv_int = '0' then
ctrl_tmp.msr(MSR_EE) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
end if;
end if;

bypass_valid := ex1.e.valid;
@ -1838,6 +2108,7 @@ begin
e_out <= ex2.e;
e_out.msr <= msr_copy(ctrl.msr);

run_out <= ctrl.run;
terminate_out <= ex2.se.terminate;
icache_inval <= ex2.se.icache_inval;


@ -391,7 +391,7 @@ begin
v_int.next_nia := RESET_ADDRESS;
end if;
elsif w_in.interrupt = '1' then
v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00";
v_int.next_nia := 47x"0" & w_in.intr_vec(16 downto 2) & "00";
end if;
if rst /= '0' or w_in.interrupt = '1' then
v.req := '0';

@ -142,6 +142,9 @@ end entity toplevel;

architecture behaviour of toplevel is

-- Status
signal run_out : std_ulogic;

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;
@ -263,6 +266,7 @@ begin
system_clk => system_clk,
rst => soc_rst,
sw_soc_reset => sw_rst,
run_out => run_out,

-- UART signals
uart0_txd => uart_main_tx,
@ -742,6 +746,7 @@ begin
led4 <= system_clk_locked;
led5 <= eth_clk_locked;
led6 <= not soc_rst;
led7 <= run_out;

-- GPIO
gpio_in(10) <= btn0;

@ -61,6 +61,9 @@ architecture behave of loadstore1 is
dc_req : std_ulogic;
load : std_ulogic;
store : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
tlbie : std_ulogic;
dcbz : std_ulogic;
read_spr : std_ulogic;
@ -84,6 +87,9 @@ architecture behave of loadstore1 is
update : std_ulogic;
xerc : xer_common_t;
reserve : std_ulogic;
atomic_qw : std_ulogic;
atomic_first : std_ulogic;
atomic_last : std_ulogic;
rc : std_ulogic;
nc : std_ulogic; -- non-cacheable access
virt_mode : std_ulogic;
@ -97,7 +103,8 @@ architecture behave of loadstore1 is
two_dwords : std_ulogic;
incomplete : std_ulogic;
end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0',
flush => '0', touch => '0', sync => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', do_update => '0',
mode_32bit => '0', prefixed => '0',
@ -108,6 +115,7 @@ architecture behave of loadstore1 is
elt_length => x"0", byte_reverse => '0', brev_mask => "000",
sign_extend => '0', update => '0',
xerc => xerc_init, reserve => '0',
atomic_qw => '0', atomic_first => '0', atomic_last => '0',
rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0',
sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0',
@ -447,7 +455,10 @@ begin
if l_in.second = '1' then
-- for an update-form load, use the previous address
-- as the value to write back to RA.
addr := r1.addr0;
-- for a quadword load or store, use with the previous
-- address + 8.
addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + not l_in.update) &
r1.addr0(2 downto 0);
end if;
if l_in.mode_32bit = '1' then
addr(63 downto 32) := (others => '0');
@ -463,7 +474,7 @@ begin
addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);

-- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(v.length, addr(2 downto 0));
long_sel := xfer_data_sel(l_in.length, addr(2 downto 0));
v.byte_sel := long_sel(7 downto 0);
v.second_bytes := long_sel(15 downto 8);
if long_sel(15 downto 8) /= "00000000" then
@ -472,23 +483,54 @@ begin

-- check alignment for larx/stcx
misaligned := or (addr_mask and addr(2 downto 0));
if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then
misaligned := '1';
end if;
v.align_intr := l_in.reserve and misaligned;

v.atomic_first := not misaligned and not l_in.second;
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);

-- is this a quadword load or store? i.e. lq plq stq pstq lqarx stqcx.
if l_in.repeat = '1' and l_in.update = '0' then
if misaligned = '0' then
-- Since the access is aligned we have to do it atomically
v.atomic_qw := '1';
else
-- We require non-prefixed lq in LE mode to be aligned in order
-- to avoid the case where RA = RT+1 and the second access faults
-- after the first has overwritten RA.
if l_in.op = OP_LOAD and l_in.byte_reverse = '0' and l_in.prefixed = '0' then
v.align_intr := '1';
end if;
end if;
end if;

case l_in.op is
when OP_SYNC =>
v.sync := '1';
when OP_STORE =>
v.store := '1';
if l_in.length = "0000" then
v.touch := '1';
end if;
when OP_LOAD =>
-- Note: only RA updates have l_in.second = 1
if l_in.second = '0' then
if l_in.update = '0' or l_in.second = '0' then
v.load := '1';
if HAS_FPU and l_in.is_32bit = '1' then
-- Allow an extra cycle for SP->DP precision conversion
v.load_sp := '1';
end if;
if l_in.length = "0000" then
v.touch := '1';
end if;
else
-- write back address to RA
v.do_update := '1';
end if;
when OP_DCBF =>
v.load := '1';
v.flush := '1';
when OP_DCBZ =>
v.dcbz := '1';
v.align_intr := v.nc;
@ -508,13 +550,13 @@ begin
v.mmu_op := '1';
when others =>
end case;
v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr;
v.incomplete := v.dc_req and v.two_dwords;

-- Work out controls for load and store formatting
brev_lenm1 := "000";
if v.byte_reverse = '1' then
brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
v.brev_mask := brev_lenm1;

@ -699,7 +741,8 @@ begin
end if;

interrupt := (r2.req.valid and r2.req.align_intr) or
(d_in.error and d_in.cache_paradox) or m_in.err;
(d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or
m_in.err;
if interrupt = '1' then
v.req.valid := '0';
v.busy := '0';
@ -855,7 +898,8 @@ begin

if d_in.valid = '1' then
if r2.req.incomplete = '0' then
write_enable := r2.req.load and not r2.req.load_sp;
write_enable := r2.req.load and not r2.req.load_sp and
not r2.req.flush and not r2.req.touch;
-- stores write back rA update
do_update := r2.req.update and r2.req.store;
end if;
@ -865,6 +909,7 @@ begin
-- signal an interrupt straight away
exception := '1';
dsisr(63 - 38) := not r2.req.load;
dsisr(63 - 37) := d_in.reserve_nc;
-- XXX there is no architected bit for this
-- (probably should be a machine check in fact)
dsisr(63 - 35) := d_in.cache_paradox;
@ -950,8 +995,14 @@ begin
d_out.valid <= stage1_dcreq;
d_out.load <= stage1_req.load;
d_out.dcbz <= stage1_req.dcbz;
d_out.flush <= stage1_req.flush;
d_out.touch <= stage1_req.touch;
d_out.sync <= stage1_req.sync;
d_out.nc <= stage1_req.nc;
d_out.reserve <= stage1_req.reserve;
d_out.atomic_qw <= stage1_req.atomic_qw;
d_out.atomic_first <= stage1_req.atomic_first;
d_out.atomic_last <= stage1_req.atomic_last;
d_out.addr <= stage1_req.addr;
d_out.byte_sel <= stage1_req.byte_sel;
d_out.virt_mode <= stage1_req.virt_mode;
@ -960,8 +1011,14 @@ begin
d_out.valid <= req;
d_out.load <= r2.req.load;
d_out.dcbz <= r2.req.dcbz;
d_out.flush <= r2.req.flush;
d_out.touch <= r2.req.touch;
d_out.sync <= r2.req.sync;
d_out.nc <= r2.req.nc;
d_out.reserve <= r2.req.reserve;
d_out.atomic_qw <= r2.req.atomic_qw;
d_out.atomic_first <= r2.req.atomic_first;
d_out.atomic_last <= r2.req.atomic_last;
d_out.addr <= r2.req.addr;
d_out.byte_sel <= r2.req.byte_sel;
d_out.virt_mode <= r2.req.virt_mode;

@ -20,6 +20,7 @@ filesets:
- sim_console.vhdl
- logical.vhdl
- countbits.vhdl
- bitsort.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl

@ -121,6 +121,8 @@ architecture behaviour of predecoder is
2#011110_01110# to 2#011110_01111# => INSN_rldimi,
2#011110_10000# to 2#011110_10001# => INSN_rldcl,
2#011110_10010# to 2#011110_10011# => INSN_rldcr,
-- major opcode 56
2#111000_00000# to 2#111000_11111# => INSN_lq,
-- major opcode 58
2#111010_00000# => INSN_ld,
2#111010_00001# => INSN_ldu,
@ -161,20 +163,28 @@ architecture behaviour of predecoder is
-- major opcode 62
2#111110_00000# => INSN_std,
2#111110_00001# => INSN_stdu,
2#111110_00010# => INSN_stq,
2#111110_00100# => INSN_std,
2#111110_00101# => INSN_stdu,
2#111110_00110# => INSN_stq,
2#111110_01000# => INSN_std,
2#111110_01001# => INSN_stdu,
2#111110_01010# => INSN_stq,
2#111110_01100# => INSN_std,
2#111110_01101# => INSN_stdu,
2#111110_01110# => INSN_stq,
2#111110_10000# => INSN_std,
2#111110_10001# => INSN_stdu,
2#111110_10010# => INSN_stq,
2#111110_10100# => INSN_std,
2#111110_10101# => INSN_stdu,
2#111110_10110# => INSN_stq,
2#111110_11000# => INSN_std,
2#111110_11001# => INSN_stdu,
2#111110_11010# => INSN_stq,
2#111110_11100# => INSN_std,
2#111110_11101# => INSN_stdu,
2#111110_11110# => INSN_stq,
-- major opcode 63
2#111111_00100# to 2#111111_00101# => INSN_fdiv,
2#111111_01000# to 2#111111_01001# => INSN_fsub,
@ -190,8 +200,9 @@ architecture behaviour of predecoder is
2#111111_11110# to 2#111111_11111# => INSN_fnmadd,
-- prefix word, PO1
2#000001_00000# to 2#000001_11111# => INSN_prefix,
-- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed
-- Major opcodes 57, 60 and 61 are SFFS load/store instructions when prefixed
2#111001_00000# to 2#111001_11111# => INSN_op57,
2#111100_00000# to 2#111100_11111# => INSN_op60,
2#111101_00000# to 2#111101_11111# => INSN_op61,
others => INSN_illegal
);
@ -219,6 +230,7 @@ architecture behaviour of predecoder is
2#0_00101_11011# => INSN_brd,
2#0_01001_11010# => INSN_cbcdtd,
2#0_01000_11010# => INSN_cdtbcd,
2#0_00110_11100# => INSN_cfuged,
2#0_00000_00000# => INSN_cmp,
2#0_01111_11100# => INSN_cmpb,
2#0_00111_00000# => INSN_cmpeqb,
@ -316,6 +328,7 @@ architecture behaviour of predecoder is
2#0_11001_10101# => INSN_lhzcix,
2#0_01001_10111# => INSN_lhzux,
2#0_01000_10111# => INSN_lhzx,
2#0_01000_10100# => INSN_lqarx,
2#0_00000_10100# => INSN_lwarx,
2#0_01011_10101# => INSN_lwaux,
2#0_01010_10101# => INSN_lwax,
@ -363,6 +376,8 @@ architecture behaviour of predecoder is
2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc,
2#0_00100_11100# => INSN_pdepd,
2#0_00101_11100# => INSN_pextd,
2#0_00011_11010# => INSN_popcntb,
2#0_01111_11010# => INSN_popcntd,
2#0_01011_11010# => INSN_popcntw,
@ -402,6 +417,7 @@ architecture behaviour of predecoder is
2#0_10110_10110# => INSN_sthcx,
2#0_01101_10111# => INSN_sthux,
2#0_01100_10111# => INSN_sthx,
2#0_00101_10110# => INSN_stqcx,
2#0_10100_10110# => INSN_stwbrx,
2#0_11100_10101# => INSN_stwcix,
2#0_00100_10110# => INSN_stwcx,
@ -447,6 +463,8 @@ architecture behaviour of predecoder is
2#1_00100_11110# => INSN_isync,
2#1_00000_10000# => INSN_mcrf,
2#1_00000_11010# => INSN_rfid,
2#1_00010_11010# => INSN_rfscv,
2#1_01000_11010# => INSN_rfid, -- hrfid

-- Major opcode 59
-- Address bits are 1, insn(10..6), 1, 0, insn(3..1)

@ -87,13 +87,13 @@ const char *units[4] = { "al", "ls", "fp", "3?" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ",
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ",
"darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "icbi ", "icbt ",
"fpcmp ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ",
"extswsl", "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ",
"mfmsr ", "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ",
"popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ",
"shl ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "ffail ",
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "countb ", "crop ",
"darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ",
"fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl",
"isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ",
"mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "bsort ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ",
};

const char *spr_names[13] =

@ -550,6 +550,7 @@ static const char *fast_spr_names[] =
"lr", "ctr", "srr0", "srr1", "hsrr0", "hsrr1",
"sprg0", "sprg1", "sprg2", "sprg3",
"hsprg0", "hsprg1", "xer", "tar",
"fscr", "hfscr", "heir", "cfar",
};

static const char *ldst_spr_names[] = {

@ -99,6 +99,8 @@ entity soc is
rst : in std_ulogic;
system_clk : in std_ulogic;

run_out : out std_ulogic;

-- "Large" (64-bit) DRAM wishbone
wb_dram_in : out wishbone_master_out;
wb_dram_out : in wishbone_slave_out := wishbone_slave_out_init;
@ -349,6 +351,7 @@ begin
processor: entity work.core
generic map(
SIM => SIM,
CPU_INDEX => 0,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
@ -366,6 +369,7 @@ begin
clk => system_clk,
rst => rst_core,
alt_reset => alt_reset_d,
run_out => run_out,
wishbone_insn_in => wishbone_icore_in,
wishbone_insn_out => wishbone_icore_out,
wishbone_data_in => wishbone_dcore_in,

@ -74,25 +74,15 @@ ill_test_1:
EXCEPTION(0x500)
EXCEPTION(0x600)

// We shouldn't get a Program interrupt at 700, so fail
. = 0x700
mtsprg0 %r3
mtsprg1 %r4

// test for bit 44 being set for ILL
mfsrr1 %r3
li %r4, 1
sldi %r4, %r4, (63-44)
and. %r4, %r4, %r3
li %r4, 8 // PASS so skip 2 instructions
bne 1f
li %r4, 4 // FAIL so only skip 1 instruction. Return will catch
1:

mfsrr0 %r3
add %r3, %r3, %r4 // skip some instructions
addi %r3, %r3, 4 // skip one instruction, causing a fail
mtsrr0 %r3

mfsprg0 %r3
mfsprg1 %r4
rfid

EXCEPTION(0x800)
@ -104,7 +94,18 @@ ill_test_1:
EXCEPTION(0xd00)
EXCEPTION(0xe00)
EXCEPTION(0xe20)
EXCEPTION(0xe40)

// We now expect a HEAI at e40 for illegal instructions
. = 0xe40
mthsprg0 %r3

mfhsrr0 %r3
addi %r3, %r3, 8 // skip one instruction, causing success
mthsrr0 %r3

mfhsprg0 %r3
hrfid

EXCEPTION(0xe60)
EXCEPTION(0xe80)
EXCEPTION(0xf00)

@ -7,6 +7,7 @@
#define MSR_LE 0x1
#define MSR_DR 0x10
#define MSR_IR 0x20
#define MSR_HV 0x1000000000000000ul
#define MSR_SF 0x8000000000000000ul

extern int test_read(long *addr, long *ret, long init);
@ -450,11 +451,11 @@ int mmu_test_11(void)
unsigned long ptr = 0x523000;

/* this should fail */
if (test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 2;
return 0;
}
@ -468,12 +469,12 @@ int mmu_test_12(void)
/* create PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should succeed and be a cache miss */
if (!test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE))
if (!test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* create a second PTE */
map((void *)ptr2, (void *)mem, PERM_EX | REF);
/* this should succeed and be a cache hit */
if (!test_exec(0, ptr2, MSR_SF | MSR_IR | MSR_LE))
if (!test_exec(0, ptr2, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 2;
return 0;
}
@ -487,18 +488,18 @@ int mmu_test_13(void)
/* create a PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should succeed */
if (!test_exec(1, ptr, MSR_SF | MSR_IR | MSR_LE))
if (!test_exec(1, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* invalidate the PTE */
unmap((void *)ptr);
/* install a second PTE */
map((void *)ptr2, (void *)mem, PERM_EX | REF);
/* this should fail */
if (test_exec(1, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(1, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 2;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 3;
return 0;
}
@ -513,16 +514,16 @@ int mmu_test_14(void)
/* create a PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should fail due to second page not being mapped */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr2 ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 2;
/* create a PTE for the second page */
map((void *)ptr2, (void *)mem2, PERM_EX | REF);
/* this should succeed */
if (!test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (!test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 3;
return 0;
}
@ -535,11 +536,11 @@ int mmu_test_15(void)
/* create a PTE without execute permission */
map((void *)ptr, (void *)mem, DFLT_PERM);
/* this should fail */
if (test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2;
return 0;
}
@ -556,16 +557,16 @@ int mmu_test_16(void)
/* create a PTE for the second page without execute permission */
map((void *)ptr2, (void *)mem2, PERM_RD | REF);
/* this should fail due to second page being no-execute */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr2 ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2;
/* create a PTE for the second page with execute permission */
map((void *)ptr2, (void *)mem2, PERM_RD | PERM_EX | REF);
/* this should succeed */
if (!test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (!test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 3;
return 0;
}
@ -578,22 +579,22 @@ int mmu_test_17(void)
/* create a PTE without the ref bit set */
map((void *)ptr, (void *)mem, PERM_EX);
/* this should fail */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x00040000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x00040000 | MSR_IR | MSR_LE))
return 2;
/* create a PTE without ref or execute permission */
unmap((void *)ptr);
map((void *)ptr, (void *)mem, 0);
/* this should fail */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE))
if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1;
/* SRR0 and SRR1 should be set correctly */
/* RC update fail bit should not be set */
if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE))
mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2;
return 0;
}

@ -230,3 +230,63 @@ restore:
ld %r0,16(%r1)
mtlr %r0
blr

.global do_lq
do_lq:
lq %r6,0(%r3)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_np /* "non-preferred" form of lq */
do_lq_np:
mr %r7,%r3
lq %r6,0(%r7)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_bad /* illegal form of lq */
do_lq_bad:
mr %r6,%r3
.long 0xe0c60000 /* lq %r6,0(%r6) */
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_stq
do_stq:
ld %r8,0(%r4)
ld %r9,8(%r4)
stq %r8,0(%r3)
li %r3,0
blr

/* big-endian versions of the above */
.global do_lq_be
do_lq_be:
.long 0x0000c3e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_lq_np_be /* "non-preferred" form of lq */
do_lq_np_be:
.long 0x781b677c
.long 0x0000c7e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_stq_be
do_stq_be:
.long 0x000004e9
.long 0x080024e9
.long 0x020003f9
.long 0x00006038
.long 0x2000804e

@ -12,6 +12,14 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long fn, unsigned long msr);

extern void do_lq(void *src, unsigned long *regs);
extern void do_lq_np(void *src, unsigned long *regs);
extern void do_lq_bad(void *src, unsigned long *regs);
extern void do_stq(void *dst, unsigned long *regs);
extern void do_lq_be(void *src, unsigned long *regs);
extern void do_lq_np_be(void *src, unsigned long *regs);
extern void do_stq_be(void *dst, unsigned long *regs);

static inline void do_tlbie(unsigned long rb, unsigned long rs)
{
__asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory");
@ -25,6 +33,8 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs)
#define SPRG0 272
#define SPRG1 273
#define SPRG3 275
#define HSRR0 314
#define HSRR1 315
#define PTCR 464

static inline unsigned long mfspr(int sprnum)
@ -294,6 +304,166 @@ int mode_test_6(void)
return 0;
}

int mode_test_7(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in LE mode
*/
msr = MSR_SF | MSR_LE;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 2;
/* unaligned may give alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret == 0) {
if (regs[0] != quad[2] || regs[1] != quad[1])
return 3;
} else if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 4;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[1] || quad[1] != regs[0])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[1] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[0])
return 8;

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 10;
/* unaligned should give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq_np + 4 ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 11;
} else
return 12;

/* make sure lq with rt = ra causes a HEAI interrupt */
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_bad, msr);
if (ret != 0xe40)
return 13;
if (mfspr(HSRR0) != (unsigned long)&do_lq_bad + 4)
return 14;
return 0;
}

int mode_test_8(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in BE mode
*/
msr = MSR_SF;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[0] || regs[1] != quad[1]) {
print_hex(regs[0], 16);
print_string(" ");
print_hex(regs[1], 16);
print_string(" ");
return 2;
}
/* don't expect alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret == 0) {
if (regs[0] != quad[1] || regs[1] != quad[2])
return 3;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[0] || quad[1] != regs[1])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[0] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[1]) {
print_hex(quad[0], 16);
print_string(" ");
print_hex(quad[1], 16);
print_string(" ");
print_hex(quad[2], 16);
print_string(" ");
return 8;
}

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[0] || regs[1] != quad[1])
return 10;
/* unaligned should not give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 11;
if (regs[0] != quad[1] || regs[1] != quad[2])
return 12;
return 0;
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -338,6 +508,8 @@ int main(void)
do_test(4, mode_test_4);
do_test(5, mode_test_5);
do_test(6, mode_test_6);
do_test(7, mode_test_7);
do_test(8, mode_test_8);

return fail;
}

@ -245,3 +245,23 @@ test_pstw:
pstw %r3,wvar(0)
li %r3,0
blr

.globl test_plq
test_plq:
nop
nop
plq %r4,qvar(0)
std %r4,0(%r3)
std %r5,8(%r3)
li %r3,0
blr

.globl test_pstq
test_pstq:
nop
nop
ld %r4,0(%r3)
ld %r5,8(%r3)
pstq %r4,qvar(0)
li %r3,0
blr

@ -7,6 +7,7 @@
#define MSR_LE 0x1
#define MSR_DR 0x10
#define MSR_IR 0x20
#define MSR_HV 0x1000000000000000ul
#define MSR_SF 0x8000000000000000ul

#define DSISR 18
@ -32,6 +33,8 @@ extern long test_pstd(long arg);
extern long test_psth(long arg);
extern long test_pstw(long arg);
extern long test_plfd(long arg);
extern long test_plq(long arg);
extern long test_pstq(long arg);

static inline unsigned long mfspr(int sprnum)
{
@ -103,7 +106,7 @@ long int prefix_test_2(void)
return 1;
if (mfspr(SRR0) != (unsigned long)&test_paddi_mis + 8)
return 2;
if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34))))
if (mfspr(SRR1) != (MSR_SF | MSR_HV | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34))))
return 3;

ret = trapit((long)&x, test_plfd);
@ -111,7 +114,7 @@ long int prefix_test_2(void)
return ret;
if (mfspr(SRR0) != (unsigned long)&test_plfd + 8)
return 6;
if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 34))))
if (mfspr(SRR1) != (MSR_SF | MSR_HV | MSR_LE | (1ul << (63 - 34))))
return 7;
return 0;
}
@ -182,6 +185,39 @@ long int prefix_test_3(void)
return 0;
}

unsigned long qvar[2] __attribute__((__aligned__(16)));
#define V1 0x678912345a5a2b2bull
#define V2 0xa0549922bbccddeeull

/* test plq and pstq */
long int prefix_test_4(void)
{
long int ret;
unsigned long x[2];

qvar[0] = V1;
qvar[1] = V2;
ret = trapit((long)&x, test_plq);
if (ret)
return ret | 1;
if (x[0] != V1 || x[1] != V2) {
print_hex(x[0], 16, " ");
print_hex(x[1], 16, " ");
return 2;
}
x[0] = ~V2;
x[1] = ~V1;
ret = trapit((long)&x, test_pstq);
if (ret)
return ret | 3;
if (qvar[0] != ~V2 || qvar[1] != ~V1) {
print_hex(qvar[0], 16, " ");
print_hex(qvar[1], 16, " ");
return 4;
}
return 0;
}

int fail = 0;

void do_test(int num, long int (*test)(void))
@ -209,6 +245,7 @@ int main(void)
do_test(1, prefix_test_1);
do_test(2, prefix_test_2);
do_test(3, prefix_test_3);
do_test(4, prefix_test_4);

return fail;
}

@ -155,3 +155,31 @@ call_ret:
ld %r31,248(%r1)
addi %r1,%r1,256
blr

.global do_lqarx
do_lqarx:
/* r3 = src, r4 = regs */
lqarx %r10,0,%r3
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_lqarx_bad
do_lqarx_bad:
/* r3 = src, r4 = regs */
.long 0x7d405228 /* lqarx %r10,0,%r10 */
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_stqcx
do_stqcx:
/* r3 = dest, r4 = regs, return CR */
ld %r10,0(%r4)
ld %r11,8(%r4)
stqcx. %r10,0,%r3
mfcr %r3
oris %r3,%r3,1 /* to distinguish from trap number */
blr

@ -7,6 +7,10 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long (*fn)(unsigned long, unsigned long));

extern unsigned long do_lqarx(unsigned long src, unsigned long regs);
extern unsigned long do_lqarx_bad(unsigned long src, unsigned long regs);
extern unsigned long do_stqcx(unsigned long dst, unsigned long regs);

#define DSISR 18
#define DAR 19
#define SRR0 26
@ -180,6 +184,63 @@ int resv_test_2(void)
return 0;
}

/* test lqarx/stqcx */
int resv_test_3(void)
{
unsigned long x[4] __attribute__((__aligned__(16)));
unsigned long y[2], regs[2];
unsigned long ret, offset;
int count;

x[0] = 0x7766554433221100ul;
x[1] = 0xffeeddccbbaa9988ul;
y[0] = 0x0badcafef00dd00dul;
y[1] = 0xdeadbeef07070707ul;
for (count = 0; count < 1000; ++count) {
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx);
if (ret)
return ret | 1;
ret = callit((unsigned long)x, (unsigned long)y, do_stqcx);
if (ret < 0x10000)
return ret | 2;
if (ret & 0x20000000)
break;
}
if (count == 1000)
return 3;
if (x[0] != y[1] || x[1] != y[0])
return 4;
if (regs[1] != 0x7766554433221100ul || regs[0] != 0xffeeddccbbaa9988ul)
return 5;
ret = callit((unsigned long)x, (unsigned long)regs, do_stqcx);
if (ret < 0x10000 || (ret & 0x20000000))
return ret | 12;
/* test alignment interrupts */
for (offset = 0; offset < 16; ++offset) {
ret = callit((unsigned long)x + offset, (unsigned long)regs, do_lqarx);
if (ret == 0 && (offset & 15) != 0)
return 6;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 7;
} else if (ret)
return ret;
ret = callit((unsigned long)x + offset, (unsigned long)y, do_stqcx);
if (ret >= 0x10000 && (offset & 15) != 0)
return 8;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 9;
} else if (ret < 0x10000)
return ret;
}
/* test illegal interrupt for bad lqarx case */
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx_bad);
if (ret != 0xe40)
return ret + 10;
return 0;
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -204,6 +265,7 @@ int main(void)

do_test(1, resv_test_1);
do_test(2, resv_test_2);
do_test(3, resv_test_3);

return fail;
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -4,3 +4,5 @@ test 03:PASS
test 04:PASS
test 05:PASS
test 06:PASS
test 07:PASS
test 08:PASS

Binary file not shown.

@ -1,3 +1,4 @@
test 01:PASS
test 02:PASS
test 03:PASS
test 04:PASS

Binary file not shown.

@ -1,2 +1,3 @@
test 01:PASS
test 02:PASS
test 03:PASS

Binary file not shown.

@ -115,7 +115,7 @@ __isr:
std %r29, 29*8(%r1)
std %r30, 30*8(%r1)
std %r31, 31*8(%r1)
mfsrr0 %r0
mfhsrr0 %r0
std %r0, SAVE_NIA*8(%r1)
mflr %r0
std %r0, SAVE_LR*8(%r1)
@ -123,7 +123,7 @@ __isr:
std %r0, SAVE_CTR*8(%r1)
mfcr %r0
std %r0, SAVE_CR*8(%r1)
mfsrr1 %r0
mfhsrr1 %r0
std %r0, SAVE_SRR1*8(%r1)

stdu %r1,-STACK_FRAME_C_MINIMAL(%r1)

@ -72,11 +72,15 @@ begin
variable vec : integer range 0 to 16#fff#;
variable srr1 : std_ulogic_vector(15 downto 0);
variable intr : std_ulogic;
variable hvi : std_ulogic;
variable scv : std_ulogic;
variable intr_page : std_ulogic_vector(4 downto 0);
begin
w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;
f := WritebackToFetch1Init;
vec := 0;
hvi := '0';

complete_out <= instr_tag_init;
if e_in.valid = '1' then
@ -93,9 +97,16 @@ begin
interrupt_out.intr <= intr;

srr1 := (others => '0');
intr_page := 5x"0";
scv := '0';
if e_in.interrupt = '1' then
vec := e_in.intr_vec;
srr1 := e_in.srr1;
hvi := e_in.hv_intr;
scv := e_in.is_scv;
if e_in.is_scv = '1' then
intr_page := 5x"17";
end if;
elsif l_in.interrupt = '1' then
vec := l_in.intr_vec;
srr1 := l_in.srr1;
@ -103,7 +114,9 @@ begin
vec := fp_in.intr_vec;
srr1 := fp_in.srr1;
end if;
interrupt_out.hv_intr <= hvi;
interrupt_out.srr1 <= srr1;
interrupt_out.scv_int <= scv;

if intr = '0' then
if e_in.write_enable = '1' then
@ -161,7 +174,7 @@ begin

-- Outputs to fetch1
f.interrupt := intr;
f.intr_vec := std_ulogic_vector(to_unsigned(vec, 12));
f.intr_vec := intr_page & std_ulogic_vector(to_unsigned(vec, 12));
f.redirect := e_in.redirect;
f.redirect_nia := e_in.write_data;
f.br_nia := e_in.last_nia;

Loading…
Cancel
Save