|
|
|
library ieee;
|
|
|
|
use ieee.std_logic_1164.all;
|
|
|
|
use ieee.numeric_std.all;
|
|
|
|
|
|
|
|
library work;
|
|
|
|
use work.decode_types.all;
|
|
|
|
use work.common.all;
|
|
|
|
use work.insn_helpers.all;
|
|
|
|
use work.helpers.all;
|
|
|
|
|
|
|
|
-- 2 cycle LSU
|
|
|
|
-- We calculate the address in the first cycle
|
|
|
|
|
|
|
|
entity loadstore1 is
|
|
|
|
generic (
|
|
|
|
HAS_FPU : boolean := true;
|
|
|
|
-- Non-zero to enable log data collection
|
|
|
|
LOG_LENGTH : natural := 0
|
|
|
|
);
|
|
|
|
port (
|
|
|
|
clk : in std_ulogic;
|
|
|
|
rst : in std_ulogic;
|
|
|
|
|
|
|
|
l_in : in Execute1ToLoadstore1Type;
|
|
|
|
e_out : out Loadstore1ToExecute1Type;
|
|
|
|
l_out : out Loadstore1ToWritebackType;
|
|
|
|
|
|
|
|
d_out : out Loadstore1ToDcacheType;
|
|
|
|
d_in : in DcacheToLoadstore1Type;
|
|
|
|
|
|
|
|
m_out : out Loadstore1ToMmuType;
|
|
|
|
m_in : in MmuToLoadstore1Type;
|
|
|
|
|
|
|
|
dc_stall : in std_ulogic;
|
|
|
|
|
|
|
|
events : out Loadstore1EventType;
|
|
|
|
|
|
|
|
log_out : out std_ulogic_vector(9 downto 0)
|
|
|
|
);
|
|
|
|
end loadstore1;
|
|
|
|
|
|
|
|
architecture behave of loadstore1 is
|
|
|
|
|
|
|
|
-- State machine for unaligned loads/stores
|
|
|
|
type state_t is (IDLE, -- ready for instruction
|
|
|
|
MMU_WAIT -- waiting for MMU to finish doing something
|
|
|
|
);
|
|
|
|
|
|
|
|
type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
|
|
|
|
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
|
|
|
|
type trim_ctl_t is array(0 to 7) of byte_trim_t;
|
|
|
|
|
|
|
|
type request_t is record
|
|
|
|
valid : std_ulogic;
|
|
|
|
dc_req : std_ulogic;
|
|
|
|
load : std_ulogic;
|
|
|
|
store : std_ulogic;
|
|
|
|
tlbie : std_ulogic;
|
|
|
|
dcbz : std_ulogic;
|
|
|
|
read_spr : std_ulogic;
|
|
|
|
write_spr : std_ulogic;
|
|
|
|
mmu_op : std_ulogic;
|
|
|
|
instr_fault : std_ulogic;
|
|
|
|
do_update : std_ulogic;
|
|
|
|
mode_32bit : std_ulogic;
|
|
|
|
addr : std_ulogic_vector(63 downto 0);
|
|
|
|
byte_sel : std_ulogic_vector(7 downto 0);
|
|
|
|
second_bytes : std_ulogic_vector(7 downto 0);
|
|
|
|
store_data : std_ulogic_vector(63 downto 0);
|
|
|
|
instr_tag : instr_tag_t;
|
|
|
|
write_reg : gspr_index_t;
|
|
|
|
length : std_ulogic_vector(3 downto 0);
|
|
|
|
elt_length : std_ulogic_vector(3 downto 0);
|
|
|
|
byte_reverse : std_ulogic;
|
|
|
|
brev_mask : unsigned(2 downto 0);
|
|
|
|
sign_extend : std_ulogic;
|
|
|
|
update : std_ulogic;
|
|
|
|
xerc : xer_common_t;
|
|
|
|
reserve : std_ulogic;
|
core: Implement quadword loads and stores
This implements the lq, stq, lqarx and stqcx. instructions.
These instructions all access two consecutive GPRs; for example the
"lq %r6,0(%r3)" instruction will load the doubleword at the address
in R3 into R7 and the doubleword at address R3 + 8 into R6. To cope
with having two GPR sources or destinations, the instruction gets
repeated at the decode2 stage, that is, for each lq/stq/lqarx/stqcx.
coming in from decode1, two instructions get sent out to execute1.
For these instructions, the RS or RT register gets modified on one
of the iterations by setting the LSB of the register number. In LE
mode, the first iteration uses RS|1 or RT|1 and the second iteration
uses RS or RT. In BE mode, this is done the other way around. In
order for decode2 to know what endianness is currently in use, we
pass the big_endian flag down from icache through decode1 to decode2.
This is always in sync with what execute1 is using because only rfid
or an interrupt can change MSR[LE], and those operations all cause
a flush and redirect.
There is now an extra column in the decode tables in decode1 to
indicate whether the instruction needs to be repeated. Decode1 also
enforces the rule that lq with RT = RT and lqarx with RA = RT or
RB = RT are illegal.
Decode2 now passes a 'repeat' flag and a 'second' flag to execute1,
and execute1 passes them on to loadstore1. The 'repeat' flag is set
for both iterations of a repeated instruction, and 'second' is set
on the second iteration. Execute1 does not take asynchronous or
trace interrupts on the second iteration of a repeated instruction.
Loadstore1 uses 'next_addr' for the second iteration of a repeated
load/store so that we access the second doubleword of the memory
operand. Thus loadstore1 accesses the doublewords in increasing
memory order. For 16-byte loads this means that the first iteration
writes GPR RT|1. It is possible that RA = RT|1 (this is a legal
but non-preferred form), meaning that if the memory operand was
misaligned, the first iteration would overwrite RA but then the
second iteration might take a page fault, leading to corrupted state.
To avoid that possibility, 16-byte loads in LE mode take an
alignment interrupt if the operand is not 16-byte aligned. (This
is the case anyway for lqarx, and we enforce it for lq as well.)
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
atomic : std_ulogic;
|
|
|
|
atomic_last : std_ulogic;
|
|
|
|
rc : std_ulogic;
|
|
|
|
nc : std_ulogic; -- non-cacheable access
|
|
|
|
virt_mode : std_ulogic;
|
|
|
|
priv_mode : std_ulogic;
|
|
|
|
load_sp : std_ulogic;
|
|
|
|
sprn : std_ulogic_vector(9 downto 0);
|
|
|
|
is_slbia : std_ulogic;
|
|
|
|
align_intr : std_ulogic;
|
|
|
|
dword_index : std_ulogic;
|
|
|
|
two_dwords : std_ulogic;
|
|
|
|
incomplete : std_ulogic;
|
|
|
|
nia : std_ulogic_vector(63 downto 0);
|
|
|
|
end record;
|
|
|
|
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
|
|
|
|
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
|
|
|
|
instr_fault => '0', do_update => '0',
|
|
|
|
mode_32bit => '0', addr => (others => '0'),
|
|
|
|
byte_sel => x"00", second_bytes => x"00",
|
|
|
|
store_data => (others => '0'), instr_tag => instr_tag_init,
|
|
|
|
write_reg => 7x"00", length => x"0",
|
|
|
|
elt_length => x"0", byte_reverse => '0', brev_mask => "000",
|
|
|
|
sign_extend => '0', update => '0',
|
|
|
|
xerc => xerc_init, reserve => '0',
|
|
|
|
atomic => '0', atomic_last => '0', rc => '0', nc => '0',
|
|
|
|
virt_mode => '0', priv_mode => '0', load_sp => '0',
|
|
|
|
sprn => 10x"0", is_slbia => '0', align_intr => '0',
|
|
|
|
dword_index => '0', two_dwords => '0', incomplete => '0',
|
|
|
|
nia => (others => '0'));
|
|
|
|
|
|
|
|
type reg_stage1_t is record
|
|
|
|
req : request_t;
|
|
|
|
busy : std_ulogic;
|
|
|
|
issued : std_ulogic;
|
|
|
|
addr0 : std_ulogic_vector(63 downto 0);
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type reg_stage2_t is record
|
|
|
|
req : request_t;
|
|
|
|
byte_index : byte_index_t;
|
|
|
|
use_second : std_ulogic_vector(7 downto 0);
|
|
|
|
busy : std_ulogic;
|
|
|
|
wait_dc : std_ulogic;
|
|
|
|
wait_mmu : std_ulogic;
|
|
|
|
one_cycle : std_ulogic;
|
|
|
|
wr_sel : std_ulogic_vector(1 downto 0);
|
|
|
|
addr0 : std_ulogic_vector(63 downto 0);
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type reg_stage3_t is record
|
|
|
|
state : state_t;
|
|
|
|
complete : std_ulogic;
|
|
|
|
instr_tag : instr_tag_t;
|
|
|
|
write_enable : std_ulogic;
|
|
|
|
write_reg : gspr_index_t;
|
|
|
|
write_data : std_ulogic_vector(63 downto 0);
|
|
|
|
rc : std_ulogic;
|
|
|
|
xerc : xer_common_t;
|
|
|
|
store_done : std_ulogic;
|
|
|
|
load_data : std_ulogic_vector(63 downto 0);
|
|
|
|
dar : std_ulogic_vector(63 downto 0);
|
|
|
|
dsisr : std_ulogic_vector(31 downto 0);
|
|
|
|
ld_sp_data : std_ulogic_vector(31 downto 0);
|
|
|
|
ld_sp_nz : std_ulogic;
|
|
|
|
ld_sp_lz : std_ulogic_vector(5 downto 0);
|
|
|
|
stage1_en : std_ulogic;
|
|
|
|
interrupt : std_ulogic;
|
|
|
|
intr_vec : integer range 0 to 16#fff#;
|
|
|
|
nia : std_ulogic_vector(63 downto 0);
|
|
|
|
srr1 : std_ulogic_vector(15 downto 0);
|
|
|
|
events : Loadstore1EventType;
|
|
|
|
end record;
|
|
|
|
|
|
|
|
signal req_in : request_t;
|
|
|
|
signal r1, r1in : reg_stage1_t;
|
|
|
|
signal r2, r2in : reg_stage2_t;
|
|
|
|
signal r3, r3in : reg_stage3_t;
|
|
|
|
|
|
|
|
signal flush : std_ulogic;
|
|
|
|
signal busy : std_ulogic;
|
|
|
|
signal complete : std_ulogic;
|
|
|
|
signal flushing : std_ulogic;
|
|
|
|
|
|
|
|
signal store_sp_data : std_ulogic_vector(31 downto 0);
|
|
|
|
signal load_dp_data : std_ulogic_vector(63 downto 0);
|
|
|
|
signal store_data : std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
signal stage1_req : request_t;
|
|
|
|
signal stage1_dcreq : std_ulogic;
|
|
|
|
signal stage1_dreq : std_ulogic;
|
|
|
|
|
|
|
|
-- Generate byte enables from sizes
|
|
|
|
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
|
|
|
|
begin
|
|
|
|
case length is
|
|
|
|
when "0001" =>
|
|
|
|
return "00000001";
|
|
|
|
when "0010" =>
|
|
|
|
return "00000011";
|
|
|
|
when "0100" =>
|
|
|
|
return "00001111";
|
|
|
|
when "1000" =>
|
|
|
|
return "11111111";
|
|
|
|
when others =>
|
|
|
|
return "00000000";
|
|
|
|
end case;
|
|
|
|
end function length_to_sel;
|
|
|
|
|
|
|
|
-- Calculate byte enables
|
|
|
|
-- This returns 16 bits, giving the select signals for two transfers,
|
|
|
|
-- to account for unaligned loads or stores
|
|
|
|
function xfer_data_sel(size : in std_logic_vector(3 downto 0);
|
|
|
|
address : in std_logic_vector(2 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable longsel : std_ulogic_vector(15 downto 0);
|
|
|
|
begin
|
|
|
|
longsel := "00000000" & length_to_sel(size);
|
|
|
|
return std_ulogic_vector(shift_left(unsigned(longsel),
|
|
|
|
to_integer(unsigned(address))));
|
|
|
|
end function xfer_data_sel;
|
|
|
|
|
|
|
|
-- 23-bit right shifter for DP -> SP float conversions
|
|
|
|
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable fs1 : std_ulogic_vector(22 downto 0);
|
|
|
|
variable fs2 : std_ulogic_vector(22 downto 0);
|
|
|
|
begin
|
|
|
|
case shift(1 downto 0) is
|
|
|
|
when "00" =>
|
|
|
|
fs1 := frac;
|
|
|
|
when "01" =>
|
|
|
|
fs1 := '0' & frac(22 downto 1);
|
|
|
|
when "10" =>
|
|
|
|
fs1 := "00" & frac(22 downto 2);
|
|
|
|
when others =>
|
|
|
|
fs1 := "000" & frac(22 downto 3);
|
|
|
|
end case;
|
|
|
|
case shift(4 downto 2) is
|
|
|
|
when "000" =>
|
|
|
|
fs2 := fs1;
|
|
|
|
when "001" =>
|
|
|
|
fs2 := x"0" & fs1(22 downto 4);
|
|
|
|
when "010" =>
|
|
|
|
fs2 := x"00" & fs1(22 downto 8);
|
|
|
|
when "011" =>
|
|
|
|
fs2 := x"000" & fs1(22 downto 12);
|
|
|
|
when "100" =>
|
|
|
|
fs2 := x"0000" & fs1(22 downto 16);
|
|
|
|
when others =>
|
|
|
|
fs2 := x"00000" & fs1(22 downto 20);
|
|
|
|
end case;
|
|
|
|
return fs2;
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- 23-bit left shifter for SP -> DP float conversions
|
|
|
|
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable fs1 : std_ulogic_vector(22 downto 0);
|
|
|
|
variable fs2 : std_ulogic_vector(22 downto 0);
|
|
|
|
begin
|
|
|
|
case shift(1 downto 0) is
|
|
|
|
when "00" =>
|
|
|
|
fs1 := frac;
|
|
|
|
when "01" =>
|
|
|
|
fs1 := frac(21 downto 0) & '0';
|
|
|
|
when "10" =>
|
|
|
|
fs1 := frac(20 downto 0) & "00";
|
|
|
|
when others =>
|
|
|
|
fs1 := frac(19 downto 0) & "000";
|
|
|
|
end case;
|
|
|
|
case shift(4 downto 2) is
|
|
|
|
when "000" =>
|
|
|
|
fs2 := fs1;
|
|
|
|
when "001" =>
|
|
|
|
fs2 := fs1(18 downto 0) & x"0" ;
|
|
|
|
when "010" =>
|
|
|
|
fs2 := fs1(14 downto 0) & x"00";
|
|
|
|
when "011" =>
|
|
|
|
fs2 := fs1(10 downto 0) & x"000";
|
|
|
|
when "100" =>
|
|
|
|
fs2 := fs1(6 downto 0) & x"0000";
|
|
|
|
when others =>
|
|
|
|
fs2 := fs1(2 downto 0) & x"00000";
|
|
|
|
end case;
|
|
|
|
return fs2;
|
|
|
|
end;
|
|
|
|
|
|
|
|
begin
|
|
|
|
loadstore1_reg: process(clk)
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if rst = '1' then
|
|
|
|
r1.busy <= '0';
|
|
|
|
r1.issued <= '0';
|
|
|
|
r1.req.valid <= '0';
|
|
|
|
r1.req.dc_req <= '0';
|
|
|
|
r1.req.incomplete <= '0';
|
|
|
|
r1.req.tlbie <= '0';
|
|
|
|
r1.req.is_slbia <= '0';
|
|
|
|
r1.req.instr_fault <= '0';
|
|
|
|
r1.req.load <= '0';
|
|
|
|
r1.req.priv_mode <= '0';
|
|
|
|
r1.req.sprn <= (others => '0');
|
|
|
|
r1.req.xerc <= xerc_init;
|
|
|
|
|
|
|
|
r2.req.valid <= '0';
|
|
|
|
r2.busy <= '0';
|
|
|
|
r2.req.tlbie <= '0';
|
|
|
|
r2.req.is_slbia <= '0';
|
|
|
|
r2.req.instr_fault <= '0';
|
|
|
|
r2.req.load <= '0';
|
|
|
|
r2.req.priv_mode <= '0';
|
|
|
|
r2.req.sprn <= (others => '0');
|
|
|
|
r2.req.xerc <= xerc_init;
|
|
|
|
|
|
|
|
r2.wait_dc <= '0';
|
|
|
|
r2.wait_mmu <= '0';
|
|
|
|
r2.one_cycle <= '0';
|
|
|
|
|
|
|
|
r3.dar <= (others => '0');
|
|
|
|
r3.dsisr <= (others => '0');
|
|
|
|
r3.state <= IDLE;
|
|
|
|
r3.write_enable <= '0';
|
|
|
|
r3.interrupt <= '0';
|
|
|
|
r3.complete <= '0';
|
|
|
|
r3.stage1_en <= '1';
|
|
|
|
r3.events.load_complete <= '0';
|
|
|
|
r3.events.store_complete <= '0';
|
|
|
|
flushing <= '0';
|
|
|
|
else
|
|
|
|
r1 <= r1in;
|
|
|
|
r2 <= r2in;
|
|
|
|
r3 <= r3in;
|
|
|
|
flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
|
|
|
|
not flush;
|
|
|
|
end if;
|
|
|
|
stage1_dreq <= stage1_dcreq;
|
|
|
|
if d_in.valid = '1' then
|
|
|
|
assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
|
|
|
|
end if;
|
|
|
|
if d_in.error = '1' then
|
|
|
|
assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
|
|
|
|
end if;
|
|
|
|
if m_in.done = '1' or m_in.err = '1' then
|
|
|
|
assert r2.req.valid = '1' and r3.state = MMU_WAIT severity failure;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
ls_fp_conv: if HAS_FPU generate
|
|
|
|
-- Convert DP data to SP for stfs
|
|
|
|
dp_to_sp: process(all)
|
|
|
|
variable exp : unsigned(10 downto 0);
|
|
|
|
variable frac : std_ulogic_vector(22 downto 0);
|
|
|
|
variable shift : unsigned(4 downto 0);
|
|
|
|
begin
|
|
|
|
store_sp_data(31) <= l_in.data(63);
|
|
|
|
store_sp_data(30 downto 0) <= (others => '0');
|
|
|
|
exp := unsigned(l_in.data(62 downto 52));
|
|
|
|
if exp > 896 then
|
|
|
|
store_sp_data(30) <= l_in.data(62);
|
|
|
|
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
|
|
|
|
elsif exp >= 874 then
|
|
|
|
-- denormalization required
|
|
|
|
frac := '1' & l_in.data(51 downto 30);
|
|
|
|
shift := 0 - exp(4 downto 0);
|
|
|
|
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Convert SP data to DP for lfs
|
|
|
|
sp_to_dp: process(all)
|
|
|
|
variable exp : unsigned(7 downto 0);
|
|
|
|
variable exp_dp : unsigned(10 downto 0);
|
|
|
|
variable exp_nz : std_ulogic;
|
|
|
|
variable exp_ao : std_ulogic;
|
|
|
|
variable frac : std_ulogic_vector(22 downto 0);
|
|
|
|
variable frac_shift : unsigned(4 downto 0);
|
|
|
|
begin
|
|
|
|
frac := r3.ld_sp_data(22 downto 0);
|
|
|
|
exp := unsigned(r3.ld_sp_data(30 downto 23));
|
|
|
|
exp_nz := or (r3.ld_sp_data(30 downto 23));
|
|
|
|
exp_ao := and (r3.ld_sp_data(30 downto 23));
|
|
|
|
frac_shift := (others => '0');
|
|
|
|
if exp_ao = '1' then
|
|
|
|
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
|
|
|
|
elsif exp_nz = '1' then
|
|
|
|
exp_dp := 896 + resize(exp, 11); -- finite normalized value
|
|
|
|
elsif r3.ld_sp_nz = '0' then
|
|
|
|
exp_dp := to_unsigned(0, 11); -- zero
|
|
|
|
else
|
|
|
|
-- denormalized SP operand, need to normalize
|
|
|
|
exp_dp := 896 - resize(unsigned(r3.ld_sp_lz), 11);
|
|
|
|
frac_shift := unsigned(r3.ld_sp_lz(4 downto 0)) + 1;
|
|
|
|
end if;
|
|
|
|
load_dp_data(63) <= r3.ld_sp_data(31);
|
|
|
|
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
|
|
|
|
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
|
|
|
|
load_dp_data(28 downto 0) <= (others => '0');
|
|
|
|
end process;
|
|
|
|
end generate;
|
|
|
|
|
|
|
|
-- Translate a load/store instruction into the internal request format
|
|
|
|
-- XXX this should only depend on l_in, but actually depends on
|
|
|
|
-- r1.addr0 as well (in the l_in.second = 1 case).
|
|
|
|
loadstore1_in: process(all)
|
|
|
|
variable v : request_t;
|
|
|
|
variable lsu_sum : std_ulogic_vector(63 downto 0);
|
|
|
|
variable brev_lenm1 : unsigned(2 downto 0);
|
|
|
|
variable long_sel : std_ulogic_vector(15 downto 0);
|
|
|
|
variable addr : std_ulogic_vector(63 downto 0);
|
|
|
|
variable sprn : std_ulogic_vector(9 downto 0);
|
|
|
|
variable misaligned : std_ulogic;
|
|
|
|
variable addr_mask : std_ulogic_vector(2 downto 0);
|
|
|
|
begin
|
|
|
|
v := request_init;
|
|
|
|
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
|
|
|
|
|
|
|
|
v.valid := l_in.valid;
|
|
|
|
v.instr_tag := l_in.instr_tag;
|
|
|
|
v.mode_32bit := l_in.mode_32bit;
|
|
|
|
v.write_reg := l_in.write_reg;
|
|
|
|
v.length := l_in.length;
|
|
|
|
v.elt_length := l_in.length;
|
|
|
|
v.byte_reverse := l_in.byte_reverse;
|
|
|
|
v.sign_extend := l_in.sign_extend;
|
|
|
|
v.update := l_in.update;
|
|
|
|
v.xerc := l_in.xerc;
|
|
|
|
v.reserve := l_in.reserve;
|
|
|
|
v.rc := l_in.rc;
|
|
|
|
v.nc := l_in.ci;
|
|
|
|
v.virt_mode := l_in.virt_mode;
|
|
|
|
v.priv_mode := l_in.priv_mode;
|
|
|
|
v.sprn := sprn;
|
|
|
|
v.nia := l_in.nia;
|
|
|
|
|
|
|
|
lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
|
|
|
|
|
|
|
|
if HAS_FPU and l_in.is_32bit = '1' then
|
|
|
|
v.store_data := x"00000000" & store_sp_data;
|
|
|
|
else
|
|
|
|
v.store_data := l_in.data;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
addr := lsu_sum;
|
|
|
|
if l_in.second = '1' then
|
|
|
|
if l_in.update = '0' then
|
|
|
|
-- for the second half of a 16-byte transfer,
|
|
|
|
-- use the previous address plus 8.
|
|
|
|
addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + 1) & r1.addr0(2 downto 0);
|
|
|
|
else
|
|
|
|
-- for an update-form load, use the previous address
|
|
|
|
-- as the value to write back to RA.
|
|
|
|
addr := r1.addr0;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if l_in.mode_32bit = '1' then
|
|
|
|
addr(63 downto 32) := (others => '0');
|
|
|
|
end if;
|
|
|
|
v.addr := addr;
|
|
|
|
|
|
|
|
-- XXX Temporary hack. Mark the op as non-cachable if the address
|
|
|
|
-- is the form 0xc------- for a real-mode access.
|
|
|
|
if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
|
|
|
|
v.nc := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);
|
|
|
|
|
|
|
|
-- Do length_to_sel and work out if we are doing 2 dwords
|
|
|
|
long_sel := xfer_data_sel(v.length, addr(2 downto 0));
|
|
|
|
v.byte_sel := long_sel(7 downto 0);
|
|
|
|
v.second_bytes := long_sel(15 downto 8);
|
|
|
|
if long_sel(15 downto 8) /= "00000000" then
|
|
|
|
v.two_dwords := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- check alignment for larx/stcx
|
|
|
|
misaligned := or (addr_mask and addr(2 downto 0));
|
|
|
|
v.align_intr := l_in.reserve and misaligned;
|
|
|
|
|
|
|
|
v.atomic := not misaligned;
|
|
|
|
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
|
|
|
|
|
|
|
|
case l_in.op is
|
|
|
|
when OP_STORE =>
|
|
|
|
v.store := '1';
|
|
|
|
when OP_LOAD =>
|
|
|
|
if l_in.update = '0' or l_in.second = '0' then
|
|
|
|
v.load := '1';
|
|
|
|
if HAS_FPU and l_in.is_32bit = '1' then
|
|
|
|
-- Allow an extra cycle for SP->DP precision conversion
|
|
|
|
v.load_sp := '1';
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
-- write back address to RA
|
|
|
|
v.do_update := '1';
|
|
|
|
end if;
|
|
|
|
when OP_DCBZ =>
|
|
|
|
v.dcbz := '1';
|
|
|
|
v.align_intr := v.nc;
|
|
|
|
when OP_TLBIE =>
|
|
|
|
v.tlbie := '1';
|
|
|
|
v.addr := l_in.addr2; -- address from RB for tlbie
|
|
|
|
v.is_slbia := l_in.insn(7);
|
|
|
|
v.mmu_op := '1';
|
|
|
|
when OP_MFSPR =>
|
|
|
|
v.read_spr := '1';
|
|
|
|
when OP_MTSPR =>
|
|
|
|
v.write_spr := '1';
|
|
|
|
v.mmu_op := sprn(8) or sprn(5);
|
|
|
|
when OP_FETCH_FAILED =>
|
|
|
|
-- send it to the MMU to do the radix walk
|
|
|
|
v.instr_fault := '1';
|
|
|
|
v.addr := l_in.nia;
|
|
|
|
v.mmu_op := '1';
|
|
|
|
when others =>
|
|
|
|
end case;
|
|
|
|
v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
|
|
|
|
v.incomplete := v.dc_req and v.two_dwords;
|
|
|
|
|
|
|
|
-- Work out controls for load and store formatting
|
|
|
|
brev_lenm1 := "000";
|
|
|
|
if v.byte_reverse = '1' then
|
|
|
|
brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
|
|
|
|
end if;
|
|
|
|
v.brev_mask := brev_lenm1;
|
|
|
|
|
|
|
|
req_in <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
busy <= dc_stall or d_in.error or r1.busy or r2.busy;
|
|
|
|
complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or r3.complete;
|
|
|
|
|
|
|
|
-- Processing done in the first cycle of a load/store instruction
|
|
|
|
loadstore1_1: process(all)
|
|
|
|
variable v : reg_stage1_t;
|
|
|
|
variable req : request_t;
|
|
|
|
variable dcreq : std_ulogic;
|
|
|
|
variable issue : std_ulogic;
|
|
|
|
begin
|
|
|
|
v := r1;
|
|
|
|
issue := '0';
|
|
|
|
dcreq := '0';
|
|
|
|
|
|
|
|
if r1.busy = '0' then
|
|
|
|
req := req_in;
|
|
|
|
req.valid := l_in.valid;
|
|
|
|
if flushing = '1' then
|
|
|
|
-- Make this a no-op request rather than simply invalid.
|
|
|
|
-- It will never get to stage 3 since there is a request ahead of
|
|
|
|
-- it with align_intr = 1.
|
|
|
|
req.dc_req := '0';
|
|
|
|
end if;
|
|
|
|
issue := l_in.valid and req.dc_req;
|
|
|
|
if l_in.valid = '1' then
|
|
|
|
v.addr0 := req.addr;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
req := r1.req;
|
|
|
|
if r1.req.dc_req = '1' and r1.issued = '0' then
|
|
|
|
issue := '1';
|
|
|
|
elsif r1.req.incomplete = '1' then
|
|
|
|
-- construct the second request for a misaligned access
|
|
|
|
req.dword_index := '1';
|
|
|
|
req.incomplete := '0';
|
|
|
|
req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
|
|
|
|
if r1.req.mode_32bit = '1' then
|
|
|
|
req.addr(32) := '0';
|
|
|
|
end if;
|
|
|
|
req.byte_sel := r1.req.second_bytes;
|
|
|
|
issue := '1';
|
|
|
|
else
|
|
|
|
-- For the lfs conversion cycle, leave the request valid
|
|
|
|
-- for another cycle but with req.dc_req = 0.
|
|
|
|
-- For an MMU request last cycle, we have nothing
|
|
|
|
-- to do in this cycle, so make it invalid.
|
|
|
|
if r1.req.load_sp = '0' then
|
|
|
|
req.valid := '0';
|
|
|
|
end if;
|
|
|
|
req.dc_req := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if flush = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.req.dc_req := '0';
|
|
|
|
v.req.incomplete := '0';
|
|
|
|
v.issued := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
elsif (dc_stall or d_in.error or r2.busy) = '0' then
|
|
|
|
-- we can change what's in r1 next cycle because the current thing
|
|
|
|
-- in r1 will go into r2
|
|
|
|
v.req := req;
|
|
|
|
dcreq := issue;
|
|
|
|
v.issued := issue;
|
|
|
|
v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op);
|
|
|
|
else
|
|
|
|
-- pipeline is stalled
|
|
|
|
if r1.issued = '1' and d_in.error = '1' then
|
|
|
|
v.issued := '0';
|
|
|
|
v.busy := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
stage1_req <= req;
|
|
|
|
stage1_dcreq <= dcreq;
|
|
|
|
r1in <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Processing done in the second cycle of a load/store instruction.
|
|
|
|
-- Store data is formatted here and sent to the dcache.
|
|
|
|
-- The request in r1 is sent to stage 3 if stage 3 will not be busy next cycle.
|
|
|
|
loadstore1_2: process(all)
|
|
|
|
variable v : reg_stage2_t;
|
|
|
|
variable j : integer;
|
|
|
|
variable k : unsigned(2 downto 0);
|
|
|
|
variable kk : unsigned(3 downto 0);
|
|
|
|
variable idx : unsigned(2 downto 0);
|
|
|
|
variable byte_offset : unsigned(2 downto 0);
|
|
|
|
variable interrupt : std_ulogic;
|
|
|
|
begin
|
|
|
|
v := r2;
|
|
|
|
|
|
|
|
-- Byte reversing and rotating for stores.
|
|
|
|
-- Done in the second cycle (the cycle after l_in.valid = 1).
|
|
|
|
byte_offset := unsigned(r1.addr0(2 downto 0));
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
k := (to_unsigned(i, 3) - byte_offset) xor r1.req.brev_mask;
|
|
|
|
j := to_integer(k) * 8;
|
|
|
|
store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then
|
|
|
|
if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then
|
|
|
|
v.req := r1.req;
|
|
|
|
v.addr0 := r1.addr0;
|
|
|
|
v.req.store_data := store_data;
|
|
|
|
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
|
|
|
|
not r1.req.incomplete;
|
|
|
|
v.wait_mmu := r1.req.valid and r1.req.mmu_op;
|
|
|
|
v.busy := r1.req.valid and r1.req.mmu_op;
|
|
|
|
v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op);
|
|
|
|
if r1.req.read_spr = '1' then
|
|
|
|
v.wr_sel := "00";
|
|
|
|
elsif r1.req.do_update = '1' or r1.req.store = '1' then
|
|
|
|
v.wr_sel := "01";
|
|
|
|
elsif r1.req.load_sp = '1' then
|
|
|
|
v.wr_sel := "10";
|
|
|
|
else
|
|
|
|
v.wr_sel := "11";
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Work out load formatter controls for next cycle
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
idx := to_unsigned(i, 3) xor r1.req.brev_mask;
|
|
|
|
kk := ('0' & idx) + ('0' & byte_offset);
|
|
|
|
v.use_second(i) := kk(3);
|
|
|
|
v.byte_index(i) := kk(2 downto 0);
|
|
|
|
end loop;
|
|
|
|
else
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.wait_dc := '0';
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
v.one_cycle := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if r2.wait_mmu = '1' and m_in.done = '1' then
|
|
|
|
if r2.req.mmu_op = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
end if;
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
end if;
|
|
|
|
if r2.busy = '1' and r2.wait_mmu = '0' then
|
|
|
|
v.busy := '0';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
interrupt := (r2.req.valid and r2.req.align_intr) or
|
|
|
|
(d_in.error and d_in.cache_paradox) or m_in.err;
|
|
|
|
if interrupt = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
v.wait_dc := '0';
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
elsif d_in.error = '1' then
|
|
|
|
v.wait_mmu := '1';
|
|
|
|
v.busy := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
r2in <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Processing done in the third cycle of a load/store instruction.
|
|
|
|
-- At this stage we can do things that have side effects without
|
|
|
|
-- fear of the instruction getting flushed. This is the point at
|
|
|
|
-- which requests get sent to the MMU.
|
|
|
|
loadstore1_3: process(all)
|
|
|
|
variable v : reg_stage3_t;
|
|
|
|
variable j : integer;
|
|
|
|
variable req : std_ulogic;
|
|
|
|
variable mmureq : std_ulogic;
|
|
|
|
variable mmu_mtspr : std_ulogic;
|
|
|
|
variable write_enable : std_ulogic;
|
|
|
|
variable write_data : std_ulogic_vector(63 downto 0);
|
|
|
|
variable do_update : std_ulogic;
|
|
|
|
variable done : std_ulogic;
|
|
|
|
variable exception : std_ulogic;
|
|
|
|
variable data_permuted : std_ulogic_vector(63 downto 0);
|
|
|
|
variable data_trimmed : std_ulogic_vector(63 downto 0);
|
|
|
|
variable sprval : std_ulogic_vector(63 downto 0);
|
|
|
|
var |