|
|
|
library ieee;
|
|
|
|
use ieee.std_logic_1164.all;
|
|
|
|
use ieee.numeric_std.all;
|
|
|
|
|
|
|
|
library work;
|
|
|
|
use work.decode_types.all;
|
|
|
|
use work.common.all;
|
|
|
|
use work.insn_helpers.all;
|
|
|
|
use work.helpers.all;
|
|
|
|
|
|
|
|
-- 2 cycle LSU
|
|
|
|
-- We calculate the address in the first cycle
|
|
|
|
|
|
|
|
entity loadstore1 is
|
|
|
|
generic (
|
|
|
|
HAS_FPU : boolean := true;
|
|
|
|
-- Non-zero to enable log data collection
|
|
|
|
LOG_LENGTH : natural := 0
|
|
|
|
);
|
|
|
|
port (
|
|
|
|
clk : in std_ulogic;
|
|
|
|
rst : in std_ulogic;
|
|
|
|
|
|
|
|
l_in : in Execute1ToLoadstore1Type;
|
|
|
|
e_out : out Loadstore1ToExecute1Type;
|
|
|
|
l_out : out Loadstore1ToWritebackType;
|
|
|
|
|
|
|
|
d_out : out Loadstore1ToDcacheType;
|
|
|
|
d_in : in DcacheToLoadstore1Type;
|
|
|
|
|
|
|
|
m_out : out Loadstore1ToMmuType;
|
|
|
|
m_in : in MmuToLoadstore1Type;
|
|
|
|
|
|
|
|
dc_stall : in std_ulogic;
|
|
|
|
|
|
|
|
events : out Loadstore1EventType;
|
|
|
|
|
|
|
|
-- Access to SPRs from core_debug module
|
|
|
|
dbg_spr_req : in std_ulogic;
|
|
|
|
dbg_spr_ack : out std_ulogic;
|
|
|
|
dbg_spr_addr : in std_ulogic_vector(1 downto 0);
|
|
|
|
dbg_spr_data : out std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
log_out : out std_ulogic_vector(9 downto 0)
|
|
|
|
);
|
|
|
|
end loadstore1;
|
|
|
|
|
|
|
|
architecture behave of loadstore1 is
|
|
|
|
|
|
|
|
-- State machine for unaligned loads/stores
|
|
|
|
type state_t is (IDLE, -- ready for instruction
|
|
|
|
MMU_WAIT -- waiting for MMU to finish doing something
|
|
|
|
);
|
|
|
|
|
|
|
|
type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
|
|
|
|
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
|
|
|
|
type trim_ctl_t is array(0 to 7) of byte_trim_t;
|
|
|
|
|
|
|
|
type request_t is record
|
|
|
|
valid : std_ulogic;
|
|
|
|
dc_req : std_ulogic;
|
|
|
|
load : std_ulogic;
|
|
|
|
store : std_ulogic;
|
|
|
|
flush : std_ulogic;
|
|
|
|
touch : std_ulogic;
|
|
|
|
sync : std_ulogic;
|
|
|
|
tlbie : std_ulogic;
|
|
|
|
dcbz : std_ulogic;
|
|
|
|
read_spr : std_ulogic;
|
|
|
|
write_spr : std_ulogic;
|
|
|
|
mmu_op : std_ulogic;
|
|
|
|
instr_fault : std_ulogic;
|
|
|
|
do_update : std_ulogic;
|
|
|
|
mode_32bit : std_ulogic;
|
Implement interrupts for prefixed instructions
This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field. This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0. (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)
This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.
This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2 years ago
|
|
|
prefixed : std_ulogic;
|
|
|
|
addr : std_ulogic_vector(63 downto 0);
|
|
|
|
byte_sel : std_ulogic_vector(7 downto 0);
|
|
|
|
second_bytes : std_ulogic_vector(7 downto 0);
|
|
|
|
store_data : std_ulogic_vector(63 downto 0);
|
|
|
|
instr_tag : instr_tag_t;
|
|
|
|
write_reg : gspr_index_t;
|
|
|
|
length : std_ulogic_vector(3 downto 0);
|
|
|
|
elt_length : std_ulogic_vector(3 downto 0);
|
|
|
|
byte_reverse : std_ulogic;
|
|
|
|
brev_mask : unsigned(2 downto 0);
|
|
|
|
sign_extend : std_ulogic;
|
|
|
|
update : std_ulogic;
|
|
|
|
xerc : xer_common_t;
|
|
|
|
reserve : std_ulogic;
|
|
|
|
atomic_qw : std_ulogic;
|
|
|
|
atomic_first : std_ulogic;
|
|
|
|
atomic_last : std_ulogic;
|
|
|
|
rc : std_ulogic;
|
|
|
|
nc : std_ulogic; -- non-cacheable access
|
|
|
|
virt_mode : std_ulogic;
|
|
|
|
priv_mode : std_ulogic;
|
|
|
|
load_sp : std_ulogic;
|
|
|
|
sprsel : std_ulogic_vector(1 downto 0);
|
|
|
|
ric : std_ulogic_vector(1 downto 0);
|
|
|
|
is_slbia : std_ulogic;
|
|
|
|
align_intr : std_ulogic;
|
|
|
|
dword_index : std_ulogic;
|
|
|
|
two_dwords : std_ulogic;
|
|
|
|
incomplete : std_ulogic;
|
|
|
|
end record;
|
|
|
|
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0',
|
|
|
|
flush => '0', touch => '0', sync => '0', tlbie => '0',
|
|
|
|
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
|
|
|
|
instr_fault => '0', do_update => '0',
|
Implement interrupts for prefixed instructions
This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field. This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0. (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)
This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.
This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2 years ago
|
|
|
mode_32bit => '0', prefixed => '0',
|
|
|
|
addr => (others => '0'),
|
|
|
|
byte_sel => x"00", second_bytes => x"00",
|
|
|
|
store_data => (others => '0'), instr_tag => instr_tag_init,
|
|
|
|
write_reg => 6x"00", length => x"0",
|
|
|
|
elt_length => x"0", byte_reverse => '0', brev_mask => "000",
|
|
|
|
sign_extend => '0', update => '0',
|
|
|
|
xerc => xerc_init, reserve => '0',
|
|
|
|
atomic_qw => '0', atomic_first => '0', atomic_last => '0',
|
|
|
|
rc => '0', nc => '0',
|
|
|
|
virt_mode => '0', priv_mode => '0', load_sp => '0',
|
|
|
|
sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0',
|
|
|
|
dword_index => '0', two_dwords => '0', incomplete => '0');
|
|
|
|
|
|
|
|
type reg_stage1_t is record
|
|
|
|
req : request_t;
|
|
|
|
busy : std_ulogic;
|
|
|
|
issued : std_ulogic;
|
|
|
|
addr0 : std_ulogic_vector(63 downto 0);
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type reg_stage2_t is record
|
|
|
|
req : request_t;
|
|
|
|
byte_index : byte_index_t;
|
|
|
|
use_second : std_ulogic_vector(7 downto 0);
|
|
|
|
busy : std_ulogic;
|
|
|
|
wait_dc : std_ulogic;
|
|
|
|
wait_mmu : std_ulogic;
|
|
|
|
one_cycle : std_ulogic;
|
|
|
|
wr_sel : std_ulogic_vector(1 downto 0);
|
|
|
|
addr0 : std_ulogic_vector(63 downto 0);
|
|
|
|
sprsel : std_ulogic_vector(1 downto 0);
|
|
|
|
dbg_spr : std_ulogic_vector(63 downto 0);
|
|
|
|
dbg_spr_ack: std_ulogic;
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type reg_stage3_t is record
|
|
|
|
state : state_t;
|
|
|
|
complete : std_ulogic;
|
|
|
|
instr_tag : instr_tag_t;
|
|
|
|
write_enable : std_ulogic;
|
|
|
|
write_reg : gspr_index_t;
|
|
|
|
write_data : std_ulogic_vector(63 downto 0);
|
|
|
|
rc : std_ulogic;
|
|
|
|
xerc : xer_common_t;
|
|
|
|
store_done : std_ulogic;
|
|
|
|
load_data : std_ulogic_vector(63 downto 0);
|
|
|
|
dar : std_ulogic_vector(63 downto 0);
|
|
|
|
dsisr : std_ulogic_vector(31 downto 0);
|
|
|
|
ld_sp_data : std_ulogic_vector(31 downto 0);
|
|
|
|
ld_sp_nz : std_ulogic;
|
|
|
|
ld_sp_lz : std_ulogic_vector(5 downto 0);
|
|
|
|
stage1_en : std_ulogic;
|
|
|
|
interrupt : std_ulogic;
|
|
|
|
intr_vec : integer range 0 to 16#fff#;
|
|
|
|
srr1 : std_ulogic_vector(15 downto 0);
|
|
|
|
events : Loadstore1EventType;
|
|
|
|
end record;
|
|
|
|
|
|
|
|
signal req_in : request_t;
|
|
|
|
signal r1, r1in : reg_stage1_t;
|
|
|
|
signal r2, r2in : reg_stage2_t;
|
|
|
|
signal r3, r3in : reg_stage3_t;
|
|
|
|
|
|
|
|
signal flush : std_ulogic;
|
|
|
|
signal busy : std_ulogic;
|
|
|
|
signal complete : std_ulogic;
|
|
|
|
signal flushing : std_ulogic;
|
|
|
|
|
|
|
|
signal store_sp_data : std_ulogic_vector(31 downto 0);
|
|
|
|
signal load_dp_data : std_ulogic_vector(63 downto 0);
|
|
|
|
signal store_data : std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
signal stage1_req : request_t;
|
|
|
|
signal stage1_dcreq : std_ulogic;
|
|
|
|
signal stage1_dreq : std_ulogic;
|
|
|
|
|
|
|
|
-- Generate byte enables from sizes
|
|
|
|
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
|
|
|
|
begin
|
|
|
|
case length is
|
|
|
|
when "0001" =>
|
|
|
|
return "00000001";
|
|
|
|
when "0010" =>
|
|
|
|
return "00000011";
|
|
|
|
when "0100" =>
|
|
|
|
return "00001111";
|
|
|
|
when "1000" =>
|
|
|
|
return "11111111";
|
|
|
|
when others =>
|
|
|
|
return "00000000";
|
|
|
|
end case;
|
|
|
|
end function length_to_sel;
|
|
|
|
|
|
|
|
-- Calculate byte enables
|
|
|
|
-- This returns 16 bits, giving the select signals for two transfers,
|
|
|
|
-- to account for unaligned loads or stores
|
|
|
|
function xfer_data_sel(size : in std_logic_vector(3 downto 0);
|
|
|
|
address : in std_logic_vector(2 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable longsel : std_ulogic_vector(15 downto 0);
|
|
|
|
begin
|
|
|
|
if is_X(address) then
|
|
|
|
longsel := (others => 'X');
|
|
|
|
return longsel;
|
|
|
|
else
|
|
|
|
longsel := "00000000" & length_to_sel(size);
|
|
|
|
return std_ulogic_vector(shift_left(unsigned(longsel),
|
|
|
|
to_integer(unsigned(address))));
|
|
|
|
end if;
|
|
|
|
end function xfer_data_sel;
|
|
|
|
|
|
|
|
-- 23-bit right shifter for DP -> SP float conversions
|
|
|
|
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable fs1 : std_ulogic_vector(22 downto 0);
|
|
|
|
variable fs2 : std_ulogic_vector(22 downto 0);
|
|
|
|
begin
|
|
|
|
case shift(1 downto 0) is
|
|
|
|
when "00" =>
|
|
|
|
fs1 := frac;
|
|
|
|
when "01" =>
|
|
|
|
fs1 := '0' & frac(22 downto 1);
|
|
|
|
when "10" =>
|
|
|
|
fs1 := "00" & frac(22 downto 2);
|
|
|
|
when others =>
|
|
|
|
fs1 := "000" & frac(22 downto 3);
|
|
|
|
end case;
|
|
|
|
case shift(4 downto 2) is
|
|
|
|
when "000" =>
|
|
|
|
fs2 := fs1;
|
|
|
|
when "001" =>
|
|
|
|
fs2 := x"0" & fs1(22 downto 4);
|
|
|
|
when "010" =>
|
|
|
|
fs2 := x"00" & fs1(22 downto 8);
|
|
|
|
when "011" =>
|
|
|
|
fs2 := x"000" & fs1(22 downto 12);
|
|
|
|
when "100" =>
|
|
|
|
fs2 := x"0000" & fs1(22 downto 16);
|
|
|
|
when others =>
|
|
|
|
fs2 := x"00000" & fs1(22 downto 20);
|
|
|
|
end case;
|
|
|
|
return fs2;
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- 23-bit left shifter for SP -> DP float conversions
|
|
|
|
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable fs1 : std_ulogic_vector(22 downto 0);
|
|
|
|
variable fs2 : std_ulogic_vector(22 downto 0);
|
|
|
|
begin
|
|
|
|
case shift(1 downto 0) is
|
|
|
|
when "00" =>
|
|
|
|
fs1 := frac;
|
|
|
|
when "01" =>
|
|
|
|
fs1 := frac(21 downto 0) & '0';
|
|
|
|
when "10" =>
|
|
|
|
fs1 := frac(20 downto 0) & "00";
|
|
|
|
when others =>
|
|
|
|
fs1 := frac(19 downto 0) & "000";
|
|
|
|
end case;
|
|
|
|
case shift(4 downto 2) is
|
|
|
|
when "000" =>
|
|
|
|
fs2 := fs1;
|
|
|
|
when "001" =>
|
|
|
|
fs2 := fs1(18 downto 0) & x"0" ;
|
|
|
|
when "010" =>
|
|
|
|
fs2 := fs1(14 downto 0) & x"00";
|
|
|
|
when "011" =>
|
|
|
|
fs2 := fs1(10 downto 0) & x"000";
|
|
|
|
when "100" =>
|
|
|
|
fs2 := fs1(6 downto 0) & x"0000";
|
|
|
|
when others =>
|
|
|
|
fs2 := fs1(2 downto 0) & x"00000";
|
|
|
|
end case;
|
|
|
|
return fs2;
|
|
|
|
end;
|
|
|
|
|
|
|
|
begin
|
|
|
|
loadstore1_reg: process(clk)
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if rst = '1' then
|
|
|
|
r1.busy <= '0';
|
|
|
|
r1.issued <= '0';
|
|
|
|
r1.req.valid <= '0';
|
|
|
|
r1.req.dc_req <= '0';
|
|
|
|
r1.req.incomplete <= '0';
|
|
|
|
r1.req.tlbie <= '0';
|
|
|
|
r1.req.is_slbia <= '0';
|
|
|
|
r1.req.instr_fault <= '0';
|
|
|
|
r1.req.load <= '0';
|
|
|
|
r1.req.priv_mode <= '0';
|
|
|
|
r1.req.sprsel <= "00";
|
|
|
|
r1.req.ric <= "00";
|
|
|
|
r1.req.xerc <= xerc_init;
|
|
|
|
|
|
|
|
r2.req.valid <= '0';
|
|
|
|
r2.busy <= '0';
|
|
|
|
r2.req.tlbie <= '0';
|
|
|
|
r2.req.is_slbia <= '0';
|
|
|
|
r2.req.instr_fault <= '0';
|
|
|
|
r2.req.load <= '0';
|
|
|
|
r2.req.priv_mode <= '0';
|
|
|
|
r2.req.sprsel <= "00";
|
|
|
|
r2.req.ric <= "00";
|
|
|
|
r2.req.xerc <= xerc_init;
|
|
|
|
|
|
|
|
r2.wait_dc <= '0';
|
|
|
|
r2.wait_mmu <= '0';
|
|
|
|
r2.one_cycle <= '0';
|
|
|
|
|
|
|
|
r3.dar <= (others => '0');
|
|
|
|
r3.dsisr <= (others => '0');
|
|
|
|
r3.state <= IDLE;
|
|
|
|
r3.write_enable <= '0';
|
|
|
|
r3.interrupt <= '0';
|
|
|
|
r3.complete <= '0';
|
|
|
|
r3.stage1_en <= '1';
|
|
|
|
r3.events.load_complete <= '0';
|
|
|
|
r3.events.store_complete <= '0';
|
|
|
|
flushing <= '0';
|
|
|
|
else
|
|
|
|
r1 <= r1in;
|
|
|
|
r2 <= r2in;
|
|
|
|
r3 <= r3in;
|
|
|
|
flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
|
|
|
|
not flush;
|
|
|
|
end if;
|
|
|
|
stage1_dreq <= stage1_dcreq;
|
|
|
|
if d_in.valid = '1' then
|
|
|
|
assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
|
|
|
|
end if;
|
|
|
|
if d_in.error = '1' then
|
|
|
|
assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
|
|
|
|
end if;
|
|
|
|
if m_in.done = '1' or m_in.err = '1' then
|
|
|
|
assert r2.req.valid = '1' and r3.state = MMU_WAIT severity failure;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
ls_fp_conv: if HAS_FPU generate
|
|
|
|
-- Convert DP data to SP for stfs
|
|
|
|
dp_to_sp: process(all)
|
|
|
|
variable exp : unsigned(10 downto 0);
|
|
|
|
variable frac : std_ulogic_vector(22 downto 0);
|
|
|
|
variable shift : unsigned(4 downto 0);
|
|
|
|
begin
|
|
|
|
store_sp_data(31) <= l_in.data(63);
|
|
|
|
store_sp_data(30 downto 0) <= (others => '0');
|
|
|
|
exp := unsigned(l_in.data(62 downto 52));
|
|
|
|
if exp > 896 then
|
|
|
|
store_sp_data(30) <= l_in.data(62);
|
|
|
|
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
|
|
|
|
elsif exp >= 874 then
|
|
|
|
-- denormalization required
|
|
|
|
frac := '1' & l_in.data(51 downto 30);
|
|
|
|
shift := 0 - exp(4 downto 0);
|
|
|
|
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Convert SP data to DP for lfs
|
|
|
|
sp_to_dp: process(all)
|
|
|
|
variable exp : unsigned(7 downto 0);
|
|
|
|
variable exp_dp : unsigned(10 downto 0);
|
|
|
|
variable exp_nz : std_ulogic;
|
|
|
|
variable exp_ao : std_ulogic;
|
|
|
|
variable frac : std_ulogic_vector(22 downto 0);
|
|
|
|
variable frac_shift : unsigned(4 downto 0);
|
|
|
|
begin
|
|
|
|
frac := r3.ld_sp_data(22 downto 0);
|
|
|
|
exp := unsigned(r3.ld_sp_data(30 downto 23));
|
|
|
|
exp_nz := or (r3.ld_sp_data(30 downto 23));
|
|
|
|
exp_ao := and (r3.ld_sp_data(30 downto 23));
|
|
|
|
frac_shift := (others => '0');
|
|
|
|
if exp_ao = '1' then
|
|
|
|
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
|
|
|
|
elsif exp_nz = '1' then
|
|
|
|
exp_dp := 896 + resize(exp, 11); -- finite normalized value
|
|
|
|
elsif r3.ld_sp_nz = '0' then
|
|
|
|
exp_dp := to_unsigned(0, 11); -- zero
|
|
|
|
else
|
|
|
|
-- denormalized SP operand, need to normalize
|
|
|
|
exp_dp := 896 - resize(unsigned(r3.ld_sp_lz), 11);
|
|
|
|
frac_shift := unsigned(r3.ld_sp_lz(4 downto 0)) + 1;
|
|
|
|
end if;
|
|
|
|
load_dp_data(63) <= r3.ld_sp_data(31);
|
|
|
|
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
|
|
|
|
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
|
|
|
|
load_dp_data(28 downto 0) <= (others => '0');
|
|
|
|
end process;
|
|
|
|
end generate;
|
|
|
|
|
|
|
|
-- Translate a load/store instruction into the internal request format
|
|
|
|
-- XXX this should only depend on l_in, but actually depends on
|
|
|
|
-- r1.addr0 as well (in the l_in.second = 1 case).
|
|
|
|
loadstore1_in: process(all)
|
|
|
|
variable v : request_t;
|
|
|
|
variable lsu_sum : std_ulogic_vector(63 downto 0);
|
|
|
|
variable brev_lenm1 : unsigned(2 downto 0);
|
|
|
|
variable long_sel : std_ulogic_vector(15 downto 0);
|
|
|
|
variable addr : std_ulogic_vector(63 downto 0);
|
|
|
|
variable sprn : std_ulogic_vector(9 downto 0);
|
|
|
|
variable misaligned : std_ulogic;
|
|
|
|
variable addr_mask : std_ulogic_vector(2 downto 0);
|
|
|
|
begin
|
|
|
|
v := request_init;
|
|
|
|
sprn := l_in.insn(15 downto 11) & l_in.insn(20 downto 16);
|
|
|
|
|
|
|
|
v.valid := l_in.valid;
|
|
|
|
v.instr_tag := l_in.instr_tag;
|
|
|
|
v.mode_32bit := l_in.mode_32bit;
|
Implement interrupts for prefixed instructions
This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field. This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0. (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)
This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.
This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2 years ago
|
|
|
v.prefixed := l_in.prefixed;
|
|
|
|
v.write_reg := l_in.write_reg;
|
|
|
|
v.length := l_in.length;
|
|
|
|
v.elt_length := l_in.length;
|
|
|
|
v.byte_reverse := l_in.byte_reverse;
|
|
|
|
v.sign_extend := l_in.sign_extend;
|
|
|
|
v.update := l_in.update;
|
|
|
|
v.xerc := l_in.xerc;
|
|
|
|
v.reserve := l_in.reserve;
|
|
|
|
v.rc := l_in.rc;
|
|
|
|
v.nc := l_in.ci;
|
|
|
|
v.virt_mode := l_in.virt_mode;
|
|
|
|
v.priv_mode := l_in.priv_mode;
|
|
|
|
v.ric := l_in.insn(19 downto 18);
|
|
|
|
if sprn(1) = '1' then
|
|
|
|
-- DSISR and DAR
|
|
|
|
v.sprsel := '1' & sprn(0);
|
|
|
|
else
|
|
|
|
-- PID and PTCR
|
|
|
|
v.sprsel := '0' & sprn(8);
|
|
|
|
end if;
|
|
|
|
|
|
|
|
lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
|
|
|
|
|
|
|
|
if HAS_FPU and l_in.is_32bit = '1' then
|
|
|
|
v.store_data := x"00000000" & store_sp_data;
|
|
|
|
else
|
|
|
|
v.store_data := l_in.data;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
addr := lsu_sum;
|
|
|
|
if l_in.second = '1' then
|
|
|
|
-- for an update-form load, use the previous address
|
|
|
|
-- as the value to write back to RA.
|
|
|
|
-- for a quadword load or store, use with the previous
|
|
|
|
-- address + 8.
|
|
|
|
addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + not l_in.update) &
|
|
|
|
r1.addr0(2 downto 0);
|
|
|
|
end if;
|
|
|
|
if l_in.mode_32bit = '1' then
|
|
|
|
addr(63 downto 32) := (others => '0');
|
|
|
|
end if;
|
|
|
|
v.addr := addr;
|
|
|
|
|
|
|
|
-- XXX Temporary hack. Mark the op as non-cachable if the address
|
|
|
|
-- is the form 0xc------- for a real-mode access.
|
|
|
|
if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
|
|
|
|
v.nc := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);
|
|
|
|
|
|
|
|
-- Do length_to_sel and work out if we are doing 2 dwords
|
|
|
|
long_sel := xfer_data_sel(l_in.length, addr(2 downto 0));
|
|
|
|
v.byte_sel := long_sel(7 downto 0);
|
|
|
|
v.second_bytes := long_sel(15 downto 8);
|
|
|
|
if long_sel(15 downto 8) /= "00000000" then
|
|
|
|
v.two_dwords := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- check alignment for larx/stcx
|
|
|
|
misaligned := or (addr_mask and addr(2 downto 0));
|
|
|
|
if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then
|
|
|
|
misaligned := '1';
|
|
|
|
end if;
|
|
|
|
v.align_intr := l_in.reserve and misaligned;
|
|
|
|
|
|
|
|
v.atomic_first := not misaligned and not l_in.second;
|
|
|
|
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
|
|
|
|
|
|
|
|
-- is this a quadword load or store? i.e. lq plq stq pstq lqarx stqcx.
|
|
|
|
if l_in.repeat = '1' and l_in.update = '0' then
|
|
|
|
if misaligned = '0' then
|
|
|
|
-- Since the access is aligned we have to do it atomically
|
|
|
|
v.atomic_qw := '1';
|
|
|
|
else
|
|
|
|
-- We require non-prefixed lq in LE mode to be aligned in order
|
|
|
|
-- to avoid the case where RA = RT+1 and the second access faults
|
|
|
|
-- after the first has overwritten RA.
|
|
|
|
if l_in.op = OP_LOAD and l_in.byte_reverse = '0' and l_in.prefixed = '0' then
|
|
|
|
v.align_intr := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
case l_in.op is
|
|
|
|
when OP_SYNC =>
|
|
|
|
v.sync := '1';
|
|
|
|
when OP_STORE =>
|
|
|
|
v.store := '1';
|
|
|
|
if l_in.length = "0000" then
|
|
|
|
v.touch := '1';
|
|
|
|
end if;
|
|
|
|
when OP_LOAD =>
|
|
|
|
if l_in.update = '0' or l_in.second = '0' then
|
|
|
|
v.load := '1';
|
|
|
|
if HAS_FPU and l_in.is_32bit = '1' then
|
|
|
|
-- Allow an extra cycle for SP->DP precision conversion
|
|
|
|
v.load_sp := '1';
|
|
|
|
end if;
|
|
|
|
if l_in.length = "0000" then
|
|
|
|
v.touch := '1';
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
-- write back address to RA
|
|
|
|
v.do_update := '1';
|
|
|
|
end if;
|
|
|
|
when OP_DCBF =>
|
|
|
|
v.load := '1';
|
|
|
|
v.flush := '1';
|
|
|
|
when OP_DCBZ =>
|
|
|
|
v.dcbz := '1';
|
|
|
|
v.align_intr := v.nc;
|
|
|
|
when OP_TLBIE =>
|
|
|
|
v.tlbie := '1';
|
|
|
|
v.addr := l_in.addr2; -- address from RB for tlbie
|
|
|
|
v.is_slbia := l_in.insn(7);
|
|
|
|
v.mmu_op := '1';
|
|
|
|
when OP_MFSPR =>
|
|
|
|
v.read_spr := '1';
|
|
|
|
when OP_MTSPR =>
|
|
|
|
v.write_spr := '1';
|
|
|
|
v.mmu_op := not sprn(1);
|
|
|
|
when OP_FETCH_FAILED =>
|
|
|
|
-- send it to the MMU to do the radix walk
|
|
|
|
v.instr_fault := '1';
|
|
|
|
v.mmu_op := '1';
|
|
|
|
when others =>
|
|
|
|
end case;
|
|
|
|
v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr;
|
|
|
|
v.incomplete := v.dc_req and v.two_dwords;
|
|
|
|
|
|
|
|
-- Work out controls for load and store formatting
|
|
|
|
brev_lenm1 := "000";
|
|
|
|
if v.byte_reverse = '1' then
|
|
|
|
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
|
|
|
end if;
|
|
|
|
v.brev_mask := brev_lenm1;
|
|
|
|
|
|
|
|
req_in <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
busy <= dc_stall or d_in.error or r1.busy or r2.busy;
|
|
|
|
complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or r3.complete;
|
|
|
|
|
|
|
|
-- Processing done in the first cycle of a load/store instruction
|
|
|
|
loadstore1_1: process(all)
|
|
|
|
variable v : reg_stage1_t;
|
|
|
|
variable req : request_t;
|
|
|
|
variable dcreq : std_ulogic;
|
|
|
|
variable issue : std_ulogic;
|
|
|
|
begin
|
|
|
|
v := r1;
|
|
|
|
issue := '0';
|
|
|
|
dcreq := '0';
|
|
|
|
|
|
|
|
if r1.busy = '0' then
|
|
|
|
req := req_in;
|
|
|
|
req.valid := l_in.valid;
|
|
|
|
if flushing = '1' then
|
|
|
|
-- Make this a no-op request rather than simply invalid.
|
|
|
|
-- It will never get to stage 3 since there is a request ahead of
|
|
|
|
-- it with align_intr = 1.
|
|
|
|
req.dc_req := '0';
|
|
|
|
end if;
|
|
|
|
issue := l_in.valid and req.dc_req;
|
|
|
|
if l_in.valid = '1' then
|
|
|
|
v.addr0 := req.addr;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
req := r1.req;
|
|
|
|
if r1.req.dc_req = '1' and r1.issued = '0' then
|
|
|
|
issue := '1';
|
|
|
|
elsif r1.req.incomplete = '1' then
|
|
|
|
-- construct the second request for a misaligned access
|
|
|
|
req.dword_index := '1';
|
|
|
|
req.incomplete := '0';
|
|
|
|
req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
|
|
|
|
if r1.req.mode_32bit = '1' then
|
|
|
|
req.addr(32) := '0';
|
|
|
|
end if;
|
|
|
|
req.byte_sel := r1.req.second_bytes;
|
|
|
|
issue := '1';
|
|
|
|
else
|
|
|
|
-- For the lfs conversion cycle, leave the request valid
|
|
|
|
-- for another cycle but with req.dc_req = 0.
|
|
|
|
-- For an MMU request last cycle, we have nothing
|
|
|
|
-- to do in this cycle, so make it invalid.
|
|
|
|
if r1.req.load_sp = '0' then
|
|
|
|
req.valid := '0';
|
|
|
|
end if;
|
|
|
|
req.dc_req := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if flush = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.req.dc_req := '0';
|
|
|
|
v.req.incomplete := '0';
|
|
|
|
v.issued := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
elsif (dc_stall or d_in.error or r2.busy) = '0' then
|
|
|
|
-- we can change what's in r1 next cycle because the current thing
|
|
|
|
-- in r1 will go into r2
|
|
|
|
v.req := req;
|
|
|
|
dcreq := issue;
|
|
|
|
v.issued := issue;
|
|
|
|
v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op);
|
|
|
|
else
|
|
|
|
-- pipeline is stalled
|
|
|
|
if r1.issued = '1' and d_in.error = '1' then
|
|
|
|
v.issued := '0';
|
|
|
|
v.busy := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
stage1_req <= req;
|
|
|
|
stage1_dcreq <= dcreq;
|
|
|
|
r1in <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Processing done in the second cycle of a load/store instruction.
|
|
|
|
-- Store data is formatted here and sent to the dcache.
|
|
|
|
-- The request in r1 is sent to stage 3 if stage 3 will not be busy next cycle.
|
|
|
|
loadstore1_2: process(all)
|
|
|
|
variable v : reg_stage2_t;
|
|
|
|
variable j : integer;
|
|
|
|
variable k : unsigned(2 downto 0);
|
|
|
|
variable kk : unsigned(3 downto 0);
|
|
|
|
variable idx : unsigned(2 downto 0);
|
|
|
|
variable byte_offset : unsigned(2 downto 0);
|
|
|
|
variable interrupt : std_ulogic;
|
|
|
|
variable dbg_spr_rd : std_ulogic;
|
|
|
|
variable sprsel : std_ulogic_vector(1 downto 0);
|
|
|
|
variable sprval : std_ulogic_vector(63 downto 0);
|
|
|
|
begin
|
|
|
|
v := r2;
|
|
|
|
|
|
|
|
-- Byte reversing and rotating for stores.
|
|
|
|
-- Done in the second cycle (the cycle after l_in.valid = 1).
|
|
|
|
byte_offset := unsigned(r1.addr0(2 downto 0));
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
k := (to_unsigned(i, 3) - byte_offset) xor r1.req.brev_mask;
|
|
|
|
if is_X(k) then
|
|
|
|
store_data(i * 8 + 7 downto i * 8) <= (others => 'X');
|
|
|
|
else
|
|
|
|
j := to_integer(k) * 8;
|
|
|
|
store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr);
|
|
|
|
if dbg_spr_rd = '0' then
|
|
|
|
sprsel := r1.req.sprsel;
|
|
|
|
else
|
|
|
|
sprsel := dbg_spr_addr;
|
|
|
|
end if;
|
|
|
|
if sprsel(1) = '1' then
|
|
|
|
if sprsel(0) = '0' then
|
|
|
|
sprval := x"00000000" & r3.dsisr;
|
|
|
|
else
|
|
|
|
sprval := r3.dar;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
sprval := m_in.sprval;
|
|
|
|
end if;
|
|
|
|
if dbg_spr_req = '0' then
|
|
|
|
v.dbg_spr_ack := '0';
|
|
|
|
elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then
|
|
|
|
v.dbg_spr := sprval;
|
|
|
|
v.dbg_spr_ack := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then
|
|
|
|
if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then
|
|
|
|
v.req := r1.req;
|
|
|
|
v.addr0 := r1.addr0;
|
|
|
|
v.req.store_data := store_data;
|
|
|
|
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
|
|
|
|
not r1.req.incomplete;
|
|
|
|
v.wait_mmu := r1.req.valid and r1.req.mmu_op;
|
|
|
|
if r1.req.valid = '1' and r1.req.align_intr = '1' then
|
|
|
|
v.busy := '1';
|
|
|
|
v.one_cycle := '0';
|
|
|
|
else
|
|
|
|
v.busy := r1.req.valid and r1.req.mmu_op;
|
|
|
|
v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op);
|
|
|
|
end if;
|
|
|
|
if r1.req.do_update = '1' or r1.req.store = '1' or r1.req.read_spr = '1' then
|
|
|
|
v.wr_sel := "00";
|
|
|
|
elsif r1.req.load_sp = '1' then
|
|
|
|
v.wr_sel := "01";
|
|
|
|
else
|
|
|
|
v.wr_sel := "10";
|
|
|
|
end if;
|
|
|
|
if r1.req.read_spr = '1' then
|
|
|
|
v.addr0 := sprval;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Work out load formatter controls for next cycle
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
idx := to_unsigned(i, 3) xor r1.req.brev_mask;
|
|
|
|
kk := ('0' & idx) + ('0' & byte_offset);
|
|
|
|
v.use_second(i) := kk(3);
|
|
|
|
v.byte_index(i) := kk(2 downto 0);
|
|
|
|
end loop;
|
|
|
|
else
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.wait_dc := '0';
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
v.one_cycle := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if r2.wait_mmu = '1' and m_in.done = '1' then
|
|
|
|
if r2.req.mmu_op = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
end if;
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
end if;
|
|
|
|
if r2.busy = '1' and r2.wait_mmu = '0' then
|
|
|
|
v.busy := '0';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
interrupt := (r2.req.valid and r2.req.align_intr) or
|
|
|
|
(d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or
|
|
|
|
m_in.err;
|
|
|
|
if interrupt = '1' then
|
|
|
|
v.req.valid := '0';
|
|
|
|
v.busy := '0';
|
|
|
|
v.wait_dc := '0';
|
|
|
|
v.wait_mmu := '0';
|
|
|
|
elsif d_in.error = '1' then
|
|
|
|
v.wait_mmu := '1';
|
|
|
|
v.busy := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
r2in <= v;
|
|
|
|
|
|
|
|
-- SPR values for core_debug
|
|
|
|
dbg_spr_data <= r2.dbg_spr;
|
|
|
|
dbg_spr_ack <= r2.dbg_spr_ack;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Processing done in the third cycle of a load/store instruction.
|
|
|
|
-- At this stage we can do things that have side effects without
|
|
|
|
-- fear of the instruction getting flushed. This is the point at
|
|
|
|
-- which requests get sent to the MMU.
|
|
|
|
loadstore1_3: process(all)
|
|
|
|
variable v : reg_stage3_t;
|
|
|
|
variable j : integer;
|
|
|
|
variable req : std_ulogic;
|
|
|
|
variable mmureq : std_ulogic;
|
|
|
|
variable mmu_mtspr : std_ulogic;
|
|
|
|
variable write_enable : std_ulogic;
|
|
|
|
variable write_data : std_ulogic_vector(63 downto 0);
|
|
|
|
variable do_update : std_ulogic;
|
|
|
|
variable done : std_ulogic;
|
|
|
|
variable exception : std_ulogic;
|
|
|
|
variable data_permuted : std_ulogic_vector(63 downto 0);
|
|
|
|
variable data_trimmed : std_ulogic_vector(63 downto 0);
|
|
|
|
variable sprval : std_ulogic_vector(63 downto 0);
|
|
|
|
variable negative : std_ulogic;
|
|
|
|
variable dsisr : std_ulogic_vector(31 downto 0);
|
|
|
|
variable itlb_fault : std_ulogic;
|
|
|
|
variable trim_ctl : trim_ctl_t;
|
|
|
|
begin
|
|
|
|
v := r3;
|
|
|
|
|
|
|
|
req := '0';
|
|
|
|
mmureq := '0';
|
MMU: Implement radix page table machinery
This adds the necessary machinery to the MMU for it to do radix page
table walks. The core elements are a shifter that can shift the
address right by between 0 and 47 bits, a mask generator that can
generate a mask of between 5 and 16 bits, a final mask generator,
and new states in the state machine.
(The final mask generator is used for transferring bits of the
original address into the resulting TLB entry when the leaf PTE
corresponds to a page size larger than 4kB.)
The hardware does not implement a partition table or a process table.
Software is expected to load the appropriate process table entry
into a new SPR called PGTBL0, SPR 720. The contents should be
formatted as described in Book III section 5.7.6.2 of the Power ISA
v3.0B. PGTBL0 is set to 0 on hard reset. At present, the top two bits
of the address (the quadrant) are ignored.
There is currently no caching of any step in the translation process
or of the final result, other than the entry created in the dTLB.
That entry is a 4k page entry even if the leaf PTE found in the walk
corresponds to a larger page size.
This implementation can handle almost any page table layout and any
page size. The RTS field (in PGTBL0) can have any value between 0
and 31, corresponding to a total address space size between 2^31
and 2^62 bytes. The RPDS field of PGTBL0 can be any value between
5 and 16, except that a value of 0 is taken to disable radix page
table walking (for use when one is using software loading of TLB
entries). The NLS field of the page directory entries can have any
value between 5 and 16. The minimum page size is 4kB, meaning that
the sum of RPDS and the NLS values of the PDEs found on the path to
a leaf PTE must be less than or equal to RTS + 31 - 12.
The PGTBL0 SPR is in the mmu module; thus this adds a path for
loadstore1 to read and write SPRs in mmu. This adds code in dcache
to service doubleword read requests from the MMU, as well as requests
to write dTLB entries.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
mmu_mtspr := '0';
|
|
|
|
done := '0';
|
|
|
|
exception := '0';
|
|
|
|
dsisr := (others => '0');
|
|
|
|
write_enable := '0';
|
|
|
|
sprval := (others => '0');
|
|
|
|
do_update := '0';
|
|
|
|
v.complete := '0';
|
|
|
|
v.srr1 := (others => '0');
|
|
|
|
v.events := (others => '0');
|
|
|
|
|
|
|
|
-- load data formatting
|
|
|
|
-- shift and byte-reverse data bytes
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
if is_X(r2.byte_index(i)) then
|
|
|
|
data_permuted(i * 8 + 7 downto i * 8) := (others => 'X');
|
|
|
|
else
|
|
|
|
j := to_integer(r2.byte_index(i)) * 8;
|
|
|
|
data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
-- Work out the sign bit for sign extension.
|
|
|
|
-- For unaligned loads crossing two dwords, the sign bit is in the
|
|
|
|
-- first dword for big-endian (byte_reverse = 1), or the second dword
|
|
|
|
-- for little-endian.
|
|
|
|
if r2.req.dword_index = '1' and r2.req.byte_reverse = '1' then
|
|
|
|
negative := (r2.req.length(3) and r3.load_data(63)) or
|
|
|
|
(r2.req.length(2) and r3.load_data(31)) or
|
|
|
|
(r2.req.length(1) and r3.load_data(15)) or
|
|
|
|
(r2.req.length(0) and r3.load_data(7));
|
|
|
|
else
|
|
|
|
negative := (r2.req.length(3) and data_permuted(63)) or
|
|
|
|
(r2.req.length(2) and data_permuted(31)) or
|
|
|
|
(r2.req.length(1) and data_permuted(15)) or
|
|
|
|
(r2.req.length(0) and data_permuted(7));
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- trim and sign-extend
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
if is_X(r2.req.length) then
|
|
|
|
trim_ctl(i) := "XX";
|
|
|
|
elsif i < to_integer(unsigned(r2.req.length)) then
|
|
|
|
if r2.req.dword_index = '1' then
|
|
|
|
trim_ctl(i) := '1' & not r2.use_second(i);
|
|
|
|
else
|
|
|
|
trim_ctl(i) := "10";
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
trim_ctl(i) := "00";
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
case trim_ctl(i) is
|
|
|
|
when "11" =>
|
|
|
|
data_trimmed(i * 8 + 7 downto i * 8) := r3.load_data(i * 8 + 7 downto i * 8);
|
|
|
|
when "10" =>
|
|
|
|
data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
|
|
|
|
when others =>
|
|
|
|
data_trimmed(i * 8 + 7 downto i * 8) := (others => negative and r2.req.sign_extend);
|
|
|
|
end case;
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
if HAS_FPU then
|
|
|
|
-- Single-precision FP conversion for loads
|
|
|
|
v.ld_sp_data := data_trimmed(31 downto 0);
|
|
|
|
v.ld_sp_nz := or (data_trimmed(22 downto 0));
|
|
|
|
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if d_in.valid = '1' and r2.req.load = '1' then
|
|
|
|
v.load_data := data_permuted;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
if r2.req.valid = '1' then
|
|
|
|
if r2.req.read_spr = '1' then
|
|
|
|
write_enable := '1';
|
|
|
|
end if;
|
|
|
|
if r2.req.align_intr = '1' then
|
|
|
|
-- generate alignment interrupt
|
|
|
|
exception := '1';
|
|
|
|
end if;
|
|
|
|
if r2.req.do_update = '1' then
|
|
|
|
do_update := '1';
|
|
|
|
end if;
|
|
|
|
if r2.req.load_sp = '1' and r2.req.dc_req = '0' then
|
|
|
|
write_enable := '1';
|
|
|
|
end if;
|
|
|
|
if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then
|
|
|
|
if r2.req.sprsel(0) = '0' then
|
|
|
|
v.dsisr := r2.req.store_data(31 downto 0);
|
|
|
|
else
|
|
|
|
v.dar := r2.req.store_data;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if r3.state = IDLE and r2.req.valid = '1' and r2.req.mmu_op = '1' then
|
|
|
|
-- send request (tlbie, mtspr, itlb miss) to MMU
|
|
|
|
mmureq := not r2.req.write_spr;
|
|
|
|
mmu_mtspr := r2.req.write_spr;
|
|
|
|
if r2.req.instr_fault = '1' then
|
|
|
|
v.events.itlb_miss := '1';
|
|
|
|
end if;
|
|
|
|
v.state := MMU_WAIT;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if d_in.valid = '1' then
|
|
|
|
if r2.req.incomplete = '0' then
|
|
|
|
write_enable := r2.req.load and not r2.req.load_sp and
|
|
|
|
not r2.req.flush and not r2.req.touch;
|
|
|
|
-- stores write back rA update
|
|
|
|
do_update := r2.req.update and r2.req.store;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if d_in.error = '1' then
|
|
|
|
if d_in.cache_paradox = '1' then
|
|
|
|
-- signal an interrupt straight away
|
|
|
|
exception := '1';
|
|
|
|
dsisr(63 - 38) := not r2.req.load;
|
|
|
|
dsisr(63 - 37) := d_in.reserve_nc;
|
|
|
|
-- XXX there is no architected bit for this
|
|
|
|
-- (probably should be a machine check in fact)
|
|
|
|
dsisr(63 - 35) := d_in.cache_paradox;
|
|
|
|
else
|
|
|
|
-- Look up the translation for TLB miss
|
|
|
|
-- and also for permission error and RC error
|
|
|
|
-- in case the PTE has been updated.
|
|
|
|
mmureq := '1';
|
|
|
|
v.state := MMU_WAIT;
|
|
|
|
v.stage1_en := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if m_in.done = '1' then
|
|
|
|
if r2.req.dc_req = '1' then
|
|
|
|
-- retry the request now that the MMU has installed a TLB entry
|
|
|
|
req := '1';
|
|
|
|
else
|
|
|
|
v.complete := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if m_in.err = '1' then
|
|
|
|
exception := '1';
|
|
|
|
dsisr(63 - 33) := m_in.invalid;
|
|
|
|
dsisr(63 - 36) := m_in.perm_error;
|
|
|
|
dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
|
|
|
|
dsisr(63 - 44) := m_in.badtree;
|
|
|
|
dsisr(63 - 45) := m_in.rc_error;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if (m_in.done or m_in.err) = '1' then
|
|
|
|
v.stage1_en := '1';
|
|
|
|
v.state := IDLE;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
v.events.load_complete := r2.req.load and complete;
|
|
|
|
v.events.store_complete := (r2.req.store or r2.req.dcbz) and complete;
|
|
|
|
|
|
|
|
-- generate DSI or DSegI for load/store exceptions
|
|
|
|
-- or ISI or ISegI for instruction fetch exceptions
|
|
|
|
v.interrupt := exception;
|
|
|
|
if exception = '1' then
|
|
|
|
if r2.req.align_intr = '1' then
|
|
|
|
v.intr_vec := 16#600#;
|
Implement interrupts for prefixed instructions
This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field. This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0. (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)
This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.
This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2 years ago
|
|
|
v.srr1(47 - 34) := r2.req.prefixed;
|
|
|
|
v.dar := r2.req.addr;
|
|
|
|
elsif r2.req.instr_fault = '0' then
|
Implement interrupts for prefixed instructions
This arranges to generate an illegal instruction type program
interrupt for illegal prefixed instructions, that is, those where the
suffix is not a legal value given the prefix, or the prefix has a
reserved value in the subtype field. This implementation doesn't
generate an interrupt for the invalid 8LS:D and MLS:D instruction
forms where R = 1 and RA != 0. (In those cases it uses (RA) as the
addend, i.e. it ignores the R bit.)
This detects the case where the address of an instruction prefix is
equal mod 64 to 60, and generates an alignment interrupt in that case.
This also arranges to set bit 34 of SRR1 when an interrupt occurs due
to a prefixed instruction, for those interrupts where that is required
(i.e. trace, alignment, floating-point unavailable, data storage, data
segment, and most cases of program interrupt).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2 years ago
|
|
|
v.srr1(47 - 34) := r2.req.prefixed;
|
|
|
|
v.dar := r2.req.addr;
|
|
|
|
if m_in.segerr = '0' then
|
|
|
|
v.intr_vec := 16#300#;
|
|
|
|
v.dsisr := dsisr;
|
|
|
|
else
|
|
|
|
v.intr_vec := 16#380#;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if m_in.segerr = '0' then
|
|
|
|
v.srr1(47 - 33) := m_in.invalid;
|
|
|
|
v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
|
|
|
|
v.srr1(47 - 44) := m_in.badtree;
|
|
|
|
v.srr1(47 - 45) := m_in.rc_error;
|
|
|
|
v.intr_vec := 16#400#;
|
|
|
|
else
|
|
|
|
v.intr_vec := 16#480#;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
case r2.wr_sel is
|
|
|
|
when "00" =>
|
|
|
|
-- update reg
|
|
|
|
write_data := r2.addr0;
|
|
|
|
when "01" =>
|
|
|
|
-- lfs result
|
|
|
|
write_data := load_dp_data;
|
|
|
|
when others =>
|
|
|
|
-- load data
|
|
|
|
write_data := data_trimmed;
|
|
|
|
end case;
|
|
|
|
|
|
|
|
-- Update outputs to dcache
|
|
|
|
if r3.stage1_en = '1' then
|
|
|
|
d_out.valid <= stage1_dcreq;
|
|
|
|
d_out.load <= stage1_req.load;
|
|
|
|
d_out.dcbz <= stage1_req.dcbz;
|
|
|
|
d_out.flush <= stage1_req.flush;
|
|
|
|
d_out.touch <= stage1_req.touch;
|
|
|
|
d_out.sync <= stage1_req.sync;
|
|
|
|
d_out.nc <= stage1_req.nc;
|
|
|
|
d_out.reserve <= stage1_req.reserve;
|
|
|
|
d_out.atomic_qw <= stage1_req.atomic_qw;
|
|
|
|
d_out.atomic_first <= stage1_req.atomic_first;
|
|
|
|
d_out.atomic_last <= stage1_req.atomic_last;
|
|
|
|
d_out.addr <= stage1_req.addr;
|
|
|
|
d_out.byte_sel <= stage1_req.byte_sel;
|
|
|
|
d_out.virt_mode <= stage1_req.virt_mode;
|
|
|
|
d_out.priv_mode <= stage1_req.priv_mode;
|
|
|
|
else
|
|
|
|
d_out.valid <= req;
|
|
|
|
d_out.load <= r2.req.load;
|
|
|
|
d_out.dcbz <= r2.req.dcbz;
|
|
|
|
d_out.flush <= r2.req.flush;
|
|
|
|
d_out.touch <= r2.req.touch;
|
|
|
|
d_out.sync <= r2.req.sync;
|
|
|
|
d_out.nc <= r2.req.nc;
|
|
|
|
d_out.reserve <= r2.req.reserve;
|
|
|
|
d_out.atomic_qw <= r2.req.atomic_qw;
|
|
|
|
d_out.atomic_first <= r2.req.atomic_first;
|
|
|
|
d_out.atomic_last <= r2.req.atomic_last;
|
|
|
|
d_out.addr <= r2.req.addr;
|
|
|
|
d_out.byte_sel <= r2.req.byte_sel;
|
|
|
|
d_out.virt_mode <= r2.req.virt_mode;
|
|
|
|
d_out.priv_mode <= r2.req.priv_mode;
|
|
|
|
end if;
|
|
|
|
if stage1_dreq = '1' then
|
|
|
|
d_out.data <= store_data;
|
|
|
|
else
|
|
|
|
d_out.data <= r2.req.store_data;
|
|
|
|
end if;
|
|
|
|
d_out.hold <= l_in.e2stall;
|
|
|
|
|
|
|
|
-- Update outputs to MMU
|
|
|
|
m_out.valid <= mmureq;
|
|
|
|
m_out.iside <= r2.req.instr_fault;
|
|
|
|
m_out.load <= r2.req.load;
|
|
|
|
m_out.priv <= r2.req.priv_mode;
|
|
|
|
m_out.tlbie <= r2.req.tlbie;
|
|
|
|
m_out.ric <= r2.req.ric;
|
MMU: Implement radix page table machinery
This adds the necessary machinery to the MMU for it to do radix page
table walks. The core elements are a shifter that can shift the
address right by between 0 and 47 bits, a mask generator that can
generate a mask of between 5 and 16 bits, a final mask generator,
and new states in the state machine.
(The final mask generator is used for transferring bits of the
original address into the resulting TLB entry when the leaf PTE
corresponds to a page size larger than 4kB.)
The hardware does not implement a partition table or a process table.
Software is expected to load the appropriate process table entry
into a new SPR called PGTBL0, SPR 720. The contents should be
formatted as described in Book III section 5.7.6.2 of the Power ISA
v3.0B. PGTBL0 is set to 0 on hard reset. At present, the top two bits
of the address (the quadrant) are ignored.
There is currently no caching of any step in the translation process
or of the final result, other than the entry created in the dTLB.
That entry is a 4k page entry even if the leaf PTE found in the walk
corresponds to a larger page size.
This implementation can handle almost any page table layout and any
page size. The RTS field (in PGTBL0) can have any value between 0
and 31, corresponding to a total address space size between 2^31
and 2^62 bytes. The RPDS field of PGTBL0 can be any value between
5 and 16, except that a value of 0 is taken to disable radix page
table walking (for use when one is using software loading of TLB
entries). The NLS field of the page directory entries can have any
value between 5 and 16. The minimum page size is 4kB, meaning that
the sum of RPDS and the NLS values of the PDEs found on the path to
a leaf PTE must be less than or equal to RTS + 31 - 12.
The PGTBL0 SPR is in the mmu module; thus this adds a path for
loadstore1 to read and write SPRs in mmu. This adds code in dcache
to service doubleword read requests from the MMU, as well as requests
to write dTLB entries.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
m_out.mtspr <= mmu_mtspr;
|
|
|
|
m_out.sprnf <= r1.req.sprsel(0);
|
|
|
|
m_out.sprnt <= r2.req.sprsel(0);
|
|
|
|
m_out.addr <= r2.req.addr;
|
|
|
|
m_out.slbia <= r2.req.is_slbia;
|
|
|
|
m_out.rs <= r2.req.store_data;
|
|
|
|
|
|
|
|
-- Update outputs to writeback
|
|
|
|
l_out.valid <= complete;
|
|
|
|
l_out.instr_tag <= r2.req.instr_tag;
|
|
|
|
l_out.write_enable <= write_enable or do_update;
|
|
|
|
l_out.write_reg <= r2.req.write_reg;
|
|
|
|
l_out.write_data <= write_data;
|
|
|
|
l_out.xerc <= r2.req.xerc;
|
|
|
|
l_out.rc <= r2.req.rc and complete;
|
|
|
|
l_out.store_done <= d_in.store_done;
|
|
|
|
l_out.interrupt <= r3.interrupt;
|
|
|
|
l_out.intr_vec <= r3.intr_vec;
|
|
|
|
l_out.srr1 <= r3.srr1;
|
|
|
|
|
|
|
|
-- update busy signal back to execute1
|
|
|
|
e_out.busy <= busy;
|
|
|
|
e_out.l2stall <= dc_stall or d_in.error or r2.busy;
|
|
|
|
|
|
|
|
events <= r3.events;
|
|
|
|
|
|
|
|
flush <= exception;
|
|
|
|
|
|
|
|
-- Update registers
|
|
|
|
r3in <= v;
|
|
|
|
|
|
|
|
end process;
|
|
|
|
|
|
|
|
l1_log: if LOG_LENGTH > 0 generate
|
|
|
|
signal log_data : std_ulogic_vector(9 downto 0);
|
|
|
|
begin
|
|
|
|
ls1_log: process(clk)
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
log_data <= e_out.busy &
|
|
|
|
l_out.interrupt &
|
|
|
|
l_out.valid &
|
|
|
|
m_out.valid &
|
|
|
|
d_out.valid &
|
|
|
|
m_in.done &
|
|
|
|
r2.req.dword_index &
|
|
|
|
r2.req.valid &
|
|
|
|
r2.wait_dc &
|
|
|
|
std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 1));
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
log_out <= log_data;
|
|
|
|
end generate;
|
|
|
|
|
|
|
|
end;
|