@ -30,7 +30,6 @@ architecture behave of mmu is
DO_TLBIE,
PART_TBL_READ,
PART_TBL_WAIT,
PART_TBL_DONE,
PROC_TBL_READ,
PROC_TBL_WAIT,
SEGMENT_CHECK,
@ -71,9 +70,13 @@ architecture behave of mmu is
segerror : std_ulogic;
perm_err : std_ulogic;
rc_error : std_ulogic;
wr_tlbram : std_ulogic;
tlbie_req : std_ulogic;
is_mtspr : std_ulogic;
rereadpte : std_ulogic;
-- communication with TLB and PWC
wr_tlbram : std_ulogic;
wr_pwcram : std_ulogic;
pwc_level : std_ulogic_vector(1 downto 0);
end record;
signal r, rin : reg_stage_t;
@ -151,6 +154,112 @@ architecture behave of mmu is
signal tlb_plru_upd : std_ulogic_vector(2 downto 0);
signal tlb_plru_victim : std_ulogic_vector(1 downto 0);
-- Page walk cache, 256 entries, 4-way set associative
-- (also stores large page PTEs).
-- This is implemented using a 512 x 64 bit RAM, divided
-- into 64 blocks of 8 words, each block containing a set of
-- 4 entries. It caches PDEs and PTEs at the 2MB, 1GB
-- and 512GB levels for a 52-bit address space, giving
-- 31-6, 22-6, and 13-6 bits of address tag respectively
-- (the -6 is because of the 6-bit index).
-- In each block, word 0 contains a 2-bit size/valid field,
-- 12-bit PID, and 1 bit indicating leaf (PTE) vs. PDE.
-- (This allows us to do invalidate-all or invalidate-by-PID
-- in 64 cycles instead of 256.)
-- For 2MB entries, word 1 contains two 32-bit fields containing
-- address tags (25 bits) for entries 0 and 1, and word 2 has
-- the tags for entries 2 and 3.
-- For 1GB entries, word 3 contains four 16-bit fields containing
-- address tags (16 bits) for entries 0 - 3. For 512GB entries,
-- word 3 is used similarly but there are only 7 bits per tag.
-- Words 4 to 7 contain the PTE/PDE value for entries 0 to 3.
-- Words 1 - 3 are arranged in this way so that any entry can be
-- written in 3 cycles without disturbing other entries.
-- EAs are expected to be in a 4PB (52-bit) space per PID
-- (ignoring the quadrant bits); anything outside that
-- doesn't get cached.
constant PWC_WIDTH : natural := 64;
constant PWC_DEPTH : natural := 256;
constant PWC_HASH_BITS : natural := 6;
constant PWC_ADDR_BITS : natural := PWC_HASH_BITS + 3;
subtype pwc_word_t is std_ulogic_vector(PWC_WIDTH - 1 downto 0);
type pwc_t is array(0 to 2 * PWC_DEPTH - 1) of pwc_word_t;
signal pwc : pwc_t;
subtype pwc_index_t is integer range 0 to 2**PWC_HASH_BITS - 1;
signal pwc_doread : std_ulogic;
signal pwc_rdren : std_ulogic;
signal pwc_rdaddr : std_ulogic_vector(PWC_ADDR_BITS - 1 downto 0);
signal pwc_rddata : std_ulogic_vector(PWC_WIDTH - 1 downto 0);
signal pwc_rdreg : std_ulogic_vector(PWC_WIDTH - 1 downto 0);
signal pwc_wren : std_ulogic_vector(3 downto 0);
signal pwc_wraddr : std_ulogic_vector(PWC_ADDR_BITS - 1 downto 0);
signal pwc_wrdata : std_ulogic_vector(PWC_WIDTH - 1 downto 0);
type pwc_state_t is (IDLE,
SEARCH1,
SEARCH_2M_0, SEARCH_2M_1, SEARCH_2M_2,
SEARCH_1G_0, SEARCH_1G_3,
SEARCH_HT_0, SEARCH_HT_3,
RDPDE,
WAITW, WRPTE1_2M, WRPTE1_W3, WRPTE2,
INVAL1, INVAL2,
INVAL_2M, INVAL_2M_0, INVAL_2M_1, INVAL_2M_2);
type mmu_pwc_reg_t is record
state : pwc_state_t;
next_state : pwc_state_t;
addr : std_ulogic_vector(30 downto 0);
pid : std_ulogic_vector(11 downto 0);
bad_ea : std_ulogic;
hash_2M : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
hash_1G : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
hash_512G : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
is_tlbie : std_ulogic;
may_hit_2M : std_ulogic_vector(3 downto 0);
may_hit_1G : std_ulogic_vector(3 downto 0);
may_hit_512G : std_ulogic_vector(3 downto 0);
missed_2M : std_ulogic;
missed_1G : std_ulogic;
missed_512G : std_ulogic;
hit : std_ulogic;
miss : std_ulogic;
hit_size : std_ulogic_vector(1 downto 0);
sel_way : std_ulogic_vector(1 downto 0);
repl_way_2M : std_ulogic_vector(1 downto 0);
repl_way_1G : std_ulogic_vector(1 downto 0);
repl_way_HT : std_ulogic_vector(1 downto 0);
wr_leaf : std_ulogic;
wr_level : std_ulogic_vector(1 downto 0);
update_plru : std_ulogic;
tlbie_done : std_ulogic;
inval_all : std_ulogic;
inval_pdes : std_ulogic;
inval_pid : std_ulogic;
rd_hash : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
reg_hash : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
end record;
constant mmu_pwc_reg_init : mmu_pwc_reg_t := (
state => INVAL2, next_state => IDLE, inval_all => '1',
addr => 31x"0", pid => 12x"0",
hash_2M => (others => '0'), hash_1G => (others => '0'),
hash_512G => (others => '0'),
rd_hash => (others => '0'), reg_hash => (others => '0'),
may_hit_2M => "0000", may_hit_1G => "0000", may_hit_512G => "0000",
sel_way => "00", hit_size => "00",
repl_way_2M => "00", repl_way_1G => "00", repl_way_HT => "00",
wr_level => "00",
others => '0');
signal pr, prin : mmu_pwc_reg_t;
-- PWC PLRU array
type pwc_plru_array is array(pwc_index_t) of std_ulogic_vector(2 downto 0);
signal pwc_plru_ram : pwc_plru_array;
signal pwc_plru_cur : std_ulogic_vector(2 downto 0);
signal pwc_plru_upd : std_ulogic_vector(2 downto 0);
signal pwc_plru_victim : std_ulogic_vector(1 downto 0);
function addr_hash_4k(ea: std_ulogic_vector(63 downto 0);
pid: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
variable h : std_ulogic_vector(TLB_HASH_BITS - 1 downto 0);
@ -161,24 +270,56 @@ architecture behave of mmu is
return h;
end;
function find_first_zero(x: std_ulogic_vector(3 downto 0)) return std_ulogic_vector is
function addr_hash_2M(ea: std_ulogic_vector(63 downto 0);
pid: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
variable h : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
begin
h := ea(26 downto 21) xor ea(32 downto 27) xor ea(51 downto 46) xor
pid(5 downto 0) xor pid(11 downto 6) xor 6x"09";
return h;
end;
function addr_hash_1G(ea: std_ulogic_vector(63 downto 0);
pid: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
variable h : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
begin
h := ea(35 downto 30) xor ea(41 downto 36) xor ea(51 downto 46) xor
pid(5 downto 0) xor pid(11 downto 6) xor 6x"12";
return h;
end;
function addr_hash_512G(ea: std_ulogic_vector(63 downto 0);
pid: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
variable h : std_ulogic_vector(PWC_HASH_BITS - 1 downto 0);
begin
h := ea(44 downto 39) xor ea(51 downto 46) xor
pid(5 downto 0) xor pid(11 downto 6) xor 6x"24";
return h;
end;
function find_first_one(x: std_ulogic_vector(3 downto 0)) return std_ulogic_vector is
begin
for i in 0 to 2 loop
if x(i) = '0' then
if x(i) = '1' then
return std_ulogic_vector(to_unsigned(i, 2));
end if;
end loop;
return "11";
end;
function check_perm(pte: std_ulogic_vector(63 downto 0); priv: std_ulogic;
iside: std_ulogic; store: std_ulogic) return std_ulogic is
function check_perm_c(pte: std_ulogic_vector(63 downto 0); priv: std_ulogic;
iside: std_ulogic; store: std_ulogic; cbit : std_ulogic)
return std_ulogic is
variable ok: std_ulogic;
begin
ok := '0';
if priv = '1' or pte(3) = '0' then
if iside = '0' then
ok := pte(1) or (pte(2) and not store);
if store = '0' then
ok := pte(1) or pte(2); -- loads need R or W permission
else
ok := pte(1) and cbit; -- stores need W and C
end if;
else
-- no IAMR, so no KUEP support for now
-- deny execute permission if cache inhibited
@ -344,7 +485,7 @@ begin
if valids = "1111" then
tv.repl_way := tlb_plru_victim;
else
tv.repl_way := find_first_zero(valids);
tv.repl_way := find_first_one(not valids);
end if;
-- next read word 2 of group
idx := "010";
@ -479,6 +620,520 @@ begin
trin <= tv;
end process;
-- Synchronous reads and writes to PWC array
mmu_pwc_ram: process(clk)
begin
if rising_edge(clk) then
if pwc_rdren = '1' then
pwc_rdreg <= pwc_rddata;
end if;
if pwc_doread = '1' then
pwc_rddata <= pwc(to_integer(unsigned(pwc_rdaddr)));
end if;
if pwc_wren /= "0000" then
for i in 0 to 3 loop
if pwc_wren(i) = '1' then
pwc(to_integer(unsigned(pwc_wraddr)))(i*16 + 15 downto i*16) <=
pwc_wrdata(i*16 + 15 downto i*16);
end if;
end loop;
end if;
end if;
end process;
-- PWC PLRU
pwc_plru : entity work.plrufn
generic map (
BITS => 2
)
port map (
acc => pr.sel_way,
tree_in => pwc_plru_cur,
tree_out => pwc_plru_upd,
lru => pwc_plru_victim
);
process(clk)
begin
if rising_edge(clk) then
if is_X(pr.rd_hash) then
pwc_plru_cur <= (others => 'X');
else
pwc_plru_cur <= pwc_plru_ram(to_integer(unsigned(pr.rd_hash)));
end if;
if pr.update_plru = '1' then
assert not is_X(pr.rd_hash) severity failure;
pwc_plru_ram(to_integer(unsigned(pr.rd_hash))) <= pwc_plru_upd;
end if;
end if;
end process;
-- State machine for doing PWC searches, updates and invalidations
mmu_pwc_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
pr <= mmu_pwc_reg_init;
else
pr <= prin;
end if;
end if;
end process;
mmu_pwc_1: process(all)
variable pv : mmu_pwc_reg_t;
variable isf : std_ulogic_vector(1 downto 0);
variable ap : std_ulogic_vector(2 downto 0);
variable is_hit : std_ulogic;
variable valids : std_ulogic_vector(3 downto 0);
variable idx : std_ulogic_vector(2 downto 0);
variable wdat : std_ulogic_vector(15 downto 0);
variable rway : std_ulogic_vector(1 downto 0);
variable wr_hash : std_ulogic_vector(5 downto 0);
begin
pv := pr;
pwc_doread <= '0';
pwc_rdren <= '0';
pwc_wren <= "0000";
pwc_wrdata <= (others => '0');
is_hit := '0';
idx := "000";
wr_hash := (others => '0');
pv.update_plru := '0';
case pr.state is
when IDLE =>
pv.state := IDLE;
pv.next_state := IDLE;
pv.addr := l_in.addr(51 downto 21);
pv.pid := (others => '0');
if l_in.tlbie = '1' then
-- PID for tlbie comes from RS
pv.pid := l_in.rs(43 downto 32);
elsif l_in.addr(63) = '0' then
-- we currently only implement quadrants 0 and 3
pv.pid := r.pid;
end if;
pv.bad_ea := (or (l_in.addr(61 downto 52)) or (l_in.addr(63) xor l_in.addr(62)))
and not l_in.tlbie;
pv.hash_2M := addr_hash_2M(l_in.addr, pv.pid);
pv.hash_1G := addr_hash_1G(l_in.addr, pv.pid);
pv.hash_512G := addr_hash_512G(l_in.addr, pv.pid);
pv.rd_hash := pv.hash_2M;
pv.is_tlbie := l_in.tlbie;
pv.missed_2M := '0';
pv.missed_1G := '0';
pv.missed_512G := '0';
if l_in.valid = '1' then
pv.hit := '0';
pv.miss := '0';
pv.tlbie_done := '0';
pv.inval_all := '0';
pv.inval_pdes := '0';
pv.inval_pid := '0';
if l_in.tlbie = '1' then
-- decode what type of tlbie this is
isf := l_in.addr(11 downto 10);
pv.inval_pdes := (l_in.ric(0) or l_in.ric(1));
if l_in.slbia = '1' then
-- no effect on this PWC (flushes L1 TLBs below)
pv.tlbie_done := '1';
elsif isf(1) = '1' and pv.inval_pdes = '1' then
-- invalidate everything in this cache
pv.inval_all := '1';
pv.rd_hash := (others => '0');
pv.reg_hash := (others => '0');
pv.state := INVAL2;
elsif isf(1) = '1' or isf(0) = '1' then
-- invalidate PTEs but not PDEs, or invalidate by PID
-- in these cases we need to read word 0 of each group
pv.inval_pid := not isf(1);
pv.rd_hash := (others => '0');
pwc_doread <= '1';
pv.state := INVAL1;
else
-- invalidate single page
ap := l_in.addr(7 downto 5); -- actual page size
if ap = "001" then -- 2MB page
pwc_doread <= '1';
pv.state := INVAL_2M;
else
-- 4k, 64k, 1G or unrecognized
pv.tlbie_done := '1';
end if;
end if;
else
-- first read word 0 of 2M group
pwc_doread <= '1';
pv.state := SEARCH1;
pv.next_state := SEARCH_2M_0;
end if;
end if;
when SEARCH1 =>
-- next read word 0 of 1G group
pv.rd_hash := pr.hash_1G;
pwc_doread <= '1';
pwc_rdren <= '1';
if pr.bad_ea = '0' then
pv.state := SEARCH_2M_0;
else
pv.miss := '1';
pv.state := IDLE;
end if;
when SEARCH_2M_0 =>
-- pwc_rdreg contains 2M group word 0, check for hits/misses
pv.may_hit_2M := "0000";
valids := "0000";
for i in 0 to 3 loop
valids(i) := pwc_rdreg(i*16 + 15);
if pwc_rdreg(i*16 + 15) = '1' and
pwc_rdreg(i*16 + 11 downto i*16) = pr.pid and
pwc_rdreg(i*16 + 13 downto i*16 + 12) = "00" then
pv.may_hit_2M(i) := '1';
end if;
end loop;
if valids = "1111" then
pv.repl_way_2M := pwc_plru_victim;
else
pv.repl_way_2M := find_first_one(not valids);
end if;
-- if any 2M hits are possible, read word 1 of 2M group next
if pv.may_hit_2M /= "0000" then
pv.rd_hash := pr.hash_2M;
idx := "001";
pv.next_state := SEARCH_2M_1;
else
-- otherwise read word 0 of 512G group next
pv.missed_2M := '1';
pv.rd_hash := pr.hash_512G;
pv.next_state := SEARCH_HT_0;
end if;
pv.state := SEARCH_1G_0;
pwc_doread <= '1';
pwc_rdren <= '1';
when SEARCH_2M_1 =>
-- pwc_rdreg contains 2M group word 1
for i in 0 to 1 loop
if pwc_rdreg(i*32 + 31 downto i*32 + 7) /= pr.addr(30 downto 6) then
pv.may_hit_2M(i) := '0';
end if;
end loop;
if pv.may_hit_2M = "0000" then
pv.missed_2M := '1';
end if;
-- decide what to read next based on whether 1G hits are still possible
if pr.missed_1G = '0' then
pv.rd_hash := pr.hash_1G;
idx := "011";
pv.next_state := SEARCH_1G_3;
else
pv.rd_hash := pr.hash_512G;
pv.next_state := SEARCH_HT_0;
end if;
pv.state := pr.next_state; -- will be SEARCH_2M_2
pwc_doread <= '1';
pwc_rdren <= '1';
when SEARCH_2M_2 =>
-- pwc_rdreg contains 2M group word 2
for i in 0 to 1 loop
if pwc_rdreg(i*32 + 31 downto i*32 + 7) /= pr.addr(30 downto 6) then
pv.may_hit_2M(i+2) := '0';
end if;
end loop;
-- Can now decide hit/miss for 2M entries
if pv.may_hit_2M /= "0000" then
pv.sel_way := find_first_one(pv.may_hit_2M);
pv.hit_size := "00";
pv.rd_hash := pr.hash_2M;
idx := '1' & pv.sel_way;
pv.state := RDPDE;
else
pv.missed_2M := '1';
pv.rd_hash := pr.hash_512G;
if pr.missed_1G = '0' then
pv.next_state := SEARCH_HT_0;
else
idx := "011";
pv.next_state := SEARCH_HT_3;
end if;
pv.state := pr.next_state;
pwc_rdren <= '1';
end if;
pwc_doread <= '1';
when SEARCH_1G_0 =>
-- pwc_rdreg contains 1G group word 0, check for hits/misses
pv.may_hit_1G := "0000";
valids := "0000";
for i in 0 to 3 loop
valids(i) := pwc_rdreg(i*16 + 15);
if pwc_rdreg(i*16 + 15) = '1' and
pwc_rdreg(i*16 + 11 downto i*16) = pr.pid and
pwc_rdreg(i*16 + 13 downto i*16 + 12) = "01" then
pv.may_hit_1G(i) := '1';
end if;
end loop;
if valids = "1111" then
pv.repl_way_1G := pwc_plru_victim;
else
pv.repl_way_1G := find_first_one(not valids);
end if;
if pv.may_hit_1G = "0000" then
pv.missed_1G := '1';
end if;
if pr.missed_2M = '0' then
-- If 2M hits are still possible, read word 2 of 2M group next
pv.rd_hash := pr.hash_2M;
idx := "010";
pv.next_state := SEARCH_2M_2;
elsif pv.missed_1G = '0' then
-- otherwise, if any 1G hits are possible, read word 3 of 1G group next
pv.rd_hash := pr.hash_1G;
idx := "011";
pv.next_state := SEARCH_1G_3;
else
-- otherwise read word 0 of 512G group
pv.rd_hash := pr.hash_512G;
pv.next_state := SEARCH_HT_0;
end if;
pv.state := pr.next_state;
pwc_doread <= '1';
pwc_rdren <= '1';
when SEARCH_1G_3 =>
-- pwc_rdreg contains 1G group word 3
for i in 0 to 3 loop
if pwc_rdreg(i*16 + 15 downto i*16) /= pr.addr(30 downto 15) then
pv.may_hit_1G(i) := '0';
end if;
end loop;
-- Can now decide hit/miss for 1G entries
if pv.may_hit_1G /= "0000" then
pv.sel_way := find_first_one(pv.may_hit_1G);
pv.hit_size := "01";
pv.rd_hash := pr.hash_1G;
idx := '1' & pv.sel_way;
pv.state := RDPDE;
pwc_doread <= '1';
else
pv.missed_1G := '1';
if pr.missed_512G = '0' then
pv.state := pr.next_state;
pwc_rdren <= '1';
else
pv.miss := '1';
pv.state := WAITW;
end if;
end if;
when SEARCH_HT_0 =>
-- pwc_rdreg contains 512G group (half TB) word 0, check for hits/misses
pv.may_hit_512G := "0000";
valids := "0000";
for i in 0 to 3 loop
valids(i) := pwc_rdreg(i*16 + 15);
if pwc_rdreg(i*16 + 15) = '1' and
pwc_rdreg(i*16 + 11 downto i*16) = pr.pid and
pwc_rdreg(i*16 + 13 downto i*16 + 12) = "10" then
pv.may_hit_512G(i) := '1';
end if;
end loop;
if valids = "1111" then
pv.repl_way_HT := pwc_plru_victim;
else
pv.repl_way_HT := find_first_one(not valids);
end if;
-- if any 512G hits are possible, read word 3 of 512G group next
if pv.may_hit_512G /= "0000" then
pv.rd_hash := pr.hash_512G;
idx := "011";
pv.next_state := SEARCH_HT_3;
pwc_doread <= '1';
else
pv.missed_512G := '1';
end if;
if pv.missed_512G = '1' and pr.missed_1G = '1' then
pv.miss := '1';
pv.state := WAITW;
else
pv.state := pr.next_state;
pwc_rdren <= '1';
end if;
when SEARCH_HT_3 =>
-- pwc_rdreg contains 512G group word 3
for i in 0 to 3 loop
if pwc_rdreg(i*16 + 15 downto i*16 + 9) /= pr.addr(30 downto 24) then
pv.may_hit_512G(i) := '0';
end if;
end loop;
-- Can now decide hit/miss for 512G entries
if pv.may_hit_512G /= "0000" then
pv.sel_way := find_first_one(pv.may_hit_512G);
pv.hit_size := "10";
pv.rd_hash := pr.hash_512G;
idx := '1' & pv.sel_way;
pv.state := RDPDE;
pwc_doread <= '1';
else
pv.miss := '1';
pv.state := WAITW;
end if;
when RDPDE =>
pwc_rdren <= '1';
pv.hit := '1';
pv.update_plru := '1';
pv.state := WAITW;
when WAITW =>
pwc_wrdata <= r.pde;
pv.wr_leaf := r.pde(62);
pv.wr_level := r.pwc_level;
rway := "00";
if r.rereadpte = '1' then
-- rewriting a 2M PTE with changed permissions
rway := pr.sel_way;
wr_hash := pr.hash_2M;
else
-- choose way according to which group is to be written
case r.pwc_level is
when "00" => -- 2M
rway := pr.repl_way_2M;
wr_hash := pr.hash_2M;
when "01" =>
rway := pr.repl_way_1G;
wr_hash := pr.hash_1G;
when others =>
rway := pr.repl_way_HT;
wr_hash := pr.hash_512G;
end case;
end if;
if r.wr_pwcram = '1' then
-- write PDE to one of words 4-7
pwc_wren <= "1111";
idx := '1' & rway;
pv.rd_hash := wr_hash;
pv.sel_way := rway;
pv.update_plru := '1';
if r.pwc_level = "00" then
pv.state := WRPTE1_2M;
else
pv.state := WRPTE1_W3;
end if;
elsif r.done = '1' or r.err = '1' then
pv.state := IDLE;
end if;
when WRPTE1_2M =>
pwc_wrdata <= pr.addr & '0' & pr.addr & '0';
wr_hash := pr.rd_hash;
if pr.sel_way(0) = '1' then
pwc_wren <= "1100";
else
pwc_wren <= "0011";
end if;
idx := '0' & pr.sel_way(1) & not pr.sel_way(1);
pv.state := WRPTE2;
when WRPTE1_W3 =>
pwc_wrdata <= pr.addr(30 downto 15) & pr.addr(30 downto 15) &
pr.addr(30 downto 15) & pr.addr(30 downto 15);
wr_hash := pr.rd_hash;
pwc_wren(to_integer(unsigned(pr.sel_way))) <= '1';
idx := "011";
pv.state := WRPTE2;
when WRPTE2 =>
-- word 0 gets valid, leaf bit, page size, PID
wdat := '1' & pr.wr_leaf & pr.wr_level & pr.pid;
pwc_wrdata <= wdat & wdat & wdat & wdat;
-- write one 16b section of word 0
wr_hash := pr.rd_hash;
pwc_wren(to_integer(unsigned(pr.sel_way))) <= '1';
if pr.wr_leaf = '1' then
pv.state := IDLE;
else
pv.state := WAITW;
end if;
when INVAL1 =>
pv.rd_hash := 6x"01";
pwc_doread <= '1';
pwc_rdren <= '1';
pv.state := INVAL2;
when INVAL2 =>
if pr.inval_all = '1' then
pwc_wren <= "1111";
pv.reg_hash := pr.rd_hash;
else
valids := "0000";
for i in 0 to 3 loop
if pwc_rdreg(i*16 + 15) = '1' and
(pwc_rdreg(i*16 + 14) = '1' or pr.inval_pdes = '1') and
(pwc_rdreg(i*16 + 11 downto i*16) = pr.pid or pr.inval_pid = '0') then
valids(i) := '1';
end if;
end loop;
pwc_wren <= valids;
pwc_doread <= '1';
pwc_rdren <= '1';
end if;
wr_hash := pr.reg_hash;
pv.rd_hash := std_ulogic_vector(unsigned(pv.rd_hash) + 1);
if pr.reg_hash = 6x"3f" then
pv.tlbie_done := '1';
pv.state := IDLE;
end if;
when INVAL_2M =>
-- next read word 1 of 2M group
idx := "001";
pwc_doread <= '1';
pwc_rdren <= '1';
pv.state := INVAL_2M_0;
when INVAL_2M_0 =>
-- pwc_rdreg contains 2M group word 0
pv.may_hit_2M := "0000";
for i in 0 to 3 loop
if pwc_rdreg(i*16 + 15 downto i*16 + 12) = "1100" and
pwc_rdreg(i*16 + 11 downto i*16) = pr.pid then
pv.may_hit_2M(i) := '1';
end if;
end loop;
-- next read word 2 of 2M group
idx := "010";
pwc_doread <= '1';
pwc_rdren <= '1';
pv.state := INVAL_2M_1;
when INVAL_2M_1 =>
-- pwc_rdreg contains 2M group word 1
for i in 0 to 1 loop
if pwc_rdreg(i*32 + 31 downto i*32 + 7) /= pr.addr(30 downto 6) then
pv.may_hit_2M(i) := '0';
end if;
end loop;
pwc_rdren <= '1';
pv.state := INVAL_2M_2;
when INVAL_2M_2 =>
-- pwc_rdreg contains 2M group word 2
for i in 0 to 1 loop
if pwc_rdreg(i*32 + 31 downto i*32 + 7) /= r.addr(30 downto 6) then
pv.may_hit_2M(i+2) := '0';
end if;
end loop;
wr_hash := pr.hash_2M;
pwc_wren <= pv.may_hit_2M;
pv.tlbie_done := '1';
pv.state := IDLE;
end case;
if r.done = '1' or r.err = '1' then
pv.state := IDLE;
end if;
if pwc_rdren = '1' then
pv.reg_hash := pr.rd_hash;
end if;
pwc_rdaddr <= pv.rd_hash & idx;
pwc_wraddr <= wr_hash & idx;
prin <= pv;
end process;
-- Multiplex internal SPR values back to loadstore1, selected
-- by l_in.sprnf.
l_out.sprval <= r.ptcr when l_in.sprnf = '1' else x"0000000000000" & r.pid;
@ -514,6 +1169,9 @@ begin
report "send load addr=" & to_hstring(d_out.addr) &
" addrsh=" & to_hstring(addrsh) & " mask=" & to_hstring(mask);
end if;
if l_in.valid = '1' or l_in.mtspr = '1' then
assert r.state = IDLE severity failure;
end if;
r <= rin;
end if;
end if;
@ -612,6 +1270,7 @@ begin
variable rc_ok : std_ulogic;
variable addr : std_ulogic_vector(63 downto 0);
variable data : std_ulogic_vector(63 downto 0);
variable tlbdone, pwcdone : std_ulogic;
begin
v := r;
v.valid := '0';
@ -623,6 +1282,7 @@ begin
v.segerror := '0';
v.perm_err := '0';
v.rc_error := '0';
v.wr_pwcram := '0';
tlb_load := '0';
v.tlbie_req := '0';
v.inval_all := '0';
@ -635,24 +1295,17 @@ begin
data(i * 8 + 7 downto i * 8) := d_in.data((7 - i) * 8 + 7 downto (7 - i) * 8);
end loop;
if r.addr(63) = '0' then
pgtbl := r.pgtbl0;
pt_valid := r.pt0_valid;
else
pgtbl := r.pgtbl3;
pt_valid := r.pt3_valid;
end if;
case r.state is
when IDLE =>
if l_in.addr(63) = '0' then
pgtbl := r.pgtbl0;
pt_valid := r.pt0_valid;
else
pgtbl := r.pgtbl3;
pt_valid := r.pt3_valid;
end if;
-- rts == radix tree size, # address bits being translated
six := '0' & pgtbl(62 downto 61) & pgtbl(7 downto 5);
rts := unsigned(six);
-- mbits == # address bits to index top level of tree
mbits := unsigned('0' & pgtbl(4 downto 0));
-- set v.shift to rts so that we can use finalmask for the segment check
v.shift := rts;
v.mask_size := mbits(4 downto 0);
v.pgbase := pgtbl(55 downto 8) & x"00";
v.rereadpte := '0';
if l_in.valid = '1' then
v.addr := l_in.addr;
@ -677,18 +1330,9 @@ begin
if r.ptb_valid = '0' then
-- need to fetch process table base from partition table
v.state := PART_TBL_READ;
elsif pt_valid = '0' then
-- need to fetch process table entry
-- set v.shift so we can use finalmask for generating
-- the process table entry address
v.shift := unsigned('0' & r.prtbl(4 downto 0));
v.state := PROC_TBL_READ;
elsif mbits = 0 then
-- Use RPDS = 0 to disable radix tree walks
v.state := RADIX_FINISH;
v.invalid := '1';
else
v.state := SEGMENT_CHECK;
-- wait for TLB and PWC to do their stuff
v.state := TLBWAIT;
end if;
end if;
end if;
@ -711,7 +1355,7 @@ begin
end if;
when DO_TLBIE =>
if r.is_mtspr = '1' or tr.tlbie_done = '1' then
if r.is_mtspr = '1' or (tr.tlbie_done = '1' and pr.tlbie_done = '1') then
v.state := RADIX_FINISH;
end if;
@ -724,12 +1368,61 @@ begin
if d_in.done = '1' then
v.prtbl := data;
v.ptb_valid := '1';
v.state := PART_TBL_DONE;
v.state := TLBWAIT;
end if;
when PART_TBL_DONE =>
v.shift := unsigned('0' & r.prtbl(4 downto 0));
v.state := PROC_TBL_READ;
when TLBWAIT =>
-- If we have a TLB hit, or a PWC hit that is a
-- large-page PTE, check permissions;
-- if the access is not permitted, we will need to reread
-- the PTE from memory to verify, because increasing
-- permission on a PTE doesn't require tlbie.
-- (Note that R must be set in the PTE, otherwise it
-- wouldn't have been written to the TLB.)
tlbdone := tr.hit or tr.miss;
pwcdone := pr.hit or pr.miss;
if tr.hit = '1' and r.rereadpte = '0' then
v.pde := tlb_rdreg;
if check_perm_c(tlb_rdreg, r.priv, r.iside, r.store, tlb_rdreg(7)) = '1' then
v.shift := to_unsigned(0, 6);
v.state := RADIX_LOAD_TLB;
else
v.rereadpte := '1';
end if;
elsif pr.hit = '1' and pr.hit_size = "00" and pwc_rdreg(62) = '1' and r.rereadpte = '0' then
v.pde := pwc_rdreg;
if check_perm_c(pwc_rdreg, r.priv, r.iside, r.store, pwc_rdreg(7)) = '1' then
-- Large-page (2M) PTE from PWC is in pwc_rdreg
v.shift := to_unsigned(9, 6);
v.state := RADIX_LOAD_TLB;
else
v.rereadpte := '1';
end if;
elsif pr.hit = '1' and pwc_rdreg(62) = '0' and tlbdone = '1' then
v.pde := pwc_rdreg;
-- PDE from PWC is in pwc_rdreg
-- multiply pr.hit_size by 9 to get shift
six := '0' & pr.hit_size & '0' & pr.hit_size;
v.shift := unsigned(six);
v.mask_size := to_unsigned(9, 5);
v.pgbase := pwc_rdreg(55 downto 8) & x"00";
v.state := RADIX_LOOKUP;
elsif tlbdone = '1' and pwcdone = '1' then
if pt_valid = '0' then
-- need to fetch process table entry
-- set v.shift so we can use finalmask for generating
-- the process table entry address
v.shift := unsigned('0' & r.prtbl(4 downto 0));
v.state := PROC_TBL_READ;
else
-- rts == radix tree size, # address bits being translated
six := '0' & pgtbl(62 downto 61) & pgtbl(7 downto 5);
rts := unsigned(six);
-- set v.shift to rts so that we can use finalmask for the segment check
v.shift := rts;
v.state := SEGMENT_CHECK;
end if;
end if;
when PROC_TBL_READ =>
dcreq := '1';
@ -748,18 +1441,9 @@ begin
-- rts == radix tree size, # address bits being translated
six := '0' & data(62 downto 61) & data(7 downto 5);
rts := unsigned(six);
-- mbits == # address bits to index top level of tree
mbits := unsigned('0' & data(4 downto 0));
-- set v.shift to rts so that we can use finalmask for the segment check
v.shift := rts;
v.mask_size := mbits(4 downto 0);
v.pgbase := data(55 downto 8) & x"00";
if mbits = 0 then
v.state := RADIX_FINISH;
v.invalid := '1';
else
v.state := SEGMENT_CHECK;
end if;
v.state := SEGMENT_CHECK;
end if;
if d_in.err = '1' then
v.state := RADIX_FINISH;
@ -767,39 +1451,22 @@ begin
end if;
when SEGMENT_CHECK =>
mbits := '0' & r.mask_size;
mbits := unsigned('0' & pgtbl(4 downto 0));
v.mask_size := unsigned(pgtbl(4 downto 0));
v.pgbase := pgtbl(55 downto 8) & x"00";
v.shift := r.shift + (31 - 12) - mbits;
nonzero := or(r.addr(61 downto 31) and not finalmask(30 downto 0));
if r.addr(63) /= r.addr(62) or nonzero = '1' then
if mbits = 0 then
-- Use RPDS = 0 to disable radix tree walks
v.state := RADIX_FINISH;
v.invalid := '1';
elsif r.addr(63) /= r.addr(62) or nonzero = '1' then
v.state := RADIX_FINISH;
v.segerror := '1';
elsif mbits < 5 or mbits > 16 or mbits > (r.shift + (31 - 12)) then
v.state := RADIX_FINISH;
v.badtree := '1';
elsif tr.miss = '1' then
v.state := RADIX_LOOKUP;
else
v.state := TLBWAIT;
end if;
when TLBWAIT =>
v.pde := tlb_rdreg;
if tr.hit = '1' then
-- PTE from the TLB entry is in tlb_rdreg
-- Check permissions; if the access is not permitted,
-- reread the PTE from memory to verify, because increasing
-- permission on a PTE doesn't require tlbie.
-- Note that R must be set in the PTE, otherwise it
-- wouldn't have been written to the TLB.
perm_ok := check_perm(tlb_rdreg, r.priv, r.iside, r.store);
rc_ok := tlb_rdreg(7) or not r.store;
if perm_ok = '1' and rc_ok = '1' then
v.shift := to_unsigned(0, 6);
v.state := RADIX_LOAD_TLB;
else
v.state := RADIX_LOOKUP;
end if;
elsif tr.miss = '1' then
v.state := RADIX_LOOKUP;
end if;
@ -815,7 +1482,7 @@ begin
-- test leaf bit
if data(62) = '1' then
-- check permissions and RC bits
perm_ok := check_perm(data, r.priv, r.iside, r.store);
perm_ok := check_perm_c(data, r.priv, r.iside, r.store, '1');
rc_ok := data(8) and (data(7) or not r.store);
if perm_ok = '1' and rc_ok = '1' then
v.state := RADIX_LOAD_TLB;
@ -824,6 +1491,11 @@ begin
if r.shift = 0 then
v.wr_tlbram := '1';
end if;
-- 2M PTEs can be cached in the PWC
if r.shift = 9 then
v.pwc_level := "00";
v.wr_pwcram := '1';
end if;
else
v.state := RADIX_FINISH;
v.perm_err := not perm_ok;
@ -836,10 +1508,17 @@ begin
v.state := RADIX_FINISH;
v.badtree := '1';
else
v.shift := v.shift - mbits;
v.shift := r.shift - mbits;
v.mask_size := mbits(4 downto 0);
v.pgbase := data(55 downto 8) & x"00";
v.state := RADIX_LOOKUP;
-- Write entry to PWC if it is one of the supported sizes
-- i.e. 2M, 1G or 512G
if (r.shift = 9 or r.shift = 18 or r.shift = 27) and
mbits = 9 and r.rereadpte = '0' then
v.wr_pwcram := '1';
v.pwc_level := std_ulogic_vector(r.shift(4 downto 3) - 1);
end if;
end if;
end if;
else