@ -184,7 +184,6 @@ architecture rtl of dcache is
-- Type of operation on a "valid" input
type op_t is (OP_NONE,
OP_BAD, -- NC cache hit, TLB miss, prot/RC failure
OP_STCX_FAIL, -- conditional store w/o reservation
OP_LOAD_HIT, -- Cache hit on load
OP_LOAD_MISS, -- Load missing cache
OP_LOAD_NC, -- Non-cachable load
@ -195,8 +194,8 @@ architecture rtl of dcache is
type state_t is (IDLE, -- Normal load hit processing
RELOAD_WAIT_ACK, -- Cache reload wait ack
STORE_WAIT_ACK, -- Store wait ack
NC_LOAD_WAIT_ACK);-- Non-cachable load wait ack
NC_LOAD_WAIT_ACK, -- Non-cachable load wait ack
DO_STCX); -- Check for stcx. validity
--
-- Dcache operations:
@ -290,6 +289,9 @@ architecture rtl of dcache is
op : op_t;
valid : std_ulogic;
dcbz : std_ulogic;
reserve : std_ulogic;
first_dw : std_ulogic;
last_dw : std_ulogic;
real_addr : real_addr_t;
data : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0);
@ -365,10 +367,12 @@ architecture rtl of dcache is
--
type reservation_t is record
valid : std_ulogic;
addr : std_ulogic_vector(63 downto LINE_OFF_BITS);
addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
end record;
signal reservation : reservation_t;
signal kill_rsrv : std_ulogic;
signal kill_rsrv2 : std_ulogic;
-- Async signals on incoming request
signal req_index : index_t;
@ -382,10 +386,6 @@ architecture rtl of dcache is
signal early_req_row : row_t;
signal early_rd_valid : std_ulogic;
signal cancel_store : std_ulogic;
signal set_rsrv : std_ulogic;
signal clear_rsrv : std_ulogic;
signal r0_valid : std_ulogic;
signal r0_stall : std_ulogic;
@ -427,10 +427,13 @@ architecture rtl of dcache is
-- TLB PLRU output interface
signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal snoop_active : std_ulogic;
signal snoop_tag_set : cache_tags_set_t;
signal snoop_valid : std_ulogic;
signal snoop_wrtag : cache_tag_t;
signal snoop_index : index_t;
signal snoop_paddr : real_addr_t;
signal snoop_addr : real_addr_t;
signal snoop_hits : cache_way_valids_t;
signal req_snoop_hit : std_ulogic;
--
-- Helper functions to decode incoming requests
@ -861,26 +864,45 @@ begin
end if;
end process;
-- Snoop logic
-- Don't snoop our own cycles
snoop_addr <= addr_to_real(wb_to_addr(snoop_in.adr));
snoop_active <= snoop_in.cyc and snoop_in.stb and snoop_in.we and
not (r1.wb.cyc and not wishbone_in.stall);
kill_rsrv <= '1' when (snoop_active = '1' and reservation.valid = '1' and
snoop_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';
-- Cache tag RAM second read port, for snooping
cache_tag_read_2 : process(clk)
variable addr : real_addr_t;
begin
if rising_edge(clk) then
-- Don't snoop our own cycles
snoop_valid <= '0';
if not (r1.wb.cyc = '1' and wishbone_in.stall = '0') then
if (snoop_in.cyc and snoop_in.stb and snoop_in.we) = '1' then
snoop_valid <= '1';
addr := addr_to_real(wb_to_addr(snoop_in.adr));
assert not is_X(addr);
snoop_tag_set <= cache_tags(to_integer(get_index(addr)));
snoop_wrtag <= get_tag(addr);
snoop_index <= get_index(addr);
end if;
if is_X(snoop_addr) then
snoop_tag_set <= (others => 'X');
else
snoop_tag_set <= cache_tags(to_integer(get_index(snoop_addr)));
end if;
snoop_paddr <= snoop_addr;
snoop_valid <= snoop_active;
end if;
end process;
-- Compare the previous cycle's snooped store address to the reservation,
-- to catch the case where a write happens on cycle 1 of a cached larx
kill_rsrv2 <= '1' when (snoop_valid = '1' and reservation.valid = '1' and
snoop_paddr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';
snoop_tag_match : process(all)
begin
snoop_hits <= (others => '0');
for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = get_tag(snoop_paddr) then
snoop_hits(i) <= '1';
end if;
end loop;
end process;
-- Cache request parsing and hit detection
dcache_request : process(all)
variable req_row : row_t;
@ -901,6 +923,8 @@ begin
variable rel_match : std_ulogic;
variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable fwd_match : std_ulogic;
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snoop_match : std_ulogic;
begin
-- Extract line, row and tag from request
rindex := get_index(r0.req.addr);
@ -924,9 +948,11 @@ begin
is_hit := '0';
rel_match := '0';
fwd_match := '0';
snoop_match := '0';
if r0.req.virt_mode = '1' then
rel_matches := (others => '0');
fwd_matches := (others => '0');
snp_matches := (others => '0');
for j in tlb_way_t loop
hit_way_set(j) := to_unsigned(0, WAY_BITS);
s_hit := '0';
@ -943,6 +969,9 @@ begin
tlb_valid_way(j) = '1' then
hit_way_set(j) := to_unsigned(i, WAY_BITS);
s_hit := '1';
if snoop_hits(i) = '1' then
snp_matches(j) := '1';
end if;
end if;
end loop;
hit_set(j) := s_hit;
@ -959,6 +988,7 @@ begin
hit_way := hit_way_set(to_integer(tlb_hit_way));
rel_match := rel_matches(to_integer(tlb_hit_way));
fwd_match := fwd_matches(to_integer(tlb_hit_way));
snoop_match := snp_matches(to_integer(tlb_hit_way));
end if;
else
s_tag := get_tag(r0.req.addr);
@ -970,6 +1000,9 @@ begin
read_tag(i, cache_tag_set) = s_tag then
hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1';
if snoop_hits(i) = '1' then
snoop_match := '1';
end if;
end if;
end loop;
if go = '1' and not is_X(r1.reload_tag) and s_tag = r1.reload_tag then
@ -982,6 +1015,13 @@ begin
req_same_tag <= rel_match;
fwd_same_tag <= fwd_match;
-- This is 1 if the snooped write from the previous cycle hits the same
-- cache line that is being accessed in this cycle.
req_snoop_hit <= '0';
if go = '1' and snoop_match = '1' and get_index(snoop_paddr) = rindex then
req_snoop_hit <= '1';
end if;
-- Whether to use forwarded data for a load or not
use_forward_st <= '0';
use_forward_rl <= '0';
@ -1060,8 +1100,6 @@ begin
if go = '1' then
if access_ok = '0' then
op := OP_BAD;
elsif cancel_store = '1' then
op := OP_STCX_FAIL;
else
opsel := r0.req.load & nc & is_hit;
case opsel is
@ -1101,45 +1139,6 @@ begin
-- Wire up wishbone request latch out of stage 1
wishbone_out <= r1.wb;
-- Handle load-with-reservation and store-conditional instructions
reservation_comb: process(all)
begin
cancel_store <= '0';
set_rsrv <= '0';
clear_rsrv <= '0';
if r0_valid = '1' and r0.req.reserve = '1' then
-- XXX generate alignment interrupt if address is not aligned
-- XXX or if r0.req.nc = '1'
if r0.req.load = '1' then
-- load with reservation
set_rsrv <= not r0.req.atomic_qw or r0.req.atomic_last;
else
-- store conditional
clear_rsrv <= not r0.req.atomic_qw or r0.req.atomic_last;
if reservation.valid = '0' or
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
cancel_store <= '1';
end if;
end if;
end if;
end process;
reservation_reg: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
reservation.valid <= '0';
elsif r0_valid = '1' and access_ok = '1' then
if clear_rsrv = '1' then
reservation.valid <= '0';
elsif set_rsrv = '1' then
reservation.valid <= '1';
reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS);
end if;
end if;
end if;
end process;
-- Return data for loads & completion control logic
--
writeback_control: process(all)
@ -1367,12 +1366,6 @@ begin
r1.cache_paradox <= '0';
end if;
if req_op = OP_STCX_FAIL then
r1.stcx_fail <= '1';
else
r1.stcx_fail <= '0';
end if;
-- Record TLB hit information for updating TLB PLRU
r1.tlb_hit <= tlb_hit;
r1.tlb_hit_way <= tlb_hit_way;
@ -1423,6 +1416,8 @@ begin
r1.acks_pending <= to_unsigned(0, 3);
r1.stalled <= '0';
r1.dec_acks <= '0';
reservation.valid <= '0';
reservation.addr <= (others => '0');
-- Not useful normally but helps avoiding tons of sim warnings
r1.wb.adr <= (others => '0');
@ -1430,27 +1425,40 @@ begin
-- One cycle pulses reset
r1.slow_valid <= '0';
r1.write_bram <= '0';
r1.stcx_fail <= '0';
r1.ls_valid <= '0';
-- complete tlbies and TLB loads in the third cycle
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
if req_op = OP_LOAD_HIT then
if r0.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
end if;
-- The kill_rsrv2 term covers the case where the reservation
-- address was set at the beginning of this cycle, and a store
-- to that address happened in the previous cycle.
if kill_rsrv = '1' or kill_rsrv2 = '1' then
reservation.valid <= '0';
end if;
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
r0.req.reserve = '1' and r0.req.atomic_first = '1' then
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
if req_op = OP_LOAD_HIT then
reservation.valid <= not req_snoop_hit;
end if;
end if;
-- Do invalidations from snooped stores to memory
if snoop_valid = '1' then
assert not is_X(snoop_tag_set);
assert not is_X(snoop_wrtag);
assert not is_X(snoop_paddr);
assert not is_X(snoop_hits);
end if;
for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = snoop_wrtag then
assert not is_X(snoop_index);
cache_valids(to_integer(snoop_index))(i) <= '0';
if snoop_hits(i) = '1' then
cache_valids(to_integer(get_index(snoop_paddr)))(i) <= '0';
end if;
end loop;
@ -1477,6 +1485,9 @@ begin
req.valid := req_go;
req.mmu_req := r0.mmu_req;
req.dcbz := r0.req.dcbz;
req.reserve := r0.req.reserve;
req.first_dw := r0.req.atomic_first;
req.last_dw := r0.req.atomic_last;
req.real_addr := ra;
-- Force data to 0 for dcbz
if r0.req.dcbz = '1' then
@ -1581,7 +1592,11 @@ begin
r1.state <= NC_LOAD_WAIT_ACK;
when OP_STORE_HIT | OP_STORE_MISS =>
if req.dcbz = '0' then
if req.reserve = '1' then
-- stcx needs to wait until next cycle
-- for the reservation address check
r1.state <= DO_STCX;
elsif req.dcbz = '0' then
r1.state <= STORE_WAIT_ACK;
r1.full <= '0';
r1.slow_valid <= '1';
@ -1593,6 +1608,9 @@ begin
if req.op = OP_STORE_HIT then
r1.write_bram <= '1';
end if;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
else
-- dcbz is handled much like a load miss except
-- that we are writing to memory instead of reading
@ -1600,19 +1618,18 @@ begin
if req.op = OP_STORE_MISS then
r1.write_tag <= '1';
end if;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
end if;
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
if req.op = OP_STORE_MISS then
ev.store_miss <= '1';
end if;
-- OP_NONE and OP_BAD do nothing
-- OP_BAD & OP_STCX_FAIL were handled above already
-- OP_BAD was handled above already
when OP_NONE =>
when OP_BAD =>
when OP_STCX_FAIL =>
end case;
when RELOAD_WAIT_ACK =>
@ -1652,6 +1669,13 @@ begin
else
r1.mmu_done <= '1';
end if;
-- NB: for lqarx, set the reservation on the first
-- dword so that a snooped store between the two
-- dwords will kill the reservation.
if req.reserve = '1' and req.first_dw = '1' then
reservation.valid <= '1';
reservation.addr <= req.real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
end if;
end if;
-- Check for completion
@ -1736,6 +1760,48 @@ begin
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
end if;
when DO_STCX =>
if reservation.valid = '0' or kill_rsrv = '1' or
r1.req.real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) /= reservation.addr then
-- Wrong address, didn't have reservation, or lost reservation
-- Abandon the wishbone cycle if started and fail the stcx.
r1.stcx_fail <= '1';
r1.full <= '0';
r1.ls_valid <= '1';
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
reservation.valid <= '0';
elsif r1.wb.cyc = '0' then
-- Right address and have reservation, so start the
-- wishbone cycle
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
else
if wishbone_in.stall = '0' then
-- Store has been accepted, so now we can write the
-- cache data RAM
if r1.req.op = OP_STORE_HIT then
r1.write_bram <= '1';
end if;
r1.wb.stb <= '0';
end if;
if wishbone_in.ack = '1' then
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
-- For stqcx., kill the reservation on the last dword
if r1.req.last_dw = '1' then
reservation.valid <= '0';
end if;
end if;
end if;
end case;
end if;
end if;