diff --git a/dcache.vhdl b/dcache.vhdl index 705393a..34dbda2 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -1537,7 +1537,7 @@ begin r1.wb.dat <= req.data; r1.wb.sel <= req.byte_sel; end if; - if acks < 7 and req.same_tag = '1' and + if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and (req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then r1.wb.stb <= '1'; stbs_done := false; diff --git a/decode1.vhdl b/decode1.vhdl index 0774250..baf4347 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -114,8 +114,8 @@ architecture behaviour of decode1 is 36 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stw 37 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwu 8 => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- subfic - 2 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- tdi - 3 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1', NONE), -- twi + 2 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tdi + 3 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- twi 26 => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- xori 27 => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- xoris others => illegal_inst @@ -410,8 +410,8 @@ architecture behaviour of decode1 is 2#0011001000# => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfze 2#1011001000# => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfzeo 2#1001010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- sync - 2#0001000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- td - 2#0000000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1', NONE), -- tw + 2#0001000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- td + 2#0000000100# => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- tw 2#0100110010# => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tlbie 2#0100010010# => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- tlbiel 2#1000110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- tlbsync diff --git a/icache.vhdl b/icache.vhdl index 7937ff6..298ee47 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -171,7 +171,7 @@ architecture rtl of icache is signal eaa_priv : std_ulogic; -- Cache reload state machine - type state_t is (IDLE, CLR_TAG, WAIT_ACK); + type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK); type reg_internal_t is record -- Cache hit state (Latches for 1 cycle BRAM access) @@ -546,7 +546,7 @@ begin end loop; -- Generate the "hit" and "miss" signals for the synchronous blocks - if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then + if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' and use_previous = '0' then req_is_hit <= is_hit; req_is_miss <= not is_hit; else @@ -580,7 +580,7 @@ begin i_out.next_pred_ntaken <= i_in.pred_ntaken; -- Stall fetch1 if we have a miss on cache or TLB or a protection fault - stall_out <= not (is_hit and access_ok); + stall_out <= not (is_hit and access_ok) and not use_previous; -- Wishbone requests output (from the cache miss reload machine) wishbone_out <= r.wb; @@ -757,9 +757,15 @@ begin r.wb.adr <= next_row_addr(r.wb.adr); end if; + -- Abort reload if we get an invalidation + if inval_in = '1' then + r.wb.stb <= '0'; + r.state <= STOP_RELOAD; + end if; + -- Incoming acks processing if wishbone_in.ack = '1' then - r.rows_valid(r.store_row mod ROW_PER_LINE) <= '1'; + r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in; -- Check for completion if is_last_row(r.store_row, r.end_row_ix) then -- Complete wishbone cycle @@ -775,6 +781,18 @@ begin -- Increment store row counter r.store_row <= next_row(r.store_row); end if; + + when STOP_RELOAD => + -- Wait for all outstanding requests to be satisfied, then + -- go to IDLE state. + if get_row_of_line(r.store_row) = get_row_of_line(get_row(r.wb.adr)) then + r.wb.cyc <= '0'; + r.state <= IDLE; + end if; + if wishbone_in.ack = '1' then + -- Increment store row counter + r.store_row <= next_row(r.store_row); + end if; end case; end if;