@ -317,6 +317,7 @@ architecture rtl of dcache is
 
		
	
		
			
				        tlb_hit          : std_ulogic;
 
		
	
		
			
				        tlb_hit_way      : tlb_way_sig_t;
 
		
	
		
			
				        tlb_hit_index    : tlb_index_sig_t;
 
		
	
		
			
				        tlb_victim       : tlb_way_sig_t;
 
		
	
		
			
				 
		
	
		
			
					-- data buffer for data forwarded from writes to reads
 
		
	
		
			
					forward_data     : std_ulogic_vector(63 downto 0);
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -342,6 +343,8 @@ architecture rtl of dcache is
 
		
	
		
			
				        acks_pending     : unsigned(2 downto 0);
 
		
	
		
			
				        inc_acks         : std_ulogic;
 
		
	
		
			
				        dec_acks         : std_ulogic;
 
		
	
		
			
				        choose_victim    : std_ulogic;
 
		
	
		
			
				        victim_way       : way_t;
 
		
	
		
			
				 
		
	
		
			
				        -- Signals to complete (possibly with error)
 
		
	
		
			
				        ls_valid         : std_ulogic;
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -398,8 +401,7 @@ architecture rtl of dcache is
 
		
	
		
			
				    signal ram_wr_select : std_ulogic_vector(ROW_SIZE - 1 downto 0);
 
		
	
		
			
				 
		
	
		
			
				    -- PLRU output interface
 
		
	
		
			
				    type plru_out_t is array(0 to NUM_LINES-1) of std_ulogic_vector(WAY_BITS-1 downto 0);
 
		
	
		
			
				    signal plru_victim : plru_out_t;
 
		
	
		
			
				    signal plru_victim : way_t;
 
		
	
		
			
				    signal replace_way : way_t;
 
		
	
		
			
				 
		
	
		
			
				    -- Wishbone read/write/cache write formatting signals
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -423,8 +425,7 @@ architecture rtl of dcache is
 
		
	
		
			
				    signal tlb_miss : std_ulogic;
 
		
	
		
			
				 
		
	
		
			
				    -- TLB PLRU output interface
 
		
	
		
			
				    type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
				    signal tlb_plru_victim : tlb_plru_out_t;
 
		
	
		
			
				    signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
				 
		
	
		
			
				    signal snoop_tag_set : cache_tags_set_t;
 
		
	
		
			
				    signal snoop_valid   : std_ulogic;
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -650,39 +651,49 @@ begin
 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    -- Generate TLB PLRUs
 
		
	
		
			
				    maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
 
		
	
		
			
				    maybe_tlb_plrus : if TLB_NUM_WAYS > 1 generate
 
		
	
		
			
				        type tlb_plru_array is array(tlb_index_t) of std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal tlb_plru_ram    : tlb_plru_array;
 
		
	
		
			
				        signal tlb_plru_cur    : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal tlb_plru_upd    : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal tlb_plru_acc    : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
				        signal tlb_plru_out    : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
				    begin
 
		
	
		
			
					tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
 
		
	
		
			
					    -- TLB PLRU interface
 
		
	
		
			
					    signal tlb_plru_acc    : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
					    signal tlb_plru_acc_en : std_ulogic;
 
		
	
		
			
					    signal tlb_plru_out    : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 
		
	
		
			
					begin
 
		
	
		
			
					    tlb_plru : entity work.plru
 
		
	
		
			
						generic map (
 
		
	
		
			
						    BITS => TLB_WAY_BITS
 
		
	
		
			
						    )
 
		
	
		
			
						port map (
 
		
	
		
			
						    clk => clk,
 
		
	
		
			
						    rst => rst,
 
		
	
		
			
						    acc => tlb_plru_acc,
 
		
	
		
			
						    acc_en => tlb_plru_acc_en,
 
		
	
		
			
						    lru => tlb_plru_out
 
		
	
		
			
						    );
 
		
	
		
			
				 
		
	
		
			
					    process(all)
 
		
	
		
			
					    begin
 
		
	
		
			
						-- PLRU interface
 
		
	
		
			
						if not is_X(r1.tlb_hit_index) and r1.tlb_hit_index = i then
 
		
	
		
			
						    tlb_plru_acc_en <= r1.tlb_hit;
 
		
	
		
			
				                    assert not is_X(r1.tlb_hit_way);
 
		
	
		
			
						else
 
		
	
		
			
						    tlb_plru_acc_en <= '0';
 
		
	
		
			
						end if;
 
		
	
		
			
						tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
 
		
	
		
			
						tlb_plru_victim(i) <= tlb_plru_out;
 
		
	
		
			
					    end process;
 
		
	
		
			
					end generate;
 
		
	
		
			
				        tlb_plru : entity work.plrufn
 
		
	
		
			
				            generic map (
 
		
	
		
			
				                BITS => TLB_WAY_BITS
 
		
	
		
			
				                )
 
		
	
		
			
				            port map (
 
		
	
		
			
				                acc      => tlb_plru_acc,
 
		
	
		
			
				                tree_in  => tlb_plru_cur,
 
		
	
		
			
				                tree_out => tlb_plru_upd,
 
		
	
		
			
				                lru      => tlb_plru_out
 
		
	
		
			
				                );
 
		
	
		
			
				 
		
	
		
			
				        process(all)
 
		
	
		
			
				        begin
 
		
	
		
			
				            -- Read PLRU bits from array
 
		
	
		
			
				            if is_X(r1.tlb_hit_index) then
 
		
	
		
			
				                tlb_plru_cur <= (others => 'X');
 
		
	
		
			
				            else
 
		
	
		
			
				                tlb_plru_cur <= tlb_plru_ram(to_integer(r1.tlb_hit_index));
 
		
	
		
			
				            end if;
 
		
	
		
			
				 
		
	
		
			
				            -- PLRU interface
 
		
	
		
			
				            tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
 
		
	
		
			
				            tlb_plru_victim <= tlb_plru_out;
 
		
	
		
			
				        end process;
 
		
	
		
			
				 
		
	
		
			
				        -- synchronous writes to TLB PLRU array
 
		
	
		
			
				        process(clk)
 
		
	
		
			
				        begin
 
		
	
		
			
				            if rising_edge(clk) then
 
		
	
		
			
				                if r1.tlb_hit = '1' then
 
		
	
		
			
				                    assert not is_X(r1.tlb_hit_index) severity failure;
 
		
	
		
			
				                    tlb_plru_ram(to_integer(r1.tlb_hit_index)) <= tlb_plru_upd;
 
		
	
		
			
				                end if;
 
		
	
		
			
				            end if;
 
		
	
		
			
				        end process;
 
		
	
		
			
				    end generate;
 
		
	
		
			
				 
		
	
		
			
				    tlb_search : process(all)
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -753,7 +764,7 @@ begin
 
		
	
		
			
				                    if tlb_hit = '1' then
 
		
	
		
			
				                        repl_way := tlb_hit_way;
 
		
	
		
			
				                    else
 
		
	
		
			
				                        repl_way := unsigned(tlb_plru_victim(to_integer(tlb_req_index)));
 
		
	
		
			
				                        repl_way := unsigned(r1.tlb_victim);
 
		
	
		
			
				                    end if;
 
		
	
		
			
				                    assert not is_X(repl_way);
 
		
	
		
			
				                end if;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -770,39 +781,49 @@ begin
 
		
	
		
			
				    end process;
 
		
	
		
			
				 
		
	
		
			
				    -- Generate PLRUs
 
		
	
		
			
				    maybe_plrus: if NUM_WAYS > 1 generate
 
		
	
		
			
				    maybe_plrus : if NUM_WAYS > 1 generate
 
		
	
		
			
				        type plru_array is array(0 to NUM_LINES-1) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal plru_ram    : plru_array;
 
		
	
		
			
				        signal plru_cur    : std_ulogic_vector(NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal plru_upd    : std_ulogic_vector(NUM_WAYS - 2 downto 0);
 
		
	
		
			
				        signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
		
	
		
			
				        signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
		
	
		
			
				    begin
 
		
	
		
			
					plrus: for i in 0 to NUM_LINES-1 generate
 
		
	
		
			
					    -- PLRU interface
 
		
	
		
			
					    signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
		
	
		
			
					    signal plru_acc_en : std_ulogic;
 
		
	
		
			
					    signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 
		
	
		
			
					    
 
		
	
		
			
					begin
 
		
	
		
			
					    plru : entity work.plru
 
		
	
		
			
						generic map (
 
		
	
		
			
						    BITS => WAY_BITS
 
		
	
		
			
						    )
 
		
	
		
			
						port map (
 
		
	
		
			
						    clk => clk,
 
		
	
		
			
						    rst => rst,
 
		
	
		
			
						    acc => plru_acc,
 
		
	
		
			
						    acc_en => plru_acc_en,
 
		
	
		
			
						    lru => plru_out
 
		
	
		
			
						    );
 
		
	
		
			
				 
		
	
		
			
					    process(all)
 
		
	
		
			
					    begin
 
		
	
		
			
						-- PLRU interface
 
		
	
		
			
						if not is_X(r1.hit_index) and r1.hit_index = to_unsigned(i, INDEX_BITS) then
 
		
	
		
			
						    plru_acc_en <= r1.cache_hit;
 
		
	
		
			
						else
 
		
	
		
			
						    plru_acc_en <= '0';
 
		
	
		
			
						end if;
 
		
	
		
			
						plru_acc <= std_ulogic_vector(r1.hit_way);
 
		
	
		
			
						plru_victim(i) <= plru_out;
 
		
	
		
			
					    end process;
 
		
	
		
			
					end generate;
 
		
	
		
			
				        plru : entity work.plrufn
 
		
	
		
			
				            generic map (
 
		
	
		
			
				                BITS => WAY_BITS
 
		
	
		
			
				                )
 
		
	
		
			
				            port map (
 
		
	
		
			
				                acc      => plru_acc,
 
		
	
		
			
				                tree_in  => plru_cur,
 
		
	
		
			
				                tree_out => plru_upd,
 
		
	
		
			
				                lru      => plru_out
 
		
	
		
			
				                );
 
		
	
		
			
				 
		
	
		
			
				        process(all)
 
		
	
		
			
				        begin
 
		
	
		
			
				            -- Read PLRU bits from array
 
		
	
		
			
				            if is_X(r1.hit_index) then
 
		
	
		
			
				                plru_cur <= (others => 'X');
 
		
	
		
			
				            else
 
		
	
		
			
				                plru_cur <= plru_ram(to_integer(r1.hit_index));
 
		
	
		
			
				            end if;
 
		
	
		
			
				 
		
	
		
			
				            -- PLRU interface
 
		
	
		
			
				            plru_acc <= std_ulogic_vector(r1.hit_way);
 
		
	
		
			
				            plru_victim <= unsigned(plru_out);
 
		
	
		
			
				        end process;
 
		
	
		
			
				 
		
	
		
			
				        -- synchronous writes to PLRU array
 
		
	
		
			
				        process(clk)
 
		
	
		
			
				        begin
 
		
	
		
			
				            if rising_edge(clk) then
 
		
	
		
			
				                if r1.cache_hit = '1' then
 
		
	
		
			
				                    assert not is_X(r1.hit_index) severity failure;
 
		
	
		
			
				                    plru_ram(to_integer(r1.hit_index)) <= plru_upd;
 
		
	
		
			
				                end if;
 
		
	
		
			
				            end if;
 
		
	
		
			
				        end process;
 
		
	
		
			
				    end generate;
 
		
	
		
			
				 
		
	
		
			
				    -- Cache tag RAM read port
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -980,8 +1001,13 @@ begin
 
		
	
		
			
				        replace_way <= to_unsigned(0, WAY_BITS);
 
		
	
		
			
				        if NUM_WAYS > 1 then
 
		
	
		
			
				            if r1.write_tag = '1' then
 
		
	
		
			
				                assert not is_X(r1.store_index);
 
		
	
		
			
				                replace_way <= unsigned(plru_victim(to_integer(r1.store_index)));
 
		
	
		
			
				                if r1.choose_victim = '1' then
 
		
	
		
			
				                    replace_way <= plru_victim;
 
		
	
		
			
				                else
 
		
	
		
			
				                    -- Cache victim way was chosen earlier,
 
		
	
		
			
				                    -- in the cycle after the miss was detected.
 
		
	
		
			
				                    replace_way <= r1.victim_way;
 
		
	
		
			
				                end if;
 
		
	
		
			
				            else
 
		
	
		
			
				                replace_way <= r1.store_way;
 
		
	
		
			
				            end if;
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -1305,8 +1331,6 @@ begin
 
		
	
		
			
				            end if;
 
		
	
		
			
				 
		
	
		
			
				            -- Fast path for load/store hits. Set signals for the writeback controls.
 
		
	
		
			
				            r1.hit_way <= req_hit_way;
 
		
	
		
			
				            r1.hit_index <= req_index;
 
		
	
		
			
					    if req_op = OP_LOAD_HIT then
 
		
	
		
			
						r1.hit_load_valid <= '1';
 
		
	
		
			
					    else
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -1340,6 +1364,11 @@ begin
 
		
	
		
			
				            r1.tlb_hit <= tlb_hit;
 
		
	
		
			
				            r1.tlb_hit_way <= tlb_hit_way;
 
		
	
		
			
				            r1.tlb_hit_index <= tlb_req_index;
 
		
	
		
			
				            -- determine victim way in the TLB in the cycle after
 
		
	
		
			
				            -- we detect the TLB miss
 
		
	
		
			
				            if r1.ls_error = '1' then
 
		
	
		
			
				                r1.tlb_victim <= unsigned(tlb_plru_victim);
 
		
	
		
			
				            end if;
 
		
	
		
			
				 
		
	
		
			
					end if;
 
		
	
		
			
				    end process;
 
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
			
			@ -1364,6 +1393,7 @@ begin
 
		
	
		
			
				            ev.load_miss <= '0';
 
		
	
		
			
				            ev.store_miss <= '0';
 
		
	
		
			
				            ev.dtlb_miss <= tlb_miss;
 
		
	
		
			
				            r1.choose_victim <= '0';
 
		
	
		
			
				 
		
	
		
			
					    -- On reset, clear all valid bits to force misses
 
		
	
		
			
				            if rst = '1' then
 
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
			
			@ -1460,6 +1490,17 @@ begin
 
		
	
		
			
				                    end if;
 
		
	
		
			
				                end if;
 
		
	
		
			
				 
		
	
		
			
				                -- Signals for PLRU update and victim selection
 
		
	
		
			
				                r1.hit_way <= req_hit_way;
 
		
	
		
			
				                r1.hit_index <= req_index;
 
		
	
		
			
				                -- Record victim way in the cycle after we see a load or dcbz miss
 
		
	
		
			
				                if r1.choose_victim = '1' then
 
		
	
		
			
				                    r1.victim_way <= plru_victim;
 
		
	
		
			
				                end if;
 
		
	
		
			
				                if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then
 
		
	
		
			
				                    r1.choose_victim <= '1';
 
		
	
		
			
				                end if;
 
		
	
		
			
				 
		
	
		
			
						-- Main state machine
 
		
	
		
			
						case r1.state is
 
		
	
		
			
				                when IDLE =>