You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			992 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			VHDL
		
	
			
		
		
	
	
			992 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			VHDL
		
	
| library ieee;
 | |
| use ieee.std_logic_1164.all;
 | |
| use ieee.numeric_std.all;
 | |
| 
 | |
| library work;
 | |
| use work.decode_types.all;
 | |
| use work.common.all;
 | |
| use work.insn_helpers.all;
 | |
| use work.helpers.all;
 | |
| 
 | |
| -- 2 cycle LSU
 | |
| -- We calculate the address in the first cycle
 | |
| 
 | |
| entity loadstore1 is
 | |
|     generic (
 | |
|         HAS_FPU : boolean := true;
 | |
|         -- Non-zero to enable log data collection
 | |
|         LOG_LENGTH : natural := 0
 | |
|         );
 | |
|     port (
 | |
|         clk   : in std_ulogic;
 | |
|         rst   : in std_ulogic;
 | |
| 
 | |
|         l_in  : in Execute1ToLoadstore1Type;
 | |
|         e_out : out Loadstore1ToExecute1Type;
 | |
|         l_out : out Loadstore1ToWritebackType;
 | |
| 
 | |
|         d_out : out Loadstore1ToDcacheType;
 | |
|         d_in  : in DcacheToLoadstore1Type;
 | |
| 
 | |
|         m_out : out Loadstore1ToMmuType;
 | |
|         m_in  : in MmuToLoadstore1Type;
 | |
| 
 | |
|         dc_stall  : in std_ulogic;
 | |
| 
 | |
|         events  : out Loadstore1EventType;
 | |
| 
 | |
|         log_out : out std_ulogic_vector(9 downto 0)
 | |
|         );
 | |
| end loadstore1;
 | |
| 
 | |
| architecture behave of loadstore1 is
 | |
| 
 | |
|     -- State machine for unaligned loads/stores
 | |
|     type state_t is (IDLE,              -- ready for instruction
 | |
|                      MMU_LOOKUP,        -- waiting for MMU to look up translation
 | |
|                      TLBIE_WAIT,        -- waiting for MMU to finish doing a tlbie
 | |
|                      FINISH_LFS         -- write back converted SP data for lfs*
 | |
|                      );
 | |
| 
 | |
|     type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
 | |
|     subtype byte_trim_t is std_ulogic_vector(1 downto 0);
 | |
|     type trim_ctl_t is array(0 to 7) of byte_trim_t;
 | |
| 
 | |
|     type request_t is record
 | |
|         valid        : std_ulogic;
 | |
|         dc_req       : std_ulogic;
 | |
|         load         : std_ulogic;
 | |
|         store        : std_ulogic;
 | |
|         tlbie        : std_ulogic;
 | |
|         dcbz         : std_ulogic;
 | |
|         read_spr     : std_ulogic;
 | |
|         write_spr    : std_ulogic;
 | |
|         mmu_op       : std_ulogic;
 | |
|         instr_fault  : std_ulogic;
 | |
|         load_zero    : std_ulogic;
 | |
|         do_update    : std_ulogic;
 | |
|         noop         : std_ulogic;
 | |
|         mode_32bit   : std_ulogic;
 | |
| 	addr         : std_ulogic_vector(63 downto 0);
 | |
|         byte_sel     : std_ulogic_vector(7 downto 0);
 | |
|         second_bytes : std_ulogic_vector(7 downto 0);
 | |
| 	store_data   : std_ulogic_vector(63 downto 0);
 | |
|         instr_tag    : instr_tag_t;
 | |
| 	write_reg    : gspr_index_t;
 | |
| 	length       : std_ulogic_vector(3 downto 0);
 | |
|         elt_length   : std_ulogic_vector(3 downto 0);
 | |
| 	byte_reverse : std_ulogic;
 | |
|         brev_mask    : unsigned(2 downto 0);
 | |
| 	sign_extend  : std_ulogic;
 | |
| 	update       : std_ulogic;
 | |
| 	xerc         : xer_common_t;
 | |
|         reserve      : std_ulogic;
 | |
|         atomic       : std_ulogic;
 | |
|         atomic_last  : std_ulogic;
 | |
|         rc           : std_ulogic;
 | |
|         nc           : std_ulogic;              -- non-cacheable access
 | |
|         virt_mode    : std_ulogic;
 | |
|         priv_mode    : std_ulogic;
 | |
|         load_sp      : std_ulogic;
 | |
|         sprn         : std_ulogic_vector(9 downto 0);
 | |
|         is_slbia     : std_ulogic;
 | |
|         align_intr   : std_ulogic;
 | |
|         dword_index  : std_ulogic;
 | |
|         two_dwords   : std_ulogic;
 | |
|         nia          : std_ulogic_vector(63 downto 0);
 | |
|     end record;
 | |
|     constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
 | |
|                                           dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
 | |
|                                           instr_fault => '0', load_zero => '0', do_update => '0', noop => '0',
 | |
|                                           mode_32bit => '0', addr => (others => '0'),
 | |
|                                           byte_sel => x"00", second_bytes => x"00",
 | |
|                                           store_data => (others => '0'), instr_tag => instr_tag_init,
 | |
|                                           write_reg => 7x"00", length => x"0",
 | |
|                                           elt_length => x"0", byte_reverse => '0', brev_mask => "000",
 | |
|                                           sign_extend => '0', update => '0',
 | |
|                                           xerc => xerc_init, reserve => '0',
 | |
|                                           atomic => '0', atomic_last => '0', rc => '0', nc => '0',
 | |
|                                           virt_mode => '0', priv_mode => '0', load_sp => '0',
 | |
|                                           sprn => 10x"0", is_slbia => '0', align_intr => '0',
 | |
|                                           dword_index => '0', two_dwords => '0',
 | |
|                                           nia => (others => '0'));
 | |
| 
 | |
|     type reg_stage1_t is record
 | |
|         req : request_t;
 | |
|         issued : std_ulogic;
 | |
|         addr0 : std_ulogic_vector(63 downto 0);
 | |
|     end record;
 | |
| 
 | |
|     type reg_stage2_t is record
 | |
|         req        : request_t;
 | |
|         byte_index : byte_index_t;
 | |
|         use_second : std_ulogic_vector(7 downto 0);
 | |
|         wait_dc    : std_ulogic;
 | |
|         wait_mmu   : std_ulogic;
 | |
|         one_cycle  : std_ulogic;
 | |
|         wr_sel     : std_ulogic_vector(1 downto 0);
 | |
|         addr0      : std_ulogic_vector(63 downto 0);
 | |
|     end record;
 | |
| 
 | |
|     type reg_stage3_t is record
 | |
|         state        : state_t;
 | |
|         instr_tag    : instr_tag_t;
 | |
|         write_enable : std_ulogic;
 | |
| 	write_reg    : gspr_index_t;
 | |
|         write_data   : std_ulogic_vector(63 downto 0);
 | |
|         rc           : std_ulogic;
 | |
|         xerc         : xer_common_t;
 | |
|         store_done   : std_ulogic;
 | |
|         convert_lfs  : std_ulogic;
 | |
|         load_data    : std_ulogic_vector(63 downto 0);
 | |
|         dar          : std_ulogic_vector(63 downto 0);
 | |
|         dsisr        : std_ulogic_vector(31 downto 0);
 | |
|         ld_sp_data   : std_ulogic_vector(31 downto 0);
 | |
|         ld_sp_nz     : std_ulogic;
 | |
|         ld_sp_lz     : std_ulogic_vector(5 downto 0);
 | |
|         stage1_en    : std_ulogic;
 | |
|         interrupt    : std_ulogic;
 | |
|         intr_vec     : integer range 0 to 16#fff#;
 | |
|         nia          : std_ulogic_vector(63 downto 0);
 | |
|         srr1         : std_ulogic_vector(15 downto 0);
 | |
|         events       : Loadstore1EventType;
 | |
|     end record;
 | |
| 
 | |
|     signal req_in   : request_t;
 | |
|     signal r1, r1in : reg_stage1_t;
 | |
|     signal r2, r2in : reg_stage2_t;
 | |
|     signal r3, r3in : reg_stage3_t;
 | |
| 
 | |
|     signal busy     : std_ulogic;
 | |
|     signal complete : std_ulogic;
 | |
|     signal in_progress : std_ulogic;
 | |
|     signal flushing : std_ulogic;
 | |
| 
 | |
|     signal store_sp_data : std_ulogic_vector(31 downto 0);
 | |
|     signal load_dp_data  : std_ulogic_vector(63 downto 0);
 | |
|     signal store_data    : std_ulogic_vector(63 downto 0);
 | |
| 
 | |
|     signal stage1_issue_enable : std_ulogic;
 | |
|     signal stage1_req          : request_t;
 | |
|     signal stage1_dcreq        : std_ulogic;
 | |
|     signal stage1_dreq         : std_ulogic;
 | |
|     signal stage2_busy_next    : std_ulogic;
 | |
|     signal stage3_busy_next    : std_ulogic;
 | |
| 
 | |
|     -- Generate byte enables from sizes
 | |
|     function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
 | |
|     begin
 | |
|         case length is
 | |
|             when "0001" =>
 | |
|                 return "00000001";
 | |
|             when "0010" =>
 | |
|                 return "00000011";
 | |
|             when "0100" =>
 | |
|                 return "00001111";
 | |
|             when "1000" =>
 | |
|                 return "11111111";
 | |
|             when others =>
 | |
|                 return "00000000";
 | |
|         end case;
 | |
|     end function length_to_sel;
 | |
| 
 | |
|     -- Calculate byte enables
 | |
|     -- This returns 16 bits, giving the select signals for two transfers,
 | |
|     -- to account for unaligned loads or stores
 | |
|     function xfer_data_sel(size : in std_logic_vector(3 downto 0);
 | |
|                            address : in std_logic_vector(2 downto 0))
 | |
| 	return std_ulogic_vector is
 | |
|         variable longsel : std_ulogic_vector(15 downto 0);
 | |
|     begin
 | |
|         longsel := "00000000" & length_to_sel(size);
 | |
|         return std_ulogic_vector(shift_left(unsigned(longsel),
 | |
| 					    to_integer(unsigned(address))));
 | |
|     end function xfer_data_sel;
 | |
| 
 | |
|     -- 23-bit right shifter for DP -> SP float conversions
 | |
|     function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
 | |
|         return std_ulogic_vector is
 | |
|         variable fs1   : std_ulogic_vector(22 downto 0);
 | |
|         variable fs2   : std_ulogic_vector(22 downto 0);
 | |
|     begin
 | |
|         case shift(1 downto 0) is
 | |
|             when "00" =>
 | |
|                 fs1 := frac;
 | |
|             when "01" =>
 | |
|                 fs1 := '0' & frac(22 downto 1);
 | |
|             when "10" =>
 | |
|                 fs1 := "00" & frac(22 downto 2);
 | |
|             when others =>
 | |
|                 fs1 := "000" & frac(22 downto 3);
 | |
|         end case;
 | |
|         case shift(4 downto 2) is
 | |
|             when "000" =>
 | |
|                 fs2 := fs1;
 | |
|             when "001" =>
 | |
|                 fs2 := x"0" & fs1(22 downto 4);
 | |
|             when "010" =>
 | |
|                 fs2 := x"00" & fs1(22 downto 8);
 | |
|             when "011" =>
 | |
|                 fs2 := x"000" & fs1(22 downto 12);
 | |
|             when "100" =>
 | |
|                 fs2 := x"0000" & fs1(22 downto 16);
 | |
|             when others =>
 | |
|                 fs2 := x"00000" & fs1(22 downto 20);
 | |
|         end case;
 | |
|         return fs2;
 | |
|     end;
 | |
| 
 | |
|     -- 23-bit left shifter for SP -> DP float conversions
 | |
|     function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
 | |
|         return std_ulogic_vector is
 | |
|         variable fs1   : std_ulogic_vector(22 downto 0);
 | |
|         variable fs2   : std_ulogic_vector(22 downto 0);
 | |
|     begin
 | |
|         case shift(1 downto 0) is
 | |
|             when "00" =>
 | |
|                 fs1 := frac;
 | |
|             when "01" =>
 | |
|                 fs1 := frac(21 downto 0) & '0';
 | |
|             when "10" =>
 | |
|                 fs1 := frac(20 downto 0) & "00";
 | |
|             when others =>
 | |
|                 fs1 := frac(19 downto 0) & "000";
 | |
|         end case;
 | |
|         case shift(4 downto 2) is
 | |
|             when "000" =>
 | |
|                 fs2 := fs1;
 | |
|             when "001" =>
 | |
|                 fs2 := fs1(18 downto 0) & x"0" ;
 | |
|             when "010" =>
 | |
|                 fs2 := fs1(14 downto 0) & x"00";
 | |
|             when "011" =>
 | |
|                 fs2 := fs1(10 downto 0) & x"000";
 | |
|             when "100" =>
 | |
|                 fs2 := fs1(6 downto 0) & x"0000";
 | |
|             when others =>
 | |
|                 fs2 := fs1(2 downto 0) & x"00000";
 | |
|         end case;
 | |
|         return fs2;
 | |
|     end;
 | |
| 
 | |
| begin
 | |
|     loadstore1_reg: process(clk)
 | |
|     begin
 | |
|         if rising_edge(clk) then
 | |
|             if rst = '1' then
 | |
|                 r1.req.valid <= '0';
 | |
|                 r2.req.valid <= '0';
 | |
|                 r2.wait_dc <= '0';
 | |
|                 r2.wait_mmu <= '0';
 | |
|                 r2.one_cycle <= '0';
 | |
|                 r3.dar <= (others => '0');
 | |
|                 r3.dsisr <= (others => '0');
 | |
|                 r3.state <= IDLE;
 | |
|                 r3.write_enable <= '0';
 | |
|                 r3.interrupt <= '0';
 | |
|                 r3.stage1_en <= '1';
 | |
|                 r3.convert_lfs <= '0';
 | |
|                 flushing <= '0';
 | |
|             else
 | |
|                 r1 <= r1in;
 | |
|                 r2 <= r2in;
 | |
|                 r3 <= r3in;
 | |
|                 flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
 | |
|                             not r3in.interrupt;
 | |
|             end if;
 | |
|             stage1_dreq <= stage1_dcreq;
 | |
|             if d_in.valid = '1' then
 | |
|                 assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
 | |
|             end if;
 | |
|             if d_in.error = '1' then
 | |
|                 assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
 | |
|             end if;
 | |
|             if m_in.done = '1' or m_in.err = '1' then
 | |
|                 assert r2.req.valid = '1' and (r3.state = MMU_LOOKUP or r3.state = TLBIE_WAIT) severity failure;
 | |
|             end if;
 | |
|         end if;
 | |
|     end process;
 | |
| 
 | |
|     ls_fp_conv: if HAS_FPU generate
 | |
|         -- Convert DP data to SP for stfs
 | |
|         dp_to_sp: process(all)
 | |
|             variable exp   : unsigned(10 downto 0);
 | |
|             variable frac  : std_ulogic_vector(22 downto 0);
 | |
|             variable shift : unsigned(4 downto 0);
 | |
|         begin
 | |
|             store_sp_data(31) <= l_in.data(63);
 | |
|             store_sp_data(30 downto 0) <= (others => '0');
 | |
|             exp := unsigned(l_in.data(62 downto 52));
 | |
|             if exp > 896 then
 | |
|                 store_sp_data(30) <= l_in.data(62);
 | |
|                 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
 | |
|             elsif exp >= 874 then
 | |
|                 -- denormalization required
 | |
|                 frac := '1' & l_in.data(51 downto 30);
 | |
|                 shift := 0 - exp(4 downto 0);
 | |
|                 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
 | |
|             end if;
 | |
|         end process;
 | |
| 
 | |
|         -- Convert SP data to DP for lfs
 | |
|         sp_to_dp: process(all)
 | |
|             variable exp     : unsigned(7 downto 0);
 | |
|             variable exp_dp  : unsigned(10 downto 0);
 | |
|             variable exp_nz  : std_ulogic;
 | |
|             variable exp_ao  : std_ulogic;
 | |
|             variable frac    : std_ulogic_vector(22 downto 0);
 | |
|             variable frac_shift : unsigned(4 downto 0);
 | |
|         begin
 | |
|             frac := r3.ld_sp_data(22 downto 0);
 | |
|             exp := unsigned(r3.ld_sp_data(30 downto 23));
 | |
|             exp_nz := or (r3.ld_sp_data(30 downto 23));
 | |
|             exp_ao := and (r3.ld_sp_data(30 downto 23));
 | |
|             frac_shift := (others => '0');
 | |
|             if exp_ao = '1' then
 | |
|                 exp_dp := to_unsigned(2047, 11);    -- infinity or NaN
 | |
|             elsif exp_nz = '1' then
 | |
|                 exp_dp := 896 + resize(exp, 11);    -- finite normalized value
 | |
|             elsif r3.ld_sp_nz = '0' then
 | |
|                 exp_dp := to_unsigned(0, 11);       -- zero
 | |
|             else
 | |
|                 -- denormalized SP operand, need to normalize
 | |
|                 exp_dp := 896 - resize(unsigned(r3.ld_sp_lz), 11);
 | |
|                 frac_shift := unsigned(r3.ld_sp_lz(4 downto 0)) + 1;
 | |
|             end if;
 | |
|             load_dp_data(63) <= r3.ld_sp_data(31);
 | |
|             load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
 | |
|             load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
 | |
|             load_dp_data(28 downto 0) <= (others => '0');
 | |
|         end process;
 | |
|     end generate;
 | |
| 
 | |
|     -- Translate a load/store instruction into the internal request format
 | |
|     -- XXX this should only depend on l_in, but actually depends on
 | |
|     -- r1.addr0 as well (in the l_in.second = 1 case).
 | |
|     loadstore1_in: process(all)
 | |
|         variable v : request_t;
 | |
|         variable lsu_sum : std_ulogic_vector(63 downto 0);
 | |
|         variable brev_lenm1 : unsigned(2 downto 0);
 | |
|         variable long_sel : std_ulogic_vector(15 downto 0);
 | |
|         variable addr : std_ulogic_vector(63 downto 0);
 | |
|         variable sprn : std_ulogic_vector(9 downto 0);
 | |
|         variable misaligned : std_ulogic;
 | |
|         variable addr_mask : std_ulogic_vector(2 downto 0);
 | |
|     begin
 | |
|         v := request_init;
 | |
|         sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
 | |
| 
 | |
|         v.valid := l_in.valid;
 | |
|         v.instr_tag := l_in.instr_tag;
 | |
|         v.mode_32bit := l_in.mode_32bit;
 | |
|         v.write_reg := l_in.write_reg;
 | |
|         v.length := l_in.length;
 | |
|         v.elt_length := l_in.length;
 | |
|         v.byte_reverse := l_in.byte_reverse;
 | |
|         v.sign_extend := l_in.sign_extend;
 | |
|         v.update := l_in.update;
 | |
|         v.xerc := l_in.xerc;
 | |
|         v.reserve := l_in.reserve;
 | |
|         v.rc := l_in.rc;
 | |
|         v.nc := l_in.ci;
 | |
|         v.virt_mode := l_in.virt_mode;
 | |
|         v.priv_mode := l_in.priv_mode;
 | |
|         v.sprn := sprn;
 | |
|         v.nia := l_in.nia;
 | |
| 
 | |
|         lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
 | |
| 
 | |
|         if HAS_FPU and l_in.is_32bit = '1' then
 | |
|             v.store_data := x"00000000" & store_sp_data;
 | |
|         else
 | |
|             v.store_data := l_in.data;
 | |
|         end if;
 | |
| 
 | |
|         addr := lsu_sum;
 | |
|         if l_in.second = '1' then
 | |
|             if l_in.update = '0' then
 | |
|                 -- for the second half of a 16-byte transfer,
 | |
|                 -- use the previous address plus 8.
 | |
|                 addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + 1) & r1.addr0(2 downto 0);
 | |
|             else
 | |
|                 -- for an update-form load, use the previous address
 | |
|                 -- as the value to write back to RA.
 | |
|                 addr := r1.addr0;
 | |
|             end if;
 | |
|         end if;
 | |
|         if l_in.mode_32bit = '1' then
 | |
|             addr(63 downto 32) := (others => '0');
 | |
|         end if;
 | |
|         v.addr := addr;
 | |
| 
 | |
|         -- XXX Temporary hack. Mark the op as non-cachable if the address
 | |
|         -- is the form 0xc------- for a real-mode access.
 | |
|         if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
 | |
|             v.nc := '1';
 | |
|         end if;
 | |
| 
 | |
|         addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);
 | |
| 
 | |
|         -- Do length_to_sel and work out if we are doing 2 dwords
 | |
|         long_sel := xfer_data_sel(v.length, addr(2 downto 0));
 | |
|         v.byte_sel := long_sel(7 downto 0);
 | |
|         v.second_bytes := long_sel(15 downto 8);
 | |
|         if long_sel(15 downto 8) /= "00000000" then
 | |
|             v.two_dwords := '1';
 | |
|         end if;
 | |
| 
 | |
|         -- check alignment for larx/stcx
 | |
|         misaligned := or (addr_mask and addr(2 downto 0));
 | |
|         v.align_intr := l_in.reserve and misaligned;
 | |
|         if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
 | |
|             -- length is really 16 not 8
 | |
|             -- Make misaligned lq cause an alignment interrupt in LE mode,
 | |
|             -- in order to avoid the case with RA = RT + 1 where the second half
 | |
|             -- faults but the first doesn't (and updates RT+1, destroying RA).
 | |
|             -- The equivalent BE case doesn't occur because RA = RT is illegal.
 | |
|             misaligned := '1';
 | |
|             if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
 | |
|                 v.align_intr := '1';
 | |
|             end if;
 | |
|         end if;
 | |
| 
 | |
|         v.atomic := not misaligned;
 | |
|         v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
 | |
| 
 | |
|         case l_in.op is
 | |
|             when OP_STORE =>
 | |
|                 v.store := '1';
 | |
|             when OP_LOAD =>
 | |
|                 if l_in.update = '0' or l_in.second = '0' then
 | |
|                     v.load := '1';
 | |
|                     if HAS_FPU and l_in.is_32bit = '1' then
 | |
|                         -- Allow an extra cycle for SP->DP precision conversion
 | |
|                         v.load_sp := '1';
 | |
|                     end if;
 | |
|                 else
 | |
|                     -- write back address to RA
 | |
|                     v.do_update := '1';
 | |
|                 end if;
 | |
|             when OP_DCBZ =>
 | |
|                 v.dcbz := '1';
 | |
|                 v.align_intr := v.nc;
 | |
|             when OP_TLBIE =>
 | |
|                 v.tlbie := '1';
 | |
|                 v.addr := l_in.addr2;    -- address from RB for tlbie
 | |
|                 v.is_slbia := l_in.insn(7);
 | |
|                 v.mmu_op := '1';
 | |
|             when OP_MFSPR =>
 | |
|                 v.read_spr := '1';
 | |
|             when OP_MTSPR =>
 | |
|                 v.write_spr := '1';
 | |
|                 v.mmu_op := sprn(8) or sprn(5);
 | |
|             when OP_FETCH_FAILED =>
 | |
|                 -- send it to the MMU to do the radix walk
 | |
|                 v.instr_fault := '1';
 | |
|                 v.addr := l_in.nia;
 | |
|                 v.mmu_op := '1';
 | |
|             when others =>
 | |
|         end case;
 | |
|         v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
 | |
| 
 | |
|         -- Work out controls for load and store formatting
 | |
|         brev_lenm1 := "000";
 | |
|         if v.byte_reverse = '1' then
 | |
|             brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
 | |
|         end if;
 | |
|         v.brev_mask := brev_lenm1;
 | |
| 
 | |
|         req_in <= v;
 | |
|     end process;
 | |
| 
 | |
|     busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
 | |
|                               (r1.issued and d_in.error) or
 | |
|                               stage2_busy_next or
 | |
|                               (r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
 | |
|     complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or
 | |
|                 (r2.wait_mmu and m_in.done) or r3.convert_lfs;
 | |
|     in_progress <= r1.req.valid or (r2.req.valid and not complete);
 | |
| 
 | |
|     stage1_issue_enable <= r3.stage1_en and not (r1.req.valid and r1.req.mmu_op) and
 | |
|                            not (r2.req.valid and r2.req.mmu_op);
 | |
| 
 | |
|     -- Processing done in the first cycle of a load/store instruction
 | |
|     loadstore1_1: process(all)
 | |
|         variable v     : reg_stage1_t;
 | |
|         variable req   : request_t;
 | |
|         variable dcreq : std_ulogic;
 | |
|         variable issue : std_ulogic;
 | |
|     begin
 | |
|         v := r1;
 | |
|         issue := '0';
 | |
| 
 | |
|         if busy = '0' then
 | |
|             req := req_in;
 | |
|             v.issued := '0';
 | |
|             if flushing = '1' then
 | |
|                 -- Make this a no-op request rather than simply invalid.
 | |
|                 -- It will never get to stage 3 since there is a request ahead of
 | |
|                 -- it with align_intr = 1.
 | |
|                 req.dc_req := '0';
 | |
|             end if;
 | |
|             issue := l_in.valid and req.dc_req;
 | |
|             if l_in.valid = '1' then
 | |
|                 v.addr0 := req.addr;
 | |
|             end if;
 | |
|         else
 | |
|             req := r1.req;
 | |
|         end if;
 | |
| 
 | |
|         if r1.req.valid = '1' then
 | |
|             if r1.req.dc_req = '1' and r1.issued = '0' then
 | |
|                 issue := '1';
 | |
|             elsif r1.issued = '1' and d_in.error = '1' then
 | |
|                 v.issued := '0';
 | |
|             elsif stage2_busy_next = '0' then
 | |
|                 -- we can change what's in r1 next cycle because the current thing
 | |
|                 -- in r1 will go into r2
 | |
|                 if r1.req.dc_req = '1' and r1.req.two_dwords = '1' and r1.req.dword_index = '0' then
 | |
|                     -- construct the second request for a misaligned access
 | |
|                     req.dword_index := '1';
 | |
|                     req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
 | |
|                     if r1.req.mode_32bit = '1' then
 | |
|                         req.addr(32) := '0';
 | |
|                     end if;
 | |
|                     req.byte_sel := r1.req.second_bytes;
 | |
|                     issue := '1';
 | |
|                 end if;
 | |
|             end if;
 | |
|         end if;
 | |
|         if r3in.interrupt = '1' then
 | |
|             req.valid := '0';
 | |
|             issue := '0';
 | |
|         end if;
 | |
| 
 | |
|         v.req := req;
 | |
|         dcreq := issue and stage1_issue_enable and not d_in.error and not dc_stall;
 | |
|         if issue = '1' then
 | |
|             v.issued := dcreq;
 | |
|         end if;
 | |
| 
 | |
|         stage1_req <= req;
 | |
|         stage1_dcreq <= dcreq;
 | |
|         r1in <= v;
 | |
|     end process;
 | |
| 
 | |
|     -- Processing done in the second cycle of a load/store instruction.
 | |
|     -- Store data is formatted here and sent to the dcache.
 | |
|     -- The request in r1 is sent to stage 3 if stage 3 will not be busy next cycle.
 | |
|     loadstore1_2: process(all)
 | |
|         variable v : reg_stage2_t;
 | |
|         variable j : integer;
 | |
|         variable k : unsigned(2 downto 0);
 | |
|         variable kk : unsigned(3 downto 0);
 | |
|         variable idx : unsigned(2 downto 0);
 | |
|         variable byte_offset : unsigned(2 downto 0);
 | |
|     begin
 | |
|         v := r2;
 | |
| 
 | |
|         -- Byte reversing and rotating for stores.
 | |
|         -- Done in the second cycle (the cycle after l_in.valid = 1).
 | |
|         byte_offset := unsigned(r1.addr0(2 downto 0));
 | |
|         for i in 0 to 7 loop
 | |
|             k := (to_unsigned(i, 3) - byte_offset) xor r1.req.brev_mask;
 | |
|             j := to_integer(k) * 8;
 | |
|             store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
 | |
|         end loop;
 | |
| 
 | |
|         if stage3_busy_next = '0' and
 | |
|             (r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0') then
 | |
|             v.req := r1.req;
 | |
|             v.addr0 := r1.addr0;
 | |
|             v.req.store_data := store_data;
 | |
|             v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
 | |
|                          not (r1.req.two_dwords and not r1.req.dword_index);
 | |
|             v.wait_mmu := r1.req.valid and r1.req.mmu_op;
 | |
|             v.one_cycle := r1.req.valid and (r1.req.noop or r1.req.read_spr or
 | |
|                                              (r1.req.write_spr and not r1.req.mmu_op) or
 | |
|                                              r1.req.load_zero or r1.req.do_update);
 | |
|             if r1.req.read_spr = '1' then
 | |
|                 v.wr_sel := "00";
 | |
|             elsif r1.req.do_update = '1' or r1.req.store = '1' then
 | |
|                 v.wr_sel := "01";
 | |
|             elsif r1.req.load_sp = '1' then
 | |
|                 v.wr_sel := "10";
 | |
|             else
 | |
|                 v.wr_sel := "11";
 | |
|             end if;
 | |
| 
 | |
|             -- Work out load formatter controls for next cycle
 | |
|             for i in 0 to 7 loop
 | |
|                 idx := to_unsigned(i, 3) xor r1.req.brev_mask;
 | |
|                 kk := ('0' & idx) + ('0' & byte_offset);
 | |
|                 v.use_second(i) := kk(3);
 | |
|                 v.byte_index(i) := kk(2 downto 0);
 | |
|             end loop;
 | |
|         elsif stage3_busy_next = '0' then
 | |
|             v.req.valid := '0';
 | |
|             v.wait_dc := '0';
 | |
|             v.wait_mmu := '0';
 | |
|         end if;
 | |
| 
 | |
|         stage2_busy_next <= r1.req.valid and stage3_busy_next;
 | |
| 
 | |
|         if r3in.interrupt = '1' then
 | |
|             v.req.valid := '0';
 | |
|         end if;
 | |
| 
 | |
|         r2in <= v;
 | |
|     end process;
 | |
| 
 | |
|     -- Processing done in the third cycle of a load/store instruction.
 | |
|     -- At this stage we can do things that have side effects without
 | |
|     -- fear of the instruction getting flushed.  This is the point at
 | |
|     -- which requests get sent to the MMU.
 | |
|     loadstore1_3: process(all)
 | |
|         variable v             : reg_stage3_t;
 | |
|         variable j             : integer;
 | |
|         variable req           : std_ulogic;
 | |
|         variable mmureq        : std_ulogic;
 | |
|         variable mmu_mtspr     : std_ulogic;
 | |
|         variable write_enable  : std_ulogic;
 | |
|         variable write_data    : std_ulogic_vector(63 downto 0);
 | |
|         variable do_update     : std_ulogic;
 | |
|         variable done          : std_ulogic;
 | |
|         variable part_done     : std_ulogic;
 | |
|         variable exception     : std_ulogic;
 | |
|         variable data_permuted : std_ulogic_vector(63 downto 0);
 | |
|         variable data_trimmed  : std_ulogic_vector(63 downto 0);
 | |
|         variable sprval        : std_ulogic_vector(63 downto 0);
 | |
|         variable negative      : std_ulogic;
 | |
|         variable dsisr         : std_ulogic_vector(31 downto 0);
 | |
|         variable itlb_fault    : std_ulogic;
 | |
|         variable trim_ctl      : trim_ctl_t;
 | |
|     begin
 | |
|         v := r3;
 | |
| 
 | |
|         req := '0';
 | |
|         mmureq := '0';
 | |
|         mmu_mtspr := '0';
 | |
|         done := '0';
 | |
|         part_done := '0';
 | |
|         exception := '0';
 | |
|         dsisr := (others => '0');
 | |
|         write_enable := '0';
 | |
|         sprval := (others => '0');
 | |
|         do_update := '0';
 | |
|         v.convert_lfs := '0';
 | |
|         v.srr1 := (others => '0');
 | |
|         v.events := (others => '0');
 | |
| 
 | |
|         -- load data formatting
 | |
|         -- shift and byte-reverse data bytes
 | |
|         for i in 0 to 7 loop
 | |
|             j := to_integer(r2.byte_index(i)) * 8;
 | |
|             data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
 | |
|         end loop;
 | |
| 
 | |
|         -- Work out the sign bit for sign extension.
 | |
|         -- For unaligned loads crossing two dwords, the sign bit is in the
 | |
|         -- first dword for big-endian (byte_reverse = 1), or the second dword
 | |
|         -- for little-endian.
 | |
|         if r2.req.dword_index = '1' and r2.req.byte_reverse = '1' then
 | |
|             negative := (r2.req.length(3) and r3.load_data(63)) or
 | |
|                         (r2.req.length(2) and r3.load_data(31)) or
 | |
|                         (r2.req.length(1) and r3.load_data(15)) or
 | |
|                         (r2.req.length(0) and r3.load_data(7));
 | |
|         else
 | |
|             negative := (r2.req.length(3) and data_permuted(63)) or
 | |
|                         (r2.req.length(2) and data_permuted(31)) or
 | |
|                         (r2.req.length(1) and data_permuted(15)) or
 | |
|                         (r2.req.length(0) and data_permuted(7));
 | |
|         end if;
 | |
| 
 | |
|         -- trim and sign-extend
 | |
|         for i in 0 to 7 loop
 | |
|             if i < to_integer(unsigned(r2.req.length)) then
 | |
|                 if r2.req.dword_index = '1' then
 | |
|                     trim_ctl(i) := '1' & not r2.use_second(i);
 | |
|                 else
 | |
|                     trim_ctl(i) := "10";
 | |
|                 end if;
 | |
|             else
 | |
|                 trim_ctl(i) := "00";
 | |
|             end if;
 | |
|         end loop;
 | |
| 
 | |
|         for i in 0 to 7 loop
 | |
|             case trim_ctl(i) is
 | |
|                 when "11" =>
 | |
|                     data_trimmed(i * 8 + 7 downto i * 8) := r3.load_data(i * 8 + 7 downto i * 8);
 | |
|                 when "10" =>
 | |
|                     data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
 | |
|                 when others =>
 | |
|                     data_trimmed(i * 8 + 7 downto i * 8) := (others => negative and r2.req.sign_extend);
 | |
|             end case;
 | |
|         end loop;
 | |
| 
 | |
|         if HAS_FPU then
 | |
|             -- Single-precision FP conversion for loads
 | |
|             v.ld_sp_data := data_trimmed(31 downto 0);
 | |
|             v.ld_sp_nz := or (data_trimmed(22 downto 0));
 | |
|             v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
 | |
|         end if;
 | |
| 
 | |
|         if d_in.valid = '1' and r2.req.load = '1' then
 | |
|             v.load_data := data_permuted;
 | |
|         end if;
 | |
| 
 | |
|         if r2.req.valid = '1' then
 | |
|             if r2.req.read_spr = '1' then
 | |
|                 write_enable := '1';
 | |
|                 -- partial decode on SPR number should be adequate given
 | |
|                 -- the restricted set that get sent down this path
 | |
|                 if r2.req.sprn(8) = '0' and r2.req.sprn(5) = '0' then
 | |
|                     if r2.req.sprn(0) = '0' then
 | |
|                         sprval := x"00000000" & r3.dsisr;
 | |
|                     else
 | |
|                         sprval := r3.dar;
 | |
|                     end if;
 | |
|                 else
 | |
|                     -- reading one of the SPRs in the MMU
 | |
|                     sprval := m_in.sprval;
 | |
|                 end if;
 | |
|             end if;
 | |
|             if r2.req.align_intr = '1' then
 | |
|                 -- generate alignment interrupt
 | |
|                 exception := '1';
 | |
|             end if;
 | |
|             if r2.req.load_zero = '1' then
 | |
|                 write_enable := '1';
 | |
|             end if;
 | |
|             if r2.req.do_update = '1' then
 | |
|                 do_update := '1';
 | |
|             end if;
 | |
|         end if;
 | |
| 
 | |
|         case r3.state is
 | |
|         when IDLE =>
 | |
|             if d_in.valid = '1' then
 | |
|                 if r2.req.two_dwords = '0' or r2.req.dword_index = '1' then
 | |
|                     write_enable := r2.req.load and not r2.req.load_sp;
 | |
|                     if HAS_FPU and r2.req.load_sp = '1' then
 | |
|                         -- SP to DP conversion takes a cycle
 | |
|                         v.state := FINISH_LFS;
 | |
|                         v.convert_lfs := '1';
 | |
|                     else
 | |
|                         -- stores write back rA update
 | |
|                         do_update := r2.req.update and r2.req.store;
 | |
|                     end if;
 | |
|                 else
 | |
|                     part_done := '1';
 | |
|                 end if;
 | |
|             end if;
 | |
|             if d_in.error = '1' then
 | |
|                 if d_in.cache_paradox = '1' then
 | |
|                     -- signal an interrupt straight away
 | |
|                     exception := '1';
 | |
|                     dsisr(63 - 38) := not r2.req.load;
 | |
|                     -- XXX there is no architected bit for this
 | |
|                     -- (probably should be a machine check in fact)
 | |
|                     dsisr(63 - 35) := d_in.cache_paradox;
 | |
|                 else
 | |
|                     -- Look up the translation for TLB miss
 | |
|                     -- and also for permission error and RC error
 | |
|                     -- in case the PTE has been updated.
 | |
|                     mmureq := '1';
 | |
|                     v.state := MMU_LOOKUP;
 | |
|                     v.stage1_en := '0';
 | |
|                 end if;
 | |
|             end if;
 | |
|             if r2.req.valid = '1' then
 | |
|                 if r2.req.mmu_op = '1' then
 | |
|                     -- send request (tlbie, mtspr, itlb miss) to MMU
 | |
|                     mmureq := not r2.req.write_spr;
 | |
|                     mmu_mtspr := r2.req.write_spr;
 | |
|                     if r2.req.instr_fault = '1' then
 | |
|                         v.state := MMU_LOOKUP;
 | |
|                         v.events.itlb_miss := '1';
 | |
|                     else
 | |
|                         v.state := TLBIE_WAIT;
 | |
|                     end if;
 | |
|                 elsif r2.req.write_spr = '1' then
 | |
|                     if r2.req.sprn(0) = '0' then
 | |
|                         v.dsisr := r2.req.store_data(31 downto 0);
 | |
|                     else
 | |
|                         v.dar := r2.req.store_data;
 | |
|                     end if;
 | |
|                 end if;
 | |
|             end if;
 | |
| 
 | |
|         when MMU_LOOKUP =>
 | |
|             if m_in.done = '1' then
 | |
|                 if r2.req.instr_fault = '0' then
 | |
|                     -- retry the request now that the MMU has installed a TLB entry
 | |
|                     req := '1';
 | |
|                     v.stage1_en := '1';
 | |
|                     v.state := IDLE;
 | |
|                 end if;
 | |
|             end if;
 | |
|             if m_in.err = '1' then
 | |
|                 exception := '1';
 | |
|                 dsisr(63 - 33) := m_in.invalid;
 | |
|                 dsisr(63 - 36) := m_in.perm_error;
 | |
|                 dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
 | |
|                 dsisr(63 - 44) := m_in.badtree;
 | |
|                 dsisr(63 - 45) := m_in.rc_error;
 | |
|             end if;
 | |
| 
 | |
|         when TLBIE_WAIT =>
 | |
| 
 | |
|         when FINISH_LFS =>
 | |
|             write_enable := '1';
 | |
| 
 | |
|         end case;
 | |
| 
 | |
|         if complete = '1' or exception = '1' then
 | |
|             v.stage1_en := '1';
 | |
|             v.state := IDLE;
 | |
|         end if;
 | |
| 
 | |
|         v.events.load_complete := r2.req.load and complete;
 | |
|         v.events.store_complete := (r2.req.store or r2.req.dcbz) and complete;
 | |
| 
 | |
|         -- generate DSI or DSegI for load/store exceptions
 | |
|         -- or ISI or ISegI for instruction fetch exceptions
 | |
|         v.interrupt := exception;
 | |
|         if exception = '1' then
 | |
|             v.nia := r2.req.nia;
 | |
|             if r2.req.align_intr = '1' then
 | |
|                 v.intr_vec := 16#600#;
 | |
|                 v.dar := r2.req.addr;
 | |
|             elsif r2.req.instr_fault = '0' then
 | |
|                 v.dar := r2.req.addr;
 | |
|                 if m_in.segerr = '0' then
 | |
|                     v.intr_vec := 16#300#;
 | |
|                     v.dsisr := dsisr;
 | |
|                 else
 | |
|                     v.intr_vec := 16#380#;
 | |
|                 end if;
 | |
|             else
 | |
|                 if m_in.segerr = '0' then
 | |
|                     v.srr1(47 - 33) := m_in.invalid;
 | |
|                     v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
 | |
|                     v.srr1(47 - 44) := m_in.badtree;
 | |
|                     v.srr1(47 - 45) := m_in.rc_error;
 | |
|                     v.intr_vec := 16#400#;
 | |
|                 else
 | |
|                     v.intr_vec := 16#480#;
 | |
|                 end if;
 | |
|             end if;
 | |
|         end if;
 | |
| 
 | |
|         case r2.wr_sel is
 | |
|         when "00" =>
 | |
|             -- mfspr result
 | |
|             write_data := sprval;
 | |
|         when "01" =>
 | |
|             -- update reg
 | |
|             write_data := r2.addr0;
 | |
|         when "10" =>
 | |
|             -- lfs result
 | |
|             write_data := load_dp_data;
 | |
|         when others =>
 | |
|             -- load data
 | |
|             write_data := data_trimmed;
 | |
|         end case;
 | |
| 
 | |
|         -- Update outputs to dcache
 | |
|         if stage1_issue_enable = '1' then
 | |
|             d_out.valid <= stage1_dcreq;
 | |
|             d_out.load <= stage1_req.load;
 | |
|             d_out.dcbz <= stage1_req.dcbz;
 | |
|             d_out.nc <= stage1_req.nc;
 | |
|             d_out.reserve <= stage1_req.reserve;
 | |
|             d_out.atomic <= stage1_req.atomic;
 | |
|             d_out.atomic_last <= stage1_req.atomic_last;
 | |
|             d_out.addr <= stage1_req.addr;
 | |
|             d_out.byte_sel <= stage1_req.byte_sel;
 | |
|             d_out.virt_mode <= stage1_req.virt_mode;
 | |
|             d_out.priv_mode <= stage1_req.priv_mode;
 | |
|         else
 | |
|             d_out.valid <= req;
 | |
|             d_out.load <= r2.req.load;
 | |
|             d_out.dcbz <= r2.req.dcbz;
 | |
|             d_out.nc <= r2.req.nc;
 | |
|             d_out.reserve <= r2.req.reserve;
 | |
|             d_out.atomic <= r2.req.atomic;
 | |
|             d_out.atomic_last <= r2.req.atomic_last;
 | |
|             d_out.addr <= r2.req.addr;
 | |
|             d_out.byte_sel <= r2.req.byte_sel;
 | |
|             d_out.virt_mode <= r2.req.virt_mode;
 | |
|             d_out.priv_mode <= r2.req.priv_mode;
 | |
|         end if;
 | |
|         if stage1_dreq = '1' then
 | |
|             d_out.data <= store_data;
 | |
|         else
 | |
|             d_out.data <= r2.req.store_data;
 | |
|         end if;
 | |
|         d_out.hold <= r2.req.valid and r2.req.load_sp and d_in.valid;
 | |
| 
 | |
|         -- Update outputs to MMU
 | |
|         m_out.valid <= mmureq;
 | |
|         m_out.iside <= r2.req.instr_fault;
 | |
|         m_out.load <= r2.req.load;
 | |
|         m_out.priv <= r2.req.priv_mode;
 | |
|         m_out.tlbie <= r2.req.tlbie;
 | |
|         m_out.mtspr <= mmu_mtspr;
 | |
|         m_out.sprn <= r2.req.sprn;
 | |
|         m_out.addr <= r2.req.addr;
 | |
|         m_out.slbia <= r2.req.is_slbia;
 | |
|         m_out.rs <= r2.req.store_data;
 | |
| 
 | |
|         -- Update outputs to writeback
 | |
|         l_out.valid <= complete;
 | |
|         l_out.instr_tag <= r2.req.instr_tag;
 | |
|         l_out.write_enable <= write_enable or do_update;
 | |
|         l_out.write_reg <= r2.req.write_reg;
 | |
|         l_out.write_data <= write_data;
 | |
|         l_out.xerc <= r2.req.xerc;
 | |
|         l_out.rc <= r2.req.rc and complete;
 | |
|         l_out.store_done <= d_in.store_done;
 | |
|         l_out.interrupt <= r3.interrupt;
 | |
|         l_out.intr_vec <= r3.intr_vec;
 | |
|         l_out.srr0 <= r3.nia;
 | |
|         l_out.srr1 <= r3.srr1;
 | |
| 
 | |
|         -- update busy signal back to execute1
 | |
|         e_out.busy <= busy;
 | |
|         e_out.in_progress <= in_progress;
 | |
|         e_out.interrupt <= r3.interrupt;
 | |
| 
 | |
|         events <= r3.events;
 | |
| 
 | |
|         -- Busy calculation.
 | |
|         stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);
 | |
| 
 | |
|         -- Update registers
 | |
|         r3in <= v;
 | |
| 
 | |
|     end process;
 | |
| 
 | |
|     l1_log: if LOG_LENGTH > 0 generate
 | |
|         signal log_data : std_ulogic_vector(9 downto 0);
 | |
|     begin
 | |
|         ls1_log: process(clk)
 | |
|         begin
 | |
|             if rising_edge(clk) then
 | |
|                 log_data <= e_out.busy &
 | |
|                             l_out.interrupt &
 | |
|                             l_out.valid &
 | |
|                             m_out.valid &
 | |
|                             d_out.valid &
 | |
|                             m_in.done &
 | |
|                             r2.req.dword_index &
 | |
|                             std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 3));
 | |
|             end if;
 | |
|         end process;
 | |
|         log_out <= log_data;
 | |
|     end generate;
 | |
| 
 | |
| end;
 |