diff --git a/Makefile b/Makefile index a830a5f..ed74176 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: -loadstore1.o: common.o helpers.o +loadstore1.o: common.o helpers.o decode_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o multiply.o: common.o decode_types.o @@ -131,7 +131,7 @@ dmi_dtm_tb: dmi_dtm_tb.o sim_vhpi_c.o sim_bram_helpers_c.o tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) tests_console = $(sort $(patsubst tests/%.console_out,%,$(wildcard tests/*.console_out))) -check: $(tests) $(test_console) test_micropython test_micropython_long +check: $(tests) $(tests_console) test_micropython test_micropython_long check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython test_micropython_long $(tests_console) diff --git a/README.md b/README.md index 8bf4622..98f2140 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,10 @@ You can try out Microwatt/Micropython without hardware by using the ghdl simulat - Build micropython. If you aren't building on a ppc64le box you will need a cross compiler. If it isn't available on your distro - grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com + grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com. + You may need to set the CROSS_COMPILE environment variable + to the prefix used for your cross compilers. The default is + powerpc64le-linux-gnu-. ``` git clone https://github.com/micropython/micropython.git diff --git a/common.vhdl b/common.vhdl index d10d857..ed97e0c 100644 --- a/common.vhdl +++ b/common.vhdl @@ -7,6 +7,15 @@ use work.decode_types.all; package common is + -- MSR bit numbers + constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode + constant MSR_EE : integer := (63 - 48); -- External interrupt Enable + constant MSR_PR : integer := (63 - 49); -- PRoblem state + constant MSR_IR : integer := (63 - 58); -- Instruction Relocation + constant MSR_DR : integer := (63 - 59); -- Data Relocation + constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt + constant MSR_LE : integer := (63 - 63); -- Little Endian + -- SPR numbers subtype spr_num_t is integer range 0 to 1023; @@ -109,6 +118,7 @@ package common is type Decode2ToExecute1Type is record valid: std_ulogic; + unit : unit_t; insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); write_reg: gspr_index_t; @@ -141,7 +151,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', @@ -204,7 +214,7 @@ package common is type Execute1ToLoadstore1Type is record valid : std_ulogic; - load : std_ulogic; -- is this a load or store + op : insn_type_t; -- what ld/st op to do addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -219,13 +229,14 @@ package common is reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. end record; - constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', ci => '0', byte_reverse => '0', + constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', rc => '0', others => (others => '0')); type Loadstore1ToDcacheType is record valid : std_ulogic; load : std_ulogic; + dcbz : std_ulogic; nc : std_ulogic; reserve : std_ulogic; addr : std_ulogic_vector(63 downto 0); diff --git a/dcache.vhdl b/dcache.vhdl index 7e553bf..7d61a85 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -581,8 +581,12 @@ begin wr_data <= r0.data; wr_sel <= r0.byte_sel; else - -- Otherwise, we might be doing a reload - wr_data <= wishbone_in.dat; + -- Otherwise, we might be doing a reload or a DCBZ + if r1.req.dcbz = '1' then + wr_data <= (others => '0'); + else + wr_data <= wishbone_in.dat; + end if; wr_sel <= (others => '1'); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); end if; @@ -593,7 +597,8 @@ begin if reloading and wishbone_in.ack = '1' and r1.store_way = i then do_write <= '1'; end if; - if req_op = OP_STORE_HIT and req_hit_way = i and cancel_store = '0' then + if req_op = OP_STORE_HIT and req_hit_way = i and cancel_store = '0' and + r1.req.dcbz = '0' then assert not reloading report "Store hit while in state:" & state_t'image(r1.state) severity FAILURE; @@ -718,18 +723,54 @@ begin r1.wb.we <= '0'; r1.state <= NC_LOAD_WAIT_ACK; - when OP_STORE_HIT | OP_STORE_MISS => - r1.wb.sel <= r0.byte_sel; - r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; - r1.wb.dat <= r0.data; - if cancel_store = '0' then + when OP_STORE_HIT | OP_STORE_MISS => + if r0.dcbz = '0' then + r1.wb.sel <= r0.byte_sel; + r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; + r1.wb.dat <= r0.data; + if cancel_store = '0' then + r1.wb.cyc <= '1'; + r1.wb.stb <= '1'; + r1.wb.we <= '1'; + r1.state <= STORE_WAIT_ACK; + else + r1.stcx_fail <= '1'; + r1.state <= IDLE; + end if; + else + -- dcbz is handled much like a load miss except + -- that we are writing to memory instead of reading + r1.store_index <= req_index; + r1.store_row <= get_row(req_laddr); + + if req_op = OP_STORE_HIT then + r1.store_way <= req_hit_way; + else + r1.store_way <= replace_way; + + -- Force misses on the victim way while zeroing + cache_valids(req_index)(replace_way) <= '0'; + + -- Store new tag in selected way + for i in 0 to NUM_WAYS-1 loop + if i = replace_way then + tagset := cache_tags(req_index); + write_tag(i, tagset, req_tag); + cache_tags(req_index) <= tagset; + end if; + end loop; + end if; + + -- Set up for wishbone writes + r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0); + r1.wb.sel <= (others => '1'); + r1.wb.we <= '1'; + r1.wb.dat <= (others => '0'); r1.wb.cyc <= '1'; r1.wb.stb <= '1'; - r1.wb.we <= '1'; - r1.state <= STORE_WAIT_ACK; - else - r1.stcx_fail <= '1'; - r1.state <= IDLE; + + -- Handle the rest like a load miss + r1.state <= RELOAD_WAIT_ACK; end if; -- OP_NONE and OP_BAD do nothing @@ -766,7 +807,7 @@ begin -- not idle, which we don't currently know how to deal -- with. -- - if r1.store_row = get_row(r1.req.addr) then + if r1.store_row = get_row(r1.req.addr) and r1.req.dcbz = '0' then r1.slow_data <= wishbone_in.dat; end if; diff --git a/decode1.vhdl b/decode1.vhdl index 8c7d5f2..785b669 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -164,7 +164,7 @@ architecture behaviour of decode1 is 2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst - -- 2#1111110110# dcbz + 2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo 2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu diff --git a/decode2.vhdl b/decode2.vhdl index ff773aa..edcc50c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -304,6 +304,7 @@ begin -- execute unit v.e.nia := d_in.nia; + v.e.unit := d_in.decode.unit; v.e.insn_type := d_in.decode.insn_type; v.e.read_reg1 := decoded_reg_a.reg; v.e.read_data1 := decoded_reg_a.data; diff --git a/execute1.vhdl b/execute1.vhdl index e32285d..8286d30 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -50,6 +50,11 @@ architecture behaviour of execute1 is slow_op_oe : std_ulogic; slow_op_xerc : xer_common_t; end record; + constant reg_type_init : reg_type := + (e => Execute1ToWritebackInit, lr_update => '0', + mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', + slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, + others => (others => '0')); signal r, rin : reg_type; @@ -73,6 +78,28 @@ architecture behaviour of execute1 is signal x_to_divider: Execute1ToDividerType; signal divider_to_x: DividerToExecute1Type; + type privilege_level is (USER, SUPER); + type op_privilege_array is array(insn_type_t) of privilege_level; + constant op_privilege: op_privilege_array := ( + OP_ATTN => SUPER, + OP_MFMSR => SUPER, + OP_MTMSRD => SUPER, + OP_RFID => SUPER, + others => USER + ); + + function instr_is_privileged(op: insn_type_t; insn: std_ulogic_vector(31 downto 0)) + return boolean is + begin + if op_privilege(op) = SUPER then + return true; + elsif op = OP_MFSPR or op = OP_MTSPR then + return insn(20) = '1'; + else + return false; + end if; + end; + procedure set_carry(e: inout Execute1ToWritebackType; carry32 : in std_ulogic; carry : in std_ulogic) is @@ -126,11 +153,11 @@ architecture behaviour of execute1 is -- tion MSR bits are not saved or restored. -- Full function MSR bits lie in the range 0:32, 37:41, and -- 48:63, and partial function MSR bits lie in the range - -- 33:36 and 42:47. + -- 33:36 and 42:47. (Note this is IBM bit numbering). msr_out := (others => '0'); - msr_out(32 downto 0) := msr(32 downto 0); - msr_out(41 downto 37) := msr(41 downto 37); - msr_out(63 downto 48) := msr(63 downto 48); + msr_out(63 downto 31) := msr(63 downto 31); + msr_out(26 downto 22) := msr(26 downto 22); + msr_out(15 downto 0) := msr(15 downto 0); return msr_out; end; @@ -195,14 +222,20 @@ begin execute1_0: process(clk) begin if rising_edge(clk) then - r <= rin; - ctrl <= ctrl_tmp; - assert not (r.lr_update = '1' and e_in.valid = '1') - report "LR update collision with valid in EX1" - severity failure; - if r.lr_update = '1' then - report "LR update to " & to_hstring(r.next_lr); - end if; + if rst = '1' then + r <= reg_type_init; + ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0'); + ctrl.irq_state <= WRITE_SRR0; + else + r <= rin; + ctrl <= ctrl_tmp; + assert not (r.lr_update = '1' and e_in.valid = '1') + report "LR update collision with valid in EX1" + severity failure; + if r.lr_update = '1' then + report "LR update to " & to_hstring(r.next_lr); + end if; + end if; end if; end process; @@ -372,7 +405,7 @@ begin ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1); irq_valid := '0'; - if ctrl.msr(63 - 48) = '1' then + if ctrl.msr(MSR_EE) = '1' then if ctrl.dec(63) = '1' then ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#900#, 64)); report "IRQ valid: DEC"; @@ -409,21 +442,37 @@ begin v.e.exc_write_reg := fast_spr_num(SPR_SRR1); v.e.exc_write_data := ctrl.srr1; v.e.exc_write_enable := '1'; - ctrl_tmp.msr(63 - 48) <= '0'; -- clear EE + ctrl_tmp.msr(MSR_SF) <= '1'; + ctrl_tmp.msr(MSR_EE) <= '0'; + ctrl_tmp.msr(MSR_PR) <= '0'; + ctrl_tmp.msr(MSR_IR) <= '0'; + ctrl_tmp.msr(MSR_DR) <= '0'; + ctrl_tmp.msr(MSR_RI) <= '0'; + ctrl_tmp.msr(MSR_LE) <= '1'; f_out.redirect <= '1'; f_out.redirect_nia <= ctrl.irq_nia; v.e.valid := e_in.valid; report "Writing SRR1: " & to_hstring(ctrl.srr1); - elsif irq_valid = '1' then + elsif irq_valid = '1' and e_in.valid = '1' then -- we need two cycles to write srr0 and 1 -- will need more when we have to write DSISR, DAR and HIER -- Don't deliver the interrupt until we have a valid instruction -- coming in, so we have a valid NIA to put in SRR0. - exception := e_in.valid; + exception := '1'; ctrl_tmp.srr1 <= msr_copy(ctrl.msr); - elsif e_in.valid = '1' then + elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and + instr_is_privileged(e_in.insn_type, e_in.insn) then + -- generate a program interrupt + exception := '1'; + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64)); + ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + -- set bit 45 to indicate privileged instruction type interrupt + ctrl_tmp.srr1(63 - 45) <= '1'; + report "privileged instruction"; + + elsif e_in.valid = '1' and e_in.unit = ALU then report "execute nia " & to_hstring(e_in.nia); @@ -555,7 +604,7 @@ begin when OP_B => f_out.redirect <= '1'; if (insn_aa(e_in.insn)) then - f_out.redirect_nia <= std_ulogic_vector(signed(b_in)); + f_out.redirect_nia <= b_in; else f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in)); end if; @@ -571,7 +620,7 @@ begin if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then f_out.redirect <= '1'; if (insn_aa(e_in.insn)) then - f_out.redirect_nia <= std_ulogic_vector(signed(b_in)); + f_out.redirect_nia <= b_in; else f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in)); end if; @@ -594,7 +643,17 @@ begin when OP_RFID => f_out.redirect <= '1'; f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0 - ctrl_tmp.msr <= msr_copy(std_ulogic_vector(signed(b_in))); -- srr1 + -- Can't use msr_copy here because the partial function MSR + -- bits should be left unchanged, not zeroed. + ctrl_tmp.msr(63 downto 31) <= b_in(63 downto 31); + ctrl_tmp.msr(26 downto 22) <= b_in(26 downto 22); + ctrl_tmp.msr(15 downto 0) <= b_in(15 downto 0); + if b_in(MSR_PR) = '1' then + ctrl_tmp.msr(MSR_EE) <= '1'; + ctrl_tmp.msr(MSR_IR) <= '1'; + ctrl_tmp.msr(MSR_DR) <= '1'; + end if; + when OP_CMPB => result := ppc_cmpb(c_in, b_in); result_en := '1'; @@ -668,7 +727,7 @@ begin end loop; end if; when OP_MFMSR => - result := msr_copy(ctrl.msr); + result := ctrl.msr; result_en := '1'; when OP_MFSPR => report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & @@ -724,9 +783,23 @@ begin v.e.write_cr_mask := num_to_fxm(crnum); end if; v.e.write_cr_data := c_in(31 downto 0); - when OP_MTMSRD => - -- FIXME handle just the bits we need to. - ctrl_tmp.msr <= msr_copy(c_in); + when OP_MTMSRD => + if e_in.insn(16) = '1' then + -- just update EE and RI + ctrl_tmp.msr(MSR_EE) <= c_in(MSR_EE); + ctrl_tmp.msr(MSR_RI) <= c_in(MSR_RI); + else + -- Architecture says to leave out bits 3 (HV), 51 (ME) + -- and 63 (LE) (IBM bit numbering) + ctrl_tmp.msr(63 downto 61) <= c_in(63 downto 61); + ctrl_tmp.msr(59 downto 13) <= c_in(59 downto 13); + ctrl_tmp.msr(11 downto 1) <= c_in(11 downto 1); + if c_in(MSR_PR) = '1' then + ctrl_tmp.msr(MSR_EE) <= '1'; + ctrl_tmp.msr(MSR_IR) <= '1'; + ctrl_tmp.msr(MSR_DR) <= '1'; + end if; + end if; when OP_MTSPR => report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(c_in); @@ -781,11 +854,6 @@ begin stall_out <= '1'; x_to_divider.valid <= '1'; - when OP_LOAD | OP_STORE => - -- loadstore/dcache has its own port to writeback - v.e.valid := '0'; - lv.valid := '1'; - when others => terminate_out <= '1'; report "illegal"; @@ -811,6 +879,14 @@ begin report "Delayed LR update to " & to_hstring(next_nia); stall_out <= '1'; end if; + + elsif e_in.valid = '1' then + -- instruction for other units, i.e. LDST + v.e.valid := '0'; + if e_in.unit = LDST then + lv.valid := '1'; + end if; + elsif r.lr_update = '1' then result_en := '1'; result := r.next_lr; @@ -877,9 +953,7 @@ begin v.e.write_enable := result_en; -- Outputs to loadstore1 (async) - if e_in.insn_type = OP_LOAD then - lv.load := '1'; - end if; + lv.op := e_in.insn_type; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; diff --git a/hello_world/Makefile b/hello_world/Makefile index 674095e..a609199 100644 --- a/hello_world/Makefile +++ b/hello_world/Makefile @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 518feee..90650db 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -3,6 +3,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.decode_types.all; use work.common.all; use work.helpers.all; @@ -41,7 +42,8 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request - load : std_ulogic; + load : std_ulogic; + dcbz : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -146,59 +148,63 @@ begin two_dwords := or (r.second_bytes); -- load data formatting - if r.load = '1' then - byte_offset := unsigned(r.addr(2 downto 0)); - brev_lenm1 := "000"; - if r.byte_reverse = '1' then - brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; - end if; + byte_offset := unsigned(r.addr(2 downto 0)); + brev_lenm1 := "000"; + if r.byte_reverse = '1' then + brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; + end if; - -- shift and byte-reverse data bytes - for i in 0 to 7 loop - kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); - use_second(i) := kk(3); - j := to_integer(kk(2 downto 0)) * 8; - data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); - end loop; - - -- Work out the sign bit for sign extension. - -- Assumes we are not doing both sign extension and byte reversal, - -- in that for unaligned loads crossing two dwords we end up - -- using a bit from the second dword, whereas for a byte-reversed - -- (i.e. big-endian) load the sign bit would be in the first dword. - negative := (r.length(3) and data_permuted(63)) or - (r.length(2) and data_permuted(31)) or - (r.length(1) and data_permuted(15)) or - (r.length(0) and data_permuted(7)); - - -- trim and sign-extend - for i in 0 to 7 loop - if i < to_integer(unsigned(r.length)) then - if two_dwords = '1' then - trim_ctl(i) := '1' & not use_second(i); - else - trim_ctl(i) := not use_second(i) & '0'; - end if; + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + use_second(i) := kk(3); + j := to_integer(kk(2 downto 0)) * 8; + data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); + end loop; + + -- Work out the sign bit for sign extension. + -- Assumes we are not doing both sign extension and byte reversal, + -- in that for unaligned loads crossing two dwords we end up + -- using a bit from the second dword, whereas for a byte-reversed + -- (i.e. big-endian) load the sign bit would be in the first dword. + negative := (r.length(3) and data_permuted(63)) or + (r.length(2) and data_permuted(31)) or + (r.length(1) and data_permuted(15)) or + (r.length(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(unsigned(r.length)) then + if two_dwords = '1' then + trim_ctl(i) := '1' & not use_second(i); else - trim_ctl(i) := '0' & (negative and r.sign_extend); + trim_ctl(i) := not use_second(i) & '0'; end if; - case trim_ctl(i) is - when "11" => - data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); - when "10" => - data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); - when "01" => - data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; - when others => - data_trimmed(i * 8 + 7 downto i * 8) := x"00"; - end case; - end loop; - end if; + else + trim_ctl(i) := '0' & (negative and r.sign_extend); + end if; + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) := x"00"; + end case; + end loop; case r.state is when IDLE => if l_in.valid = '1' then - v.load := l_in.load; + v.load := '0'; + v.dcbz := '0'; + if l_in.op = OP_LOAD then + v.load := '1'; + elsif l_in.op = OP_DCBZ then + v.dcbz := '1'; + end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; @@ -229,18 +235,16 @@ begin v.addr := lsu_sum; -- Do byte reversing and rotating for stores in the first cycle - if v.load = '0' then - byte_offset := unsigned(lsu_sum(2 downto 0)); - brev_lenm1 := "000"; - if l_in.byte_reverse = '1' then - brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; - end if; - for i in 0 to 7 loop - k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; - j := to_integer(k) * 8; - v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); - end loop; + byte_offset := unsigned(lsu_sum(2 downto 0)); + brev_lenm1 := "000"; + if l_in.byte_reverse = '1' then + brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; end if; + for i in 0 to 7 loop + k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; + j := to_integer(k) * 8; + v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); + end loop; req := '1'; stall := '1'; @@ -293,6 +297,7 @@ begin -- Update outputs to dcache d_out.valid <= req; d_out.load <= v.load; + d_out.dcbz <= v.dcbz; d_out.nc <= v.nc; d_out.reserve <= v.reserve; d_out.addr <= addr; diff --git a/rust_lib_demo/Makefile b/rust_lib_demo/Makefile index 26aebf8..fdbb18b 100644 --- a/rust_lib_demo/Makefile +++ b/rust_lib_demo/Makefile @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif diff --git a/tests/Makefile.test b/tests/Makefile.test index 9676370..250135d 100644 --- a/tests/Makefile.test +++ b/tests/Makefile.test @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif diff --git a/tests/privileged/Makefile b/tests/privileged/Makefile new file mode 100644 index 0000000..7c24998 --- /dev/null +++ b/tests/privileged/Makefile @@ -0,0 +1,3 @@ +TEST=privileged + +include ../Makefile.test diff --git a/tests/privileged/head.S b/tests/privileged/head.S new file mode 100644 index 0000000..9b76234 --- /dev/null +++ b/tests/privileged/head.S @@ -0,0 +1,91 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define STACK_TOP 0x8000 + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + + .section ".head","ax" + + /* + * Microwatt currently enters in LE mode at 0x0, so we don't need to + * do any endian fix ups + */ + . = 0 +.global _start +_start: + b boot_entry + +.global boot_entry +boot_entry: + /* setup stack */ + LOAD_IMM64(%r1, STACK_TOP - 0x100) + LOAD_IMM64(%r12, main) + mtctr %r12 + bctrl + attn // terminate on exit + b . + + /* Call a function with a specified MSR value */ + .global call_with_msr +call_with_msr: + mtsrr0 %r4 + mr %r12,%r4 + mtsrr1 %r5 + rfid + +#define EXCEPTION(nr) \ + .= nr ;\ + li %r3,nr ;\ + blr + + EXCEPTION(0x300) + EXCEPTION(0x380) + EXCEPTION(0x400) + EXCEPTION(0x480) + EXCEPTION(0x500) + EXCEPTION(0x600) + EXCEPTION(0x700) + EXCEPTION(0x800) + EXCEPTION(0x900) + EXCEPTION(0x980) + EXCEPTION(0xa00) + EXCEPTION(0xb00) + + /* + * System call - used to exit from tests where MSR[PR] + * may have been set. + */ + . = 0xc00 + blr + + EXCEPTION(0xd00) + EXCEPTION(0xe00) + EXCEPTION(0xe20) + EXCEPTION(0xe40) + EXCEPTION(0xe60) + EXCEPTION(0xe80) + EXCEPTION(0xf00) + EXCEPTION(0xf20) + EXCEPTION(0xf40) + EXCEPTION(0xf60) + EXCEPTION(0xf80) diff --git a/tests/privileged/powerpc.lds b/tests/privileged/powerpc.lds new file mode 100644 index 0000000..8c8c65b --- /dev/null +++ b/tests/privileged/powerpc.lds @@ -0,0 +1,13 @@ +SECTIONS +{ + _start = .; + . = 0; + .head : { + KEEP(*(.head)) + } + . = 0x2000; + .text : { *(.text) } + . = 0x4000; + .data : { *(.data) } + .bss : { *(.bss) } +} diff --git a/tests/privileged/privileged.c b/tests/privileged/privileged.c new file mode 100644 index 0000000..073dc07 --- /dev/null +++ b/tests/privileged/privileged.c @@ -0,0 +1,152 @@ +#include +#include +#include + +#include "console.h" + +#define MSR_EE 0x8000 +#define MSR_PR 0x4000 +#define MSR_IR 0x0020 +#define MSR_DR 0x0010 + +extern int call_with_msr(unsigned long arg, int (*fn)(unsigned long), unsigned long msr); + +#define SRR0 26 +#define SRR1 27 + +static inline unsigned long mfspr(int sprnum) +{ + long val; + + __asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum)); + return val; +} + +static inline void mtspr(int sprnum, unsigned long val) +{ + __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val)); +} + +void print_string(const char *str) +{ + for (; *str; ++str) + putchar(*str); +} + +void print_hex(unsigned long val, int ndigits) +{ + int i, x; + + for (i = (ndigits - 1) * 4; i >= 0; i -= 4) { + x = (val >> i) & 0xf; + if (x >= 10) + putchar(x + 'a' - 10); + else + putchar(x + '0'); + } +} + +// i < 100 +void print_test_number(int i) +{ + print_string("test "); + putchar(48 + i/10); + putchar(48 + i%10); + putchar(':'); +} + +int priv_fn_1(unsigned long x) +{ + __asm__ volatile("attn"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_2(unsigned long x) +{ + __asm__ volatile("mfmsr 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_fn_3(unsigned long x) +{ + __asm__ volatile("mtmsrd 3"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_4(unsigned long x) +{ + __asm__ volatile("rfid"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_5(unsigned long x) +{ + __asm__ volatile("mfsrr0 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_fn_6(unsigned long x) +{ + __asm__ volatile("mtsrr0 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_test(int (*fn)(unsigned long)) +{ + unsigned long msr; + int vec; + + __asm__ volatile ("mtdec %0" : : "r" (0x7fffffff)); + __asm__ volatile ("mfmsr %0" : "=r" (msr)); + /* this should fail */ + vec = call_with_msr(0, fn, msr | MSR_PR); + if (vec != 0x700) + return vec | 1; + /* SRR1 should be set correctly */ + msr |= MSR_PR | MSR_EE | MSR_IR | MSR_DR; + if (mfspr(SRR1) != (msr | 0x40000)) + return 2; + return 0; +} + +int fail = 0; + +void do_test(int num, int (*fn)(unsigned long)) +{ + int ret; + + print_test_number(num); + ret = priv_test(fn); + if (ret == 0) { + print_string("PASS\r\n"); + } else { + fail = 1; + print_string("FAIL "); + print_hex(ret, 4); + print_string(" SRR0="); + print_hex(mfspr(SRR0), 16); + print_string(" SRR1="); + print_hex(mfspr(SRR1), 16); + print_string("\r\n"); + } +} + +int main(void) +{ + potato_uart_init(); + + do_test(1, priv_fn_1); + do_test(2, priv_fn_2); + do_test(3, priv_fn_3); + do_test(4, priv_fn_4); + do_test(5, priv_fn_5); + do_test(6, priv_fn_6); + + return fail; +} diff --git a/tests/test_privileged.bin b/tests/test_privileged.bin new file mode 100755 index 0000000..5b8ce63 Binary files /dev/null and b/tests/test_privileged.bin differ diff --git a/tests/test_privileged.console_out b/tests/test_privileged.console_out new file mode 100644 index 0000000..a49bb9b --- /dev/null +++ b/tests/test_privileged.console_out @@ -0,0 +1,6 @@ +test 01:PASS +test 02:PASS +test 03:PASS +test 04:PASS +test 05:PASS +test 06:PASS diff --git a/tests/update_console_tests b/tests/update_console_tests index 11306bb..94e74d1 100755 --- a/tests/update_console_tests +++ b/tests/update_console_tests @@ -3,7 +3,7 @@ # Script to update console related tests from source # -for i in sc illegal decrementer xics ; do +for i in sc illegal decrementer xics privileged ; do cd $i make cd - diff --git a/wishbone_debug_master.vhdl b/wishbone_debug_master.vhdl index 11b9ee3..ddf6923 100644 --- a/wishbone_debug_master.vhdl +++ b/wishbone_debug_master.vhdl @@ -49,6 +49,7 @@ architecture behaviour of wishbone_debug_master is type state_t is (IDLE, WB_CYCLE, DMI_WAIT); signal state : state_t; + signal do_inc : std_ulogic; begin @@ -84,16 +85,16 @@ begin reg_addr <= (others => '0'); reg_ctrl <= (others => '0'); else -- Standard register writes - if dmi_req and dmi_wr then + if do_inc = '1' then + -- Address register auto-increment + reg_addr <= std_ulogic_vector(unsigned(reg_addr) + + decode_autoinc(reg_ctrl(10 downto 9))); + elsif dmi_req and dmi_wr then if dmi_addr = DBG_WB_ADDR then reg_addr <= dmi_din; elsif dmi_addr = DBG_WB_CTRL then reg_ctrl <= dmi_din(10 downto 0); end if; - elsif state = WB_CYCLE and (wb_in.ack and reg_ctrl(8))= '1' then - -- Address register auto-increment - reg_addr <= std_ulogic_vector(unsigned(reg_addr) + - decode_autoinc(reg_ctrl(10 downto 9))); end if; end if; end if; @@ -145,6 +146,7 @@ begin if (rst) then state <= IDLE; wb_out.stb <= '0'; + do_inc <= '0'; else case state is when IDLE => @@ -162,11 +164,13 @@ begin -- wb_out.stb <= '0'; state <= DMI_WAIT; + do_inc <= reg_ctrl(8); end if; when DMI_WAIT => if dmi_req = '0' then state <= IDLE; end if; + do_inc <= '0'; end case; end if; end if;