diff --git a/Makefile b/Makefile index af9c91d..5525c1e 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ common.o: decode_types.o control.o: gpr_hazard.o cr_hazard.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o core_debug.o: common.o countzero.o: countzero_tb.o: common.o glibc_random.o countzero.o @@ -27,7 +27,6 @@ decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode_types.o: execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o -execute2.o: common.o crhelpers.o ppc_fx_insns.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: @@ -43,9 +42,9 @@ loadstore1.o: common.o helpers.o loadstore2.o: common.o helpers.o wishbone_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o -multiply.o: common.o decode_types.o ppc_fx_insns.o crhelpers.o +multiply.o: common.o decode_types.o divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o -divider.o: common.o decode_types.o crhelpers.o +divider.o: common.o decode_types.o ppc_fx_insns.o: helpers.o register_file.o: common.o rotator.o: common.o @@ -58,7 +57,7 @@ sim_uart.o: wishbone_types.o sim_console.o soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: -writeback.o: common.o +writeback.o: common.o crhelpers.o dmi_dtm_tb.o: dmi_dtm_xilinx.o wishbone_debug_master.o dmi_dtm_xilinx.o: wishbone_types.o sim-unisim/unisim_vcomponents.o wishbone_debug_master.o: wishbone_types.o diff --git a/common.vhdl b/common.vhdl index ae61342..93bd598 100644 --- a/common.vhdl +++ b/common.vhdl @@ -64,6 +64,7 @@ package common is is_32bit: std_ulogic; is_signed: std_ulogic; insn: std_ulogic_vector(31 downto 0); + data_len: std_ulogic_vector(3 downto 0); end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', invert_a => '0', @@ -155,31 +156,27 @@ package common is write_enable: std_ulogic; write_reg : std_ulogic_vector(4 downto 0); write_data : std_ulogic_vector(63 downto 0); + write_len : std_ulogic_vector(3 downto 0); + write_shift : std_ulogic_vector(2 downto 0); + sign_extend : std_ulogic; + byte_reverse : std_ulogic; + second_word : std_ulogic; end record; - constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', others => (others => '0')); + constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0')); - type Execute1ToExecute2Type is record + type Execute1ToWritebackType is record valid: std_ulogic; - write_enable : std_ulogic; - write_reg: std_ulogic_vector(4 downto 0); - write_data: std_ulogic_vector(63 downto 0); - write_cr_enable : std_ulogic; - write_cr_mask : std_ulogic_vector(7 downto 0); - write_cr_data : std_ulogic_vector(31 downto 0); rc : std_ulogic; - end record; - constant Execute1ToExecute2Init : Execute1ToExecute2Type := (valid => '0', write_enable => '0', write_cr_enable => '0', rc => '0', others => (others => '0')); - - type Execute2ToWritebackType is record - valid: std_ulogic; write_enable : std_ulogic; write_reg: std_ulogic_vector(4 downto 0); write_data: std_ulogic_vector(63 downto 0); + write_len : std_ulogic_vector(3 downto 0); write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); + sign_extend: std_ulogic; end record; - constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); type MultiplyToWritebackType is record valid: std_ulogic; @@ -187,11 +184,9 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); - write_cr_enable: std_ulogic; - write_cr_mask: std_ulogic_vector(7 downto 0); - write_cr_data: std_ulogic_vector(31 downto 0); + rc: std_ulogic; end record; - constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); type DividerToWritebackType is record valid: std_ulogic; @@ -199,11 +194,9 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); - write_cr_enable: std_ulogic; - write_cr_mask: std_ulogic_vector(7 downto 0); - write_cr_data: std_ulogic_vector(31 downto 0); + rc: std_ulogic; end record; - constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); type WritebackToRegisterFileType is record write_reg : std_ulogic_vector(4 downto 0); diff --git a/core.vhdl b/core.vhdl index aa5e87a..5a269a2 100644 --- a/core.vhdl +++ b/core.vhdl @@ -54,8 +54,7 @@ architecture behave of core is signal writeback_to_cr_file: WritebackToCrFileType; -- execute signals - signal execute1_to_execute2: Execute1ToExecute2Type; - signal execute2_to_writeback: Execute2ToWritebackType; + signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_fetch1: Execute1ToFetch1Type; -- load store signals @@ -204,17 +203,10 @@ begin flush_out => flush, e_in => decode2_to_execute1, f_out => execute1_to_fetch1, - e_out => execute1_to_execute2, + e_out => execute1_to_writeback, terminate_out => terminate ); - execute2_0: entity work.execute2 - port map ( - clk => clk, - e_in => execute1_to_execute2, - e_out => execute2_to_writeback - ); - loadstore1_0: entity work.loadstore1 port map ( clk => clk, @@ -249,7 +241,7 @@ begin writeback_0: entity work.writeback port map ( clk => clk, - e_in => execute2_to_writeback, + e_in => execute1_to_writeback, l_in => loadstore2_to_writeback, m_in => multiply_to_writeback, d_in => divider_to_writeback, diff --git a/decode1.vhdl b/decode1.vhdl index 4515522..747411b 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -164,9 +164,9 @@ architecture behaviour of decode1 is 2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd 2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw 2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv - 2#1110111010# => (ALU, OP_EXTSB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb - 2#1110011010# => (ALU, OP_EXTSH, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh - 2#1111011010# => (ALU, OP_EXTSW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsw + 2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb + 2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh + 2#1111011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsw -- 2#110111101-# extswsli 2#1111010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbi 2#0000010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbt diff --git a/decode2.vhdl b/decode2.vhdl index 524943c..c8dee48 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -208,6 +208,7 @@ begin variable decoded_reg_b : decode_input_reg_t; variable decoded_reg_c : decode_input_reg_t; variable signed_division: std_ulogic; + variable length : std_ulogic_vector(3 downto 0); begin v := r; @@ -231,6 +232,19 @@ begin r_out.read2_enable <= decoded_reg_b.reg_valid; r_out.read3_enable <= decoded_reg_c.reg_valid; + case d_in.decode.length is + when is1B => + length := "0001"; + when is2B => + length := "0010"; + when is4B => + length := "0100"; + when is8B => + length := "1000"; + when NONE => + length := "0000"; + end case; + -- execute unit v.e.nia := d_in.nia; v.e.insn_type := d_in.decode.insn_type; @@ -252,6 +266,7 @@ begin v.e.lr := insn_lk(d_in.insn); end if; v.e.insn := d_in.insn; + v.e.data_len := length; -- multiply unit v.m.insn_type := d_in.decode.insn_type; @@ -336,19 +351,7 @@ begin v.l.load := '0'; end if; - case d_in.decode.length is - when is1B => - v.l.length := "0001"; - when is2B => - v.l.length := "0010"; - when is4B => - v.l.length := "0100"; - when is8B => - v.l.length := "1000"; - when NONE => - v.l.length := "0000"; - end case; - + v.l.length := length; v.l.byte_reverse := d_in.decode.byte_reverse; v.l.sign_extend := d_in.decode.sign_extend; v.l.update := d_in.decode.update; diff --git a/decode_types.vhdl b/decode_types.vhdl index a60053f..2d85b27 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -8,7 +8,7 @@ package decode_types is OP_CNTZ, OP_CRAND, OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_DIV, OP_EXTSB, OP_EXTSH, OP_EXTSW, + OP_DCBZ, OP_DIV, OP_EXTS, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, diff --git a/divider.vhdl b/divider.vhdl index cfadc51..20d4600 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -5,7 +5,6 @@ use ieee.numeric_std.all; library work; use work.common.all; use work.decode_types.all; -use work.crhelpers.all; entity divider is port ( @@ -37,7 +36,6 @@ architecture behaviour of divider is signal overflow : std_ulogic; signal ovf32 : std_ulogic; signal did_ovf : std_ulogic; - signal cr_data : std_ulogic_vector(2 downto 0); begin divider_0: process(clk) @@ -114,7 +112,7 @@ begin divider_1: process(all) begin d_out.write_reg_nr <= write_reg; - d_out.write_cr_mask <= num_to_fxm(0); + d_out.rc <= rc; if is_modulus = '1' then result <= dend(128 downto 65); @@ -144,29 +142,18 @@ begin else oresult <= sresult; end if; - - if (did_ovf = '1') or (or (sresult) = '0') then - cr_data <= "001"; - elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then - cr_data <= "100"; - else - cr_data <= "010"; - end if; end process; divider_out: process(clk) begin if rising_edge(clk) then d_out.write_reg_data <= oresult; - d_out.write_cr_data <= cr_data & '0' & x"0000000"; if count = "1000000" then d_out.valid <= '1'; d_out.write_reg_enable <= '1'; - d_out.write_cr_enable <= rc; else d_out.valid <= '0'; d_out.write_reg_enable <= '0'; - d_out.write_cr_enable <= '0'; end if; end if; end process; diff --git a/divider_tb.vhdl b/divider_tb.vhdl index fdc8da5..5f809bb 100644 --- a/divider_tb.vhdl +++ b/divider_tb.vhdl @@ -68,7 +68,7 @@ begin assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.write_cr_enable = '0'; + assert d2.rc = '0'; wait for clk_period; assert d2.valid = '0' report "valid"; @@ -92,9 +92,7 @@ begin assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.write_cr_enable = '1'; - assert d2.write_cr_mask = "10000000"; - assert d2.write_cr_data = x"40000000" report "cr data is " & to_hstring(d2.write_cr_data); + assert d2.rc = '1'; wait for clk_period; assert d2.valid = '0'; @@ -129,8 +127,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divd"; end loop; end loop; end loop; @@ -165,8 +161,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divdu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divdu"; end loop; end loop; end loop; @@ -207,8 +201,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divde"; end loop; end loop; end loop; @@ -246,8 +238,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divdeu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divdeu"; end loop; end loop; end loop; @@ -284,8 +274,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divw"; end loop; end loop; end loop; @@ -322,8 +310,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divwu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divwu"; end loop; end loop; end loop; @@ -363,8 +349,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divwe"; end if; end loop; end loop; @@ -402,8 +386,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divweu"; end loop; end loop; end loop; @@ -441,8 +423,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modsd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modsd"; end loop; end loop; end loop; @@ -480,8 +460,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modud expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modud"; end loop; end loop; end loop; @@ -524,8 +502,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modsw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modsw"; end loop; end loop; end loop; @@ -563,8 +539,6 @@ begin end if; assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0) report "bad moduw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for moduw"; end loop; end loop; end loop; diff --git a/execute1.vhdl b/execute1.vhdl index 5f49c6f..702744e 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -25,7 +25,7 @@ entity execute1 is -- asynchronous f_out : out Execute1ToFetch1Type; - e_out : out Execute1ToExecute2Type; + e_out : out Execute1ToWritebackType; terminate_out : out std_ulogic ); @@ -34,7 +34,7 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record --f : Execute1ToFetch1Type; - e : Execute1ToExecute2Type; + e : Execute1ToWritebackType; end record; signal r, rin : reg_type; @@ -124,7 +124,7 @@ begin newcrf := (others => '0'); v := r; - v.e := Execute1ToExecute2Init; + v.e := Execute1ToWritebackInit; --v.f := Execute1ToFetch1TypeInit; ctrl_tmp <= ctrl; @@ -143,6 +143,8 @@ begin v.e.valid := '1'; v.e.write_reg := e_in.write_reg; + v.e.write_len := x"8"; + v.e.sign_extend := '0'; case_0: case e_in.insn_type is @@ -230,14 +232,10 @@ begin when OP_CNTZ => result := countzero_result; result_en := 1; - when OP_EXTSB => - result := ppc_extsb(e_in.read_data3); - result_en := 1; - when OP_EXTSH => - result := ppc_extsh(e_in.read_data3); - result_en := 1; - when OP_EXTSW => - result := ppc_extsw(e_in.read_data3); + when OP_EXTS => + v.e.write_len := e_in.data_len; + v.e.sign_extend := '1'; + result := e_in.read_data3; result_en := 1; when OP_ISEL => crnum := to_integer(unsigned(insn_bc(e_in.insn))); diff --git a/execute2.vhdl b/execute2.vhdl deleted file mode 100644 index 9fdb1dd..0000000 --- a/execute2.vhdl +++ /dev/null @@ -1,57 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; - -library work; -use work.common.all; -use work.crhelpers.all; -use work.ppc_fx_insns.all; - --- 2 cycle ALU --- We handle rc form instructions here - -entity execute2 is - port ( - clk : in std_ulogic; - - e_in : in Execute1ToExecute2Type; - e_out : out Execute2ToWritebackType - ); -end execute2; - -architecture behave of execute2 is - signal r, rin : Execute2ToWritebackType; -begin - execute2_0: process(clk) - begin - if rising_edge(clk) then - r <= rin; - end if; - end process; - - execute2_1: process(all) - variable v : Execute2ToWritebackType; - begin - v := rin; - - v.valid := e_in.valid; - v.write_enable := e_in.write_enable; - v.write_reg := e_in.write_reg; - v.write_data := e_in.write_data; - v.write_cr_enable := e_in.write_cr_enable; - v.write_cr_mask := e_in.write_cr_mask; - v.write_cr_data := e_in.write_cr_data; - - if e_in.valid = '1' and e_in.rc = '1' then - v.write_cr_enable := '1'; - v.write_cr_mask := num_to_fxm(0); - v.write_cr_data := ppc_cmpi('1', e_in.write_data, x"0000") & x"0000000"; - end if; - - -- Update registers - rin <= v; - - -- Update outputs - e_out <= r; - end process; -end; diff --git a/loadstore2.vhdl b/loadstore2.vhdl index 17ef7e1..cd7061c 100644 --- a/loadstore2.vhdl +++ b/loadstore2.vhdl @@ -26,9 +26,6 @@ architecture behave of loadstore2 is signal l_saved : Loadstore1ToLoadstore2Type; signal w_tmp : Loadstore2ToWritebackType; signal m_tmp : wishbone_master_out; - signal read_data : std_ulogic_vector(63 downto 0); - signal read_data_shift : std_ulogic_vector(2 downto 0); - signal sign_extend_byte_reverse: std_ulogic_vector(1 downto 0); signal dlength : std_ulogic_vector(3 downto 0); type state_t is (IDLE, WAITING_FOR_READ_ACK, WAITING_FOR_WRITE_ACK); @@ -61,37 +58,6 @@ architecture behave of loadstore2 is end function wishbone_data_sel; begin - loadstore2_1: process(all) - variable tmp : std_ulogic_vector(63 downto 0); - variable data : std_ulogic_vector(63 downto 0); - begin - tmp := std_logic_vector(shift_right(unsigned(read_data), to_integer(unsigned(read_data_shift)) * 8)); - data := (others => '0'); - case to_integer(unsigned(dlength)) is - when 0 => - when 1 => - data(7 downto 0) := tmp(7 downto 0); - when 2 => - data(15 downto 0) := tmp(15 downto 0); - when 4 => - data(31 downto 0) := tmp(31 downto 0); - when 8 => - data(63 downto 0) := tmp(63 downto 0); - when others => - assert false report "invalid length" severity failure; - data(63 downto 0) := tmp(63 downto 0); - end case; - - case sign_extend_byte_reverse is - when "10" => - w_tmp.write_data <= sign_extend(data, to_integer(unsigned(l_saved.length))); - when "01" => - w_tmp.write_data <= byte_reverse(data, to_integer(unsigned(l_saved.length))); - when others => - w_tmp.write_data <= data; - end case; - end process; - w_out <= w_tmp; m_out <= m_tmp; @@ -102,11 +68,13 @@ begin w_tmp.valid <= '0'; w_tmp.write_enable <= '0'; w_tmp.write_reg <= (others => '0'); + w_tmp.write_len <= "1000"; + w_tmp.write_shift <= "000"; + w_tmp.sign_extend <= '0'; + w_tmp.byte_reverse <= '0'; + w_tmp.second_word <= '0'; l_saved <= l_saved; - read_data_shift <= "000"; - sign_extend_byte_reverse <= "00"; - dlength <= "1000"; case_0: case state is when IDLE => @@ -131,7 +99,7 @@ begin if l_in.update = '1' then w_tmp.write_enable <= '1'; w_tmp.write_reg <= l_in.update_reg; - read_data <= l_in.addr; + w_tmp.write_data <= l_in.addr; end if; state <= WAITING_FOR_READ_ACK; @@ -148,15 +116,15 @@ begin when WAITING_FOR_READ_ACK => if m_in.ack = '1' then - read_data <= m_in.dat; - read_data_shift <= l_saved.addr(2 downto 0); - dlength <= l_saved.length; - sign_extend_byte_reverse <= l_saved.sign_extend & l_saved.byte_reverse; - -- write data to register file w_tmp.valid <= '1'; w_tmp.write_enable <= '1'; + w_tmp.write_data <= m_in.dat; w_tmp.write_reg <= l_saved.write_reg; + w_tmp.write_len <= l_saved.length; + w_tmp.write_shift <= l_saved.addr(2 downto 0); + w_tmp.sign_extend <= l_saved.sign_extend; + w_tmp.byte_reverse <= l_saved.byte_reverse; m_tmp <= wishbone_master_out_init; state <= IDLE; @@ -168,7 +136,7 @@ begin if l_saved.update = '1' then w_tmp.write_enable <= '1'; w_tmp.write_reg <= l_saved.update_reg; - read_data <= l_saved.addr; + w_tmp.write_data <= l_saved.addr; end if; m_tmp <= wishbone_master_out_init; diff --git a/microwatt.core b/microwatt.core index b963c45..44dfbbd 100644 --- a/microwatt.core +++ b/microwatt.core @@ -24,7 +24,6 @@ filesets: - cr_hazard.vhdl - control.vhdl - execute1.vhdl - - execute2.vhdl - loadstore1.vhdl - loadstore2.vhdl - multiply.vhdl diff --git a/multiply.vhdl b/multiply.vhdl index 71aceca..94fa792 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -5,8 +5,6 @@ use ieee.numeric_std.all; library work; use work.common.all; use work.decode_types.all; -use work.ppc_fx_insns.all; -use work.crhelpers.all; entity multiply is generic ( @@ -88,12 +86,7 @@ begin if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then m_out.valid <= '1'; m_out.write_reg_enable <= '1'; - - if v.multiply_pipeline(PIPELINE_DEPTH-1).rc = '1' then - m_out.write_cr_enable <= '1'; - m_out.write_cr_mask <= num_to_fxm(0); - m_out.write_cr_data <= ppc_cmpi('1', d2, x"0000") & x"0000000"; - end if; + m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; end if; rin <= v; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index 95c3199..48f83ab 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -61,7 +61,7 @@ begin assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.write_cr_enable = '0'; + assert m2.rc = '0'; wait for clk_period; assert m2.valid = '0'; @@ -79,8 +79,7 @@ begin assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.write_cr_enable = '1'; - assert m2.write_cr_data = x"40000000"; + assert m2.rc = '1'; -- test mulld mulld_loop : for i in 0 to 1000 loop diff --git a/writeback.vhdl b/writeback.vhdl index e244960..042ad59 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -4,12 +4,13 @@ use ieee.numeric_std.all; library work; use work.common.all; +use work.crhelpers.all; entity writeback is port ( clk : in std_ulogic; - e_in : in Execute2ToWritebackType; + e_in : in Execute1ToWritebackType; l_in : in Loadstore2ToWritebackType; m_in : in MultiplyToWritebackType; d_in : in DividerToWritebackType; @@ -22,12 +23,44 @@ entity writeback is end entity writeback; architecture behaviour of writeback is + subtype byte_index_t is unsigned(2 downto 0); + type permutation_t is array(0 to 7) of byte_index_t; + subtype byte_trim_t is std_ulogic_vector(1 downto 0); + type trim_ctl_t is array(0 to 7) of byte_trim_t; + type byte_sel_t is array(0 to 7) of std_ulogic; + + signal data_len : unsigned(3 downto 0); + signal data_in : std_ulogic_vector(63 downto 0); + signal data_permuted : std_ulogic_vector(63 downto 0); + signal data_trimmed : std_ulogic_vector(63 downto 0); + signal data_latched : std_ulogic_vector(63 downto 0); + signal perm : permutation_t; + signal use_second : byte_sel_t; + signal byte_offset : unsigned(2 downto 0); + signal brev_lenm1 : unsigned(2 downto 0); + signal trim_ctl : trim_ctl_t; + signal rc : std_ulogic; + signal partial_write : std_ulogic; + signal sign_extend : std_ulogic; + signal negative : std_ulogic; + signal second_word : std_ulogic; begin + writeback_0: process(clk) + begin + if rising_edge(clk) then + if partial_write = '1' then + data_latched <= data_permuted; + end if; + end if; + end process; + writeback_1: process(all) variable x : std_ulogic_vector(0 downto 0); variable y : std_ulogic_vector(0 downto 0); variable z : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0); + variable j : integer; + variable k : unsigned(3 downto 0); begin x := "" & e_in.valid; y := "" & l_in.valid; @@ -41,10 +74,11 @@ begin w := "" & d_in.write_reg_enable; assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure; - x := "" & e_in.write_cr_enable; - y := "" & m_in.write_cr_enable; - z := "" & d_in.write_cr_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + w := "" & e_in.write_cr_enable; + x := "" & (e_in.write_enable and e_in.rc); + y := "" & (m_in.valid and m_in.rc); + z := "" & (d_in.valid and d_in.rc); + assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; @@ -54,10 +88,22 @@ begin complete_out <= '1'; end if; + rc <= '0'; + brev_lenm1 <= "000"; + byte_offset <= "000"; + data_len <= x"8"; + partial_write <= '0'; + sign_extend <= '0'; + second_word <= '0'; + data_in <= (others => '0'); + if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg; - w_out.write_data <= e_in.write_data; + data_in <= e_in.write_data; w_out.write_enable <= '1'; + data_len <= unsigned(e_in.write_len); + sign_extend <= e_in.sign_extend; + rc <= e_in.rc; end if; if e_in.write_cr_enable = '1' then @@ -68,32 +114,89 @@ begin if l_in.write_enable = '1' then w_out.write_reg <= l_in.write_reg; - w_out.write_data <= l_in.write_data; + data_in <= l_in.write_data; + data_len <= unsigned(l_in.write_len); + byte_offset <= unsigned(l_in.write_shift); + sign_extend <= l_in.sign_extend; + if l_in.byte_reverse = '1' then + brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1; + end if; w_out.write_enable <= '1'; + second_word <= l_in.second_word; + if l_in.valid = '0' and (data_len + byte_offset > 8) then + partial_write <= '1'; + end if; end if; if m_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= m_in.write_reg_nr; - w_out.write_data <= m_in.write_reg_data; - end if; - - if m_in.write_cr_enable = '1' then - c_out.write_cr_enable <= '1'; - c_out.write_cr_mask <= m_in.write_cr_mask; - c_out.write_cr_data <= m_in.write_cr_data; + data_in <= m_in.write_reg_data; + rc <= m_in.rc; end if; if d_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= d_in.write_reg_nr; - w_out.write_data <= d_in.write_reg_data; + data_in <= d_in.write_reg_data; + rc <= d_in.rc; end if; - if d_in.write_cr_enable = '1' then + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + perm(i) <= k(2 downto 0); + use_second(i) <= k(3); + end loop; + for i in 0 to 7 loop + j := to_integer(perm(i)) * 8; + data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j); + end loop; + + -- If the data can arrive split over two cycles, this will be correct + -- provided we don't have both sign extension and byte reversal. + negative <= (data_len(2) and data_permuted(31)) or (data_len(1) and data_permuted(15)) or + (data_len(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(data_len) then + if second_word = '1' then + trim_ctl(i) <= '1' & not use_second(i); + else + trim_ctl(i) <= not use_second(i) & '0'; + end if; + else + trim_ctl(i) <= '0' & (negative and sign_extend); + end if; + end loop; + for i in 0 to 7 loop + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) <= x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) <= x"00"; + end case; + end loop; + + -- deliver to regfile + w_out.write_data <= data_trimmed; + + -- test value against 0 and set CR0 if requested + if rc = '1' then c_out.write_cr_enable <= '1'; - c_out.write_cr_mask <= d_in.write_cr_mask; - c_out.write_cr_data <= d_in.write_cr_data; + c_out.write_cr_mask <= num_to_fxm(0); + if data_trimmed(63) = '1' then + c_out.write_cr_data <= x"80000000"; + elsif or (data_trimmed(62 downto 0)) = '1' then + c_out.write_cr_data <= x"40000000"; + else + c_out.write_cr_data <= x"20000000"; + end if; end if; end process; end;