From 374f4c536d3c4dd8051a139b8e25691ba82e35b7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 14 Oct 2019 12:56:01 +1100 Subject: [PATCH 1/4] writeback: Do data formatting and condition recording in writeback This adds code to writeback to format data and test the result against zero for the purpose of setting CR0. The data formatter is able to shift and mask by bytes and do byte reversal and sign extension. It can also put together bytes from two input doublewords to support unaligned loads (including unaligned byte-reversed loads). The data formatter starts with an 8:1 multiplexer that is able to direct any byte of the input to any byte of the output. This lets us rotate the data and simultaneously byte-reverse it. The rotated/reversed data goes to a register for the unaligned cases that overlap two doublewords. Then there is per-byte logic that does trimming, sign extension, and splicing together bytes from a previous input doubleword (stored in data_latched) and the current doubleword. Finally the 64-bit result is tested to set CR0 if rc = 1. This removes the RC logic from the execute2, multiply and divide units, and the shift/mask/byte-reverse/sign-extend logic from loadstore2. Signed-off-by: Paul Mackerras --- Makefile | 8 +-- common.vhdl | 22 ++++---- divider.vhdl | 15 +----- divider_tb.vhdl | 30 +---------- execute2.vhdl | 9 +--- loadstore2.vhdl | 56 +++++--------------- multiply.vhdl | 9 +--- multiply_tb.vhdl | 5 +- writeback.vhdl | 134 +++++++++++++++++++++++++++++++++++++++++------ 9 files changed, 152 insertions(+), 136 deletions(-) diff --git a/Makefile b/Makefile index af9c91d..6657d4d 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode_types.o: execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o -execute2.o: common.o crhelpers.o ppc_fx_insns.o +execute2.o: common.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: @@ -43,9 +43,9 @@ loadstore1.o: common.o helpers.o loadstore2.o: common.o helpers.o wishbone_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o -multiply.o: common.o decode_types.o ppc_fx_insns.o crhelpers.o +multiply.o: common.o decode_types.o divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o -divider.o: common.o decode_types.o crhelpers.o +divider.o: common.o decode_types.o ppc_fx_insns.o: helpers.o register_file.o: common.o rotator.o: common.o @@ -58,7 +58,7 @@ sim_uart.o: wishbone_types.o sim_console.o soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: -writeback.o: common.o +writeback.o: common.o crhelpers.o dmi_dtm_tb.o: dmi_dtm_xilinx.o wishbone_debug_master.o dmi_dtm_xilinx.o: wishbone_types.o sim-unisim/unisim_vcomponents.o wishbone_debug_master.o: wishbone_types.o diff --git a/common.vhdl b/common.vhdl index ae61342..321cff1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -155,8 +155,13 @@ package common is write_enable: std_ulogic; write_reg : std_ulogic_vector(4 downto 0); write_data : std_ulogic_vector(63 downto 0); + write_len : std_ulogic_vector(3 downto 0); + write_shift : std_ulogic_vector(2 downto 0); + sign_extend : std_ulogic; + byte_reverse : std_ulogic; + second_word : std_ulogic; end record; - constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', others => (others => '0')); + constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0')); type Execute1ToExecute2Type is record valid: std_ulogic; @@ -172,6 +177,7 @@ package common is type Execute2ToWritebackType is record valid: std_ulogic; + rc : std_ulogic; write_enable : std_ulogic; write_reg: std_ulogic_vector(4 downto 0); write_data: std_ulogic_vector(63 downto 0); @@ -179,7 +185,7 @@ package common is write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); end record; - constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); type MultiplyToWritebackType is record valid: std_ulogic; @@ -187,11 +193,9 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); - write_cr_enable: std_ulogic; - write_cr_mask: std_ulogic_vector(7 downto 0); - write_cr_data: std_ulogic_vector(31 downto 0); + rc: std_ulogic; end record; - constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); type DividerToWritebackType is record valid: std_ulogic; @@ -199,11 +203,9 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); - write_cr_enable: std_ulogic; - write_cr_mask: std_ulogic_vector(7 downto 0); - write_cr_data: std_ulogic_vector(31 downto 0); + rc: std_ulogic; end record; - constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); type WritebackToRegisterFileType is record write_reg : std_ulogic_vector(4 downto 0); diff --git a/divider.vhdl b/divider.vhdl index cfadc51..20d4600 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -5,7 +5,6 @@ use ieee.numeric_std.all; library work; use work.common.all; use work.decode_types.all; -use work.crhelpers.all; entity divider is port ( @@ -37,7 +36,6 @@ architecture behaviour of divider is signal overflow : std_ulogic; signal ovf32 : std_ulogic; signal did_ovf : std_ulogic; - signal cr_data : std_ulogic_vector(2 downto 0); begin divider_0: process(clk) @@ -114,7 +112,7 @@ begin divider_1: process(all) begin d_out.write_reg_nr <= write_reg; - d_out.write_cr_mask <= num_to_fxm(0); + d_out.rc <= rc; if is_modulus = '1' then result <= dend(128 downto 65); @@ -144,29 +142,18 @@ begin else oresult <= sresult; end if; - - if (did_ovf = '1') or (or (sresult) = '0') then - cr_data <= "001"; - elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then - cr_data <= "100"; - else - cr_data <= "010"; - end if; end process; divider_out: process(clk) begin if rising_edge(clk) then d_out.write_reg_data <= oresult; - d_out.write_cr_data <= cr_data & '0' & x"0000000"; if count = "1000000" then d_out.valid <= '1'; d_out.write_reg_enable <= '1'; - d_out.write_cr_enable <= rc; else d_out.valid <= '0'; d_out.write_reg_enable <= '0'; - d_out.write_cr_enable <= '0'; end if; end if; end process; diff --git a/divider_tb.vhdl b/divider_tb.vhdl index fdc8da5..5f809bb 100644 --- a/divider_tb.vhdl +++ b/divider_tb.vhdl @@ -68,7 +68,7 @@ begin assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.write_cr_enable = '0'; + assert d2.rc = '0'; wait for clk_period; assert d2.valid = '0' report "valid"; @@ -92,9 +92,7 @@ begin assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); - assert d2.write_cr_enable = '1'; - assert d2.write_cr_mask = "10000000"; - assert d2.write_cr_data = x"40000000" report "cr data is " & to_hstring(d2.write_cr_data); + assert d2.rc = '1'; wait for clk_period; assert d2.valid = '0'; @@ -129,8 +127,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divd"; end loop; end loop; end loop; @@ -165,8 +161,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divdu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divdu"; end loop; end loop; end loop; @@ -207,8 +201,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divde"; end loop; end loop; end loop; @@ -246,8 +238,6 @@ begin end if; assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) report "bad divdeu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divdeu"; end loop; end loop; end loop; @@ -284,8 +274,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divw"; end loop; end loop; end loop; @@ -322,8 +310,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divwu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divwu"; end loop; end loop; end loop; @@ -363,8 +349,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divwe"; end if; end loop; end loop; @@ -402,8 +386,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divweu"; end loop; end loop; end loop; @@ -441,8 +423,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modsd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modsd"; end loop; end loop; end loop; @@ -480,8 +460,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modud expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modud"; end loop; end loop; end loop; @@ -524,8 +502,6 @@ begin end if; assert behave_rt = d2.write_reg_data report "bad modsw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for modsw"; end loop; end loop; end loop; @@ -563,8 +539,6 @@ begin end if; assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0) report "bad moduw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for moduw"; end loop; end loop; end loop; diff --git a/execute2.vhdl b/execute2.vhdl index 9fdb1dd..de55310 100644 --- a/execute2.vhdl +++ b/execute2.vhdl @@ -4,8 +4,6 @@ use ieee.numeric_std.all; library work; use work.common.all; -use work.crhelpers.all; -use work.ppc_fx_insns.all; -- 2 cycle ALU -- We handle rc form instructions here @@ -41,12 +39,7 @@ begin v.write_cr_enable := e_in.write_cr_enable; v.write_cr_mask := e_in.write_cr_mask; v.write_cr_data := e_in.write_cr_data; - - if e_in.valid = '1' and e_in.rc = '1' then - v.write_cr_enable := '1'; - v.write_cr_mask := num_to_fxm(0); - v.write_cr_data := ppc_cmpi('1', e_in.write_data, x"0000") & x"0000000"; - end if; + v.rc := e_in.rc; -- Update registers rin <= v; diff --git a/loadstore2.vhdl b/loadstore2.vhdl index 17ef7e1..cd7061c 100644 --- a/loadstore2.vhdl +++ b/loadstore2.vhdl @@ -26,9 +26,6 @@ architecture behave of loadstore2 is signal l_saved : Loadstore1ToLoadstore2Type; signal w_tmp : Loadstore2ToWritebackType; signal m_tmp : wishbone_master_out; - signal read_data : std_ulogic_vector(63 downto 0); - signal read_data_shift : std_ulogic_vector(2 downto 0); - signal sign_extend_byte_reverse: std_ulogic_vector(1 downto 0); signal dlength : std_ulogic_vector(3 downto 0); type state_t is (IDLE, WAITING_FOR_READ_ACK, WAITING_FOR_WRITE_ACK); @@ -61,37 +58,6 @@ architecture behave of loadstore2 is end function wishbone_data_sel; begin - loadstore2_1: process(all) - variable tmp : std_ulogic_vector(63 downto 0); - variable data : std_ulogic_vector(63 downto 0); - begin - tmp := std_logic_vector(shift_right(unsigned(read_data), to_integer(unsigned(read_data_shift)) * 8)); - data := (others => '0'); - case to_integer(unsigned(dlength)) is - when 0 => - when 1 => - data(7 downto 0) := tmp(7 downto 0); - when 2 => - data(15 downto 0) := tmp(15 downto 0); - when 4 => - data(31 downto 0) := tmp(31 downto 0); - when 8 => - data(63 downto 0) := tmp(63 downto 0); - when others => - assert false report "invalid length" severity failure; - data(63 downto 0) := tmp(63 downto 0); - end case; - - case sign_extend_byte_reverse is - when "10" => - w_tmp.write_data <= sign_extend(data, to_integer(unsigned(l_saved.length))); - when "01" => - w_tmp.write_data <= byte_reverse(data, to_integer(unsigned(l_saved.length))); - when others => - w_tmp.write_data <= data; - end case; - end process; - w_out <= w_tmp; m_out <= m_tmp; @@ -102,11 +68,13 @@ begin w_tmp.valid <= '0'; w_tmp.write_enable <= '0'; w_tmp.write_reg <= (others => '0'); + w_tmp.write_len <= "1000"; + w_tmp.write_shift <= "000"; + w_tmp.sign_extend <= '0'; + w_tmp.byte_reverse <= '0'; + w_tmp.second_word <= '0'; l_saved <= l_saved; - read_data_shift <= "000"; - sign_extend_byte_reverse <= "00"; - dlength <= "1000"; case_0: case state is when IDLE => @@ -131,7 +99,7 @@ begin if l_in.update = '1' then w_tmp.write_enable <= '1'; w_tmp.write_reg <= l_in.update_reg; - read_data <= l_in.addr; + w_tmp.write_data <= l_in.addr; end if; state <= WAITING_FOR_READ_ACK; @@ -148,15 +116,15 @@ begin when WAITING_FOR_READ_ACK => if m_in.ack = '1' then - read_data <= m_in.dat; - read_data_shift <= l_saved.addr(2 downto 0); - dlength <= l_saved.length; - sign_extend_byte_reverse <= l_saved.sign_extend & l_saved.byte_reverse; - -- write data to register file w_tmp.valid <= '1'; w_tmp.write_enable <= '1'; + w_tmp.write_data <= m_in.dat; w_tmp.write_reg <= l_saved.write_reg; + w_tmp.write_len <= l_saved.length; + w_tmp.write_shift <= l_saved.addr(2 downto 0); + w_tmp.sign_extend <= l_saved.sign_extend; + w_tmp.byte_reverse <= l_saved.byte_reverse; m_tmp <= wishbone_master_out_init; state <= IDLE; @@ -168,7 +136,7 @@ begin if l_saved.update = '1' then w_tmp.write_enable <= '1'; w_tmp.write_reg <= l_saved.update_reg; - read_data <= l_saved.addr; + w_tmp.write_data <= l_saved.addr; end if; m_tmp <= wishbone_master_out_init; diff --git a/multiply.vhdl b/multiply.vhdl index 71aceca..94fa792 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -5,8 +5,6 @@ use ieee.numeric_std.all; library work; use work.common.all; use work.decode_types.all; -use work.ppc_fx_insns.all; -use work.crhelpers.all; entity multiply is generic ( @@ -88,12 +86,7 @@ begin if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then m_out.valid <= '1'; m_out.write_reg_enable <= '1'; - - if v.multiply_pipeline(PIPELINE_DEPTH-1).rc = '1' then - m_out.write_cr_enable <= '1'; - m_out.write_cr_mask <= num_to_fxm(0); - m_out.write_cr_data <= ppc_cmpi('1', d2, x"0000") & x"0000000"; - end if; + m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; end if; rin <= v; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index 95c3199..48f83ab 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -61,7 +61,7 @@ begin assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.write_cr_enable = '0'; + assert m2.rc = '0'; wait for clk_period; assert m2.valid = '0'; @@ -79,8 +79,7 @@ begin assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; - assert m2.write_cr_enable = '1'; - assert m2.write_cr_data = x"40000000"; + assert m2.rc = '1'; -- test mulld mulld_loop : for i in 0 to 1000 loop diff --git a/writeback.vhdl b/writeback.vhdl index e244960..ba88970 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -4,6 +4,7 @@ use ieee.numeric_std.all; library work; use work.common.all; +use work.crhelpers.all; entity writeback is port ( @@ -22,12 +23,44 @@ entity writeback is end entity writeback; architecture behaviour of writeback is + subtype byte_index_t is unsigned(2 downto 0); + type permutation_t is array(0 to 7) of byte_index_t; + subtype byte_trim_t is std_ulogic_vector(1 downto 0); + type trim_ctl_t is array(0 to 7) of byte_trim_t; + type byte_sel_t is array(0 to 7) of std_ulogic; + + signal data_len : unsigned(3 downto 0); + signal data_in : std_ulogic_vector(63 downto 0); + signal data_permuted : std_ulogic_vector(63 downto 0); + signal data_trimmed : std_ulogic_vector(63 downto 0); + signal data_latched : std_ulogic_vector(63 downto 0); + signal perm : permutation_t; + signal use_second : byte_sel_t; + signal byte_offset : unsigned(2 downto 0); + signal brev_lenm1 : unsigned(2 downto 0); + signal trim_ctl : trim_ctl_t; + signal rc : std_ulogic; + signal partial_write : std_ulogic; + signal sign_extend : std_ulogic; + signal negative : std_ulogic; + signal second_word : std_ulogic; begin + writeback_0: process(clk) + begin + if rising_edge(clk) then + if partial_write = '1' then + data_latched <= data_permuted; + end if; + end if; + end process; + writeback_1: process(all) variable x : std_ulogic_vector(0 downto 0); variable y : std_ulogic_vector(0 downto 0); variable z : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0); + variable j : integer; + variable k : unsigned(3 downto 0); begin x := "" & e_in.valid; y := "" & l_in.valid; @@ -41,10 +74,11 @@ begin w := "" & d_in.write_reg_enable; assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure; - x := "" & e_in.write_cr_enable; - y := "" & m_in.write_cr_enable; - z := "" & d_in.write_cr_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + w := "" & e_in.write_cr_enable; + x := "" & (e_in.write_enable and e_in.rc); + y := "" & (m_in.valid and m_in.rc); + z := "" & (d_in.valid and d_in.rc); + assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; @@ -54,10 +88,19 @@ begin complete_out <= '1'; end if; + rc <= '0'; + brev_lenm1 <= "000"; + byte_offset <= "000"; + data_len <= x"8"; + partial_write <= '0'; + sign_extend <= '0'; + second_word <= '0'; + if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg; - w_out.write_data <= e_in.write_data; + data_in <= e_in.write_data; w_out.write_enable <= '1'; + rc <= e_in.rc; end if; if e_in.write_cr_enable = '1' then @@ -68,32 +111,89 @@ begin if l_in.write_enable = '1' then w_out.write_reg <= l_in.write_reg; - w_out.write_data <= l_in.write_data; + data_in <= l_in.write_data; + data_len <= unsigned(l_in.write_len); + byte_offset <= unsigned(l_in.write_shift); + sign_extend <= l_in.sign_extend; + if l_in.byte_reverse = '1' then + brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1; + end if; w_out.write_enable <= '1'; + second_word <= l_in.second_word; + if l_in.valid = '0' and (data_len + byte_offset > 8) then + partial_write <= '1'; + end if; end if; if m_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= m_in.write_reg_nr; - w_out.write_data <= m_in.write_reg_data; - end if; - - if m_in.write_cr_enable = '1' then - c_out.write_cr_enable <= '1'; - c_out.write_cr_mask <= m_in.write_cr_mask; - c_out.write_cr_data <= m_in.write_cr_data; + data_in <= m_in.write_reg_data; + rc <= m_in.rc; end if; if d_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= d_in.write_reg_nr; - w_out.write_data <= d_in.write_reg_data; + data_in <= d_in.write_reg_data; + rc <= d_in.rc; end if; - if d_in.write_cr_enable = '1' then + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + perm(i) <= k(2 downto 0); + use_second(i) <= k(3); + end loop; + for i in 0 to 7 loop + j := to_integer(perm(i)) * 8; + data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j); + end loop; + + -- If the data can arrive split over two cycles, this will be correct + -- provided we don't have both sign extension and byte reversal. + negative <= (data_len(2) and data_permuted(31)) or (data_len(1) and data_permuted(15)) or + (data_len(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(data_len) then + if second_word = '1' then + trim_ctl(i) <= '1' & not use_second(i); + else + trim_ctl(i) <= not use_second(i) & '0'; + end if; + else + trim_ctl(i) <= '0' & (negative and sign_extend); + end if; + end loop; + for i in 0 to 7 loop + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) <= x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) <= x"00"; + end case; + end loop; + + -- deliver to regfile + w_out.write_data <= data_trimmed; + + -- test value against 0 and set CR0 if requested + if rc = '1' then c_out.write_cr_enable <= '1'; - c_out.write_cr_mask <= d_in.write_cr_mask; - c_out.write_cr_data <= d_in.write_cr_data; + c_out.write_cr_mask <= num_to_fxm(0); + if data_trimmed(63) = '1' then + c_out.write_cr_data <= x"80000000"; + elsif or (data_trimmed(62 downto 0)) = '1' then + c_out.write_cr_data <= x"40000000"; + else + c_out.write_cr_data <= x"20000000"; + end if; end if; end process; end; From 9646fe28b089c7f03aeef5d3d32a590652732ec2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 14 Oct 2019 14:39:23 +1100 Subject: [PATCH 2/4] Do sign-extension instructions in writeback instead of execute1 This makes the exts[bhw] instructions do the sign extension in the writeback stage using the sign-extension logic there instead of having unique sign extension logic in execute1. This requires passing the data length and sign extend flag from decode2 down through execute1 and execute2 and into writeback. As a side bonus we reduce the number of values in insn_type_t by two. Signed-off-by: Paul Mackerras --- common.vhdl | 9 +++++++-- decode1.vhdl | 6 +++--- decode2.vhdl | 29 ++++++++++++++++------------- decode_types.vhdl | 2 +- execute1.vhdl | 14 ++++++-------- execute2.vhdl | 2 ++ writeback.vhdl | 2 ++ 7 files changed, 37 insertions(+), 27 deletions(-) diff --git a/common.vhdl b/common.vhdl index 321cff1..41623af 100644 --- a/common.vhdl +++ b/common.vhdl @@ -64,6 +64,7 @@ package common is is_32bit: std_ulogic; is_signed: std_ulogic; insn: std_ulogic_vector(31 downto 0); + data_len: std_ulogic_vector(3 downto 0); end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', invert_a => '0', @@ -168,12 +169,14 @@ package common is write_enable : std_ulogic; write_reg: std_ulogic_vector(4 downto 0); write_data: std_ulogic_vector(63 downto 0); + write_len : std_ulogic_vector(3 downto 0); write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); rc : std_ulogic; + sign_extend: std_ulogic; end record; - constant Execute1ToExecute2Init : Execute1ToExecute2Type := (valid => '0', write_enable => '0', write_cr_enable => '0', rc => '0', others => (others => '0')); + constant Execute1ToExecute2Init : Execute1ToExecute2Type := (valid => '0', write_enable => '0', write_cr_enable => '0', rc => '0', sign_extend => '0', others => (others => '0')); type Execute2ToWritebackType is record valid: std_ulogic; @@ -181,11 +184,13 @@ package common is write_enable : std_ulogic; write_reg: std_ulogic_vector(4 downto 0); write_data: std_ulogic_vector(63 downto 0); + write_len : std_ulogic_vector(3 downto 0); write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); + sign_extend: std_ulogic; end record; - constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); type MultiplyToWritebackType is record valid: std_ulogic; diff --git a/decode1.vhdl b/decode1.vhdl index 2bdb423..49e37da 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -164,9 +164,9 @@ architecture behaviour of decode1 is 2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd 2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw 2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv - 2#1110111010# => (ALU, OP_EXTSB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb - 2#1110011010# => (ALU, OP_EXTSH, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh - 2#1111011010# => (ALU, OP_EXTSW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsw + 2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb + 2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh + 2#1111011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsw -- 2#110111101-# extswsli 2#1111010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbi 2#0000010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbt diff --git a/decode2.vhdl b/decode2.vhdl index 524943c..c8dee48 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -208,6 +208,7 @@ begin variable decoded_reg_b : decode_input_reg_t; variable decoded_reg_c : decode_input_reg_t; variable signed_division: std_ulogic; + variable length : std_ulogic_vector(3 downto 0); begin v := r; @@ -231,6 +232,19 @@ begin r_out.read2_enable <= decoded_reg_b.reg_valid; r_out.read3_enable <= decoded_reg_c.reg_valid; + case d_in.decode.length is + when is1B => + length := "0001"; + when is2B => + length := "0010"; + when is4B => + length := "0100"; + when is8B => + length := "1000"; + when NONE => + length := "0000"; + end case; + -- execute unit v.e.nia := d_in.nia; v.e.insn_type := d_in.decode.insn_type; @@ -252,6 +266,7 @@ begin v.e.lr := insn_lk(d_in.insn); end if; v.e.insn := d_in.insn; + v.e.data_len := length; -- multiply unit v.m.insn_type := d_in.decode.insn_type; @@ -336,19 +351,7 @@ begin v.l.load := '0'; end if; - case d_in.decode.length is - when is1B => - v.l.length := "0001"; - when is2B => - v.l.length := "0010"; - when is4B => - v.l.length := "0100"; - when is8B => - v.l.length := "1000"; - when NONE => - v.l.length := "0000"; - end case; - + v.l.length := length; v.l.byte_reverse := d_in.decode.byte_reverse; v.l.sign_extend := d_in.decode.sign_extend; v.l.update := d_in.decode.update; diff --git a/decode_types.vhdl b/decode_types.vhdl index 982b172..f6b6ca9 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -8,7 +8,7 @@ package decode_types is OP_CNTZ, OP_CRAND, OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_DIV, OP_EXTSB, OP_EXTSH, OP_EXTSW, + OP_DCBZ, OP_DIV, OP_EXTS, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, diff --git a/execute1.vhdl b/execute1.vhdl index d0ff461..85bb5e1 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -143,6 +143,8 @@ begin v.e.valid := '1'; v.e.write_reg := e_in.write_reg; + v.e.write_len := x"8"; + v.e.sign_extend := '0'; case_0: case e_in.insn_type is @@ -230,14 +232,10 @@ begin when OP_CNTZ => result := countzero_result; result_en := 1; - when OP_EXTSB => - result := ppc_extsb(e_in.read_data3); - result_en := 1; - when OP_EXTSH => - result := ppc_extsh(e_in.read_data3); - result_en := 1; - when OP_EXTSW => - result := ppc_extsw(e_in.read_data3); + when OP_EXTS => + v.e.write_len := e_in.data_len; + v.e.sign_extend := '1'; + result := e_in.read_data3; result_en := 1; when OP_ISEL => crnum := to_integer(unsigned(insn_bc(e_in.insn))); diff --git a/execute2.vhdl b/execute2.vhdl index de55310..97b4103 100644 --- a/execute2.vhdl +++ b/execute2.vhdl @@ -40,6 +40,8 @@ begin v.write_cr_mask := e_in.write_cr_mask; v.write_cr_data := e_in.write_cr_data; v.rc := e_in.rc; + v.write_len := e_in.write_len; + v.sign_extend := e_in.sign_extend; -- Update registers rin <= v; diff --git a/writeback.vhdl b/writeback.vhdl index ba88970..4bae8f7 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -100,6 +100,8 @@ begin w_out.write_reg <= e_in.write_reg; data_in <= e_in.write_data; w_out.write_enable <= '1'; + data_len <= unsigned(e_in.write_len); + sign_extend <= e_in.sign_extend; rc <= e_in.rc; end if; From f49a5a99a5114186211b4bc28c92b7d97ed13e48 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Oct 2019 16:26:36 +1100 Subject: [PATCH 3/4] Remove execute2 stage Since the condition setting got moved to writeback, execute2 does nothing aside from wasting a cycle. This removes it. Signed-off-by: Paul Mackerras --- Makefile | 3 +-- common.vhdl | 18 ++--------------- core.vhdl | 14 +++----------- execute1.vhdl | 6 +++--- execute2.vhdl | 52 -------------------------------------------------- microwatt.core | 1 - writeback.vhdl | 2 +- 7 files changed, 10 insertions(+), 86 deletions(-) delete mode 100644 execute2.vhdl diff --git a/Makefile b/Makefile index 6657d4d..5525c1e 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ common.o: decode_types.o control.o: gpr_hazard.o cr_hazard.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o core_debug.o: common.o countzero.o: countzero_tb.o: common.o glibc_random.o countzero.o @@ -27,7 +27,6 @@ decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode_types.o: execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o -execute2.o: common.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: diff --git a/common.vhdl b/common.vhdl index 41623af..93bd598 100644 --- a/common.vhdl +++ b/common.vhdl @@ -164,21 +164,7 @@ package common is end record; constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0')); - type Execute1ToExecute2Type is record - valid: std_ulogic; - write_enable : std_ulogic; - write_reg: std_ulogic_vector(4 downto 0); - write_data: std_ulogic_vector(63 downto 0); - write_len : std_ulogic_vector(3 downto 0); - write_cr_enable : std_ulogic; - write_cr_mask : std_ulogic_vector(7 downto 0); - write_cr_data : std_ulogic_vector(31 downto 0); - rc : std_ulogic; - sign_extend: std_ulogic; - end record; - constant Execute1ToExecute2Init : Execute1ToExecute2Type := (valid => '0', write_enable => '0', write_cr_enable => '0', rc => '0', sign_extend => '0', others => (others => '0')); - - type Execute2ToWritebackType is record + type Execute1ToWritebackType is record valid: std_ulogic; rc : std_ulogic; write_enable : std_ulogic; @@ -190,7 +176,7 @@ package common is write_cr_data : std_ulogic_vector(31 downto 0); sign_extend: std_ulogic; end record; - constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); + constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); type MultiplyToWritebackType is record valid: std_ulogic; diff --git a/core.vhdl b/core.vhdl index aa5e87a..5a269a2 100644 --- a/core.vhdl +++ b/core.vhdl @@ -54,8 +54,7 @@ architecture behave of core is signal writeback_to_cr_file: WritebackToCrFileType; -- execute signals - signal execute1_to_execute2: Execute1ToExecute2Type; - signal execute2_to_writeback: Execute2ToWritebackType; + signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_fetch1: Execute1ToFetch1Type; -- load store signals @@ -204,17 +203,10 @@ begin flush_out => flush, e_in => decode2_to_execute1, f_out => execute1_to_fetch1, - e_out => execute1_to_execute2, + e_out => execute1_to_writeback, terminate_out => terminate ); - execute2_0: entity work.execute2 - port map ( - clk => clk, - e_in => execute1_to_execute2, - e_out => execute2_to_writeback - ); - loadstore1_0: entity work.loadstore1 port map ( clk => clk, @@ -249,7 +241,7 @@ begin writeback_0: entity work.writeback port map ( clk => clk, - e_in => execute2_to_writeback, + e_in => execute1_to_writeback, l_in => loadstore2_to_writeback, m_in => multiply_to_writeback, d_in => divider_to_writeback, diff --git a/execute1.vhdl b/execute1.vhdl index 85bb5e1..abb896d 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -25,7 +25,7 @@ entity execute1 is -- asynchronous f_out : out Execute1ToFetch1Type; - e_out : out Execute1ToExecute2Type; + e_out : out Execute1ToWritebackType; terminate_out : out std_ulogic ); @@ -34,7 +34,7 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record --f : Execute1ToFetch1Type; - e : Execute1ToExecute2Type; + e : Execute1ToWritebackType; end record; signal r, rin : reg_type; @@ -124,7 +124,7 @@ begin newcrf := (others => '0'); v := r; - v.e := Execute1ToExecute2Init; + v.e := Execute1ToWritebackInit; --v.f := Execute1ToFetch1TypeInit; ctrl_tmp <= ctrl; diff --git a/execute2.vhdl b/execute2.vhdl deleted file mode 100644 index 97b4103..0000000 --- a/execute2.vhdl +++ /dev/null @@ -1,52 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; - -library work; -use work.common.all; - --- 2 cycle ALU --- We handle rc form instructions here - -entity execute2 is - port ( - clk : in std_ulogic; - - e_in : in Execute1ToExecute2Type; - e_out : out Execute2ToWritebackType - ); -end execute2; - -architecture behave of execute2 is - signal r, rin : Execute2ToWritebackType; -begin - execute2_0: process(clk) - begin - if rising_edge(clk) then - r <= rin; - end if; - end process; - - execute2_1: process(all) - variable v : Execute2ToWritebackType; - begin - v := rin; - - v.valid := e_in.valid; - v.write_enable := e_in.write_enable; - v.write_reg := e_in.write_reg; - v.write_data := e_in.write_data; - v.write_cr_enable := e_in.write_cr_enable; - v.write_cr_mask := e_in.write_cr_mask; - v.write_cr_data := e_in.write_cr_data; - v.rc := e_in.rc; - v.write_len := e_in.write_len; - v.sign_extend := e_in.sign_extend; - - -- Update registers - rin <= v; - - -- Update outputs - e_out <= r; - end process; -end; diff --git a/microwatt.core b/microwatt.core index b963c45..44dfbbd 100644 --- a/microwatt.core +++ b/microwatt.core @@ -24,7 +24,6 @@ filesets: - cr_hazard.vhdl - control.vhdl - execute1.vhdl - - execute2.vhdl - loadstore1.vhdl - loadstore2.vhdl - multiply.vhdl diff --git a/writeback.vhdl b/writeback.vhdl index 4bae8f7..1879a5e 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -10,7 +10,7 @@ entity writeback is port ( clk : in std_ulogic; - e_in : in Execute2ToWritebackType; + e_in : in Execute1ToWritebackType; l_in : in Loadstore2ToWritebackType; m_in : in MultiplyToWritebackType; d_in : in DividerToWritebackType; From 57b200d6cb3cdee94fb4813b9ced31edb7a19396 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Oct 2019 07:56:15 +1100 Subject: [PATCH 4/4] writeback: Eliminate inferred latch This initializes data_in to all zeroes so that it doesn't become a set of 64 inferred latches. Signed-off-by: Paul Mackerras --- writeback.vhdl | 1 + 1 file changed, 1 insertion(+) diff --git a/writeback.vhdl b/writeback.vhdl index 1879a5e..042ad59 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -95,6 +95,7 @@ begin partial_write <= '0'; sign_extend <= '0'; second_word <= '0'; + data_in <= (others => '0'); if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg;