diff --git a/common.vhdl b/common.vhdl index ea6a8d8..54a87d2 100644 --- a/common.vhdl +++ b/common.vhdl @@ -288,6 +288,7 @@ package common is write_reg_enable: std_ulogic; read_reg1: gspr_index_t; read_reg2: gspr_index_t; + read_reg3: gspr_index_t; read_data1: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0); diff --git a/control.vhdl b/control.vhdl index 17a288b..e6855c2 100644 --- a/control.vhdl +++ b/control.vhdl @@ -15,9 +15,7 @@ entity control is complete_in : in instr_tag_t; valid_in : in std_ulogic; - repeated : in std_ulogic; flush_in : in std_ulogic; - busy_in : in std_ulogic; deferred : in std_ulogic; sgl_pipe_in : in std_ulogic; stop_mark_in : in std_ulogic; @@ -43,7 +41,6 @@ entity control is cr_write_in : in std_ulogic; valid_out : out std_ulogic; - stall_out : out std_ulogic; stopped_out : out std_ulogic; gpr_bypass_a : out std_ulogic_vector(1 downto 0); @@ -157,9 +154,6 @@ begin tag_a.tag := i; end if; end loop; - if tag_match(tag_a, complete_in) then - tag_a.valid := '0'; - end if; tag_b := instr_tag_init; for i in tag_number_t loop if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then @@ -167,9 +161,6 @@ begin tag_b.tag := i; end if; end loop; - if tag_match(tag_b, complete_in) then - tag_b.valid := '0'; - end if; tag_c := instr_tag_init; for i in tag_number_t loop if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then @@ -177,26 +168,29 @@ begin tag_c.tag := i; end if; end loop; - if tag_match(tag_c, complete_in) then - tag_c.valid := '0'; - end if; byp_a := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then - byp_a := "10"; + byp_a := "01"; elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then + byp_a := "10"; + elsif tag_match(complete_in, tag_a) then byp_a := "11"; end if; byp_b := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then - byp_b := "10"; + byp_b := "01"; elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then + byp_b := "10"; + elsif tag_match(complete_in, tag_b) then byp_b := "11"; end if; byp_c := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then - byp_c := "10"; + byp_c := "01"; elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then + byp_c := "10"; + elsif tag_match(complete_in, tag_c) then byp_c := "11"; end if; @@ -204,9 +198,9 @@ begin gpr_bypass_b <= byp_b; gpr_bypass_c <= byp_c; - gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or - (tag_b.valid and not byp_b(1)) or - (tag_c.valid and not byp_c(1)); + gpr_tag_stall <= (tag_a.valid and not (or (byp_a))) or + (tag_b.valid and not (or (byp_b))) or + (tag_c.valid and not (or (byp_c))); incr_tag := curr_tag; instr_tag.tag <= curr_tag; @@ -331,7 +325,6 @@ begin -- update outputs valid_out <= valid_tmp; - stall_out <= stall_tmp or deferred; -- update registers rin_int <= v_int; diff --git a/core.vhdl b/core.vhdl index 23f7e82..ba8f0cc 100644 --- a/core.vhdl +++ b/core.vhdl @@ -100,6 +100,9 @@ architecture behave of core is signal fpu_to_execute1: FPUToExecute1Type; signal fpu_to_writeback: FPUToWritebackType; + -- Writeback signals + signal writeback_bypass: bypass_data_t; + -- local signals signal fetch1_stall_in : std_ulogic; signal icache_stall_out : std_ulogic; @@ -302,6 +305,7 @@ begin execute_cr_bypass => execute1_cr_bypass, execute2_bypass => execute2_bypass, execute2_cr_bypass => execute2_cr_bypass, + writeback_bypass => writeback_bypass, log_out => log_data(119 downto 110) ); decode2_busy_in <= ex1_busy_out; @@ -463,6 +467,7 @@ begin w_out => writeback_to_register_file, c_out => writeback_to_cr_file, f_out => writeback_to_fetch1, + wb_bypass => writeback_bypass, events => writeback_events, interrupt_out => do_interrupt, complete_out => complete diff --git a/decode2.vhdl b/decode2.vhdl index 371c48c..41f3e09 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -41,6 +41,7 @@ entity decode2 is execute_cr_bypass : in cr_bypass_data_t; execute2_bypass : in bypass_data_t; execute2_cr_bypass : in cr_bypass_data_t; + writeback_bypass : in bypass_data_t; log_out : out std_ulogic_vector(9 downto 0) ); @@ -49,8 +50,16 @@ end entity decode2; architecture behaviour of decode2 is type reg_type is record e : Decode2ToExecute1Type; - repeat : std_ulogic; + repeat : repeat_t; + busy : std_ulogic; + sgl_pipe : std_ulogic; + reg_a_valid : std_ulogic; + reg_b_valid : std_ulogic; + reg_c_valid : std_ulogic; + reg_o_valid : std_ulogic; end record; + constant reg_type_init : reg_type := + (e => Decode2ToExecute1Init, repeat => NONE, others => '0'); signal dc2, dc2in : reg_type; @@ -61,20 +70,21 @@ architecture behaviour of decode2 is reg : gspr_index_t; data : std_ulogic_vector(63 downto 0); end record; + constant decode_input_reg_init : decode_input_reg_t := ('0', (others => '0'), (others => '0')); type decode_output_reg_t is record reg_valid : std_ulogic; reg : gspr_index_t; end record; + constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0')); function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); - reg_data : std_ulogic_vector(63 downto 0); ispr : gspr_index_t; instr_addr : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is begin if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then - return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data); + return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0')); elsif t = SPR then -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. -- If it's all 0, we don't treat it as a dependency as slow SPRs @@ -83,27 +93,26 @@ architecture behaviour of decode2 is assert is_fast_spr(ispr) = '1' or ispr = "0000000" report "Decode A says SPR but ISPR is invalid:" & to_hstring(ispr) severity failure; - return (is_fast_spr(ispr), ispr, reg_data); + return (is_fast_spr(ispr), ispr, (others => '0')); elsif t = CIA then return ('0', (others => '0'), instr_addr); elsif HAS_FPU and t = FRA then - return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data); + return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0')); else return ('0', (others => '0'), (others => '0')); end if; end; function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0); - reg_data : std_ulogic_vector(63 downto 0); ispr : gspr_index_t) return decode_input_reg_t is variable ret : decode_input_reg_t; begin case t is when RB => - ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); + ret := ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0')); when FRB => if HAS_FPU then - ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data); + ret := ('1', fpr_to_gspr(insn_frb(insn_in)), (others => '0')); else ret := ('0', (others => '0'), (others => '0')); end if; @@ -138,7 +147,7 @@ architecture behaviour of decode2 is assert is_fast_spr(ispr) = '1' or ispr = "0000000" report "Decode B says SPR but ISPR is invalid:" & to_hstring(ispr) severity failure; - ret := (is_fast_spr(ispr), ispr, reg_data); + ret := (is_fast_spr(ispr), ispr, (others => '0')); when NONE => ret := ('0', (others => '0'), (others => '0')); end case; @@ -146,23 +155,23 @@ architecture behaviour of decode2 is return ret; end; - function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0); - reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is + function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0)) + return decode_input_reg_t is begin case t is when RS => - return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data); + return ('1', gpr_to_gspr(insn_rs(insn_in)), (others => '0')); when RCR => - return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data); + return ('1', gpr_to_gspr(insn_rcreg(insn_in)), (others => '0')); when FRS => if HAS_FPU then - return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data); + return ('1', fpr_to_gspr(insn_frt(insn_in)), (others => '0')); else return ('0', (others => '0'), (others => '0')); end if; when FRC => if HAS_FPU then - return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data); + return ('1', fpr_to_gspr(insn_frc(insn_in)), (others => '0')); else return ('0', (others => '0'), (others => '0')); end if; @@ -264,10 +273,14 @@ architecture behaviour of decode2 is others => "000" ); + signal decoded_reg_a : decode_input_reg_t; + signal decoded_reg_b : decode_input_reg_t; + signal decoded_reg_c : decode_input_reg_t; + signal decoded_reg_o : decode_output_reg_t; + -- issue control signals signal control_valid_in : std_ulogic; signal control_valid_out : std_ulogic; - signal control_stall_out : std_ulogic; signal control_sgl_pipe : std_logic; signal gpr_write_valid : std_ulogic; @@ -302,8 +315,6 @@ begin complete_in => complete_in, valid_in => control_valid_in, - repeated => dc2.repeat, - busy_in => busy_in, deferred => deferred, flush_in => flush_in, sgl_pipe_in => control_sgl_pipe, @@ -331,7 +342,6 @@ begin cr_bypass => cr_bypass, valid_out => control_valid_out, - stall_out => control_stall_out, stopped_out => stopped_out, gpr_bypass_a => gpr_a_bypass, @@ -346,9 +356,12 @@ begin decode2_0: process(clk) begin if rising_edge(clk) then - if rst = '1' or flush_in = '1' or deferred = '0' then + if rst = '1' or flush_in = '1' then + dc2 <= reg_type_init; + elsif deferred = '0' then if dc2in.e.valid = '1' then - report "execute " & to_hstring(dc2in.e.nia); + report "execute " & to_hstring(dc2in.e.nia) & + " tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid); end if; dc2 <= dc2in; end if; @@ -357,205 +370,246 @@ begin c_out.read <= d_in.decode.input_cr; + decode2_addrs: process(all) + begin + decoded_reg_a <= decode_input_reg_init; + decoded_reg_b <= decode_input_reg_init; + decoded_reg_c <= decode_input_reg_init; + decoded_reg_o <= decode_output_reg_init; + if d_in.valid = '1' then + decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.ispr1, d_in.nia); + decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.ispr2); + decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn); + decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro); + end if; + + r_out.read1_enable <= decoded_reg_a.reg_valid; + r_out.read1_reg <= decoded_reg_a.reg; + r_out.read2_enable <= decoded_reg_b.reg_valid; + r_out.read2_reg <= decoded_reg_b.reg; + r_out.read3_enable <= decoded_reg_c.reg_valid; + r_out.read3_reg <= decoded_reg_c.reg; + + end process; + decode2_1: process(all) variable v : reg_type; - variable decoded_reg_a : decode_input_reg_t; - variable decoded_reg_b : decode_input_reg_t; - variable decoded_reg_c : decode_input_reg_t; - variable decoded_reg_o : decode_output_reg_t; variable length : std_ulogic_vector(3 downto 0); variable op : insn_type_t; + variable valid_in : std_ulogic; begin v := dc2; - v.e := Decode2ToExecute1Init; - - --v.e.input_cr := d_in.decode.input_cr; - v.e.output_cr := d_in.decode.output_cr; + valid_in := d_in.valid or dc2.busy; - -- Work out whether XER common bits are set - v.e.output_xer := d_in.decode.output_carry; - case d_in.decode.insn_type is - when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE => - -- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only - if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then - v.e.oe := '1'; - v.e.output_xer := '1'; - end if; - when OP_MTSPR => - if decode_spr_num(d_in.insn) = SPR_XER then - v.e.output_xer := '1'; - end if; - when others => - end case; + if dc2.busy = '0' then + v.e := Decode2ToExecute1Init; - decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1, - d_in.nia); - decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2); - decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data); - decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro); + v.sgl_pipe := d_in.decode.sgl_pipe; - if d_in.decode.lr = '1' then - v.e.lr := insn_lk(d_in.insn); - -- b and bc have even major opcodes; bcreg is considered absolute - v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26); - end if; - op := d_in.decode.insn_type; + v.e.input_cr := d_in.decode.input_cr; + v.e.output_cr := d_in.decode.output_cr; - if d_in.decode.repeat /= NONE then - v.e.repeat := '1'; - v.e.second := dc2.repeat; - case d_in.decode.repeat is - when DUPD => - -- update-form loads, 2nd instruction writes RA - if dc2.repeat = '1' then - decoded_reg_o.reg := decoded_reg_a.reg; + -- Work out whether XER common bits are set + v.e.output_xer := d_in.decode.output_carry; + case d_in.decode.insn_type is + when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE => + -- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only + if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then + v.e.oe := '1'; + v.e.output_xer := '1'; + end if; + when OP_MTSPR => + if decode_spr_num(d_in.insn) = SPR_XER then + v.e.output_xer := '1'; end if; when others => end case; - elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then - -- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled - v.e.repeat := '1'; - v.e.second := dc2.repeat; - -- first one does CTR, second does LR - decoded_reg_o.reg(0) := not dc2.repeat; - end if; - v.e.spr_select := d_in.spr_info; + v.reg_a_valid := decoded_reg_a.reg_valid; + v.reg_b_valid := decoded_reg_b.reg_valid; + v.reg_c_valid := decoded_reg_c.reg_valid; + v.reg_o_valid := decoded_reg_o.reg_valid; - r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid; - r_out.read1_reg <= decoded_reg_a.reg; - r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid; - r_out.read2_reg <= decoded_reg_b.reg; - r_out.read3_enable <= decoded_reg_c.reg_valid and d_in.valid; - r_out.read3_reg <= decoded_reg_c.reg; + if d_in.decode.lr = '1' then + v.e.lr := insn_lk(d_in.insn); + -- b and bc have even major opcodes; bcreg is considered absolute + v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26); + end if; + op := d_in.decode.insn_type; + + v.repeat := d_in.decode.repeat; + if d_in.decode.repeat /= NONE then + v.e.repeat := '1'; + elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then + -- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled + v.e.repeat := '1'; + end if; - case d_in.decode.length is - when is1B => - length := "0001"; - when is2B => - length := "0010"; - when is4B => - length := "0100"; - when is8B => - length := "1000"; - when NONE => - length := "0000"; - end case; + v.e.spr_select := d_in.spr_info; + + case d_in.decode.length is + when is1B => + length := "0001"; + when is2B => + length := "0010"; + when is4B => + length := "0100"; + when is8B => + length := "1000"; + when NONE => + length := "0000"; + end case; - -- execute unit - v.e.nia := d_in.nia; - v.e.unit := d_in.decode.unit; - v.e.fac := d_in.decode.facility; - v.e.instr_tag := instr_tag; - v.e.read_reg1 := decoded_reg_a.reg; - v.e.read_reg2 := decoded_reg_b.reg; - v.e.write_reg := decoded_reg_o.reg; - v.e.write_reg_enable := decoded_reg_o.reg_valid; - v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); - v.e.xerc := c_in.read_xerc_data; - v.e.invert_a := d_in.decode.invert_a; - v.e.addm1 := '0'; - v.e.insn_type := op; - v.e.invert_out := d_in.decode.invert_out; - v.e.input_carry := d_in.decode.input_carry; - v.e.output_carry := d_in.decode.output_carry; - v.e.is_32bit := d_in.decode.is_32bit; - v.e.is_signed := d_in.decode.is_signed; - v.e.insn := d_in.insn; - v.e.data_len := length; - v.e.byte_reverse := d_in.decode.byte_reverse; - v.e.sign_extend := d_in.decode.sign_extend; - v.e.update := d_in.decode.update; - v.e.reserve := d_in.decode.reserve; - v.e.br_pred := d_in.br_pred; - v.e.result_sel := result_select(op); - v.e.sub_select := subresult_select(op); - if op = OP_BC or op = OP_BCREG then - if d_in.insn(23) = '0' and dc2.repeat = '0' and - not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then - -- decrement CTR if BO(2) = 0 and not bcctr - v.e.addm1 := '1'; - v.e.result_sel := "000"; -- select adder output + -- execute unit + v.e.nia := d_in.nia; + v.e.unit := d_in.decode.unit; + v.e.fac := d_in.decode.facility; + v.e.read_reg1 := decoded_reg_a.reg; + v.e.read_reg2 := decoded_reg_b.reg; + v.e.read_reg3 := decoded_reg_c.reg; + v.e.write_reg := decoded_reg_o.reg; + v.e.write_reg_enable := decoded_reg_o.reg_valid; + v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); + v.e.xerc := c_in.read_xerc_data; + v.e.invert_a := d_in.decode.invert_a; + v.e.addm1 := '0'; + v.e.insn_type := op; + v.e.invert_out := d_in.decode.invert_out; + v.e.input_carry := d_in.decode.input_carry; + v.e.output_carry := d_in.decode.output_carry; + v.e.is_32bit := d_in.decode.is_32bit; + v.e.is_signed := d_in.decode.is_signed; + v.e.insn := d_in.insn; + v.e.data_len := length; + v.e.byte_reverse := d_in.decode.byte_reverse; + v.e.sign_extend := d_in.decode.sign_extend; + v.e.update := d_in.decode.update; + v.e.reserve := d_in.decode.reserve; + v.e.br_pred := d_in.br_pred; + v.e.result_sel := result_select(op); + v.e.sub_select := subresult_select(op); + if op = OP_BC or op = OP_BCREG then + if d_in.insn(23) = '0' and + not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then + -- decrement CTR if BO(2) = 0 and not bcctr + v.e.addm1 := '1'; + v.e.result_sel := "000"; -- select adder output + end if; end if; - end if; - if op = OP_MFSPR then - if is_fast_spr(d_in.ispr1) = '1' then - v.e.result_sel := "000"; -- adder_result, effectively a_in - elsif d_in.spr_info.valid = '0' then - -- Privileged mfspr to invalid/unimplemented SPR numbers - -- writes the contents of RT back to RT (i.e. it's a no-op) - v.e.result_sel := "001"; -- logical_result - elsif d_in.spr_info.ispmu = '1' then - v.e.result_sel := "100"; -- pmuspr_result + if op = OP_MFSPR then + if is_fast_spr(d_in.ispr1) = '1' then + v.e.result_sel := "000"; -- adder_result, effectively a_in + elsif d_in.spr_info.valid = '0' then + -- Privileged mfspr to invalid/unimplemented SPR numbers + -- writes the contents of RT back to RT (i.e. it's a no-op) + v.e.result_sel := "001"; -- logical_result + elsif d_in.spr_info.ispmu = '1' then + v.e.result_sel := "100"; -- pmuspr_result + end if; end if; - end if; - -- See if any of the operands can get their value via the bypass path. - case gpr_a_bypass is - when "10" => - v.e.read_data1 := execute_bypass.data; - when "11" => - v.e.read_data1 := execute2_bypass.data; - when others => - v.e.read_data1 := decoded_reg_a.data; - end case; - case gpr_b_bypass is - when "10" => - v.e.read_data2 := execute_bypass.data; - when "11" => - v.e.read_data2 := execute2_bypass.data; - when others => - v.e.read_data2 := decoded_reg_b.data; - end case; - case gpr_c_bypass is - when "10" => - v.e.read_data3 := execute_bypass.data; - when "11" => - v.e.read_data3 := execute2_bypass.data; - when others => - v.e.read_data3 := decoded_reg_c.data; - end case; - - v.e.cr := c_in.read_cr_data; - if cr_bypass = "10" then - v.e.cr := execute_cr_bypass.data; - elsif cr_bypass = "11" then - v.e.cr := execute2_cr_bypass.data; + elsif dc2.e.valid = '1' then + -- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction. + -- Set up for the second iteration (if deferred = 1 this will all be ignored) + v.e.second := '1'; + case dc2.repeat is + when DUPD => + -- update-form loads, 2nd instruction writes RA + v.e.write_reg := dc2.e.read_reg1; + when NONE => + -- bcl/bclrl/bctarl that needs to write both CTR and LR + v.e.write_reg(0) := '0'; -- point to LR + v.e.result_sel := "110"; -- select NIA (to go to LR) + when others => + end case; end if; -- issue control - control_valid_in <= d_in.valid; - control_sgl_pipe <= d_in.decode.sgl_pipe; + control_valid_in <= valid_in; + control_sgl_pipe <= v.sgl_pipe; - gpr_write_valid <= v.e.write_reg_enable; - gpr_write <= decoded_reg_o.reg; + gpr_write_valid <= v.reg_o_valid; + gpr_write <= v.e.write_reg; - gpr_a_read_valid <= decoded_reg_a.reg_valid; - gpr_a_read <= decoded_reg_a.reg; + gpr_a_read_valid <= v.reg_a_valid; + gpr_a_read <= v.e.read_reg1; - gpr_b_read_valid <= decoded_reg_b.reg_valid; - gpr_b_read <= decoded_reg_b.reg; + gpr_b_read_valid <= v.reg_b_valid; + gpr_b_read <= v.e.read_reg2; - gpr_c_read_valid <= decoded_reg_c.reg_valid; - gpr_c_read <= decoded_reg_c.reg; + gpr_c_read_valid <= v.reg_c_valid; + gpr_c_read <= v.e.read_reg3; - cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); + cr_write_valid <= v.e.output_cr or v.e.rc; -- Since ops that write CR only write some of the fields, -- any op that writes CR effectively also reads it. - cr_read_valid <= cr_write_valid or d_in.decode.input_cr; + cr_read_valid <= cr_write_valid or v.e.input_cr; - v.e.valid := control_valid_out; - if control_valid_out = '1' then - v.repeat := v.e.repeat and not dc2.repeat; + -- See if any of the operands can get their value via the bypass path. + if dc2.busy = '0' or gpr_a_bypass /= "00" then + case gpr_a_bypass is + when "01" => + v.e.read_data1 := execute_bypass.data; + when "10" => + v.e.read_data1 := execute2_bypass.data; + when "11" => + v.e.read_data1 := writeback_bypass.data; + when others => + if decoded_reg_a.reg_valid = '1' then + v.e.read_data1 := r_in.read1_data; + else + v.e.read_data1 := decoded_reg_a.data; + end if; + end case; + end if; + if dc2.busy = '0' or gpr_b_bypass /= "00" then + case gpr_b_bypass is + when "01" => + v.e.read_data2 := execute_bypass.data; + when "10" => + v.e.read_data2 := execute2_bypass.data; + when "11" => + v.e.read_data2 := writeback_bypass.data; + when others => + if decoded_reg_b.reg_valid = '1' then + v.e.read_data2 := r_in.read2_data; + else + v.e.read_data2 := decoded_reg_b.data; + end if; + end case; + end if; + if dc2.busy = '0' or gpr_c_bypass /= "00" then + case gpr_c_bypass is + when "01" => + v.e.read_data3 := execute_bypass.data; + when "10" => + v.e.read_data3 := execute2_bypass.data; + when "11" => + v.e.read_data3 := writeback_bypass.data; + when others => + if decoded_reg_c.reg_valid = '1' then + v.e.read_data3 := r_in.read3_data; + else + v.e.read_data3 := decoded_reg_c.data; + end if; + end case; end if; - stall_out <= control_stall_out or v.repeat; + case cr_bypass is + when "10" => + v.e.cr := execute_cr_bypass.data; + when "11" => + v.e.cr := execute2_cr_bypass.data; + when others => + v.e.cr := c_in.read_cr_data; + end case; - if rst = '1' or flush_in = '1' then - v.e := Decode2ToExecute1Init; - v.repeat := '0'; - end if; + v.e.valid := control_valid_out; + v.e.instr_tag := instr_tag; + v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second)); + + stall_out <= dc2.busy or deferred; -- Update registers dc2in <= v; @@ -574,9 +628,9 @@ begin dc2.e.valid & stopped_out & stall_out & - (gpr_a_bypass(1) or gpr_a_bypass(0)) & - (gpr_b_bypass(1) or gpr_b_bypass(0)) & - (gpr_c_bypass(1) or gpr_c_bypass(0)); + (gpr_a_bypass(1) xor gpr_a_bypass(0)) & + (gpr_b_bypass(1) xor gpr_b_bypass(0)) & + (gpr_c_bypass(1) xor gpr_c_bypass(0)); end if; end process; log_out <= log_data; diff --git a/register_file.vhdl b/register_file.vhdl index ab35855..0235dfc 100644 --- a/register_file.vhdl +++ b/register_file.vhdl @@ -100,18 +100,8 @@ begin d_out.read2_data <= rd_port_b; d_out.read3_data <= registers(to_integer(unsigned(c_addr))); - -- Forward any written data - if w_in.write_enable = '1' then - if a_addr = w_addr then - d_out.read1_data <= w_in.write_data; - end if; - if b_addr = w_addr then - d_out.read2_data <= w_in.write_data; - end if; - if c_addr = w_addr then - d_out.read3_data <= w_in.write_data; - end if; - end if; + -- Forwarding of written data is now done explicitly with a bypass path + -- from writeback to decode2. end process register_read_0; -- Latch read data and ack if dbg read requested and B port not busy diff --git a/writeback.vhdl b/writeback.vhdl index db30164..0d6f41d 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -19,6 +19,8 @@ entity writeback is c_out : out WritebackToCrFileType; f_out : out WritebackToFetch1Type; + wb_bypass : out bypass_data_t; + -- PMU event bus events : out WritebackEventType; @@ -215,6 +217,11 @@ begin f_out <= f; flush_out <= f_out.redirect; + -- Register write data bypass to decode2 + wb_bypass.tag.tag <= complete_out.tag; + wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable; + wb_bypass.data <= w_out.write_data; + rin <= v; end process; end;