From c0b45e153b39ddd7fe062b575d136979cdcec076 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 10 Nov 2020 20:04:00 +1100 Subject: [PATCH] core: Track GPR hazards using tags that propagate through the pipelines This changes the way GPR hazards are detected and tracked. Instead of having a model of the pipeline in gpr_hazard.vhdl, which has to mirror the behaviour of the real pipeline exactly, we now assign a 2-bit tag to each instruction and record which GSPR the instruction writes. Subsequent instructions that need to use the GSPR get the tag number and stall until the value with that tag is being written back to the register file. For now, the forwarding paths are disabled. That gives about a 8% reduction in coremark performance. Signed-off-by: Paul Mackerras --- Makefile | 2 +- common.vhdl | 71 +++++++++++----- control.vhdl | 214 +++++++++++++++++++++++++++--------------------- core.vhdl | 2 +- decode2.vhdl | 12 +-- execute1.vhdl | 9 +- fpu.vhdl | 3 + gpr_hazard.vhdl | 112 ------------------------- loadstore1.vhdl | 3 + microwatt.core | 1 - writeback.vhdl | 16 +++- 11 files changed, 202 insertions(+), 243 deletions(-) delete mode 100644 gpr_hazard.vhdl diff --git a/Makefile b/Makefile index 2ee5d57..bb39007 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ all: $(all) core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \ utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \ - decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \ + decode1.vhdl helpers.vhdl insn_helpers.vhdl \ cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \ diff --git a/common.vhdl b/common.vhdl index 686e414..8792944 100644 --- a/common.vhdl +++ b/common.vhdl @@ -3,6 +3,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.utils.all; use work.decode_types.all; package common is @@ -126,6 +127,17 @@ package common is constant FPSCR_NI : integer := 63 - 61; constant FPSCR_RN : integer := 63 - 63; + -- Used for tracking instruction completion and pending register writes + constant TAG_COUNT : positive := 4; + constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT); + subtype tag_number_t is integer range 0 to TAG_COUNT - 1; + subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0); + type instr_tag_t is record + tag : tag_number_t; + valid : std_ulogic; + end record; + constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0'); + type irq_state_t is (WRITE_SRR0, WRITE_SRR1); -- For now, fixed 16 sources, make this either a parametric @@ -197,6 +209,7 @@ package common is fac : facility_t; insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); + instr_tag : instr_tag_t; write_reg: gspr_index_t; write_reg_enable: std_ulogic; read_reg1: gspr_index_t; @@ -236,7 +249,7 @@ package common is second : std_ulogic; -- set if this is the second op end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, + (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', @@ -291,9 +304,9 @@ package common is end record; type RegisterFileToDecode2Type is record - read1_data : std_ulogic_vector(63 downto 0); - read2_data : std_ulogic_vector(63 downto 0); - read3_data : std_ulogic_vector(63 downto 0); + read1_data : std_ulogic_vector(63 downto 0); + read2_data : std_ulogic_vector(63 downto 0); + read3_data : std_ulogic_vector(63 downto 0); end record; type Decode2ToCrFileType is record @@ -326,6 +339,7 @@ package common is op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do nia : std_ulogic_vector(63 downto 0); insn : std_ulogic_vector(31 downto 0); + instr_tag : instr_tag_t; addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -345,14 +359,17 @@ package common is repeat : std_ulogic; second : std_ulogic; end record; - constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', - sign_extend => '0', update => '0', xerc => xerc_init, - reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', - nia => (others => '0'), insn => (others => '0'), - addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), - write_reg => (others => '0'), length => (others => '0'), - mode_32bit => '0', is_32bit => '0', - repeat => '0', second => '0'); + constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := + (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', + sign_extend => '0', update => '0', xerc => xerc_init, + reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', + nia => (others => '0'), insn => (others => '0'), + instr_tag => instr_tag_init, + addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), + write_reg => (others => '0'), + length => (others => '0'), + mode_32bit => '0', is_32bit => '0', + repeat => '0', second => '0'); type Loadstore1ToExecute1Type is record busy : std_ulogic; @@ -439,6 +456,7 @@ package common is type Loadstore1ToWritebackType is record valid : std_ulogic; + instr_tag : instr_tag_t; write_enable: std_ulogic; write_reg : gspr_index_t; write_data : std_ulogic_vector(63 downto 0); @@ -446,11 +464,13 @@ package common is rc : std_ulogic; store_done : std_ulogic; end record; - constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init, - rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0')); + constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := + (valid => '0', instr_tag => instr_tag_init, write_enable => '0', xerc => xerc_init, + rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0')); type Execute1ToWritebackType is record valid: std_ulogic; + instr_tag : instr_tag_t; rc : std_ulogic; mode_32bit : std_ulogic; write_enable : std_ulogic; @@ -465,17 +485,19 @@ package common is exc_write_reg : gspr_index_t; exc_write_data : std_ulogic_vector(63 downto 0); end record; - constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', mode_32bit => '0', write_enable => '0', - write_cr_enable => '0', exc_write_enable => '0', - write_xerc_enable => '0', xerc => xerc_init, - write_data => (others => '0'), write_cr_mask => (others => '0'), - write_cr_data => (others => '0'), write_reg => (others => '0'), - exc_write_reg => (others => '0'), exc_write_data => (others => '0')); + constant Execute1ToWritebackInit : Execute1ToWritebackType := + (valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0', + write_enable => '0', write_cr_enable => '0', exc_write_enable => '0', + write_xerc_enable => '0', xerc => xerc_init, + write_data => (others => '0'), write_cr_mask => (others => '0'), + write_cr_data => (others => '0'), write_reg => (others => '0'), + exc_write_reg => (others => '0'), exc_write_data => (others => '0')); type Execute1ToFPUType is record valid : std_ulogic; op : insn_type_t; nia : std_ulogic_vector(63 downto 0); + itag : instr_tag_t; insn : std_ulogic_vector(31 downto 0); single : std_ulogic; fe_mode : std_ulogic_vector(1 downto 0); @@ -487,6 +509,7 @@ package common is out_cr : std_ulogic; end record; constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'), + itag => instr_tag_init, insn => (others => '0'), fe_mode => "00", rc => '0', fra => (others => '0'), frb => (others => '0'), frc => (others => '0'), frt => (others => '0'), @@ -502,6 +525,7 @@ package common is type FPUToWritebackType is record valid : std_ulogic; + instr_tag : instr_tag_t; write_enable : std_ulogic; write_reg : gspr_index_t; write_data : std_ulogic_vector(63 downto 0); @@ -509,7 +533,9 @@ package common is write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); end record; - constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); + constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', instr_tag => instr_tag_init, + write_enable => '0', write_cr_enable => '0', + others => (others => '0')); type DividerToExecute1Type is record valid: std_ulogic; @@ -524,7 +550,8 @@ package common is write_data : std_ulogic_vector(63 downto 0); write_enable : std_ulogic; end record; - constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', write_data => (others => '0'), others => (others => '0')); + constant WritebackToRegisterFileInit : WritebackToRegisterFileType := + (write_enable => '0', write_data => (others => '0'), others => (others => '0')); type WritebackToCrFileType is record write_cr_enable : std_ulogic; diff --git a/control.vhdl b/control.vhdl index a89dab8..576627b 100644 --- a/control.vhdl +++ b/control.vhdl @@ -12,7 +12,7 @@ entity control is clk : in std_ulogic; rst : in std_ulogic; - complete_in : in std_ulogic; + complete_in : in instr_tag_t; valid_in : in std_ulogic; repeated : in std_ulogic; flush_in : in std_ulogic; @@ -25,9 +25,6 @@ entity control is gpr_write_in : in gspr_index_t; gpr_bypassable : in std_ulogic; - update_gpr_write_valid : in std_ulogic; - update_gpr_write_reg : in gspr_index_t; - gpr_a_read_valid_in : in std_ulogic; gpr_a_read_in : in gspr_index_t; @@ -48,7 +45,9 @@ entity control is gpr_bypass_a : out std_ulogic; gpr_bypass_b : out std_ulogic; gpr_bypass_c : out std_ulogic; - cr_bypass : out std_ulogic + cr_bypass : out std_ulogic; + + instr_tag_out : out instr_tag_t ); end entity control; @@ -71,85 +70,31 @@ architecture rtl of control is signal gpr_write_valid : std_ulogic := '0'; signal cr_write_valid : std_ulogic := '0'; -begin - gpr_hazard0: entity work.gpr_hazard - generic map ( - PIPELINE_DEPTH => PIPELINE_DEPTH - ) - port map ( - clk => clk, - busy_in => busy_in, - deferred => deferred, - complete_in => complete_in, - flush_in => flush_in, - issuing => valid_out, - repeated => repeated, - - gpr_write_valid_in => gpr_write_valid, - gpr_write_in => gpr_write_in, - bypass_avail => gpr_bypassable, - gpr_read_valid_in => gpr_a_read_valid_in, - gpr_read_in => gpr_a_read_in, - - ugpr_write_valid => update_gpr_write_valid, - ugpr_write_reg => update_gpr_write_reg, - - stall_out => stall_a_out, - use_bypass => gpr_bypass_a - ); + type tag_register is record + wr_gpr : std_ulogic; + reg : gspr_index_t; + recent : std_ulogic; + end record; - gpr_hazard1: entity work.gpr_hazard - generic map ( - PIPELINE_DEPTH => PIPELINE_DEPTH - ) - port map ( - clk => clk, - busy_in => busy_in, - deferred => deferred, - complete_in => complete_in, - flush_in => flush_in, - issuing => valid_out, - repeated => repeated, + type tag_regs_array is array(tag_number_t) of tag_register; + signal tag_regs : tag_regs_array; - gpr_write_valid_in => gpr_write_valid, - gpr_write_in => gpr_write_in, - bypass_avail => gpr_bypassable, - gpr_read_valid_in => gpr_b_read_valid_in, - gpr_read_in => gpr_b_read_in, + signal instr_tag : instr_tag_t; - ugpr_write_valid => update_gpr_write_valid, - ugpr_write_reg => update_gpr_write_reg, + signal gpr_tag_a : instr_tag_t; + signal gpr_tag_b : instr_tag_t; + signal gpr_tag_c : instr_tag_t; + signal gpr_tag_stall : std_ulogic; - stall_out => stall_b_out, - use_bypass => gpr_bypass_b - ); + signal curr_tag : tag_number_t; + signal next_tag : tag_number_t; - gpr_hazard2: entity work.gpr_hazard - generic map ( - PIPELINE_DEPTH => PIPELINE_DEPTH - ) - port map ( - clk => clk, - busy_in => busy_in, - deferred => deferred, - complete_in => complete_in, - flush_in => flush_in, - issuing => valid_out, - repeated => repeated, - - gpr_write_valid_in => gpr_write_valid, - gpr_write_in => gpr_write_in, - bypass_avail => gpr_bypassable, - gpr_read_valid_in => gpr_c_read_valid_in, - gpr_read_in => gpr_c_read_in, - - ugpr_write_valid => update_gpr_write_valid, - ugpr_write_reg => update_gpr_write_reg, - - stall_out => stall_c_out, - use_bypass => gpr_bypass_c - ); + function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is + begin + return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; + end; +begin cr_hazard0: entity work.cr_hazard generic map ( PIPELINE_DEPTH => PIPELINE_DEPTH @@ -158,7 +103,7 @@ begin clk => clk, busy_in => busy_in, deferred => deferred, - complete_in => complete_in, + complete_in => complete_in.valid, flush_in => flush_in, issuing => valid_out, @@ -170,15 +115,102 @@ begin use_bypass => cr_bypass ); + gpr_bypass_a <= '0'; + gpr_bypass_b <= '0'; + gpr_bypass_c <= '0'; + control0: process(clk) begin if rising_edge(clk) then assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure; r_int <= rin_int; + for i in tag_number_t loop + if rst = '1' or flush_in = '1' then + tag_regs(i).wr_gpr <= '0'; + else + if complete_in.valid = '1' and i = complete_in.tag then + tag_regs(i).wr_gpr <= '0'; + report "tag " & integer'image(i) & " not valid"; + end if; + if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then + tag_regs(i).recent <= '0'; + if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then + report "tag " & integer'image(i) & " not recent"; + end if; + end if; + if instr_tag.valid = '1' and i = instr_tag.tag then + tag_regs(i).wr_gpr <= gpr_write_valid; + tag_regs(i).reg <= gpr_write_in; + tag_regs(i).recent <= gpr_write_valid; + if gpr_write_valid = '1' then + report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in); + end if; + end if; + end if; + end loop; + if rst = '1' then + curr_tag <= 0; + else + curr_tag <= next_tag; + end if; end if; end process; + control_hazards : process(all) + variable gpr_stall : std_ulogic; + variable tag_a : instr_tag_t; + variable tag_b : instr_tag_t; + variable tag_c : instr_tag_t; + variable tag_s : instr_tag_t; + variable tag_t : instr_tag_t; + variable incr_tag : tag_number_t; + begin + tag_a := instr_tag_init; + for i in tag_number_t loop + if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then + tag_a.valid := gpr_a_read_valid_in; + tag_a.tag := i; + end if; + end loop; + if tag_match(tag_a, complete_in) then + tag_a.valid := '0'; + end if; + tag_b := instr_tag_init; + for i in tag_number_t loop + if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then + tag_b.valid := gpr_b_read_valid_in; + tag_b.tag := i; + end if; + end loop; + if tag_match(tag_b, complete_in) then + tag_b.valid := '0'; + end if; + tag_c := instr_tag_init; + for i in tag_number_t loop + if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then + tag_c.valid := gpr_c_read_valid_in; + tag_c.tag := i; + end if; + end loop; + if tag_match(tag_c, complete_in) then + tag_c.valid := '0'; + end if; + gpr_tag_a <= tag_a; + gpr_tag_b <= tag_b; + gpr_tag_c <= tag_c; + gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid; + + incr_tag := curr_tag; + instr_tag.tag <= curr_tag; + instr_tag.valid <= valid_out and not deferred; + if instr_tag.valid = '1' then + incr_tag := (curr_tag + 1) mod TAG_COUNT; + end if; + next_tag <= incr_tag; + instr_tag_out <= instr_tag; + end process; + control1 : process(all) variable v_int : reg_internal_type; variable valid_tmp : std_ulogic; @@ -193,7 +225,7 @@ begin if flush_in = '1' then -- expect to see complete_in next cycle v_int.outstanding := 1; - elsif complete_in = '1' then + elsif complete_in.valid = '1' then v_int.outstanding := r_int.outstanding - 1; end if; @@ -222,8 +254,8 @@ begin v_int.state := WAIT_FOR_CURR_TO_COMPLETE; end if; else - -- let it go out if there are no GPR hazards - stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out; + -- let it go out if there are no GPR or CR hazards + stall_tmp := gpr_tag_stall or cr_stall_out; end if; end if; @@ -249,8 +281,8 @@ begin v_int.state := WAIT_FOR_CURR_TO_COMPLETE; end if; else - -- let it go out if there are no GPR hazards - stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out; + -- let it go out if there are no GPR or CR hazards + stall_tmp := gpr_tag_stall or cr_stall_out; end if; end if; else @@ -262,15 +294,11 @@ begin valid_tmp := '0'; end if; - if valid_tmp = '1' then - if deferred = '0' then - v_int.outstanding := v_int.outstanding + 1; - end if; - gpr_write_valid <= gpr_write_valid_in; - cr_write_valid <= cr_write_in; - else - gpr_write_valid <= '0'; - cr_write_valid <= '0'; + gpr_write_valid <= gpr_write_valid_in and valid_tmp; + cr_write_valid <= cr_write_in and valid_tmp; + + if valid_tmp = '1' and deferred = '0' then + v_int.outstanding := v_int.outstanding + 1; end if; -- update outputs diff --git a/core.vhdl b/core.vhdl index 3948b86..2ac2ece 100644 --- a/core.vhdl +++ b/core.vhdl @@ -102,7 +102,7 @@ architecture behave of core is signal decode1_flush: std_ulogic; signal fetch1_flush: std_ulogic; - signal complete: std_ulogic; + signal complete: instr_tag_t; signal terminate: std_ulogic; signal core_rst: std_ulogic; signal icache_inv: std_ulogic; diff --git a/decode2.vhdl b/decode2.vhdl index 274a241..a5d7f67 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -19,7 +19,7 @@ entity decode2 is clk : in std_ulogic; rst : in std_ulogic; - complete_in : in std_ulogic; + complete_in : in instr_tag_t; busy_in : in std_ulogic; stall_out : out std_ulogic; @@ -303,6 +303,8 @@ architecture behaviour of decode2 is signal cr_bypass : std_ulogic; signal cr_bypass_avail : std_ulogic; + signal instr_tag : instr_tag_t; + begin control_0: entity work.control generic map ( @@ -325,9 +327,6 @@ begin gpr_write_in => gpr_write, gpr_bypassable => gpr_bypassable, - update_gpr_write_valid => '0', - update_gpr_write_reg => 7x"00", - gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_in => gpr_a_read, @@ -348,7 +347,9 @@ begin gpr_bypass_a => gpr_a_bypass, gpr_bypass_b => gpr_b_bypass, - gpr_bypass_c => gpr_c_bypass + gpr_bypass_c => gpr_c_bypass, + + instr_tag_out => instr_tag ); deferred <= r.e.valid and busy_in; @@ -454,6 +455,7 @@ begin v.e.nia := d_in.nia; v.e.unit := d_in.decode.unit; v.e.fac := d_in.decode.facility; + v.e.instr_tag := instr_tag; v.e.read_reg1 := decoded_reg_a.reg; v.e.read_data1 := decoded_reg_a.data; v.e.bypass_data1 := gpr_a_bypass; diff --git a/execute1.vhdl b/execute1.vhdl index 2690424..e1fc240 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -319,7 +319,8 @@ begin ctrl <= ctrl_tmp; if valid_in = '1' then report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) & - " wr=" & to_hstring(rin.e.write_reg); + " wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) & + " tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid); end if; end if; end if; @@ -694,6 +695,7 @@ begin end if; v.e.mode_32bit := not ctrl.msr(MSR_SF); + v.e.instr_tag := current.instr_tag; do_trace := valid_in and ctrl.msr(MSR_SE); if valid_in = '1' then @@ -749,8 +751,6 @@ begin end if; if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then - report "execute nia " & to_hstring(e_in.nia); - v.cur_instr := e_in; v.next_lr := next_nia; v.e.valid := '1'; @@ -909,7 +909,6 @@ begin when OP_ISEL => when OP_CROP => cr_op := insn_cr(e_in.insn); - report "CR OP " & to_hstring(cr_op); if cr_op(0) = '0' then -- MCRF bf := insn_bf(e_in.insn); bfa := insn_bfa(e_in.insn); @@ -1309,6 +1308,7 @@ begin -- Outputs to loadstore1 (async) lv.op := e_in.insn_type; lv.nia := e_in.nia; + lv.instr_tag := e_in.instr_tag; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; @@ -1337,6 +1337,7 @@ begin fv.op := e_in.insn_type; fv.nia := e_in.nia; fv.insn := e_in.insn; + fv.itag := e_in.instr_tag; fv.single := e_in.is_32bit; fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1); fv.fra := a_in; diff --git a/fpu.vhdl b/fpu.vhdl index 2e8096a..5e5c7d6 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -75,6 +75,7 @@ architecture behaviour of fpu is do_intr : std_ulogic; op : insn_type_t; insn : std_ulogic_vector(31 downto 0); + instr_tag : instr_tag_t; dest_fpr : gspr_index_t; fe_mode : std_ulogic; rc : std_ulogic; @@ -574,6 +575,7 @@ begin e_out.interrupt <= r.do_intr; w_out.valid <= r.instr_done and not r.do_intr; + w_out.instr_tag <= r.instr_tag; w_out.write_enable <= r.writing_back; w_out.write_reg <= r.dest_fpr; w_out.write_data <= fp_result; @@ -643,6 +645,7 @@ begin if e_in.valid = '1' then v.insn := e_in.insn; v.op := e_in.op; + v.instr_tag := e_in.itag; v.fe_mode := or (e_in.fe_mode); v.dest_fpr := e_in.frt; v.single_prec := e_in.single; diff --git a/gpr_hazard.vhdl b/gpr_hazard.vhdl deleted file mode 100644 index 6b00994..0000000 --- a/gpr_hazard.vhdl +++ /dev/null @@ -1,112 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; - -library work; -use work.common.all; - -entity gpr_hazard is - generic ( - PIPELINE_DEPTH : natural := 1 - ); - port( - clk : in std_ulogic; - busy_in : in std_ulogic; - deferred : in std_ulogic; - complete_in : in std_ulogic; - flush_in : in std_ulogic; - issuing : in std_ulogic; - repeated : in std_ulogic; - - gpr_write_valid_in : in std_ulogic; - gpr_write_in : in gspr_index_t; - bypass_avail : in std_ulogic; - gpr_read_valid_in : in std_ulogic; - gpr_read_in : in gspr_index_t; - - ugpr_write_valid : in std_ulogic; - ugpr_write_reg : in gspr_index_t; - - stall_out : out std_ulogic; - use_bypass : out std_ulogic - ); -end entity gpr_hazard; -architecture behaviour of gpr_hazard is - type pipeline_entry_type is record - valid : std_ulogic; - bypass : std_ulogic; - gpr : gspr_index_t; - ugpr_valid : std_ulogic; - ugpr : gspr_index_t; - end record; - constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'), - ugpr_valid => '0', ugpr => (others => '0')); - - type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type; - constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); - - signal r, rin : pipeline_t := pipeline_t_init; -begin - gpr_hazard0: process(clk) - begin - if rising_edge(clk) then - r <= rin; - end if; - end process; - - gpr_hazard1: process(all) - variable v : pipeline_t; - begin - v := r; - - if complete_in = '1' then - v(PIPELINE_DEPTH).valid := '0'; - v(PIPELINE_DEPTH).ugpr_valid := '0'; - end if; - - stall_out <= '0'; - use_bypass <= '0'; - if repeated = '0' and gpr_read_valid_in = '1' then - loop_0: for i in 0 to PIPELINE_DEPTH loop - -- The second half of a split instruction never has GPR - -- dependencies on the first half's output GPR, - -- so ignore matches when i = 0 for the second half. - if v(i).valid = '1' and r(i).gpr = gpr_read_in and - not (i = 0 and repeated = '1') then - if r(i).bypass = '1' then - use_bypass <= '1'; - else - stall_out <= '1'; - end if; - end if; - if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then - stall_out <= '1'; - end if; - end loop; - end if; - - -- XXX assumes PIPELINE_DEPTH = 1 - if busy_in = '0' then - v(1) := v(0); - v(0).valid := '0'; - v(0).ugpr_valid := '0'; - end if; - if deferred = '0' and issuing = '1' then - v(0).valid := gpr_write_valid_in; - v(0).bypass := bypass_avail; - v(0).gpr := gpr_write_in; - v(0).ugpr_valid := ugpr_write_valid; - v(0).ugpr := ugpr_write_reg; - end if; - if flush_in = '1' then - v(0).valid := '0'; - v(0).ugpr_valid := '0'; - v(1).valid := '0'; - v(1).ugpr_valid := '0'; - end if; - - -- update registers - rin <= v; - - end process; -end; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index fc5bcf9..935ce5f 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -65,6 +65,7 @@ architecture behave of loadstore1 is addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); + instr_tag : instr_tag_t; write_reg : gspr_index_t; length : std_ulogic_vector(3 downto 0); byte_reverse : std_ulogic; @@ -503,6 +504,7 @@ begin v.align_intr := '0'; v.dwords_done := '0'; v.last_dword := '1'; + v.instr_tag := l_in.instr_tag; v.write_reg := l_in.write_reg; v.length := l_in.length; v.byte_reverse := l_in.byte_reverse; @@ -725,6 +727,7 @@ begin -- Multiplex either cache data to the destination GPR or -- the address for the rA update. l_out.valid <= done; + l_out.instr_tag <= r.instr_tag; l_out.write_reg <= r.write_reg; case r.wr_sel is when "00" => diff --git a/microwatt.core b/microwatt.core index 41b6230..0f77fba 100644 --- a/microwatt.core +++ b/microwatt.core @@ -19,7 +19,6 @@ filesets: - sim_console.vhdl - logical.vhdl - countzero.vhdl - - gpr_hazard.vhdl - cr_hazard.vhdl - control.vhdl - execute1.vhdl diff --git a/writeback.vhdl b/writeback.vhdl index 95de0ec..044b1fb 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -17,7 +17,7 @@ entity writeback is w_out : out WritebackToRegisterFileType; c_out : out WritebackToCrFileType; - complete_out : out std_ulogic + complete_out : out instr_tag_t ); end entity writeback; @@ -47,6 +47,10 @@ begin y(0) := fp_in.write_cr_enable; assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; + + assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure; + assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure; + assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure; end if; end process; @@ -59,9 +63,13 @@ begin w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; - complete_out <= '0'; - if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then - complete_out <= '1'; + complete_out <= instr_tag_init; + if e_in.valid = '1' then + complete_out <= e_in.instr_tag; + elsif l_in.valid = '1' then + complete_out <= l_in.instr_tag; + elsif fp_in.valid = '1' then + complete_out <= fp_in.instr_tag; end if; if e_in.exc_write_enable = '1' then