Track hazards explicitly for XER overflow bits

This provides a mechanism for tracking updates to the XER overflow
bits (SO, OV, OV32) and stalling instructions which need current
values of those bits (mfxer, integer compare instructions, integer
Rc=1 instructions, addex) or which writes carry bits (since all the
XER common bits are written together, if we are writing CA/CA32 we
need up-to-date values of SO/OV/OV32).

This will enable updates to SO/OV/OV32 to be done at other places
besides the ex1 stage.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 7c240a664b
commit d1850fea29

@ -39,6 +39,8 @@ entity control is


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
ov_read_in : in std_ulogic;
ov_write_in : in std_ulogic;


valid_out : out std_ulogic; valid_out : out std_ulogic;
stopped_out : out std_ulogic; stopped_out : out std_ulogic;
@ -55,12 +57,14 @@ end entity control;
architecture rtl of control is architecture rtl of control is
signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
signal ov_write_valid : std_ulogic;


type tag_register is record type tag_register is record
wr_gpr : std_ulogic; wr_gpr : std_ulogic;
reg : gspr_index_t; reg : gspr_index_t;
recent : std_ulogic; recent : std_ulogic;
wr_cr : std_ulogic; wr_cr : std_ulogic;
wr_ov : std_ulogic;
valid : std_ulogic; valid : std_ulogic;
end record; end record;


@ -71,12 +75,14 @@ architecture rtl of control is


signal gpr_tag_stall : std_ulogic; signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic; signal cr_tag_stall : std_ulogic;
signal ov_tag_stall : std_ulogic;
signal serial_stall : std_ulogic; signal serial_stall : std_ulogic;


signal curr_tag : tag_number_t; signal curr_tag : tag_number_t;
signal next_tag : tag_number_t; signal next_tag : tag_number_t;


signal curr_cr_tag : tag_number_t; signal curr_cr_tag : tag_number_t;
signal curr_ov_tag : tag_number_t;
signal prev_tag : tag_number_t; signal prev_tag : tag_number_t;


begin begin
@ -87,12 +93,14 @@ begin
if rst = '1' or flush_in = '1' then if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0'; tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0'; tag_regs(i).valid <= '0';
else else
if complete_in.valid = '1' and i = complete_in.tag then if complete_in.valid = '1' and i = complete_in.tag then
assert tag_regs(i).valid = '1' report "spurious completion" severity failure; assert tag_regs(i).valid = '1' report "spurious completion" severity failure;
tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0'; tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0'; tag_regs(i).valid <= '0';
report "tag " & integer'image(i) & " not valid"; report "tag " & integer'image(i) & " not valid";
end if; end if;
@ -108,6 +116,7 @@ begin
tag_regs(i).reg <= gpr_write_in; tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid; tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid; tag_regs(i).wr_cr <= cr_write_valid;
tag_regs(i).wr_ov <= ov_write_valid;
tag_regs(i).valid <= '1'; tag_regs(i).valid <= '1';
if gpr_write_valid = '1' then if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in); report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
@ -118,12 +127,16 @@ begin
if rst = '1' then if rst = '1' then
curr_tag <= 0; curr_tag <= 0;
curr_cr_tag <= 0; curr_cr_tag <= 0;
curr_ov_tag <= 0;
prev_tag <= 0; prev_tag <= 0;
else else
curr_tag <= next_tag; curr_tag <= next_tag;
if instr_tag.valid = '1' and cr_write_valid = '1' then if instr_tag.valid = '1' and cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag; curr_cr_tag <= instr_tag.tag;
end if; end if;
if instr_tag.valid = '1' and ov_write_valid = '1' then
curr_ov_tag <= instr_tag.tag;
end if;
if valid_out = '1' then if valid_out = '1' then
prev_tag <= instr_tag.tag; prev_tag <= instr_tag.tag;
end if; end if;
@ -144,6 +157,7 @@ begin
variable byp_c : std_ulogic_vector(1 downto 0); variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t; variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic_vector(1 downto 0); variable byp_cr : std_ulogic_vector(1 downto 0);
variable tag_ov : instr_tag_t;
variable tag_prev : instr_tag_t; variable tag_prev : instr_tag_t;
begin begin
tag_a := instr_tag_init; tag_a := instr_tag_init;
@ -226,6 +240,14 @@ begin
cr_bypass <= byp_cr; cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr(1); cr_tag_stall <= tag_cr.valid and not byp_cr(1);


-- OV hazards
tag_ov.tag := curr_ov_tag;
tag_ov.valid := ov_read_in and tag_regs(curr_ov_tag).wr_ov;
if tag_match(tag_ov, complete_in) then
tag_ov.valid := '0';
end if;
ov_tag_stall <= tag_ov.valid;

tag_prev.tag := prev_tag; tag_prev.tag := prev_tag;
tag_prev.valid := tag_regs(prev_tag).valid; tag_prev.valid := tag_regs(prev_tag).valid;
if tag_match(tag_prev, complete_in) then if tag_match(tag_prev, complete_in) then
@ -251,12 +273,14 @@ begin


-- Don't let it go out if there are GPR or CR hazards -- Don't let it go out if there are GPR or CR hazards
-- or we are waiting for the previous instruction to complete -- or we are waiting for the previous instruction to complete
if (gpr_tag_stall or cr_tag_stall or (serialize and serial_stall)) = '1' then if (gpr_tag_stall or cr_tag_stall or ov_tag_stall or
(serialize and serial_stall)) = '1' then
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


gpr_write_valid <= gpr_write_valid_in and valid_tmp; gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp; cr_write_valid <= cr_write_in and valid_tmp;
ov_write_valid <= ov_write_in and valid_tmp;


-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;

@ -58,6 +58,8 @@ architecture behaviour of decode2 is
reg_b_valid : std_ulogic; reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic; reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic; reg_o_valid : std_ulogic;
input_ov : std_ulogic;
output_ov : std_ulogic;
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Decode2ToExecute1Init, repeat => NONE, others => '0'); (e => Decode2ToExecute1Init, repeat => NONE, others => '0');
@ -303,6 +305,9 @@ architecture behaviour of decode2 is
signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic_vector(1 downto 0); signal cr_bypass : std_ulogic_vector(1 downto 0);


signal ov_read_valid : std_ulogic;
signal ov_write_valid : std_ulogic;

signal instr_tag : instr_tag_t; signal instr_tag : instr_tag_t;


begin begin
@ -342,6 +347,9 @@ begin
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
cr_bypass => cr_bypass, cr_bypass => cr_bypass,


ov_read_in => ov_read_valid,
ov_write_in => ov_write_valid,

valid_out => control_valid_out, valid_out => control_valid_out,
stopped_out => stopped_out, stopped_out => stopped_out,


@ -414,19 +422,39 @@ begin
v.e.input_cr := d_in.decode.input_cr; v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;


-- Work out whether XER common bits are set -- Work out whether XER SO/OV/OV32 bits are set
-- or used by this instruction
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.output_xer := d_in.decode.output_carry; v.e.output_xer := d_in.decode.output_carry;
v.input_ov := d_in.decode.output_carry;
v.output_ov := '0';
if d_in.decode.input_carry = OV then
v.input_ov := '1';
v.output_ov := '1';
end if;
if v.e.rc = '1' and d_in.decode.facility /= FPU then
v.input_ov := '1';
end if;
case d_in.decode.insn_type is case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE => when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only -- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1'; v.e.oe := '1';
v.e.output_xer := '1'; v.e.output_xer := '1';
v.output_ov := '1';
v.input_ov := '1'; -- need SO state if setting OV to 0
end if;
when OP_MFSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.input_ov := '1';
end if; end if;
when OP_MTSPR => when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1'; v.e.output_xer := '1';
v.output_ov := '1';
end if; end if;
when OP_CMP | OP_MCRXRX =>
v.input_ov := '1';
when others => when others =>
end case; end case;


@ -474,8 +502,6 @@ begin
v.e.read_reg3 := decoded_reg_c.reg; v.e.read_reg3 := decoded_reg_c.reg;
v.e.write_reg := decoded_reg_o.reg; v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a; v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0'; v.e.addm1 := '0';
v.e.insn_type := op; v.e.insn_type := op;
@ -550,6 +576,9 @@ begin
-- any op that writes CR effectively also reads it. -- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or v.e.input_cr; cr_read_valid <= cr_write_valid or v.e.input_cr;


ov_read_valid <= v.input_ov;
ov_write_valid <= v.output_ov;

-- See if any of the operands can get their value via the bypass path. -- See if any of the operands can get their value via the bypass path.
if dc2.busy = '0' or gpr_a_bypass /= "00" then if dc2.busy = '0' or gpr_a_bypass /= "00" then
case gpr_a_bypass is case gpr_a_bypass is
@ -608,6 +637,7 @@ begin
when others => when others =>
v.e.cr := c_in.read_cr_data; v.e.cr := c_in.read_cr_data;
end case; end case;
v.e.xerc := c_in.read_xerc_data;


v.e.valid := control_valid_out; v.e.valid := control_valid_out;
v.e.instr_tag := instr_tag; v.e.instr_tag := instr_tag;

@ -435,12 +435,18 @@ begin
x_to_pmu.spr_val <= ex1.e.write_data; x_to_pmu.spr_val <= ex1.e.write_data;
x_to_pmu.run <= '1'; x_to_pmu.run <= '1';


-- XER forwarding. To avoid having to track XER hazards, we use -- XER forwarding. The CA and CA32 bits are only modified by instructions
-- the previously latched value. Since the XER common bits -- that are handled here, so for them we can just use the result most
-- (SO, OV[32] and CA[32]) are only modified by instructions that are -- recently sent to writeback, unless a pipeline flush has happened in the
-- handled here, we can just use the result most recently sent to -- meantime.
-- writeback, unless a pipeline flush has happened in the meantime. -- Hazards for SO/OV/OV32 are handled by control.vhdl as there may be other
xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc; -- units writing to them. No forwarding is done because performance of
-- instructions that alter them is not considered significant.
xerc_in.so <= e_in.xerc.so;
xerc_in.ov <= e_in.xerc.ov;
xerc_in.ov32 <= e_in.xerc.ov32;
xerc_in.ca <= ex1.xerc.ca when ex1.xerc_valid = '1' else e_in.xerc.ca;
xerc_in.ca32 <= ex1.xerc.ca32 when ex1.xerc_valid = '1' else e_in.xerc.ca32;


-- N.B. the busy signal from each source includes the -- N.B. the busy signal from each source includes the
-- stage2 stall from that source in it. -- stage2 stall from that source in it.
@ -1561,7 +1567,7 @@ begin
cr_res(31) := sign; cr_res(31) := sign;
cr_res(30) := not (sign or zero); cr_res(30) := not (sign or zero);
cr_res(29) := zero; cr_res(29) := zero;
cr_res(28) := ex1.xerc.so; cr_res(28) := ex1.e.xerc.so;
cr_mask(7) := '1'; cr_mask(7) := '1';
end if; end if;



Loading…
Cancel
Save