Track hazards explicitly for XER overflow bits

This provides a mechanism for tracking updates to the XER overflow
bits (SO, OV, OV32) and stalling instructions which need current
values of those bits (mfxer, integer compare instructions, integer
Rc=1 instructions, addex) or which writes carry bits (since all the
XER common bits are written together, if we are writing CA/CA32 we
need up-to-date values of SO/OV/OV32).

This will enable updates to SO/OV/OV32 to be done at other places
besides the ex1 stage.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 7c240a664b
commit d1850fea29

@ -39,6 +39,8 @@ entity control is

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
ov_read_in : in std_ulogic;
ov_write_in : in std_ulogic;

valid_out : out std_ulogic;
stopped_out : out std_ulogic;
@ -55,12 +57,14 @@ end entity control;
architecture rtl of control is
signal gpr_write_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal ov_write_valid : std_ulogic;

type tag_register is record
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
wr_cr : std_ulogic;
wr_ov : std_ulogic;
valid : std_ulogic;
end record;

@ -71,12 +75,14 @@ architecture rtl of control is

signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic;
signal ov_tag_stall : std_ulogic;
signal serial_stall : std_ulogic;

signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

signal curr_cr_tag : tag_number_t;
signal curr_ov_tag : tag_number_t;
signal prev_tag : tag_number_t;

begin
@ -87,12 +93,14 @@ begin
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
assert tag_regs(i).valid = '1' report "spurious completion" severity failure;
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
tag_regs(i).wr_ov <= '0';
tag_regs(i).valid <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
@ -108,6 +116,7 @@ begin
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid;
tag_regs(i).wr_ov <= ov_write_valid;
tag_regs(i).valid <= '1';
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
@ -118,12 +127,16 @@ begin
if rst = '1' then
curr_tag <= 0;
curr_cr_tag <= 0;
curr_ov_tag <= 0;
prev_tag <= 0;
else
curr_tag <= next_tag;
if instr_tag.valid = '1' and cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag;
end if;
if instr_tag.valid = '1' and ov_write_valid = '1' then
curr_ov_tag <= instr_tag.tag;
end if;
if valid_out = '1' then
prev_tag <= instr_tag.tag;
end if;
@ -144,6 +157,7 @@ begin
variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic_vector(1 downto 0);
variable tag_ov : instr_tag_t;
variable tag_prev : instr_tag_t;
begin
tag_a := instr_tag_init;
@ -226,6 +240,14 @@ begin
cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr(1);

-- OV hazards
tag_ov.tag := curr_ov_tag;
tag_ov.valid := ov_read_in and tag_regs(curr_ov_tag).wr_ov;
if tag_match(tag_ov, complete_in) then
tag_ov.valid := '0';
end if;
ov_tag_stall <= tag_ov.valid;

tag_prev.tag := prev_tag;
tag_prev.valid := tag_regs(prev_tag).valid;
if tag_match(tag_prev, complete_in) then
@ -251,12 +273,14 @@ begin

-- Don't let it go out if there are GPR or CR hazards
-- or we are waiting for the previous instruction to complete
if (gpr_tag_stall or cr_tag_stall or (serialize and serial_stall)) = '1' then
if (gpr_tag_stall or cr_tag_stall or ov_tag_stall or
(serialize and serial_stall)) = '1' then
valid_tmp := '0';
end if;

gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp;
ov_write_valid <= ov_write_in and valid_tmp;

-- update outputs
valid_out <= valid_tmp;

@ -58,6 +58,8 @@ architecture behaviour of decode2 is
reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic;
input_ov : std_ulogic;
output_ov : std_ulogic;
end record;
constant reg_type_init : reg_type :=
(e => Decode2ToExecute1Init, repeat => NONE, others => '0');
@ -303,6 +305,9 @@ architecture behaviour of decode2 is
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic_vector(1 downto 0);

signal ov_read_valid : std_ulogic;
signal ov_write_valid : std_ulogic;

signal instr_tag : instr_tag_t;

begin
@ -342,6 +347,9 @@ begin
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,

ov_read_in => ov_read_valid,
ov_write_in => ov_write_valid,

valid_out => control_valid_out,
stopped_out => stopped_out,

@ -414,19 +422,39 @@ begin
v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

-- Work out whether XER common bits are set
-- Work out whether XER SO/OV/OV32 bits are set
-- or used by this instruction
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.output_xer := d_in.decode.output_carry;
v.input_ov := d_in.decode.output_carry;
v.output_ov := '0';
if d_in.decode.input_carry = OV then
v.input_ov := '1';
v.output_ov := '1';
end if;
if v.e.rc = '1' and d_in.decode.facility /= FPU then
v.input_ov := '1';
end if;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
v.output_ov := '1';
v.input_ov := '1'; -- need SO state if setting OV to 0
end if;
when OP_MFSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.input_ov := '1';
end if;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1';
v.output_ov := '1';
end if;
when OP_CMP | OP_MCRXRX =>
v.input_ov := '1';
when others =>
end case;

@ -474,8 +502,6 @@ begin
v.e.read_reg3 := decoded_reg_c.reg;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
v.e.insn_type := op;
@ -550,6 +576,9 @@ begin
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or v.e.input_cr;

ov_read_valid <= v.input_ov;
ov_write_valid <= v.output_ov;

-- See if any of the operands can get their value via the bypass path.
if dc2.busy = '0' or gpr_a_bypass /= "00" then
case gpr_a_bypass is
@ -608,6 +637,7 @@ begin
when others =>
v.e.cr := c_in.read_cr_data;
end case;
v.e.xerc := c_in.read_xerc_data;

v.e.valid := control_valid_out;
v.e.instr_tag := instr_tag;

@ -435,12 +435,18 @@ begin
x_to_pmu.spr_val <= ex1.e.write_data;
x_to_pmu.run <= '1';

-- XER forwarding. To avoid having to track XER hazards, we use
-- the previously latched value. Since the XER common bits
-- (SO, OV[32] and CA[32]) are only modified by instructions that are
-- handled here, we can just use the result most recently sent to
-- writeback, unless a pipeline flush has happened in the meantime.
xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc;
-- XER forwarding. The CA and CA32 bits are only modified by instructions
-- that are handled here, so for them we can just use the result most
-- recently sent to writeback, unless a pipeline flush has happened in the
-- meantime.
-- Hazards for SO/OV/OV32 are handled by control.vhdl as there may be other
-- units writing to them. No forwarding is done because performance of
-- instructions that alter them is not considered significant.
xerc_in.so <= e_in.xerc.so;
xerc_in.ov <= e_in.xerc.ov;
xerc_in.ov32 <= e_in.xerc.ov32;
xerc_in.ca <= ex1.xerc.ca when ex1.xerc_valid = '1' else e_in.xerc.ca;
xerc_in.ca32 <= ex1.xerc.ca32 when ex1.xerc_valid = '1' else e_in.xerc.ca32;

-- N.B. the busy signal from each source includes the
-- stage2 stall from that source in it.
@ -1561,7 +1567,7 @@ begin
cr_res(31) := sign;
cr_res(30) := not (sign or zero);
cr_res(29) := zero;
cr_res(28) := ex1.xerc.so;
cr_res(28) := ex1.e.xerc.so;
cr_mask(7) := '1';
end if;


Loading…
Cancel
Save