core: Track GPR hazards using tags that propagate through the pipelines

This changes the way GPR hazards are detected and tracked.  Instead of
having a model of the pipeline in gpr_hazard.vhdl, which has to mirror
the behaviour of the real pipeline exactly, we now assign a 2-bit tag
to each instruction and record which GSPR the instruction writes.
Subsequent instructions that need to use the GSPR get the tag number
and stall until the value with that tag is being written back to the
register file.

For now, the forwarding paths are disabled.  That gives about a 8%
reduction in coremark performance.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/269/head
Paul Mackerras 4 years ago
parent a1d7b54f76
commit c0b45e153b

@ -43,7 +43,7 @@ all: $(all)

core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.decode_types.all;

package common is
@ -126,6 +127,17 @@ package common is
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
subtype tag_number_t is integer range 0 to TAG_COUNT - 1;
subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0);
type instr_tag_t is record
tag : tag_number_t;
valid : std_ulogic;
end record;
constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');

type irq_state_t is (WRITE_SRR0, WRITE_SRR1);

-- For now, fixed 16 sources, make this either a parametric
@ -197,6 +209,7 @@ package common is
fac : facility_t;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg: gspr_index_t;
write_reg_enable: std_ulogic;
read_reg1: gspr_index_t;
@ -236,7 +249,7 @@ package common is
second : std_ulogic; -- set if this is the second op
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
@ -291,9 +304,9 @@ package common is
end record;

type RegisterFileToDecode2Type is record
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
end record;

type Decode2ToCrFileType is record
@ -326,6 +339,7 @@ package common is
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
@ -345,14 +359,17 @@ package common is
repeat : std_ulogic;
second : std_ulogic;
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0');
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
instr_tag => instr_tag_init,
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0');

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
@ -439,6 +456,7 @@ package common is

type Loadstore1ToWritebackType is record
valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable: std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
@ -446,11 +464,13 @@ package common is
rc : std_ulogic;
store_done : std_ulogic;
end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init,
rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0'));
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, write_enable => '0', xerc => xerc_init,
rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0'));

type Execute1ToWritebackType is record
valid: std_ulogic;
instr_tag : instr_tag_t;
rc : std_ulogic;
mode_32bit : std_ulogic;
write_enable : std_ulogic;
@ -465,17 +485,19 @@ package common is
exc_write_reg : gspr_index_t;
exc_write_data : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', mode_32bit => '0', write_enable => '0',
write_cr_enable => '0', exc_write_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));
constant Execute1ToWritebackInit : Execute1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0',
write_enable => '0', write_cr_enable => '0', exc_write_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
@ -487,6 +509,7 @@ package common is
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
@ -502,6 +525,7 @@ package common is

type FPUToWritebackType is record
valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
@ -509,7 +533,9 @@ package common is
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;
constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0'));
constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', instr_tag => instr_tag_init,
write_enable => '0', write_cr_enable => '0',
others => (others => '0'));

type DividerToExecute1Type is record
valid: std_ulogic;
@ -524,7 +550,8 @@ package common is
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', write_data => (others => '0'), others => (others => '0'));
constant WritebackToRegisterFileInit : WritebackToRegisterFileType :=
(write_enable => '0', write_data => (others => '0'), others => (others => '0'));

type WritebackToCrFileType is record
write_cr_enable : std_ulogic;

@ -12,7 +12,7 @@ entity control is
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in std_ulogic;
complete_in : in instr_tag_t;
valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic;
@ -25,9 +25,6 @@ entity control is
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;

update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;

@ -48,7 +45,9 @@ entity control is
gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic
cr_bypass : out std_ulogic;

instr_tag_out : out instr_tag_t
);
end entity control;

@ -71,85 +70,31 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

begin
gpr_hazard0: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);
type tag_register is record
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
end record;

gpr_hazard1: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,
type tag_regs_array is array(tag_number_t) of tag_register;
signal tag_regs : tag_regs_array;

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,
signal instr_tag : instr_tag_t;

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,
signal gpr_tag_a : instr_tag_t;
signal gpr_tag_b : instr_tag_t;
signal gpr_tag_c : instr_tag_t;
signal gpr_tag_stall : std_ulogic;

stall_out => stall_b_out,
use_bypass => gpr_bypass_b
);
signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

gpr_hazard2: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);
function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
begin
return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
end;

begin
cr_hazard0: entity work.cr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
@ -158,7 +103,7 @@ begin
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
complete_in => complete_in.valid,
flush_in => flush_in,
issuing => valid_out,

@ -170,15 +115,102 @@ begin
use_bypass => cr_bypass
);

gpr_bypass_a <= '0';
gpr_bypass_b <= '0';
gpr_bypass_c <= '0';

control0: process(clk)
begin
if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
tag_regs(i).wr_gpr <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
tag_regs(i).recent <= '0';
if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
report "tag " & integer'image(i) & " not recent";
end if;
end if;
if instr_tag.valid = '1' and i = instr_tag.tag then
tag_regs(i).wr_gpr <= gpr_write_valid;
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if;
end if;
end if;
end loop;
if rst = '1' then
curr_tag <= 0;
else
curr_tag <= next_tag;
end if;
end if;
end process;

control_hazards : process(all)
variable gpr_stall : std_ulogic;
variable tag_a : instr_tag_t;
variable tag_b : instr_tag_t;
variable tag_c : instr_tag_t;
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
tag_a.valid := gpr_a_read_valid_in;
tag_a.tag := i;
end if;
end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
tag_b.valid := gpr_b_read_valid_in;
tag_b.tag := i;
end if;
end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
tag_c.valid := gpr_c_read_valid_in;
tag_c.tag := i;
end if;
end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;
gpr_tag_a <= tag_a;
gpr_tag_b <= tag_b;
gpr_tag_c <= tag_c;
gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid;

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
instr_tag.valid <= valid_out and not deferred;
if instr_tag.valid = '1' then
incr_tag := (curr_tag + 1) mod TAG_COUNT;
end if;
next_tag <= incr_tag;
instr_tag_out <= instr_tag;
end process;

control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic;
@ -193,7 +225,7 @@ begin
if flush_in = '1' then
-- expect to see complete_in next cycle
v_int.outstanding := 1;
elsif complete_in = '1' then
elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;

@ -222,8 +254,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_stall_out;
end if;
end if;

@ -249,8 +281,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_stall_out;
end if;
end if;
else
@ -262,15 +294,11 @@ begin
valid_tmp := '0';
end if;

if valid_tmp = '1' then
if deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in;
else
gpr_write_valid <= '0';
cr_write_valid <= '0';
gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp;

if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;

-- update outputs

@ -102,7 +102,7 @@ architecture behave of core is
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;

signal complete: std_ulogic;
signal complete: instr_tag_t;
signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal icache_inv: std_ulogic;

@ -19,7 +19,7 @@ entity decode2 is
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in std_ulogic;
complete_in : in instr_tag_t;
busy_in : in std_ulogic;
stall_out : out std_ulogic;

@ -303,6 +303,8 @@ architecture behaviour of decode2 is
signal cr_bypass : std_ulogic;
signal cr_bypass_avail : std_ulogic;

signal instr_tag : instr_tag_t;

begin
control_0: entity work.control
generic map (
@ -325,9 +327,6 @@ begin
gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,

update_gpr_write_valid => '0',
update_gpr_write_reg => 7x"00",

gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,

@ -348,7 +347,9 @@ begin

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass
gpr_bypass_c => gpr_c_bypass,

instr_tag_out => instr_tag
);

deferred <= r.e.valid and busy_in;
@ -454,6 +455,7 @@ begin
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;

@ -319,7 +319,8 @@ begin
ctrl <= ctrl_tmp;
if valid_in = '1' then
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
" wr=" & to_hstring(rin.e.write_reg);
" wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) &
" tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid);
end if;
end if;
end if;
@ -694,6 +695,7 @@ begin
end if;

v.e.mode_32bit := not ctrl.msr(MSR_SF);
v.e.instr_tag := current.instr_tag;

do_trace := valid_in and ctrl.msr(MSR_SE);
if valid_in = '1' then
@ -749,8 +751,6 @@ begin
end if;

if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then
report "execute nia " & to_hstring(e_in.nia);

v.cur_instr := e_in;
v.next_lr := next_nia;
v.e.valid := '1';
@ -909,7 +909,6 @@ begin
when OP_ISEL =>
when OP_CROP =>
cr_op := insn_cr(e_in.insn);
report "CR OP " & to_hstring(cr_op);
if cr_op(0) = '0' then -- MCRF
bf := insn_bf(e_in.insn);
bfa := insn_bfa(e_in.insn);
@ -1309,6 +1308,7 @@ begin
-- Outputs to loadstore1 (async)
lv.op := e_in.insn_type;
lv.nia := e_in.nia;
lv.instr_tag := e_in.instr_tag;
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
@ -1337,6 +1337,7 @@ begin
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
fv.itag := e_in.instr_tag;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;

@ -75,6 +75,7 @@ architecture behaviour of fpu is
do_intr : std_ulogic;
op : insn_type_t;
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
dest_fpr : gspr_index_t;
fe_mode : std_ulogic;
rc : std_ulogic;
@ -574,6 +575,7 @@ begin
e_out.interrupt <= r.do_intr;

w_out.valid <= r.instr_done and not r.do_intr;
w_out.instr_tag <= r.instr_tag;
w_out.write_enable <= r.writing_back;
w_out.write_reg <= r.dest_fpr;
w_out.write_data <= fp_result;
@ -643,6 +645,7 @@ begin
if e_in.valid = '1' then
v.insn := e_in.insn;
v.op := e_in.op;
v.instr_tag := e_in.itag;
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;

@ -1,112 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity gpr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
);
port(
clk : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;
repeated : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in gspr_index_t;

ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in gspr_index_t;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
);
end entity gpr_hazard;
architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
gpr : gspr_index_t;
ugpr_valid : std_ulogic;
ugpr : gspr_index_t;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));

type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);

signal r, rin : pipeline_t := pipeline_t_init;
begin
gpr_hazard0: process(clk)
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;

gpr_hazard1: process(all)
variable v : pipeline_t;
begin
v := r;

if complete_in = '1' then
v(PIPELINE_DEPTH).valid := '0';
v(PIPELINE_DEPTH).ugpr_valid := '0';
end if;

stall_out <= '0';
use_bypass <= '0';
if repeated = '0' and gpr_read_valid_in = '1' then
loop_0: for i in 0 to PIPELINE_DEPTH loop
-- The second half of a split instruction never has GPR
-- dependencies on the first half's output GPR,
-- so ignore matches when i = 0 for the second half.
if v(i).valid = '1' and r(i).gpr = gpr_read_in and
not (i = 0 and repeated = '1') then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
stall_out <= '1';
end if;
end loop;
end if;

-- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := v(0);
v(0).valid := '0';
v(0).ugpr_valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in;
v(0).ugpr_valid := ugpr_write_valid;
v(0).ugpr := ugpr_write_reg;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(0).ugpr_valid := '0';
v(1).valid := '0';
v(1).ugpr_valid := '0';
end if;

-- update registers
rin <= v;

end process;
end;

@ -65,6 +65,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
@ -503,6 +504,7 @@ begin
v.align_intr := '0';
v.dwords_done := '0';
v.last_dword := '1';
v.instr_tag := l_in.instr_tag;
v.write_reg := l_in.write_reg;
v.length := l_in.length;
v.byte_reverse := l_in.byte_reverse;
@ -725,6 +727,7 @@ begin
-- Multiplex either cache data to the destination GPR or
-- the address for the rA update.
l_out.valid <= done;
l_out.instr_tag <= r.instr_tag;
l_out.write_reg <= r.write_reg;
case r.wr_sel is
when "00" =>

@ -19,7 +19,6 @@ filesets:
- sim_console.vhdl
- logical.vhdl
- countzero.vhdl
- gpr_hazard.vhdl
- cr_hazard.vhdl
- control.vhdl
- execute1.vhdl

@ -17,7 +17,7 @@ entity writeback is
w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType;

complete_out : out std_ulogic
complete_out : out instr_tag_t
);
end entity writeback;

@ -47,6 +47,10 @@ begin
y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;

assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure;
assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure;
assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure;
end if;
end process;

@ -59,9 +63,13 @@ begin
w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;

complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then
complete_out <= '1';
complete_out <= instr_tag_init;
if e_in.valid = '1' then
complete_out <= e_in.instr_tag;
elsif l_in.valid = '1' then
complete_out <= l_in.instr_tag;
elsif fp_in.valid = '1' then
complete_out <= fp_in.instr_tag;
end if;

if e_in.exc_write_enable = '1' then

Loading…
Cancel
Save