core: Track GPR hazards using tags that propagate through the pipelines

This changes the way GPR hazards are detected and tracked.  Instead of
having a model of the pipeline in gpr_hazard.vhdl, which has to mirror
the behaviour of the real pipeline exactly, we now assign a 2-bit tag
to each instruction and record which GSPR the instruction writes.
Subsequent instructions that need to use the GSPR get the tag number
and stall until the value with that tag is being written back to the
register file.

For now, the forwarding paths are disabled.  That gives about a 8%
reduction in coremark performance.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/269/head
Paul Mackerras 4 years ago
parent a1d7b54f76
commit c0b45e153b

@ -43,7 +43,7 @@ all: $(all)


core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \ utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \ decode1.vhdl helpers.vhdl insn_helpers.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \ cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \ logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.decode_types.all; use work.decode_types.all;


package common is package common is
@ -126,6 +127,17 @@ package common is
constant FPSCR_NI : integer := 63 - 61; constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63; constant FPSCR_RN : integer := 63 - 63;


-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
subtype tag_number_t is integer range 0 to TAG_COUNT - 1;
subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0);
type instr_tag_t is record
tag : tag_number_t;
valid : std_ulogic;
end record;
constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');

type irq_state_t is (WRITE_SRR0, WRITE_SRR1); type irq_state_t is (WRITE_SRR0, WRITE_SRR1);


-- For now, fixed 16 sources, make this either a parametric -- For now, fixed 16 sources, make this either a parametric
@ -197,6 +209,7 @@ package common is
fac : facility_t; fac : facility_t;
insn_type: insn_type_t; insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg: gspr_index_t; write_reg: gspr_index_t;
write_reg_enable: std_ulogic; write_reg_enable: std_ulogic;
read_reg1: gspr_index_t; read_reg1: gspr_index_t;
@ -236,7 +249,7 @@ package common is
second : std_ulogic; -- set if this is the second op second : std_ulogic; -- set if this is the second op
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
@ -326,6 +339,7 @@ package common is
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0); nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0); insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0); addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
@ -345,12 +359,15 @@ package common is
repeat : std_ulogic; repeat : std_ulogic;
second : std_ulogic; second : std_ulogic;
end record; end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init, sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'), nia => (others => '0'), insn => (others => '0'),
instr_tag => instr_tag_init,
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'), write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0', mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0'); repeat => '0', second => '0');


@ -439,6 +456,7 @@ package common is


type Loadstore1ToWritebackType is record type Loadstore1ToWritebackType is record
valid : std_ulogic; valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable: std_ulogic; write_enable: std_ulogic;
write_reg : gspr_index_t; write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
@ -446,11 +464,13 @@ package common is
rc : std_ulogic; rc : std_ulogic;
store_done : std_ulogic; store_done : std_ulogic;
end record; end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init, constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, write_enable => '0', xerc => xerc_init,
rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0')); rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0'));


type Execute1ToWritebackType is record type Execute1ToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
instr_tag : instr_tag_t;
rc : std_ulogic; rc : std_ulogic;
mode_32bit : std_ulogic; mode_32bit : std_ulogic;
write_enable : std_ulogic; write_enable : std_ulogic;
@ -465,8 +485,9 @@ package common is
exc_write_reg : gspr_index_t; exc_write_reg : gspr_index_t;
exc_write_data : std_ulogic_vector(63 downto 0); exc_write_data : std_ulogic_vector(63 downto 0);
end record; end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', mode_32bit => '0', write_enable => '0', constant Execute1ToWritebackInit : Execute1ToWritebackType :=
write_cr_enable => '0', exc_write_enable => '0', (valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0',
write_enable => '0', write_cr_enable => '0', exc_write_enable => '0',
write_xerc_enable => '0', xerc => xerc_init, write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'), write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'),
@ -476,6 +497,7 @@ package common is
valid : std_ulogic; valid : std_ulogic;
op : insn_type_t; op : insn_type_t;
nia : std_ulogic_vector(63 downto 0); nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0); insn : std_ulogic_vector(31 downto 0);
single : std_ulogic; single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0); fe_mode : std_ulogic_vector(1 downto 0);
@ -487,6 +509,7 @@ package common is
out_cr : std_ulogic; out_cr : std_ulogic;
end record; end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'), constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0', insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'), fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'), frc => (others => '0'), frt => (others => '0'),
@ -502,6 +525,7 @@ package common is


type FPUToWritebackType is record type FPUToWritebackType is record
valid : std_ulogic; valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable : std_ulogic; write_enable : std_ulogic;
write_reg : gspr_index_t; write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
@ -509,7 +533,9 @@ package common is
write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0); write_cr_data : std_ulogic_vector(31 downto 0);
end record; end record;
constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0')); constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', instr_tag => instr_tag_init,
write_enable => '0', write_cr_enable => '0',
others => (others => '0'));


type DividerToExecute1Type is record type DividerToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
@ -524,7 +550,8 @@ package common is
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic; write_enable : std_ulogic;
end record; end record;
constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', write_data => (others => '0'), others => (others => '0')); constant WritebackToRegisterFileInit : WritebackToRegisterFileType :=
(write_enable => '0', write_data => (others => '0'), others => (others => '0'));


type WritebackToCrFileType is record type WritebackToCrFileType is record
write_cr_enable : std_ulogic; write_cr_enable : std_ulogic;

@ -12,7 +12,7 @@ entity control is
clk : in std_ulogic; clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;


complete_in : in std_ulogic; complete_in : in instr_tag_t;
valid_in : in std_ulogic; valid_in : in std_ulogic;
repeated : in std_ulogic; repeated : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
@ -25,9 +25,6 @@ entity control is
gpr_write_in : in gspr_index_t; gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic; gpr_bypassable : in std_ulogic;


update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t; gpr_a_read_in : in gspr_index_t;


@ -48,7 +45,9 @@ entity control is
gpr_bypass_a : out std_ulogic; gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic; gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic; gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic cr_bypass : out std_ulogic;

instr_tag_out : out instr_tag_t
); );
end entity control; end entity control;


@ -71,85 +70,31 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0'; signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0'; signal cr_write_valid : std_ulogic := '0';


begin type tag_register is record
gpr_hazard0: entity work.gpr_hazard wr_gpr : std_ulogic;
generic map ( reg : gspr_index_t;
PIPELINE_DEPTH => PIPELINE_DEPTH recent : std_ulogic;
) end record;
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_a_out,
use_bypass => gpr_bypass_a
);


gpr_hazard1: entity work.gpr_hazard type tag_regs_array is array(tag_number_t) of tag_register;
generic map ( signal tag_regs : tag_regs_array;
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,


gpr_write_valid_in => gpr_write_valid, signal instr_tag : instr_tag_t;
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,


ugpr_write_valid => update_gpr_write_valid, signal gpr_tag_a : instr_tag_t;
ugpr_write_reg => update_gpr_write_reg, signal gpr_tag_b : instr_tag_t;
signal gpr_tag_c : instr_tag_t;
signal gpr_tag_stall : std_ulogic;


stall_out => stall_b_out, signal curr_tag : tag_number_t;
use_bypass => gpr_bypass_b signal next_tag : tag_number_t;
);


gpr_hazard2: entity work.gpr_hazard function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
generic map ( begin
PIPELINE_DEPTH => PIPELINE_DEPTH return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
) end;
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,
repeated => repeated,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_c_out,
use_bypass => gpr_bypass_c
);


begin
cr_hazard0: entity work.cr_hazard cr_hazard0: entity work.cr_hazard
generic map ( generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH PIPELINE_DEPTH => PIPELINE_DEPTH
@ -158,7 +103,7 @@ begin
clk => clk, clk => clk,
busy_in => busy_in, busy_in => busy_in,
deferred => deferred, deferred => deferred,
complete_in => complete_in, complete_in => complete_in.valid,
flush_in => flush_in, flush_in => flush_in,
issuing => valid_out, issuing => valid_out,


@ -170,13 +115,100 @@ begin
use_bypass => cr_bypass use_bypass => cr_bypass
); );


gpr_bypass_a <= '0';
gpr_bypass_b <= '0';
gpr_bypass_c <= '0';

control0: process(clk) control0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1) assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure; report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int; r_int <= rin_int;
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
tag_regs(i).wr_gpr <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
tag_regs(i).recent <= '0';
if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
report "tag " & integer'image(i) & " not recent";
end if;
end if;
if instr_tag.valid = '1' and i = instr_tag.tag then
tag_regs(i).wr_gpr <= gpr_write_valid;
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if; end if;
end if;
end if;
end loop;
if rst = '1' then
curr_tag <= 0;
else
curr_tag <= next_tag;
end if;
end if;
end process;

control_hazards : process(all)
variable gpr_stall : std_ulogic;
variable tag_a : instr_tag_t;
variable tag_b : instr_tag_t;
variable tag_c : instr_tag_t;
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
tag_a.valid := gpr_a_read_valid_in;
tag_a.tag := i;
end if;
end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
tag_b.valid := gpr_b_read_valid_in;
tag_b.tag := i;
end if;
end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
tag_c.valid := gpr_c_read_valid_in;
tag_c.tag := i;
end if;
end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;
gpr_tag_a <= tag_a;
gpr_tag_b <= tag_b;
gpr_tag_c <= tag_c;
gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid;

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
instr_tag.valid <= valid_out and not deferred;
if instr_tag.valid = '1' then
incr_tag := (curr_tag + 1) mod TAG_COUNT;
end if;
next_tag <= incr_tag;
instr_tag_out <= instr_tag;
end process; end process;


control1 : process(all) control1 : process(all)
@ -193,7 +225,7 @@ begin
if flush_in = '1' then if flush_in = '1' then
-- expect to see complete_in next cycle -- expect to see complete_in next cycle
v_int.outstanding := 1; v_int.outstanding := 1;
elsif complete_in = '1' then elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1; v_int.outstanding := r_int.outstanding - 1;
end if; end if;


@ -222,8 +254,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE; v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if; end if;
else else
-- let it go out if there are no GPR hazards -- let it go out if there are no GPR or CR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out; stall_tmp := gpr_tag_stall or cr_stall_out;
end if; end if;
end if; end if;


@ -249,8 +281,8 @@ begin
v_int.state := WAIT_FOR_CURR_TO_COMPLETE; v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if; end if;
else else
-- let it go out if there are no GPR hazards -- let it go out if there are no GPR or CR hazards
stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out; stall_tmp := gpr_tag_stall or cr_stall_out;
end if; end if;
end if; end if;
else else
@ -262,16 +294,12 @@ begin
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


if valid_tmp = '1' then gpr_write_valid <= gpr_write_valid_in and valid_tmp;
if deferred = '0' then cr_write_valid <= cr_write_in and valid_tmp;

if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1; v_int.outstanding := v_int.outstanding + 1;
end if; end if;
gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in;
else
gpr_write_valid <= '0';
cr_write_valid <= '0';
end if;


-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;

@ -102,7 +102,7 @@ architecture behave of core is
signal decode1_flush: std_ulogic; signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic; signal fetch1_flush: std_ulogic;


signal complete: std_ulogic; signal complete: instr_tag_t;
signal terminate: std_ulogic; signal terminate: std_ulogic;
signal core_rst: std_ulogic; signal core_rst: std_ulogic;
signal icache_inv: std_ulogic; signal icache_inv: std_ulogic;

@ -19,7 +19,7 @@ entity decode2 is
clk : in std_ulogic; clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;


complete_in : in std_ulogic; complete_in : in instr_tag_t;
busy_in : in std_ulogic; busy_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;


@ -303,6 +303,8 @@ architecture behaviour of decode2 is
signal cr_bypass : std_ulogic; signal cr_bypass : std_ulogic;
signal cr_bypass_avail : std_ulogic; signal cr_bypass_avail : std_ulogic;


signal instr_tag : instr_tag_t;

begin begin
control_0: entity work.control control_0: entity work.control
generic map ( generic map (
@ -325,9 +327,6 @@ begin
gpr_write_in => gpr_write, gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable, gpr_bypassable => gpr_bypassable,


update_gpr_write_valid => '0',
update_gpr_write_reg => 7x"00",

gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read, gpr_a_read_in => gpr_a_read,


@ -348,7 +347,9 @@ begin


gpr_bypass_a => gpr_a_bypass, gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass, gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass gpr_bypass_c => gpr_c_bypass,

instr_tag_out => instr_tag
); );


deferred <= r.e.valid and busy_in; deferred <= r.e.valid and busy_in;
@ -454,6 +455,7 @@ begin
v.e.nia := d_in.nia; v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit; v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility; v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg; v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data; v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass; v.e.bypass_data1 := gpr_a_bypass;

@ -319,7 +319,8 @@ begin
ctrl <= ctrl_tmp; ctrl <= ctrl_tmp;
if valid_in = '1' then if valid_in = '1' then
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) & report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
" wr=" & to_hstring(rin.e.write_reg); " wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) &
" tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid);
end if; end if;
end if; end if;
end if; end if;
@ -694,6 +695,7 @@ begin
end if; end if;


v.e.mode_32bit := not ctrl.msr(MSR_SF); v.e.mode_32bit := not ctrl.msr(MSR_SF);
v.e.instr_tag := current.instr_tag;


do_trace := valid_in and ctrl.msr(MSR_SE); do_trace := valid_in and ctrl.msr(MSR_SE);
if valid_in = '1' then if valid_in = '1' then
@ -749,8 +751,6 @@ begin
end if; end if;


if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then
report "execute nia " & to_hstring(e_in.nia);

v.cur_instr := e_in; v.cur_instr := e_in;
v.next_lr := next_nia; v.next_lr := next_nia;
v.e.valid := '1'; v.e.valid := '1';
@ -909,7 +909,6 @@ begin
when OP_ISEL => when OP_ISEL =>
when OP_CROP => when OP_CROP =>
cr_op := insn_cr(e_in.insn); cr_op := insn_cr(e_in.insn);
report "CR OP " & to_hstring(cr_op);
if cr_op(0) = '0' then -- MCRF if cr_op(0) = '0' then -- MCRF
bf := insn_bf(e_in.insn); bf := insn_bf(e_in.insn);
bfa := insn_bfa(e_in.insn); bfa := insn_bfa(e_in.insn);
@ -1309,6 +1308,7 @@ begin
-- Outputs to loadstore1 (async) -- Outputs to loadstore1 (async)
lv.op := e_in.insn_type; lv.op := e_in.insn_type;
lv.nia := e_in.nia; lv.nia := e_in.nia;
lv.instr_tag := e_in.instr_tag;
lv.addr1 := a_in; lv.addr1 := a_in;
lv.addr2 := b_in; lv.addr2 := b_in;
lv.data := c_in; lv.data := c_in;
@ -1337,6 +1337,7 @@ begin
fv.op := e_in.insn_type; fv.op := e_in.insn_type;
fv.nia := e_in.nia; fv.nia := e_in.nia;
fv.insn := e_in.insn; fv.insn := e_in.insn;
fv.itag := e_in.instr_tag;
fv.single := e_in.is_32bit; fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1); fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in; fv.fra := a_in;

@ -75,6 +75,7 @@ architecture behaviour of fpu is
do_intr : std_ulogic; do_intr : std_ulogic;
op : insn_type_t; op : insn_type_t;
insn : std_ulogic_vector(31 downto 0); insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
dest_fpr : gspr_index_t; dest_fpr : gspr_index_t;
fe_mode : std_ulogic; fe_mode : std_ulogic;
rc : std_ulogic; rc : std_ulogic;
@ -574,6 +575,7 @@ begin
e_out.interrupt <= r.do_intr; e_out.interrupt <= r.do_intr;


w_out.valid <= r.instr_done and not r.do_intr; w_out.valid <= r.instr_done and not r.do_intr;
w_out.instr_tag <= r.instr_tag;
w_out.write_enable <= r.writing_back; w_out.write_enable <= r.writing_back;
w_out.write_reg <= r.dest_fpr; w_out.write_reg <= r.dest_fpr;
w_out.write_data <= fp_result; w_out.write_data <= fp_result;
@ -643,6 +645,7 @@ begin
if e_in.valid = '1' then if e_in.valid = '1' then
v.insn := e_in.insn; v.insn := e_in.insn;
v.op := e_in.op; v.op := e_in.op;
v.instr_tag := e_in.itag;
v.fe_mode := or (e_in.fe_mode); v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt; v.dest_fpr := e_in.frt;
v.single_prec := e_in.single; v.single_prec := e_in.single;

@ -1,112 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity gpr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
);
port(
clk : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;
repeated : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in gspr_index_t;

ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in gspr_index_t;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
);
end entity gpr_hazard;
architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
gpr : gspr_index_t;
ugpr_valid : std_ulogic;
ugpr : gspr_index_t;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));

type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);

signal r, rin : pipeline_t := pipeline_t_init;
begin
gpr_hazard0: process(clk)
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;

gpr_hazard1: process(all)
variable v : pipeline_t;
begin
v := r;

if complete_in = '1' then
v(PIPELINE_DEPTH).valid := '0';
v(PIPELINE_DEPTH).ugpr_valid := '0';
end if;

stall_out <= '0';
use_bypass <= '0';
if repeated = '0' and gpr_read_valid_in = '1' then
loop_0: for i in 0 to PIPELINE_DEPTH loop
-- The second half of a split instruction never has GPR
-- dependencies on the first half's output GPR,
-- so ignore matches when i = 0 for the second half.
if v(i).valid = '1' and r(i).gpr = gpr_read_in and
not (i = 0 and repeated = '1') then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
stall_out <= '1';
end if;
end loop;
end if;

-- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := v(0);
v(0).valid := '0';
v(0).ugpr_valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in;
v(0).ugpr_valid := ugpr_write_valid;
v(0).ugpr := ugpr_write_reg;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(0).ugpr_valid := '0';
v(1).valid := '0';
v(1).ugpr_valid := '0';
end if;

-- update registers
rin <= v;

end process;
end;

@ -65,6 +65,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg : gspr_index_t; write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
@ -503,6 +504,7 @@ begin
v.align_intr := '0'; v.align_intr := '0';
v.dwords_done := '0'; v.dwords_done := '0';
v.last_dword := '1'; v.last_dword := '1';
v.instr_tag := l_in.instr_tag;
v.write_reg := l_in.write_reg; v.write_reg := l_in.write_reg;
v.length := l_in.length; v.length := l_in.length;
v.byte_reverse := l_in.byte_reverse; v.byte_reverse := l_in.byte_reverse;
@ -725,6 +727,7 @@ begin
-- Multiplex either cache data to the destination GPR or -- Multiplex either cache data to the destination GPR or
-- the address for the rA update. -- the address for the rA update.
l_out.valid <= done; l_out.valid <= done;
l_out.instr_tag <= r.instr_tag;
l_out.write_reg <= r.write_reg; l_out.write_reg <= r.write_reg;
case r.wr_sel is case r.wr_sel is
when "00" => when "00" =>

@ -19,7 +19,6 @@ filesets:
- sim_console.vhdl - sim_console.vhdl
- logical.vhdl - logical.vhdl
- countzero.vhdl - countzero.vhdl
- gpr_hazard.vhdl
- cr_hazard.vhdl - cr_hazard.vhdl
- control.vhdl - control.vhdl
- execute1.vhdl - execute1.vhdl

@ -17,7 +17,7 @@ entity writeback is
w_out : out WritebackToRegisterFileType; w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType; c_out : out WritebackToCrFileType;


complete_out : out std_ulogic complete_out : out instr_tag_t
); );
end entity writeback; end entity writeback;


@ -47,6 +47,10 @@ begin
y(0) := fp_in.write_cr_enable; y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure; to_integer(unsigned(y))) <= 1 severity failure;

assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure;
assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure;
assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure;
end if; end if;
end process; end process;


@ -59,9 +63,13 @@ begin
w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;


complete_out <= '0'; complete_out <= instr_tag_init;
if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then if e_in.valid = '1' then
complete_out <= '1'; complete_out <= e_in.instr_tag;
elsif l_in.valid = '1' then
complete_out <= l_in.instr_tag;
elsif fp_in.valid = '1' then
complete_out <= fp_in.instr_tag;
end if; end if;


if e_in.exc_write_enable = '1' then if e_in.exc_write_enable = '1' then

Loading…
Cancel
Save