Add a bypass path from the execute2 stage

This enables some instructions to issue earlier and thus improves
performance, at the cost of some extra multiplexers in decode2.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 3 years ago
parent 3510071d9a
commit 4b6148ada6

@ -36,6 +36,8 @@ entity control is

execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t;
execute2_next_tag : in instr_tag_t;
execute2_next_cr_tag : in instr_tag_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -44,10 +46,10 @@ entity control is
stall_out : out std_ulogic;
stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic;
gpr_bypass_a : out std_ulogic_vector(1 downto 0);
gpr_bypass_b : out std_ulogic_vector(1 downto 0);
gpr_bypass_c : out std_ulogic_vector(1 downto 0);
cr_bypass : out std_ulogic_vector(1 downto 0);

instr_tag_out : out instr_tag_t
);
@ -142,11 +144,11 @@ begin
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
variable byp_a : std_ulogic_vector(1 downto 0);
variable byp_b : std_ulogic_vector(1 downto 0);
variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic;
variable byp_cr : std_ulogic_vector(1 downto 0);
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
@ -179,26 +181,32 @@ begin
tag_c.valid := '0';
end if;

byp_a := '0';
byp_a := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := '1';
byp_a := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
byp_a := "11";
end if;
byp_b := '0';
byp_b := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1';
byp_b := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
byp_b := "11";
end if;
byp_c := '0';
byp_c := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1';
byp_c := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
byp_c := "11";
end if;

gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c;

gpr_tag_stall <= (tag_a.valid and not byp_a) or
(tag_b.valid and not byp_b) or
(tag_c.valid and not byp_c);
gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or
(tag_b.valid and not byp_b(1)) or
(tag_c.valid and not byp_c(1));

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
@ -215,13 +223,15 @@ begin
if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0';
end if;
byp_cr := '0';
byp_cr := "00";
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1';
byp_cr := "10";
elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then
byp_cr := "11";
end if;

cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr(1);
end process;

control1 : process(all)

@ -79,6 +79,8 @@ architecture behave of core is
signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t;
signal execute2_bypass: bypass_data_t;
signal execute2_cr_bypass: cr_bypass_data_t;

-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -298,6 +300,8 @@ begin
c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass,
execute2_bypass => execute2_bypass,
execute2_cr_bypass => execute2_cr_bypass,
log_out => log_data(119 downto 110)
);
decode2_busy_in <= ex1_busy_out;
@ -359,6 +363,8 @@ begin
e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass,
bypass2_data => execute2_bypass,
bypass2_cr_data => execute2_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_ctrl_out => ctrl_debug,
wb_events => writeback_events,

@ -39,6 +39,8 @@ entity decode2 is

execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t;
execute2_bypass : in bypass_data_t;
execute2_cr_bypass : in cr_bypass_data_t;

log_out : out std_ulogic_vector(9 downto 0)
);
@ -273,19 +275,19 @@ architecture behaviour of decode2 is

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic;
signal gpr_a_bypass : std_ulogic_vector(1 downto 0);

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;
signal gpr_b_bypass : std_ulogic_vector(1 downto 0);

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;
signal gpr_c_bypass : std_ulogic_vector(1 downto 0);

signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;
signal cr_bypass : std_ulogic_vector(1 downto 0);

signal instr_tag : instr_tag_t;

@ -321,6 +323,8 @@ begin

execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag,
execute2_next_tag => execute2_bypass.tag,
execute2_next_cr_tag => execute2_cr_bypass.tag,

cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
@ -504,27 +508,35 @@ begin

-- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is
when '1' =>
when "10" =>
v.e.read_data1 := execute_bypass.data;
when "11" =>
v.e.read_data1 := execute2_bypass.data;
when others =>
v.e.read_data1 := decoded_reg_a.data;
end case;
case gpr_b_bypass is
when '1' =>
when "10" =>
v.e.read_data2 := execute_bypass.data;
when "11" =>
v.e.read_data2 := execute2_bypass.data;
when others =>
v.e.read_data2 := decoded_reg_b.data;
end case;
case gpr_c_bypass is
when '1' =>
when "10" =>
v.e.read_data3 := execute_bypass.data;
when "11" =>
v.e.read_data3 := execute2_bypass.data;
when others =>
v.e.read_data3 := decoded_reg_c.data;
end case;

v.e.cr := c_in.read_cr_data;
if cr_bypass = '1' then
if cr_bypass = "10" then
v.e.cr := execute_cr_bypass.data;
elsif cr_bypass = "11" then
v.e.cr := execute2_cr_bypass.data;
end if;

-- issue control
@ -577,9 +589,9 @@ begin
r.e.valid &
stopped_out &
stall_out &
gpr_a_bypass &
gpr_b_bypass &
gpr_c_bypass;
(gpr_a_bypass(1) or gpr_a_bypass(0)) &
(gpr_b_bypass(1) or gpr_b_bypass(0)) &
(gpr_c_bypass(1) or gpr_c_bypass(0));
end if;
end process;
log_out <= log_data;

@ -40,6 +40,8 @@ entity execute1 is
e_out : out Execute1ToWritebackType;
bypass_data : out bypass_data_t;
bypass_cr_data : out cr_bypass_data_t;
bypass2_data : out bypass_data_t;
bypass2_cr_data : out cr_bypass_data_t;

dbg_ctrl_out : out ctrl_t;

@ -1482,6 +1484,7 @@ begin
variable fv : Execute1ToFPUType;
variable k : integer;
variable go : std_ulogic;
variable bypass_valid : std_ulogic;
begin
v := ex2;
if (l_in.busy or fp_in.busy) = '0' then
@ -1559,6 +1562,19 @@ begin
ctrl_tmp.msr(MSR_LE) <= '1';
end if;

bypass_valid := ex1.e.valid;
if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then
bypass_valid := '0';
end if;

bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid;
bypass2_data.tag.tag <= ex1.e.instr_tag.tag;
bypass2_data.data <= ex_result;

bypass2_cr_data.tag.valid <= ex1.e.write_cr_enable and bypass_valid;
bypass2_cr_data.tag.tag <= ex1.e.instr_tag.tag;
bypass2_cr_data.data <= ex1.e.write_cr_data;

-- Update registers
ex2in <= v;


Loading…
Cancel
Save