Add a bypass path from the execute2 stage

This enables some instructions to issue earlier and thus improves
performance, at the cost of some extra multiplexers in decode2.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 3510071d9a
commit 4b6148ada6

@ -36,6 +36,8 @@ entity control is


execute_next_tag : in instr_tag_t; execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t; execute_next_cr_tag : in instr_tag_t;
execute2_next_tag : in instr_tag_t;
execute2_next_cr_tag : in instr_tag_t;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -44,10 +46,10 @@ entity control is
stall_out : out std_ulogic; stall_out : out std_ulogic;
stopped_out : out std_ulogic; stopped_out : out std_ulogic;


gpr_bypass_a : out std_ulogic; gpr_bypass_a : out std_ulogic_vector(1 downto 0);
gpr_bypass_b : out std_ulogic; gpr_bypass_b : out std_ulogic_vector(1 downto 0);
gpr_bypass_c : out std_ulogic; gpr_bypass_c : out std_ulogic_vector(1 downto 0);
cr_bypass : out std_ulogic; cr_bypass : out std_ulogic_vector(1 downto 0);


instr_tag_out : out instr_tag_t instr_tag_out : out instr_tag_t
); );
@ -142,11 +144,11 @@ begin
variable tag_s : instr_tag_t; variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t; variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t; variable incr_tag : tag_number_t;
variable byp_a : std_ulogic; variable byp_a : std_ulogic_vector(1 downto 0);
variable byp_b : std_ulogic; variable byp_b : std_ulogic_vector(1 downto 0);
variable byp_c : std_ulogic; variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t; variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic; variable byp_cr : std_ulogic_vector(1 downto 0);
begin begin
tag_a := instr_tag_init; tag_a := instr_tag_init;
for i in tag_number_t loop for i in tag_number_t loop
@ -179,26 +181,32 @@ begin
tag_c.valid := '0'; tag_c.valid := '0';
end if; end if;


byp_a := '0'; byp_a := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := '1'; byp_a := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
byp_a := "11";
end if; end if;
byp_b := '0'; byp_b := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1'; byp_b := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
byp_b := "11";
end if; end if;
byp_c := '0'; byp_c := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1'; byp_c := "10";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
byp_c := "11";
end if; end if;


gpr_bypass_a <= byp_a; gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b; gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c; gpr_bypass_c <= byp_c;


gpr_tag_stall <= (tag_a.valid and not byp_a) or gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or
(tag_b.valid and not byp_b) or (tag_b.valid and not byp_b(1)) or
(tag_c.valid and not byp_c); (tag_c.valid and not byp_c(1));


incr_tag := curr_tag; incr_tag := curr_tag;
instr_tag.tag <= curr_tag; instr_tag.tag <= curr_tag;
@ -215,13 +223,15 @@ begin
if tag_match(tag_cr, complete_in) then if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0'; tag_cr.valid := '0';
end if; end if;
byp_cr := '0'; byp_cr := "00";
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1'; byp_cr := "10";
elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then
byp_cr := "11";
end if; end if;


cr_bypass <= byp_cr; cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr; cr_tag_stall <= tag_cr.valid and not byp_cr(1);
end process; end process;


control1 : process(all) control1 : process(all)

@ -79,6 +79,8 @@ architecture behave of core is
signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_bypass: bypass_data_t; signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t; signal execute1_cr_bypass: cr_bypass_data_t;
signal execute2_bypass: bypass_data_t;
signal execute2_cr_bypass: cr_bypass_data_t;


-- load store signals -- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type; signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -298,6 +300,8 @@ begin
c_out => decode2_to_cr_file, c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass, execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass, execute_cr_bypass => execute1_cr_bypass,
execute2_bypass => execute2_bypass,
execute2_cr_bypass => execute2_cr_bypass,
log_out => log_data(119 downto 110) log_out => log_data(119 downto 110)
); );
decode2_busy_in <= ex1_busy_out; decode2_busy_in <= ex1_busy_out;
@ -359,6 +363,8 @@ begin
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
bypass_data => execute1_bypass, bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass, bypass_cr_data => execute1_cr_bypass,
bypass2_data => execute2_bypass,
bypass2_cr_data => execute2_cr_bypass,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_ctrl_out => ctrl_debug, dbg_ctrl_out => ctrl_debug,
wb_events => writeback_events, wb_events => writeback_events,

@ -39,6 +39,8 @@ entity decode2 is


execute_bypass : in bypass_data_t; execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t; execute_cr_bypass : in cr_bypass_data_t;
execute2_bypass : in bypass_data_t;
execute2_cr_bypass : in cr_bypass_data_t;


log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
@ -273,19 +275,19 @@ architecture behaviour of decode2 is


signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : gspr_index_t; signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic; signal gpr_a_bypass : std_ulogic_vector(1 downto 0);


signal gpr_b_read_valid : std_ulogic; signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t; signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic; signal gpr_b_bypass : std_ulogic_vector(1 downto 0);


signal gpr_c_read_valid : std_ulogic; signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t; signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic; signal gpr_c_bypass : std_ulogic_vector(1 downto 0);


signal cr_read_valid : std_ulogic; signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic; signal cr_bypass : std_ulogic_vector(1 downto 0);


signal instr_tag : instr_tag_t; signal instr_tag : instr_tag_t;


@ -321,6 +323,8 @@ begin


execute_next_tag => execute_bypass.tag, execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag, execute_next_cr_tag => execute_cr_bypass.tag,
execute2_next_tag => execute2_bypass.tag,
execute2_next_cr_tag => execute2_cr_bypass.tag,


cr_read_in => cr_read_valid, cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
@ -504,27 +508,35 @@ begin


-- See if any of the operands can get their value via the bypass path. -- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is case gpr_a_bypass is
when '1' => when "10" =>
v.e.read_data1 := execute_bypass.data; v.e.read_data1 := execute_bypass.data;
when "11" =>
v.e.read_data1 := execute2_bypass.data;
when others => when others =>
v.e.read_data1 := decoded_reg_a.data; v.e.read_data1 := decoded_reg_a.data;
end case; end case;
case gpr_b_bypass is case gpr_b_bypass is
when '1' => when "10" =>
v.e.read_data2 := execute_bypass.data; v.e.read_data2 := execute_bypass.data;
when "11" =>
v.e.read_data2 := execute2_bypass.data;
when others => when others =>
v.e.read_data2 := decoded_reg_b.data; v.e.read_data2 := decoded_reg_b.data;
end case; end case;
case gpr_c_bypass is case gpr_c_bypass is
when '1' => when "10" =>
v.e.read_data3 := execute_bypass.data; v.e.read_data3 := execute_bypass.data;
when "11" =>
v.e.read_data3 := execute2_bypass.data;
when others => when others =>
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
end case; end case;


v.e.cr := c_in.read_cr_data; v.e.cr := c_in.read_cr_data;
if cr_bypass = '1' then if cr_bypass = "10" then
v.e.cr := execute_cr_bypass.data; v.e.cr := execute_cr_bypass.data;
elsif cr_bypass = "11" then
v.e.cr := execute2_cr_bypass.data;
end if; end if;


-- issue control -- issue control
@ -577,9 +589,9 @@ begin
r.e.valid & r.e.valid &
stopped_out & stopped_out &
stall_out & stall_out &
gpr_a_bypass & (gpr_a_bypass(1) or gpr_a_bypass(0)) &
gpr_b_bypass & (gpr_b_bypass(1) or gpr_b_bypass(0)) &
gpr_c_bypass; (gpr_c_bypass(1) or gpr_c_bypass(0));
end if; end if;
end process; end process;
log_out <= log_data; log_out <= log_data;

@ -40,6 +40,8 @@ entity execute1 is
e_out : out Execute1ToWritebackType; e_out : out Execute1ToWritebackType;
bypass_data : out bypass_data_t; bypass_data : out bypass_data_t;
bypass_cr_data : out cr_bypass_data_t; bypass_cr_data : out cr_bypass_data_t;
bypass2_data : out bypass_data_t;
bypass2_cr_data : out cr_bypass_data_t;


dbg_ctrl_out : out ctrl_t; dbg_ctrl_out : out ctrl_t;


@ -1482,6 +1484,7 @@ begin
variable fv : Execute1ToFPUType; variable fv : Execute1ToFPUType;
variable k : integer; variable k : integer;
variable go : std_ulogic; variable go : std_ulogic;
variable bypass_valid : std_ulogic;
begin begin
v := ex2; v := ex2;
if (l_in.busy or fp_in.busy) = '0' then if (l_in.busy or fp_in.busy) = '0' then
@ -1559,6 +1562,19 @@ begin
ctrl_tmp.msr(MSR_LE) <= '1'; ctrl_tmp.msr(MSR_LE) <= '1';
end if; end if;


bypass_valid := ex1.e.valid;
if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then
bypass_valid := '0';
end if;

bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid;
bypass2_data.tag.tag <= ex1.e.instr_tag.tag;
bypass2_data.data <= ex_result;

bypass2_cr_data.tag.valid <= ex1.e.write_cr_enable and bypass_valid;
bypass2_cr_data.tag.tag <= ex1.e.instr_tag.tag;
bypass2_cr_data.data <= ex1.e.write_cr_data;

-- Update registers -- Update registers
ex2in <= v; ex2in <= v;



Loading…
Cancel
Save