Improve timing of redirect_nia going from writeback to fetch1

This gets rid of the adder in writeback that computes redirect_nia.
Instead, the main adder in the ALU is used to compute the branch
target for relative branches.  We now decode b and bc differently
depending on the AA field, generating INSN_brel, INSN_babs, INSN_bcrel
or INSN_bcabs as appropriate.  Each one has a separate entry in the
decode table in decode1; the *rel versions use CIA as the A input.
The bclr/bcctr/bctar and rfid instructions now select ramspr_result
for the main result mux to get the redirect address into
ex1.e.write_data.

For branches which are predicted taken but not actually taken, we need
to redirect to the following instruction.  We also need to do that for
isync.  We do this in the execute2 stage since whether or not to do it
depends on the branch result.  The next_nia computation is moved to
the execute2 stage and comes in via a new leg on the secondary result
multiplexer, making next_nia available ultimately in ex2.e.write_data.
This also means that the next_nia leg of the primary result
multiplexer is gone.  Incrementing last_nia by 4 for sc (so that SRR0
points to the following instruction) is also moved to execute2.

Writing CIA+4 to LR was previously done through the main result
multiplexer.  Now it comes in explicitly in the ramspr write logic.

Overall this removes the br_offset and abs_br fields and the logic to
add br_offset and next_nia, and one leg of the primary result
multiplexer, at the cost of a few extra control signals between
execute1 and execute2 and some multiplexing for the ramspr write side
and an extra input on the secondary result multiplexer.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/422/head
Paul Mackerras 1 year ago
parent 06ff486567
commit 1c4b5def36

@ -658,7 +658,6 @@ package common is
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
last_nia: std_ulogic_vector(63 downto 0);
br_offset: std_ulogic_vector(63 downto 0);
br_last: std_ulogic;
br_taken: std_ulogic;
abs_br: std_ulogic;
@ -672,7 +671,7 @@ package common is
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), br_offset => (others => '0'),
last_nia => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));


@ -94,8 +94,10 @@ architecture behaviour of decode1 is
INSN_andi_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_andis_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_attn => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_b => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bc => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_brel => (ALU, NONE, OP_B, CIA, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_babs => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcrel => (ALU, NONE, OP_BC, CIA, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcabs => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcctr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bclr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bctar => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
@ -597,11 +599,11 @@ begin
-- count cache or link stack.
br_offset := (others => '0');
case icode is
when INSN_b =>
when INSN_brel | INSN_babs =>
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
when INSN_bc =>
when INSN_bcrel | INSN_bcabs =>
-- Predict backward branches as taken, forward as untaken
v.br_pred := f_in.insn(15);
br_offset := resize(signed(f_in.insn(15 downto 2)), 24);

@ -221,9 +221,8 @@ architecture behaviour of decode2 is
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
OP_BCREG => "101", -- ramspr_result
OP_RFID => "101",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",

@ -47,14 +47,16 @@ package decode_types is
INSN_andi_dot, -- 10
INSN_andis_dot,
INSN_attn,
INSN_b,
INSN_bc,
INSN_brel,
INSN_babs,
INSN_bcrel,
INSN_bcabs,
INSN_bcctr,
INSN_bclr,
INSN_bctar,
INSN_brh,
INSN_brh, -- 20
INSN_brw,
INSN_brd, -- 20
INSN_brd,
INSN_cbcdtd,
INSN_cdtbcd,
INSN_cmpi,
@ -62,9 +64,9 @@ package decode_types is
INSN_cntlzw,
INSN_cntlzd,
INSN_cnttzw,
INSN_cnttzd,
INSN_cnttzd, -- 30
INSN_crand,
INSN_crandc, -- 30
INSN_crandc,
INSN_creqv,
INSN_crnand,
INSN_crnor,
@ -72,9 +74,9 @@ package decode_types is
INSN_crorc,
INSN_crxor,
INSN_darn,
INSN_eieio,
INSN_eieio, -- 40
INSN_extsb,
INSN_extsh, -- 40
INSN_extsh,
INSN_extsw,
INSN_extswsli,
INSN_isync,
@ -82,9 +84,9 @@ package decode_types is
INSN_ld,
INSN_ldu,
INSN_lhau,
INSN_lwa,
INSN_lwa, -- 50
INSN_lwzu,
INSN_mcrf, -- 50
INSN_mcrf,
INSN_mcrxrx,
INSN_mfcr,
INSN_mfmsr,
@ -92,9 +94,9 @@ package decode_types is
INSN_mtcrf,
INSN_mtmsr,
INSN_mtmsrd,
INSN_mtspr,
INSN_mtspr, -- 60
INSN_mulli,
INSN_neg, -- 60
INSN_neg,
INSN_nop,
INSN_ori,
INSN_oris,
@ -102,9 +104,9 @@ package decode_types is
INSN_popcntw,
INSN_popcntd,
INSN_prtyw,
INSN_prtyd,
INSN_prtyd, -- 70
INSN_rfid,
INSN_rldic, -- 70
INSN_rldic,
INSN_rldicl,
INSN_rldicr,
INSN_rldimi,
@ -112,9 +114,9 @@ package decode_types is
INSN_rlwinm,
INSN_rnop,
INSN_sc,
INSN_setb,
INSN_setb, -- 80
INSN_slbia,
INSN_sradi, -- 80
INSN_sradi,
INSN_srawi,
INSN_stbu,
INSN_std,
@ -122,9 +124,9 @@ package decode_types is
INSN_sthu,
INSN_stwu,
INSN_subfic,
INSN_subfme,
INSN_subfme, -- 90
INSN_subfze,
INSN_sync, -- 90
INSN_sync,
INSN_tdi,
INSN_tlbsync,
INSN_twi,
@ -132,7 +134,7 @@ package decode_types is
INSN_xori,
INSN_xoris,
-- pad to 104
INSN_061, INSN_062, INSN_063, INSN_064, INSN_065, INSN_066, INSN_067,
INSN_063, INSN_064, INSN_065, INSN_066, INSN_067,

-- Non-prefixed instructions that have a MLS:D prefixed form and
-- their corresponding prefixed instructions.
@ -497,8 +499,10 @@ package body decode_types is
when INSN_andi_dot => return "011100";
when INSN_andis_dot => return "011101";
when INSN_attn => return "000000";
when INSN_b => return "010010";
when INSN_bc => return "010000";
when INSN_brel => return "010010";
when INSN_babs => return "010010";
when INSN_bcrel => return "010000";
when INSN_bcabs => return "010000";
when INSN_brh => return "011111";
when INSN_brw => return "011111";
when INSN_brd => return "011111";

@ -95,6 +95,7 @@ architecture behaviour of execute1 is
exception : std_ulogic;
trap : std_ulogic;
advance_nia : std_ulogic;
redir_to_next : std_ulogic;
new_msr : std_ulogic_vector(63 downto 0);
take_branch : std_ulogic;
direct_branch : std_ulogic;
@ -124,6 +125,9 @@ architecture behaviour of execute1 is
res2_sel : std_ulogic_vector(1 downto 0);
spr_select : spr_id;
pmu_spr_num : std_ulogic_vector(4 downto 0);
redir_to_next : std_ulogic;
advance_nia : std_ulogic;
lr_from_next : std_ulogic;
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
@ -145,6 +149,7 @@ architecture behaviour of execute1 is
prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0",
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
@ -510,6 +515,7 @@ begin
variable wr_addr : ramspr_index;
variable even_wr_enab, odd_wr_enab : std_ulogic;
variable even_wr_data, odd_wr_data : std_ulogic_vector(63 downto 0);
variable ramspr_even_data : std_ulogic_vector(63 downto 0);
variable doit : std_ulogic;
begin
-- Read address mux and async RAM reading
@ -533,11 +539,16 @@ begin
else
wr_addr := ex1.ramspr_wraddr;
end if;
if ex1.lr_from_next = '1' then
ramspr_even_data := next_nia;
else
ramspr_even_data := ex1.e.write_data;
end if;
if interrupt_in.intr = '1' then
even_wr_data := ex2.e.last_nia;
odd_wr_data := intr_srr1(ctrl.msr, interrupt_in.srr1);
else
even_wr_data := ex1.e.write_data;
even_wr_data := ramspr_even_data;
odd_wr_data := ex1.ramspr_odd_data;
end if;
ramspr_wr_addr <= wr_addr;
@ -550,7 +561,7 @@ begin
-- We assume no instruction executes in the cycle immediately following
-- an interrupt, so we don't need to bypass interrupt data
if ex1.se.ramspr_write_even = '1' and e_in.ramspr_even_rdaddr = ex1.ramspr_wraddr then
ramspr_even <= ex1.e.write_data;
ramspr_even <= ramspr_even_data;
else
ramspr_even <= even_rd_data;
end if;
@ -593,7 +604,6 @@ begin
shortmul_result when "011",
muldiv_result when "100",
ramspr_result when "101",
next_nia when "110",
misc_result when others;

execute1_0: process(clk)
@ -1016,7 +1026,6 @@ begin
v.e.mode_32bit := not ex1.msr(MSR_SF);
v.e.instr_tag := e_in.instr_tag;
v.e.last_nia := e_in.nia;
v.e.br_offset := 64x"4";

v.se.ramspr_write_even := e_in.ramspr_write_even;
v.se.ramspr_write_odd := e_in.ramspr_write_odd;
@ -1114,8 +1123,6 @@ begin
v.direct_branch := '1';
v.e.br_last := '1';
v.e.br_taken := '1';
v.e.br_offset := b_in;
v.e.abs_br := insn_aa(e_in.insn);
if e_in.br_pred = '0' then
-- should never happen
v.e.redirect := '1';
@ -1129,14 +1136,13 @@ begin
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
v.take_branch := ppc_bc_taken(bo, bi, cr_in, ramspr_odd);
if v.take_branch = '1' then
v.e.br_offset := b_in;
v.e.abs_br := insn_aa(e_in.insn);
end if;
-- Mispredicted branches cause a redirect
if v.take_branch /= e_in.br_pred then
v.e.redirect := '1';
end if;
if v.take_branch = '0' then
v.redir_to_next := '1';
end if;
v.direct_branch := '1';
v.e.br_last := '1';
v.e.br_taken := v.take_branch;
@ -1150,10 +1156,6 @@ begin
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
v.take_branch := ppc_bc_taken(bo, bi, cr_in, ramspr_odd);
if v.take_branch = '1' then
v.e.br_offset := ramspr_result;
v.e.abs_br := '1';
end if;
-- Indirect branches are never predicted taken
v.e.redirect := v.take_branch;
v.e.br_taken := v.take_branch;
@ -1177,8 +1179,6 @@ begin
v.new_msr(MSR_DR) := '1';
end if;
v.se.write_msr := '1';
v.e.br_offset := ramspr_result;
v.e.abs_br := '1';
v.e.redirect := '1';
v.se.write_cfar := '1';
if HAS_FPU then
@ -1292,6 +1292,7 @@ begin

when OP_ISYNC =>
v.e.redirect := '1';
v.redir_to_next := '1';

when OP_ICBI =>
v.se.icache_inval := '1';
@ -1406,6 +1407,7 @@ begin
v.mul_select := e_in.sub_select(1 downto 0);
v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr;
v.lr_from_next := e_in.lr;
v.ramspr_odd_data := actions.ramspr_odd_data;
end if;

@ -1423,9 +1425,6 @@ begin

irq_valid := ex1.msr(MSR_EE) and (pmu_to_x.intr or ctrl.dec(63) or ext_irq_in);

-- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4);

-- rotator control signals
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
@ -1507,10 +1506,9 @@ begin
x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div;
v.br_mispredict := v.e.redirect and actions.direct_branch;
v.advance_nia := actions.advance_nia;
v.redir_to_next := actions.redir_to_next;
exception := actions.trap;
if actions.advance_nia = '1' then
v.e.last_nia := next_nia;
end if;

-- Go busy while division is happening because the
-- divider is not pipelined. Also go busy while a
@ -1681,6 +1679,9 @@ begin
variable sign, zero : std_ulogic;
variable rcnz_hi, rcnz_lo : std_ulogic;
begin
-- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4);

v := ex2;
if stage2_stall = '0' then
v.e := ex1.e;
@ -1688,6 +1689,9 @@ begin
v.ext_interrupt := ex1.ext_interrupt;
v.taken_branch_event := ex1.taken_branch_event;
v.br_mispredict := ex1.br_mispredict;
if ex1.advance_nia = '1' then
v.e.last_nia := next_nia;
end if;
end if;

if ex1.se.mult_32s = '1' and ex1.oe = '1' then
@ -1748,10 +1752,12 @@ begin
else
sprres := pmu_to_x.spr_val;
end if;
if ex1.res2_sel(1) = '0' then
ex_result := rcresult;
else
if ex1.res2_sel(1) = '1' then
ex_result := sprres;
elsif ex1.redir_to_next = '1' then
ex_result := next_nia;
else
ex_result := rcresult;
end if;

cr_res := ex1.e.write_cr_data;

@ -38,8 +38,38 @@ architecture behaviour of predecoder is
2#011100_00000# to 2#011100_11111# => INSN_andi_dot,
2#011101_00000# to 2#011101_11111# => INSN_andis_dot,
2#000000_00000# => INSN_attn,
2#010010_00000# to 2#010010_11111# => INSN_b,
2#010000_00000# to 2#010000_11111# => INSN_bc,
2#010010_00000# to 2#010010_00001# => INSN_brel,
2#010010_00010# to 2#010010_00011# => INSN_babs,
2#010010_00100# to 2#010010_00101# => INSN_brel,
2#010010_00110# to 2#010010_00111# => INSN_babs,
2#010010_01000# to 2#010010_01001# => INSN_brel,
2#010010_01010# to 2#010010_01011# => INSN_babs,
2#010010_01100# to 2#010010_01101# => INSN_brel,
2#010010_01110# to 2#010010_01111# => INSN_babs,
2#010010_10000# to 2#010010_10001# => INSN_brel,
2#010010_10010# to 2#010010_10011# => INSN_babs,
2#010010_10100# to 2#010010_10101# => INSN_brel,
2#010010_10110# to 2#010010_10111# => INSN_babs,
2#010010_11000# to 2#010010_11001# => INSN_brel,
2#010010_11010# to 2#010010_11011# => INSN_babs,
2#010010_11100# to 2#010010_11101# => INSN_brel,
2#010010_11110# to 2#010010_11111# => INSN_babs,
2#010000_00000# to 2#010000_00001# => INSN_bcrel,
2#010000_00010# to 2#010000_00011# => INSN_bcabs,
2#010000_00100# to 2#010000_00101# => INSN_bcrel,
2#010000_00110# to 2#010000_00111# => INSN_bcabs,
2#010000_01000# to 2#010000_01001# => INSN_bcrel,
2#010000_01010# to 2#010000_01011# => INSN_bcabs,
2#010000_01100# to 2#010000_01101# => INSN_bcrel,
2#010000_01110# to 2#010000_01111# => INSN_bcabs,
2#010000_10000# to 2#010000_10001# => INSN_bcrel,
2#010000_10010# to 2#010000_10011# => INSN_bcabs,
2#010000_10100# to 2#010000_10101# => INSN_bcrel,
2#010000_10110# to 2#010000_10111# => INSN_bcabs,
2#010000_11000# to 2#010000_11001# => INSN_bcrel,
2#010000_11010# to 2#010000_11011# => INSN_bcabs,
2#010000_11100# to 2#010000_11101# => INSN_bcrel,
2#010000_11110# to 2#010000_11111# => INSN_bcabs,
2#001011_00000# to 2#001011_11111# => INSN_cmpi,
2#001010_00000# to 2#001010_11111# => INSN_cmpli,
2#100010_00000# to 2#100010_11111# => INSN_lbz,

@ -174,11 +174,7 @@ begin
f.big_endian := '0';
f.mode_32bit := '0';
else
if e_in.abs_br = '1' then
f.redirect_nia := e_in.br_offset;
else
f.redirect_nia := std_ulogic_vector(unsigned(e_in.last_nia) + unsigned(e_in.br_offset));
end if;
f.redirect_nia := e_in.write_data;
-- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1
f.virt_mode := e_in.redir_mode(3);
f.priv_mode := e_in.redir_mode(2);

Loading…
Cancel
Save