Merge pull request #208 from paulusmack/faster

Make the core go faster

Several major improvements in here:
- Simple branch predictor
- Reduced latency for mispredicted branches and interrupts by removing fetch2 stage
- Cache improvements
  o Request critical dword first on refill
  o Handle hits while refilling, including on line being refilled
  o Sizes doubled for both D and I
- Loadstore improvements: can now do one load or store every two cycles in most cases
- Optimized 2-cycle multiplier for Xilinx 7-series parts using DSP slices
- Timing improvements, including:
  o Stash buffer in decode1
  o Reduced width of execute1 result mux
  o Improved SPR decode in decode1
  o Some non-critical operation take a cycle longer so we can break some long combinatorial chains
- Core logging: logs 256 bits of info every cycle into a ring buffer, to help with debugging and performance analysis

This increases the LUT usage for the "synth" + A35 target from 9182 to 10297 = 12%.
pull/211/head
Michael Neuling 5 years ago committed by GitHub
commit b90a0a2139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -42,7 +42,7 @@ all = core_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
all: $(all) all: $(all)


core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
fetch2.vhdl utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \ utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \ decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \ cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \

@ -93,10 +93,11 @@ package common is
virt_mode : std_ulogic; virt_mode : std_ulogic;
priv_mode : std_ulogic; priv_mode : std_ulogic;
stop_mark: std_ulogic; stop_mark: std_ulogic;
sequential: std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
end record; end record;


type IcacheToFetch2Type is record type IcacheToDecode1Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark: std_ulogic; stop_mark: std_ulogic;
fetch_failed: std_ulogic; fetch_failed: std_ulogic;
@ -104,16 +105,6 @@ package common is
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
end record; end record;


type Fetch2ToDecode1Type is record
valid: std_ulogic;
stop_mark : std_ulogic;
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
end record;
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0',
nia => (others => '0'), insn => (others => '0'));

type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic; stop_mark : std_ulogic;
@ -122,8 +113,16 @@ package common is
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR) ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
decode: decode_rom_t; decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init, br_pred => '0');

type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init);


type Decode2ToExecute1Type is record type Decode2ToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
@ -158,23 +157,24 @@ package common is
sign_extend : std_ulogic; -- do we need to sign extend? sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction? update : std_ulogic; -- is this an update instruction?
reserve : std_ulogic; -- set for larx/stcx reserve : std_ulogic; -- set for larx/stcx
br_pred : std_ulogic;
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0', lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0')); byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));


type Execute1ToMultiplyType is record type Execute1ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; data1: std_ulogic_vector(63 downto 0);
data1: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(64 downto 0);
is_32bit: std_ulogic; is_32bit: std_ulogic;
neg_result: std_ulogic;
end record; end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
is_32bit => '0', is_32bit => '0', neg_result => '0',
others => (others => '0')); others => (others => '0'));


type Execute1ToDividerType is record type Execute1ToDividerType is record
@ -253,6 +253,7 @@ package common is
others => (others => '0')); others => (others => '0'));


type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic; exception : std_ulogic;
invalid : std_ulogic; invalid : std_ulogic;
perm_error : std_ulogic; perm_error : std_ulogic;
@ -366,7 +367,7 @@ package common is


type MultiplyToExecute1Type is record type MultiplyToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0); result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic; overflow : std_ulogic;
end record; end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0', constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',

@ -15,7 +15,8 @@ entity control is
complete_in : in std_ulogic; complete_in : in std_ulogic;
valid_in : in std_ulogic; valid_in : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic; sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic; stop_mark_in : in std_ulogic;


@ -23,6 +24,9 @@ entity control is
gpr_write_in : in gspr_index_t; gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic; gpr_bypassable : in std_ulogic;


update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t; gpr_a_read_in : in gspr_index_t;


@ -72,7 +76,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -80,6 +88,9 @@ begin
gpr_read_valid_in => gpr_a_read_valid_in, gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in, gpr_read_in => gpr_a_read_in,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_a_out, stall_out => stall_a_out,
use_bypass => gpr_bypass_a use_bypass => gpr_bypass_a
); );
@ -90,7 +101,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -98,6 +113,9 @@ begin
gpr_read_valid_in => gpr_b_read_valid_in, gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in, gpr_read_in => gpr_b_read_in,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_b_out, stall_out => stall_b_out,
use_bypass => gpr_bypass_b use_bypass => gpr_bypass_b
); );
@ -110,7 +128,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -118,6 +140,9 @@ begin
gpr_read_valid_in => gpr_c_read_valid_in, gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt, gpr_read_in => gpr_c_read_in_fmt,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_c_out, stall_out => stall_c_out,
use_bypass => gpr_bypass_c use_bypass => gpr_bypass_c
); );
@ -128,7 +153,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


cr_read_in => cr_read_in, cr_read_in => cr_read_in,
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
@ -139,7 +168,8 @@ begin
control0: process(clk) control0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure; assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int; r_int <= rin_int;
end if; end if;
end process; end process;
@ -152,17 +182,18 @@ begin
v_int := r_int; v_int := r_int;


-- asynchronous -- asynchronous
valid_tmp := valid_in and not flush_in and not stall_in; valid_tmp := valid_in and not flush_in;
stall_tmp := stall_in; stall_tmp := '0';


if complete_in = '1' then if flush_in = '1' then
-- expect to see complete_in next cycle
v_int.outstanding := 1;
elsif complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1; v_int.outstanding := r_int.outstanding - 1;
end if; end if;


if rst = '1' then if rst = '1' then
v_int.state := IDLE; v_int := reg_internal_init;
v_int.outstanding := 0;
stall_tmp := '0';
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


@ -227,7 +258,9 @@ begin
end if; end if;


if valid_tmp = '1' then if valid_tmp = '1' then
v_int.outstanding := v_int.outstanding + 1; if deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
gpr_write_valid <= gpr_write_valid_in; gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in; cr_write_valid <= cr_write_in;
else else
@ -237,7 +270,7 @@ begin


-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;
stall_out <= stall_tmp; stall_out <= stall_tmp or deferred;


-- update registers -- update registers
rin_int <= v_int; rin_int <= v_int;

@ -11,7 +11,8 @@ entity core is
SIM : boolean := false; SIM : boolean := false;
DISABLE_FLATTEN : boolean := false; DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0') ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
); );
port ( port (
clk : in std_ulogic; clk : in std_ulogic;
@ -41,16 +42,14 @@ entity core is
end core; end core;


architecture behave of core is architecture behave of core is
-- fetch signals
signal fetch2_to_decode1: Fetch2ToDecode1Type;

-- icache signals -- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType; signal fetch1_to_icache : Fetch1ToIcacheType;
signal icache_to_fetch2 : IcacheToFetch2Type; signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType; signal mmu_to_icache : MmuToIcacheType;


-- decode signals -- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type; signal decode1_to_decode2: Decode1ToDecode2Type;
signal decode1_to_fetch1: Decode1ToFetch1Type;
signal decode2_to_execute1: Decode2ToExecute1Type; signal decode2_to_execute1: Decode2ToExecute1Type;


-- register file signals -- register file signals
@ -83,16 +82,18 @@ architecture behave of core is
-- local signals -- local signals
signal fetch1_stall_in : std_ulogic; signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
signal fetch2_stall_in : std_ulogic; signal icache_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic; signal decode1_stall_in : std_ulogic;
signal decode2_stall_in : std_ulogic; signal decode1_busy : std_ulogic;
signal decode2_busy_in : std_ulogic;
signal decode2_stall_out : std_ulogic; signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic; signal ex1_icache_inval: std_ulogic;
signal ex1_stall_out: std_ulogic; signal ex1_busy_out: std_ulogic;
signal ls1_stall_out: std_ulogic;
signal dcache_stall_out: std_ulogic; signal dcache_stall_out: std_ulogic;


signal flush: std_ulogic; signal flush: std_ulogic;
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;


signal complete: std_ulogic; signal complete: std_ulogic;
signal terminate: std_ulogic; signal terminate: std_ulogic;
@ -128,6 +129,12 @@ architecture behave of core is
-- Debug status -- Debug status
signal dbg_core_is_stopped: std_ulogic; signal dbg_core_is_stopped: std_ulogic;


-- Logging signals
signal log_data : std_ulogic_vector(255 downto 0);
signal log_rd_addr : std_ulogic_vector(31 downto 0);
signal log_wr_addr : std_ulogic_vector(31 downto 0);
signal log_rd_data : std_ulogic_vector(63 downto 0);

function keep_h(disable : boolean) return string is function keep_h(disable : boolean) return string is
begin begin
if disable then if disable then
@ -139,7 +146,6 @@ architecture behave of core is
attribute keep_hierarchy : string; attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
@ -180,45 +186,40 @@ begin
rst => rst_fetch1, rst => rst_fetch1,
alt_reset_in => alt_reset_d, alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in, stall_in => fetch1_stall_in,
flush_in => flush, flush_in => fetch1_flush,
stop_in => dbg_core_stop, stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
e_in => execute1_to_fetch1, e_in => execute1_to_fetch1,
i_out => fetch1_to_icache i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
); );


fetch1_stall_in <= icache_stall_out or decode2_stall_out; fetch1_stall_in <= icache_stall_out or decode1_busy;
fetch1_flush <= flush or decode1_flush;


icache_0: entity work.icache icache_0: entity work.icache
generic map( generic map(
SIM => SIM, SIM => SIM,
LINE_SIZE => 64, LINE_SIZE => 64,
NUM_LINES => 32, NUM_LINES => 64,
NUM_WAYS => 2 NUM_WAYS => 2
) )
port map( port map(
clk => clk, clk => clk,
rst => rst_icache, rst => rst_icache,
i_in => fetch1_to_icache, i_in => fetch1_to_icache,
i_out => icache_to_fetch2, i_out => icache_to_decode1,
m_in => mmu_to_icache, m_in => mmu_to_icache,
flush_in => flush, flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval, inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
stall_out => icache_stall_out, stall_out => icache_stall_out,
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in wishbone_in => wishbone_insn_in,
); log_out => log_data(96 downto 43)

fetch2_0: entity work.fetch2
port map (
clk => clk,
rst => rst_fetch2,
stall_in => fetch2_stall_in,
flush_in => flush,
i_in => icache_to_fetch2,
f_out => fetch2_to_decode1
); );


fetch2_stall_in <= decode2_stall_out; icache_stall_in <= decode1_busy;


decode1_0: entity work.decode1 decode1_0: entity work.decode1
port map ( port map (
@ -226,8 +227,12 @@ begin
rst => rst_dec1, rst => rst_dec1,
stall_in => decode1_stall_in, stall_in => decode1_stall_in,
flush_in => flush, flush_in => flush,
f_in => fetch2_to_decode1, flush_out => decode1_flush,
d_out => decode1_to_decode2 busy_out => decode1_busy,
f_in => icache_to_decode1,
d_out => decode1_to_decode2,
f_out => decode1_to_fetch1,
log_out => log_data(109 downto 97)
); );


decode1_stall_in <= decode2_stall_out; decode1_stall_in <= decode2_stall_out;
@ -239,7 +244,7 @@ begin
port map ( port map (
clk => clk, clk => clk,
rst => rst_dec2, rst => rst_dec2,
stall_in => decode2_stall_in, busy_in => decode2_busy_in,
stall_out => decode2_stall_out, stall_out => decode2_stall_out,
flush_in => flush, flush_in => flush,
complete_in => complete, complete_in => complete,
@ -249,9 +254,10 @@ begin
r_in => register_file_to_decode2, r_in => register_file_to_decode2,
r_out => decode2_to_register_file, r_out => decode2_to_register_file,
c_in => cr_file_to_decode2, c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110)
); );
decode2_stall_in <= ex1_stall_out or ls1_stall_out; decode2_busy_in <= ex1_busy_out;


register_file_0: entity work.register_file register_file_0: entity work.register_file
generic map ( generic map (
@ -267,7 +273,8 @@ begin
dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data, dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate, sim_dump => terminate,
sim_dump_done => sim_cr_dump sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
); );


cr_file_0: entity work.cr_file cr_file_0: entity work.cr_file
@ -279,7 +286,8 @@ begin
d_in => decode2_to_cr_file, d_in => decode2_to_cr_file,
d_out => cr_file_to_decode2, d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file, w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
); );


execute1_0: entity work.execute1 execute1_0: entity work.execute1
@ -290,7 +298,7 @@ begin
clk => clk, clk => clk,
rst => rst_ex1, rst => rst_ex1,
flush_out => flush, flush_out => flush,
stall_out => ex1_stall_out, busy_out => ex1_busy_out,
e_in => decode2_to_execute1, e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1, l_in => loadstore1_to_execute1,
ext_irq_in => ext_irq, ext_irq_in => ext_irq,
@ -299,7 +307,11 @@ begin
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
terminate_out => terminate terminate_out => terminate,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
); );


loadstore1_0: entity work.loadstore1 loadstore1_0: entity work.loadstore1
@ -314,7 +326,7 @@ begin
m_out => loadstore1_to_mmu, m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1, m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out, dc_stall => dcache_stall_out,
stall_out => ls1_stall_out log_out => log_data(149 downto 140)
); );


mmu_0: entity work.mmu mmu_0: entity work.mmu
@ -331,7 +343,7 @@ begin
dcache_0: entity work.dcache dcache_0: entity work.dcache
generic map( generic map(
LINE_SIZE => 64, LINE_SIZE => 64,
NUM_LINES => 32, NUM_LINES => 64,
NUM_WAYS => 2 NUM_WAYS => 2
) )
port map ( port map (
@ -343,7 +355,8 @@ begin
m_out => dcache_to_mmu, m_out => dcache_to_mmu,
stall_out => dcache_stall_out, stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
); );


writeback_0: entity work.writeback writeback_0: entity work.writeback
@ -356,7 +369,13 @@ begin
complete_out => complete complete_out => complete
); );


log_data(151 downto 150) <= "00";
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug debug_0: entity work.core_debug
generic map (
LOG_LENGTH => LOG_LENGTH
)
port map ( port map (
clk => clk, clk => clk,
rst => rst_dbg, rst => rst_dbg,
@ -377,6 +396,10 @@ begin
dbg_gpr_ack => dbg_gpr_ack, dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data, dbg_gpr_data => dbg_gpr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,
log_write_addr => log_wr_addr,
terminated_out => terminated_out terminated_out => terminated_out
); );



@ -3,9 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.common.all; use work.common.all;


entity core_debug is entity core_debug is
generic (
-- Length of log buffer
LOG_LENGTH : natural := 512
);
port ( port (
clk : in std_logic; clk : in std_logic;
rst : in std_logic; rst : in std_logic;
@ -34,6 +39,12 @@ entity core_debug is
dbg_gpr_addr : out gspr_index_t; dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0); dbg_gpr_data : in std_ulogic_vector(63 downto 0);


-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
log_read_data : out std_ulogic_vector(63 downto 0);
log_write_addr : out std_ulogic_vector(31 downto 0);

-- Misc -- Misc
terminated_out : out std_ulogic terminated_out : out std_ulogic
); );
@ -77,6 +88,12 @@ architecture behave of core_debug is
-- GSPR register data -- GSPR register data
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101"; constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";


-- Log buffer address and data registers
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";

constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);

-- Some internal wires -- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0); signal stat_reg : std_ulogic_vector(63 downto 0);


@ -89,6 +106,12 @@ architecture behave of core_debug is
signal do_gspr_rd : std_ulogic; signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t; signal gspr_index : gspr_index_t;


signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0');
signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0');
signal do_dmi_log_rd : std_ulogic;
signal dmi_read_log_data : std_ulogic;
signal dmi_read_log_data_1 : std_ulogic;

begin begin
-- Single cycle register accesses on DMI except for GSPR data -- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
@ -108,6 +131,8 @@ begin
nia when DBG_CORE_NIA, nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR, msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA, dbg_gpr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
(others => '0') when others; (others => '0') when others;


-- DMI writes -- DMI writes
@ -118,6 +143,7 @@ begin
do_step <= '0'; do_step <= '0';
do_reset <= '0'; do_reset <= '0';
do_icreset <= '0'; do_icreset <= '0';
do_dmi_log_rd <= '0';


if (rst) then if (rst) then
stopping <= '0'; stopping <= '0';
@ -151,11 +177,26 @@ begin
end if; end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0); gspr_index <= dmi_din(gspr_index_t'left downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
end if; end if;
else else
report("DMI read from " & to_string(dmi_addr)); report("DMI read from " & to_string(dmi_addr));
end if; end if;

elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
do_dmi_log_rd <= '1';
end if; end if;
dmi_read_log_data_1 <= dmi_read_log_data;
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
dmi_read_log_data <= '1';
else
dmi_read_log_data <= '0';
end if;


-- Set core stop on terminate. We'll be stopping some time *after* -- Set core stop on terminate. We'll be stopping some time *after*
-- the offending instruction, at least until we can do back flushes -- the offending instruction, at least until we can do back flushes
@ -175,5 +216,87 @@ begin
core_rst <= do_reset; core_rst <= do_reset;
icache_rst <= do_icreset; icache_rst <= do_icreset;
terminated_out <= terminated; terminated_out <= terminated;

-- Logging RAM
maybe_log: if LOG_LENGTH > 0 generate
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
signal log_array : log_array_t;
signal log_rd_ptr : log_ptr_t;
signal log_wr_ptr : log_ptr_t;
signal log_toggle : std_ulogic;
signal log_wr_enable : std_ulogic;
signal log_rd_ptr_latched : log_ptr_t;
signal log_rd : std_ulogic_vector(255 downto 0);
signal log_dmi_reading : std_ulogic;
signal log_dmi_read_done : std_ulogic;

function select_dword(data : std_ulogic_vector(255 downto 0);
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable firstbit : integer;
begin
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
return data(firstbit + 63 downto firstbit);
end;

attribute ram_style : string;
attribute ram_style of log_array : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of log_array : signal is "power";

begin
-- Use MSB of read addresses to stop the logging
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31));

log_ram: process(clk)
begin
if rising_edge(clk) then
if log_wr_enable = '1' then
log_array(to_integer(log_wr_ptr)) <= log_data;
end if;
log_rd <= log_array(to_integer(log_rd_ptr_latched));
end if;
end process;


log_buffer: process(clk)
variable b : integer;
variable data : std_ulogic_vector(255 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
log_wr_ptr <= (others => '0');
log_toggle <= '0';
elsif log_wr_enable = '1' then
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
log_toggle <= not log_toggle;
end if;
log_wr_ptr <= log_wr_ptr + 1;
end if;
if do_dmi_log_rd = '1' then
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
else
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
end if;
if log_dmi_read_done = '1' then
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
else
log_read_data <= select_dword(log_rd, log_read_addr);
end if;
log_dmi_read_done <= log_dmi_reading;
log_dmi_reading <= do_dmi_log_rd;
end if;
end process;
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
log_write_addr(LOG_INDEX_BITS) <= '1';
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
end generate;

no_log: if LOG_LENGTH = 0 generate
begin
log_read_data <= (others => '0');
log_write_addr <= x"00000001";
end generate;

end behave; end behave;



@ -18,7 +18,9 @@ entity cr_file is
w_in : in WritebackToCrFileType; w_in : in WritebackToCrFileType;


-- debug -- debug
sim_dump : in std_ulogic sim_dump : in std_ulogic;

log_out : out std_ulogic_vector(12 downto 0)
); );
end entity cr_file; end entity cr_file;


@ -27,6 +29,7 @@ architecture behaviour of cr_file is
signal crs_updated : std_ulogic_vector(31 downto 0); signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init; signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t; signal xerc_updated : xer_common_t;
signal log_data : std_ulogic_vector(12 downto 0);
begin begin
cr_create_0: process(all) cr_create_0: process(all)
variable hi, lo : integer := 0; variable hi, lo : integer := 0;
@ -88,4 +91,14 @@ begin
end process; end process;
end generate; end generate;


cr_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_cr_enable &
w_in.write_cr_data(31 downto 28) &
w_in.write_cr_mask;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -4,11 +4,15 @@ use ieee.numeric_std.all;


entity cr_hazard is entity cr_hazard is
generic ( generic (
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 1
); );
port( port(
clk : in std_ulogic; clk : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -22,7 +26,7 @@ architecture behaviour of cr_hazard is
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0'); constant pipeline_entry_init : pipeline_entry_type := (valid => '0');


type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);


signal r, rin : pipeline_t := pipeline_t_init; signal r, rin : pipeline_t := pipeline_t_init;
@ -30,9 +34,7 @@ begin
cr_hazard0: process(clk) cr_hazard0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if stall_in = '0' then r <= rin;
r <= rin;
end if;
end if; end if;
end process; end process;


@ -41,22 +43,23 @@ begin
begin begin
v := r; v := r;


stall_out <= '0'; -- XXX assumes PIPELINE_DEPTH = 1
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop if complete_in = '1' then
if (r(i).valid = cr_read_in) then v(1).valid := '0';
stall_out <= '1'; end if;
end if; stall_out <= cr_read_in and (v(0).valid or v(1).valid);
end loop;

v(0).valid := cr_write_in;
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
-- propagate to next slot
v(i+1) := r(i);
end loop;


-- asynchronous output -- XXX assumes PIPELINE_DEPTH = 1
if cr_read_in = '0' then if busy_in = '0' then
stall_out <= '0'; v(1) := r(0);
v(0).valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := cr_write_in;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(1).valid := '0';
end if; end if;


-- update registers -- update registers

File diff suppressed because it is too large Load Diff

@ -8,19 +8,24 @@ use work.decode_types.all;


entity decode1 is entity decode1 is
port ( port (
clk : in std_ulogic; clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;


stall_in : in std_ulogic; stall_in : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;

busy_out : out std_ulogic;
f_in : in Fetch2ToDecode1Type; flush_out : out std_ulogic;
d_out : out Decode1ToDecode2Type
f_in : in IcacheToDecode1Type;
f_out : out Decode1ToFetch1Type;
d_out : out Decode1ToDecode2Type;
log_out : out std_ulogic_vector(12 downto 0)
); );
end entity decode1; end entity decode1;


architecture behaviour of decode1 is architecture behaviour of decode1 is
signal r, rin : Decode1ToDecode2Type; signal r, rin : Decode1ToDecode2Type;
signal s : Decode1ToDecode2Type;


subtype major_opcode_t is unsigned(5 downto 0); subtype major_opcode_t is unsigned(5 downto 0);
type major_rom_array_t is array(0 to 63) of decode_rom_t; type major_rom_array_t is array(0 to 63) of decode_rom_t;
@ -352,24 +357,45 @@ architecture behaviour of decode1 is
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');


signal log_data : std_ulogic_vector(12 downto 0);

begin begin
decode1_0: process(clk) decode1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
-- Output state remains unchanged on stall, unless we are flushing if rst = '1' then
if rst = '1' or flush_in = '1' or stall_in = '0' then r <= Decode1ToDecode2Init;
r <= rin; s <= Decode1ToDecode2Init;
elsif flush_in = '1' then
r.valid <= '0';
s.valid <= '0';
elsif s.valid = '1' then
if stall_in = '0' then
r <= s;
s.valid <= '0';
end if;
else
s <= rin;
s.valid <= rin.valid and r.valid and stall_in;
if r.valid = '0' or stall_in = '0' then
r <= rin;
end if;
end if; end if;
end if; end if;
end process; end process;
busy_out <= s.valid;


decode1_1: process(all) decode1_1: process(all)
variable v : Decode1ToDecode2Type; variable v : Decode1ToDecode2Type;
variable f : Decode1ToFetch1Type;
variable majorop : major_opcode_t; variable majorop : major_opcode_t;
variable op_19_bits: std_ulogic_vector(2 downto 0); variable op_19_bits: std_ulogic_vector(2 downto 0);
variable sprn : spr_num_t; variable sprn : spr_num_t;
variable br_nia : std_ulogic_vector(61 downto 0);
variable br_target : std_ulogic_vector(61 downto 0);
variable br_offset : signed(23 downto 0);
begin begin
v := r; v := Decode1ToDecode2Init;


v.valid := f_in.valid; v.valid := f_in.valid;
v.nia := f_in.nia; v.nia := f_in.nia;
@ -395,6 +421,31 @@ begin
-- major opcode 31, lots of things -- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));


-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
sprn := decode_spr_num(f_in.insn);
v.ispr1 := fast_spr_num(sprn);

if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
-- Make slow SPRs single issue
if is_fast_spr(v.ispr1) = '0' then
v.decode.sgl_pipe := '1';
-- send MMU-related SPRs to loadstore1
case sprn is
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
v.decode.unit := LDST;
when others =>
end case;
end if;
end if;

elsif majorop = "010000" then
-- CTR may be needed as input to bc
v.decode := major_decode_rom_array(to_integer(majorop));
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
end if;

elsif majorop = "010011" then elsif majorop = "010011" then
if decode_op_19_valid(to_integer(unsigned(f_in.insn(10 downto 1)))) = '0' then if decode_op_19_valid(to_integer(unsigned(f_in.insn(10 downto 1)))) = '0' then
report "op 19 illegal subcode"; report "op 19 illegal subcode";
@ -405,6 +456,27 @@ begin
report "op 19 sub " & to_hstring(op_19_bits); report "op 19 sub " & to_hstring(op_19_bits);
end if; end if;


-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
if f_in.insn(2) = '0' then
-- Could be OP_BCREG: bclr, bcctr, bctar
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
end if;
-- TODO: Add TAR
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
else
v.ispr2 := fast_spr_num(SPR_CTR);
end if;
else
-- Could be OP_RFID
v.ispr1 := fast_spr_num(SPR_SRR0);
v.ispr2 := fast_spr_num(SPR_SRR1);
end if;

elsif majorop = "011110" then elsif majorop = "011110" then
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1)))); v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));


@ -422,56 +494,45 @@ begin
v.decode := major_decode_rom_array(to_integer(majorop)); v.decode := major_decode_rom_array(to_integer(majorop));
end if; end if;


-- Set ISPR1/ISPR2 when needed -- Branch predictor
if v.decode.insn_type = OP_BC or v.decode.insn_type = OP_BCREG then -- Note bclr, bcctr and bctar are predicted not taken as we have no
-- Branch uses CTR as condition when BO(2) is 0. This is -- count cache or link stack.
-- also used to indicate that CTR is modified (they go br_offset := (others => '0');
-- together). if majorop = 18 then
-- -- Unconditional branches are always taken
if f_in.insn(23) = '0' then v.br_pred := '1';
v.ispr1 := fast_spr_num(SPR_CTR); br_offset := signed(f_in.insn(25 downto 2));
end if; elsif majorop = 16 then

-- Predict backward branches as taken, forward as untaken
-- Branch source register is an SPR v.br_pred := f_in.insn(15);
if v.decode.insn_type = OP_BCREG then br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
-- TODO: Add TAR
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
else
v.ispr2 := fast_spr_num(SPR_CTR);
end if;
end if;
elsif v.decode.insn_type = OP_MFSPR or v.decode.insn_type = OP_MTSPR then
sprn := decode_spr_num(f_in.insn);
v.ispr1 := fast_spr_num(sprn);
-- Make slow SPRs single issue
if is_fast_spr(v.ispr1) = '0' then
v.decode.sgl_pipe := '1';
-- send MMU-related SPRs to loadstore1
case sprn is
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
v.decode.unit := LDST;
when others =>
end case;
end if;
elsif v.decode.insn_type = OP_RFID then
report "PPC RFID";
v.ispr1 := fast_spr_num(SPR_SRR0);
v.ispr2 := fast_spr_num(SPR_SRR1);
end if; end if;

br_nia := f_in.nia(63 downto 2);
if flush_in = '1' then if f_in.insn(1) = '1' then
v.valid := '0'; br_nia := (others => '0');
end if;

if rst = '1' then
v := Decode1ToDecode2Init;
end if; end if;
br_target := std_ulogic_vector(signed(br_nia) + br_offset);
f.redirect := v.br_pred and f_in.valid and not flush_in and not s.valid;
f.redirect_nia := br_target & "00";


-- Update registers -- Update registers
rin <= v; rin <= v;


-- Update outputs -- Update outputs
d_out <= r; d_out <= r;
f_out <= f;
flush_out <= f.redirect;
end process; end process;

dec1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
r.nia(5 downto 2) &
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
r.valid;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -17,7 +17,7 @@ entity decode2 is
rst : in std_ulogic; rst : in std_ulogic;


complete_in : in std_ulogic; complete_in : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;


stopped_out : out std_ulogic; stopped_out : out std_ulogic;
@ -32,7 +32,9 @@ entity decode2 is
r_out : out Decode2ToRegisterFileType; r_out : out Decode2ToRegisterFileType;


c_in : in CrFileToDecode2Type; c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType c_out : out Decode2ToCrFileType;

log_out : out std_ulogic_vector(9 downto 0)
); );
end entity decode2; end entity decode2;


@ -43,6 +45,10 @@ architecture behaviour of decode2 is


signal r, rin : reg_type; signal r, rin : reg_type;


signal deferred : std_ulogic;

signal log_data : std_ulogic_vector(9 downto 0);

type decode_input_reg_t is record type decode_input_reg_t is record
reg_valid : std_ulogic; reg_valid : std_ulogic;
reg : gspr_index_t; reg : gspr_index_t;
@ -61,8 +67,6 @@ architecture behaviour of decode2 is
return decode_input_reg_t is return decode_input_reg_t is
begin begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
assert is_fast_spr(ispr) = '0' report "Decode A says GPR but ISPR says SPR:" &
to_hstring(ispr) severity failure;
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
elsif t = SPR then elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
@ -87,8 +91,6 @@ architecture behaviour of decode2 is
begin begin
case t is case t is
when RB => when RB =>
assert is_fast_spr(ispr) = '0' report "Decode B says GPR but ISPR says SPR:" &
to_hstring(ispr) severity failure;
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when CONST_UI => when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64))); ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
@ -196,6 +198,9 @@ architecture behaviour of decode2 is
signal gpr_write : gspr_index_t; signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic; signal gpr_bypassable : std_ulogic;


signal update_gpr_write_valid : std_ulogic;
signal update_gpr_write_reg : gspr_index_t;

signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t; signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic; signal gpr_a_bypass : std_ulogic;
@ -220,7 +225,8 @@ begin


complete_in => complete_in, complete_in => complete_in,
valid_in => control_valid_in, valid_in => control_valid_in,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
flush_in => flush_in, flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe, sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark, stop_mark_in => d_in.stop_mark,
@ -229,6 +235,9 @@ begin
gpr_write_in => gpr_write, gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable, gpr_bypassable => gpr_bypassable,


update_gpr_write_valid => update_gpr_write_valid,
update_gpr_write_reg => update_gpr_write_reg,

gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read, gpr_a_read_in => gpr_a_read,


@ -250,18 +259,24 @@ begin
gpr_bypass_c => gpr_c_bypass gpr_bypass_c => gpr_c_bypass
); );


deferred <= r.e.valid and busy_in;

decode2_0: process(clk) decode2_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rin.e.valid = '1' then if rst = '1' or flush_in = '1' or deferred = '0' then
report "execute " & to_hstring(rin.e.nia); if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
end if; end if;
r <= rin;
end if; end if;
end process; end process;


r_out.read1_reg <= gpr_or_spr_to_gspr(insn_ra(d_in.insn), d_in.ispr1); r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
r_out.read2_reg <= gpr_or_spr_to_gspr(insn_rb(d_in.insn), d_in.ispr2); else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= insn_rs(d_in.insn); r_out.read3_reg <= insn_rs(d_in.insn);


c_out.read <= d_in.decode.input_cr; c_out.read <= d_in.decode.input_cr;
@ -343,6 +358,7 @@ begin
v.e.sign_extend := d_in.decode.sign_extend; v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update; v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve; v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;


-- issue control -- issue control
control_valid_in <= d_in.valid; control_valid_in <= d_in.valid;
@ -354,6 +370,13 @@ begin
if EX1_BYPASS and d_in.decode.unit = ALU then if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1'; gpr_bypassable <= '1';
end if; end if;
update_gpr_write_valid <= d_in.decode.update;
update_gpr_write_reg <= decoded_reg_a.reg;
if v.e.lr = '1' then
-- there are no instructions that have both update=1 and lr=1
update_gpr_write_valid <= '1';
update_gpr_write_reg <= fast_spr_num(SPR_LR);
end if;


gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg; gpr_a_read <= decoded_reg_a.reg;
@ -371,7 +394,7 @@ begin
v.e.insn_type := OP_ILLEGAL; v.e.insn_type := OP_ILLEGAL;
end if; end if;


if rst = '1' then if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;
end if; end if;


@ -381,4 +404,19 @@ begin
-- Update outputs -- Update outputs
e_out <= r.e; e_out <= r.e;
end process; end process;

dec2_log : process(clk)
begin
if rising_edge(clk) then
log_data <= r.e.nia(5 downto 2) &
r.e.valid &
stopped_out &
stall_out &
r.e.bypass_data3 &
r.e.bypass_data2 &
r.e.bypass_data1;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -20,7 +20,7 @@ entity execute1 is


-- asynchronous -- asynchronous
flush_out : out std_ulogic; flush_out : out std_ulogic;
stall_out : out std_ulogic; busy_out : out std_ulogic;


e_in : in Decode2ToExecute1Type; e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type; l_in : in Loadstore1ToExecute1Type;
@ -36,34 +36,44 @@ entity execute1 is
dbg_msr_out : out std_ulogic_vector(63 downto 0); dbg_msr_out : out std_ulogic_vector(63 downto 0);


icache_inval : out std_ulogic; icache_inval : out std_ulogic;
terminate_out : out std_ulogic terminate_out : out std_ulogic;

log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0);
log_rd_data : in std_ulogic_vector(63 downto 0);
log_wr_addr : in std_ulogic_vector(31 downto 0)
); );
end entity execute1; end entity execute1;


architecture behaviour of execute1 is architecture behaviour of execute1 is
type reg_type is record type reg_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
busy: std_ulogic;
terminate: std_ulogic;
lr_update : std_ulogic; lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0); next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic; cntz_in_progress : std_ulogic;
slow_op_insn : insn_type_t;
slow_op_dest : gpr_index_t; slow_op_dest : gpr_index_t;
slow_op_rc : std_ulogic; slow_op_rc : std_ulogic;
slow_op_oe : std_ulogic; slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t; slow_op_xerc : xer_common_t;
ldst_nia : std_ulogic_vector(63 downto 0); last_nia : std_ulogic_vector(63 downto 0);
log_addr_spr : std_ulogic_vector(31 downto 0);
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, lr_update => '0', (e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0',
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));


signal r, rin : reg_type; signal r, rin : reg_type;


signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0); signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);


signal valid_in : std_ulogic;
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
@ -72,8 +82,6 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic; signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0); signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0);
signal popcnt_result: std_ulogic_vector(63 downto 0);
signal parity_result: std_ulogic_vector(63 downto 0);


-- multiply signals -- multiply signals
signal x_to_multiply: Execute1ToMultiplyType; signal x_to_multiply: Execute1ToMultiplyType;
@ -83,6 +91,11 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType; signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type; signal divider_to_x: DividerToExecute1Type;


-- signals for logging
signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic;
signal log_data : std_ulogic_vector(14 downto 0);

type privilege_level is (USER, SUPER); type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level; type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := ( constant op_privilege: op_privilege_array := (
@ -193,9 +206,7 @@ begin
invert_in => e_in.invert_a, invert_in => e_in.invert_a,
invert_out => e_in.invert_out, invert_out => e_in.invert_out,
result => logical_result, result => logical_result,
datalen => e_in.data_len, datalen => e_in.data_len
popcnt => popcnt_result,
parity => parity_result
); );


countzero_0: entity work.zero_counter countzero_0: entity work.zero_counter
@ -223,11 +234,17 @@ begin
); );


dbg_msr_out <= ctrl.msr; dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr;


a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;


busy_out <= l_in.busy or r.busy;
valid_in <= e_in.valid and not busy_out;

terminate_out <= r.terminate;

execute1_0: process(clk) execute1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -238,7 +255,7 @@ begin
else else
r <= rin; r <= rin;
ctrl <= ctrl_tmp; ctrl <= ctrl_tmp;
assert not (r.lr_update = '1' and e_in.valid = '1') assert not (r.lr_update = '1' and valid_in = '1')
report "LR update collision with valid in EX1" report "LR update collision with valid in EX1"
severity failure; severity failure;
if r.lr_update = '1' then if r.lr_update = '1' then
@ -274,7 +291,6 @@ begin
variable sign1, sign2 : std_ulogic; variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0); variable abs1, abs2 : signed(63 downto 0);
variable overflow : std_ulogic; variable overflow : std_ulogic;
variable negative : std_ulogic;
variable zerohi, zerolo : std_ulogic; variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic; variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic; variable a_lt : std_ulogic;
@ -284,11 +300,18 @@ begin
variable exception_nextpc : std_ulogic; variable exception_nextpc : std_ulogic;
variable trapval : std_ulogic_vector(4 downto 0); variable trapval : std_ulogic_vector(4 downto 0);
variable illegal : std_ulogic; variable illegal : std_ulogic;
variable is_branch : std_ulogic;
variable taken_branch : std_ulogic;
variable abs_branch : std_ulogic;
variable spr_val : std_ulogic_vector(63 downto 0);
begin begin
result := (others => '0'); result := (others => '0');
result_with_carry := (others => '0'); result_with_carry := (others => '0');
result_en := '0'; result_en := '0';
newcrf := (others => '0'); newcrf := (others => '0');
is_branch := '0';
taken_branch := '0';
abs_branch := '0';


v := r; v := r;
v.e := Execute1ToWritebackInit; v.e := Execute1ToWritebackInit;
@ -334,32 +357,7 @@ begin
v.div_in_progress := '0'; v.div_in_progress := '0';
v.cntz_in_progress := '0'; v.cntz_in_progress := '0';


-- signals to multiply unit -- signals to multiply and divide units
x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.is_32bit <= e_in.is_32bit;

if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => a_in(31));
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
x_to_multiply.data2 <= (others => b_in(31));
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
else
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
end if;
else
if e_in.is_signed = '1' then
x_to_multiply.data1 <= a_in(63) & a_in;
x_to_multiply.data2 <= b_in(63) & b_in;
else
x_to_multiply.data1 <= '0' & a_in;
x_to_multiply.data2 <= '0' & b_in;
end if;
end if;

-- signals to divide unit
sign1 := '0'; sign1 := '0';
sign2 := '0'; sign2 := '0';
if e_in.is_signed = '1' then if e_in.is_signed = '1' then
@ -383,15 +381,22 @@ begin
abs2 := - signed(b_in); abs2 := - signed(b_in);
end if; end if;


x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.is_32bit <= e_in.is_32bit;

x_to_divider <= Execute1ToDividerInit; x_to_divider <= Execute1ToDividerInit;
x_to_divider.is_signed <= e_in.is_signed; x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit; x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1'; x_to_divider.is_modulus <= '1';
end if; end if;

x_to_multiply.neg_result <= sign1 xor sign2;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then if e_in.is_32bit = '0' then
-- 64-bit forms -- 64-bit forms
x_to_multiply.data1 <= std_ulogic_vector(abs1);
x_to_multiply.data2 <= std_ulogic_vector(abs2);
if e_in.insn_type = OP_DIVE then if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1'; x_to_divider.is_extended <= '1';
end if; end if;
@ -399,6 +404,8 @@ begin
x_to_divider.divisor <= std_ulogic_vector(abs2); x_to_divider.divisor <= std_ulogic_vector(abs2);
else else
-- 32-bit forms -- 32-bit forms
x_to_multiply.data1 <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
x_to_multiply.data2 <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
x_to_divider.is_extended <= '0'; x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000"; x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
@ -426,9 +433,9 @@ begin
end if; end if;
end if; end if;


terminate_out <= '0'; v.terminate := '0';
icache_inval <= '0'; icache_inval <= '0';
stall_out <= '0'; v.busy := '0';
f_out <= Execute1ToFetch1TypeInit; f_out <= Execute1ToFetch1TypeInit;
-- send MSR[IR] and ~MSR[PR] up to fetch1 -- send MSR[IR] and ~MSR[PR] up to fetch1
f_out.virt_mode <= ctrl.msr(MSR_IR); f_out.virt_mode <= ctrl.msr(MSR_IR);
@ -450,6 +457,9 @@ begin
v.e.exc_write_enable := '0'; v.e.exc_write_enable := '0';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := e_in.nia; v.e.exc_write_data := e_in.nia;
if valid_in = '1' then
v.last_nia := e_in.nia;
end if;


if ctrl.irq_state = WRITE_SRR1 then if ctrl.irq_state = WRITE_SRR1 then
v.e.exc_write_reg := fast_spr_num(SPR_SRR1); v.e.exc_write_reg := fast_spr_num(SPR_SRR1);
@ -466,10 +476,10 @@ begin
f_out.virt_mode <= '0'; f_out.virt_mode <= '0';
f_out.priv_mode <= '1'; f_out.priv_mode <= '1';
f_out.redirect_nia <= ctrl.irq_nia; f_out.redirect_nia <= ctrl.irq_nia;
v.e.valid := e_in.valid; v.e.valid := '1';
report "Writing SRR1: " & to_hstring(ctrl.srr1); report "Writing SRR1: " & to_hstring(ctrl.srr1);


elsif irq_valid = '1' and e_in.valid = '1' then elsif irq_valid = '1' and valid_in = '1' then
-- we need two cycles to write srr0 and 1 -- we need two cycles to write srr0 and 1
-- will need more when we have to write HEIR -- will need more when we have to write HEIR
-- Don't deliver the interrupt until we have a valid instruction -- Don't deliver the interrupt until we have a valid instruction
@ -477,7 +487,7 @@ begin
exception := '1'; exception := '1';
ctrl_tmp.srr1 <= msr_copy(ctrl.msr); ctrl_tmp.srr1 <= msr_copy(ctrl.msr);


elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and
instr_is_privileged(e_in.insn_type, e_in.insn) then instr_is_privileged(e_in.insn_type, e_in.insn) then
-- generate a program interrupt -- generate a program interrupt
exception := '1'; exception := '1';
@ -487,12 +497,13 @@ begin
ctrl_tmp.srr1(63 - 45) <= '1'; ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction"; report "privileged instruction";
elsif e_in.valid = '1' and e_in.unit = ALU then elsif valid_in = '1' and e_in.unit = ALU then


report "execute nia " & to_hstring(e_in.nia); report "execute nia " & to_hstring(e_in.nia);


v.e.valid := '1'; v.e.valid := '1';
v.e.write_reg := e_in.write_reg; v.e.write_reg := e_in.write_reg;
v.slow_op_insn := e_in.insn_type;
v.slow_op_dest := gspr_to_gpr(e_in.write_reg); v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc; v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe; v.slow_op_oe := e_in.oe;
@ -521,7 +532,7 @@ begin
-- check bits 1-10 of the instruction to make sure it's attn -- check bits 1-10 of the instruction to make sure it's attn
-- if not then it is illegal -- if not then it is illegal
if e_in.insn(10 downto 1) = "0100000000" then if e_in.insn(10 downto 1) = "0100000000" then
terminate_out <= '1'; v.terminate := '1';
report "ATTN"; report "ATTN";
else else
illegal := '1'; illegal := '1';
@ -612,16 +623,13 @@ begin
end if; end if;
end if; end if;
end if; end if;
when OP_AND | OP_OR | OP_XOR => when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS =>
result := logical_result; result := logical_result;
result_en := '1'; result_en := '1';
when OP_B => when OP_B =>
f_out.redirect <= '1'; is_branch := '1';
if (insn_aa(e_in.insn)) then taken_branch := '1';
f_out.redirect_nia <= b_in; abs_branch := insn_aa(e_in.insn);
else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if;
when OP_BC => when OP_BC =>
-- read_data1 is CTR -- read_data1 is CTR
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
@ -631,14 +639,9 @@ begin
result_en := '1'; result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR); v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then is_branch := '1';
f_out.redirect <= '1'; taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in);
if (insn_aa(e_in.insn)) then abs_branch := insn_aa(e_in.insn);
f_out.redirect_nia <= b_in;
else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if;
end if;
when OP_BCREG => when OP_BCREG =>
-- read_data1 is CTR -- read_data1 is CTR
-- read_data2 is target register (CTR, LR or TAR) -- read_data2 is target register (CTR, LR or TAR)
@ -649,7 +652,7 @@ begin
result_en := '1'; result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR); v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then if ppc_bc_taken(bo, bi, e_in.cr, a_in) = '1' then
f_out.redirect <= '1'; f_out.redirect <= '1';
f_out.redirect_nia <= b_in(63 downto 2) & "00"; f_out.redirect_nia <= b_in(63 downto 2) & "00";
end if; end if;
@ -670,27 +673,10 @@ begin
ctrl_tmp.msr(MSR_DR) <= '1'; ctrl_tmp.msr(MSR_DR) <= '1';
end if; end if;


when OP_CMPB =>
result := ppc_cmpb(c_in, b_in);
result_en := '1';
when OP_CNTZ => when OP_CNTZ =>
v.e.valid := '0'; v.e.valid := '0';
v.cntz_in_progress := '1'; v.cntz_in_progress := '1';
stall_out <= '1'; v.busy := '1';
when OP_EXTS =>
-- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and c_in(7)) or
(e_in.data_len(1) and c_in(15)) or
(e_in.data_len(2) and c_in(31));
result := (others => negative);
if e_in.data_len(2) = '1' then
result(31 downto 16) := c_in(31 downto 16);
end if;
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
result(15 downto 8) := c_in(15 downto 8);
end if;
result(7 downto 0) := c_in(7 downto 0);
result_en := '1';
when OP_ISEL => when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn))); crbit := to_integer(unsigned(insn_bc(e_in.insn)));
if e_in.cr(31-crbit) = '1' then if e_in.cr(31-crbit) = '1' then
@ -762,19 +748,25 @@ begin
result(63-45) := v.e.xerc.ca32; result(63-45) := v.e.xerc.ca32;
end if; end if;
else else
spr_val := c_in;
case decode_spr_num(e_in.insn) is case decode_spr_num(e_in.insn) is
when SPR_TB => when SPR_TB =>
result := ctrl.tb; spr_val := ctrl.tb;
when SPR_DEC => when SPR_DEC =>
result := ctrl.dec; spr_val := ctrl.dec;
when 724 => -- LOG_ADDR SPR
spr_val := log_wr_addr & r.log_addr_spr;
when 725 => -- LOG_DATA SPR
spr_val := log_rd_data;
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
when others => when others =>
-- mfspr from unimplemented SPRs should be a nop in -- mfspr from unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode -- supervisor mode and a program interrupt for user mode
result := c_in;
if ctrl.msr(MSR_PR) = '1' then if ctrl.msr(MSR_PR) = '1' then
illegal := '1'; illegal := '1';
end if; end if;
end case; end case;
result := spr_val;
end if; end if;
when OP_MFCR => when OP_MFCR =>
if e_in.insn(20) = '0' then if e_in.insn(20) = '0' then
@ -840,6 +832,8 @@ begin
case decode_spr_num(e_in.insn) is case decode_spr_num(e_in.insn) is
when SPR_DEC => when SPR_DEC =>
ctrl_tmp.dec <= c_in; ctrl_tmp.dec <= c_in;
when 724 => -- LOG_ADDR SPR
v.log_addr_spr := c_in(31 downto 0);
when others => when others =>
-- mtspr to unimplemented SPRs should be a nop in -- mtspr to unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode -- supervisor mode and a program interrupt for user mode
@ -848,12 +842,6 @@ begin
end if; end if;
end case; end case;
end if; end if;
when OP_POPCNT =>
result := popcnt_result;
result_en := '1';
when OP_PRTY =>
result := parity_result;
result_en := '1';
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI => when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
result := rotator_result; result := rotator_result;
if e_in.output_carry = '1' then if e_in.output_carry = '1' then
@ -871,53 +859,65 @@ begin
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0'; v.e.valid := '0';
v.mul_in_progress := '1'; v.mul_in_progress := '1';
stall_out <= '1'; v.busy := '1';
x_to_multiply.valid <= '1'; x_to_multiply.valid <= '1';


when OP_DIV | OP_DIVE | OP_MOD => when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0'; v.e.valid := '0';
v.div_in_progress := '1'; v.div_in_progress := '1';
stall_out <= '1'; v.busy := '1';
x_to_divider.valid <= '1'; x_to_divider.valid <= '1';


when others => when others =>
terminate_out <= '1'; v.terminate := '1';
report "illegal"; report "illegal";
end case; end case;


v.e.rc := e_in.rc and e_in.valid; v.e.rc := e_in.rc and valid_in;

-- Mispredicted branches cause a redirect
if is_branch = '1' and taken_branch /= e_in.br_pred then
f_out.redirect <= '1';
if taken_branch = '1' then
if abs_branch = '1' then
f_out.redirect_nia <= b_in;
else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if;
else
f_out.redirect_nia <= next_nia;
end if;
end if;


-- Update LR on the next cycle after a branch link -- Update LR on the next cycle after a branch link
-- -- If we're not writing back anything else, we can write back LR
-- WARNING: The LR update isn't tracked by our hazard tracker. This -- this cycle, otherwise we take an extra cycle. We use the
-- will work (well I hope) because it only happens on branches -- exc_write path since next_nia is written through that path
-- which will flush all decoded instructions. By the time -- in other places.
-- fetch catches up, we'll have the new LR. This will
-- *not* work properly however if we have a branch predictor,
-- in which case the solution would probably be to keep a
-- local cache of the updated LR in execute1 (flushed on
-- exceptions) that is used instead of the value from
-- decode when its content is valid.
if e_in.lr = '1' then if e_in.lr = '1' then
v.lr_update := '1'; if result_en = '0' then
v.next_lr := next_nia; v.e.exc_write_enable := '1';
v.e.valid := '0'; v.e.exc_write_data := next_nia;
report "Delayed LR update to " & to_hstring(next_nia); v.e.exc_write_reg := fast_spr_num(SPR_LR);
stall_out <= '1'; else
v.lr_update := '1';
v.next_lr := next_nia;
v.e.valid := '0';
report "Delayed LR update to " & to_hstring(next_nia);
v.busy := '1';
end if;
end if; end if;


elsif e_in.valid = '1' then elsif valid_in = '1' then
-- instruction for other units, i.e. LDST -- instruction for other units, i.e. LDST
v.ldst_nia := e_in.nia;
v.e.valid := '0';
if e_in.unit = LDST then if e_in.unit = LDST then
lv.valid := '1'; lv.valid := '1';
end if; end if;


elsif r.lr_update = '1' then elsif r.lr_update = '1' then
result_en := '1'; v.e.exc_write_enable := '1';
result := r.next_lr; v.e.exc_write_data := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR); v.e.exc_write_reg := fast_spr_num(SPR_LR);
v.e.valid := '1'; v.e.valid := '1';
elsif r.cntz_in_progress = '1' then elsif r.cntz_in_progress = '1' then
-- cnt[lt]z always takes two cycles -- cnt[lt]z always takes two cycles
@ -931,8 +931,18 @@ begin
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
(r.div_in_progress = '1' and divider_to_x.valid = '1') then (r.div_in_progress = '1' and divider_to_x.valid = '1') then
if r.mul_in_progress = '1' then if r.mul_in_progress = '1' then
result := multiply_to_x.write_reg_data; overflow := '0';
overflow := multiply_to_x.overflow; case r.slow_op_insn is
when OP_MUL_H32 =>
result := multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when OP_MUL_H64 =>
result := multiply_to_x.result(127 downto 64);
when others =>
-- i.e. OP_MUL_L64
result := multiply_to_x.result(63 downto 0);
overflow := multiply_to_x.overflow;
end case;
else else
result := divider_to_x.write_reg_data; result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow; overflow := divider_to_x.overflow;
@ -952,7 +962,7 @@ begin
end if; end if;
v.e.valid := '1'; v.e.valid := '1';
else else
stall_out <= '1'; v.busy := '1';
v.mul_in_progress := r.mul_in_progress; v.mul_in_progress := r.mul_in_progress;
v.div_in_progress := r.div_in_progress; v.div_in_progress := r.div_in_progress;
end if; end if;
@ -973,7 +983,8 @@ begin
v.e.exc_write_data := next_nia; v.e.exc_write_data := next_nia;
end if; end if;
ctrl_tmp.irq_state <= WRITE_SRR1; ctrl_tmp.irq_state <= WRITE_SRR1;
v.e.valid := '1'; v.busy := '1';
v.e.valid := '0';
end if; end if;


v.e.write_data := result; v.e.write_data := result;
@ -1002,10 +1013,9 @@ begin
end if; end if;
v.e.exc_write_enable := '1'; v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.ldst_nia; v.e.exc_write_data := r.last_nia;
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia); report "ldst exception writing srr0=" & to_hstring(r.last_nia);
ctrl_tmp.irq_state <= WRITE_SRR1; ctrl_tmp.irq_state <= WRITE_SRR1;
v.e.valid := '1'; -- complete the original load or store
end if; end if;


-- Outputs to loadstore1 (async) -- Outputs to loadstore1 (async)
@ -1040,5 +1050,26 @@ begin
l_out <= lv; l_out <= lv;
e_out <= r.e; e_out <= r.e;
flush_out <= f_out.redirect; flush_out <= f_out.redirect;

exception_log <= exception;
irq_valid_log <= irq_valid;
end process;

ex1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
exception_log &
irq_valid_log &
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
"000" &
r.e.write_enable &
r.e.valid &
f_out.redirect &
r.busy &
flush_out;
end if;
end process; end process;
log_out <= log_data;
end architecture behaviour; end architecture behaviour;

@ -23,8 +23,14 @@ entity fetch1 is
-- redirect from execution unit -- redirect from execution unit
e_in : in Execute1ToFetch1Type; e_in : in Execute1ToFetch1Type;


-- redirect from decode1
d_in : in Decode1ToFetch1Type;

-- Request to icache -- Request to icache
i_out : out Fetch1ToIcacheType i_out : out Fetch1ToIcacheType;

-- outputs to logger
log_out : out std_ulogic_vector(42 downto 0)
); );
end entity fetch1; end entity fetch1;


@ -35,16 +41,18 @@ architecture behaviour of fetch1 is
end record; end record;
signal r, r_next : Fetch1ToIcacheType; signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t; signal r_int, r_next_int : reg_internal_t;
signal log_nia : std_ulogic_vector(42 downto 0);
begin begin


regs : process(clk) regs : process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next then if r /= r_next then
report "fetch1 rst:" & std_ulogic'image(rst) & report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(e_in.virt_mode) & " IR:" & std_ulogic'image(e_in.virt_mode) &
" P:" & std_ulogic'image(e_in.priv_mode) & " P:" & std_ulogic'image(e_in.priv_mode) &
" R:" & std_ulogic'image(e_in.redirect) & " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) & " S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) & " T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia) & " nia:" & to_hstring(r_next.nia) &
@ -54,6 +62,7 @@ begin
r_int <= r_next_int; r_int <= r_next_int;
end if; end if;
end process; end process;
log_out <= log_nia;


comb : process(all) comb : process(all)
variable v : Fetch1ToIcacheType; variable v : Fetch1ToIcacheType;
@ -62,6 +71,7 @@ begin
begin begin
v := r; v := r;
v_int := r_int; v_int := r_int;
v.sequential := '0';


if rst = '1' then if rst = '1' then
if alt_reset_in = '1' then if alt_reset_in = '1' then
@ -76,6 +86,8 @@ begin
v.nia := e_in.redirect_nia; v.nia := e_in.redirect_nia;
v.virt_mode := e_in.virt_mode; v.virt_mode := e_in.virt_mode;
v.priv_mode := e_in.priv_mode; v.priv_mode := e_in.priv_mode;
elsif d_in.redirect = '1' then
v.nia := d_in.redirect_nia;
elsif stall_in = '0' then elsif stall_in = '0' then


-- For debug stop/step to work properly we need a little bit of -- For debug stop/step to work properly we need a little bit of
@ -122,6 +134,7 @@ begin


if increment then if increment then
v.nia := std_logic_vector(unsigned(v.nia) + 4); v.nia := std_logic_vector(unsigned(v.nia) + 4);
v.sequential := '1';
end if; end if;
end if; end if;



@ -1,123 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity fetch2 is
port(
clk : in std_ulogic;
rst : in std_ulogic;

stall_in : in std_ulogic;
flush_in : in std_ulogic;

-- Results from icache
i_in : in IcacheToFetch2Type;

-- Output to decode
f_out : out Fetch2ToDecode1Type
);
end entity fetch2;

architecture behaviour of fetch2 is

-- The icache cannot stall, so we need to stash a cycle
-- of output from it when we stall.
type reg_internal_type is record
stash : IcacheToFetch2Type;
stash_valid : std_ulogic;
stopped : std_ulogic;
end record;

signal r_int, rin_int : reg_internal_type;
signal r, rin : Fetch2ToDecode1Type;

begin
regs : process(clk)
begin
if rising_edge(clk) then

if (r /= rin) then
report "fetch2 rst:" & std_ulogic'image(rst) &
" S:" & std_ulogic'image(stall_in) &
" F:" & std_ulogic'image(flush_in) &
" T:" & std_ulogic'image(rin.stop_mark) &
" V:" & std_ulogic'image(rin.valid) &
" FF:" & std_ulogic'image(rin.fetch_failed) &
" nia:" & to_hstring(rin.nia);
end if;

-- Output state remains unchanged on stall, unless we are flushing
if rst = '1' or flush_in = '1' or stall_in = '0' then
r <= rin;
end if;

-- Internal state is updated on every clock
r_int <= rin_int;
end if;
end process;

comb : process(all)
variable v : Fetch2ToDecode1Type;
variable v_int : reg_internal_type;
variable v_i_in : IcacheToFetch2Type;
begin
v := r;
v_int := r_int;

-- If stalling, stash away the current input from the icache
if stall_in = '1' and v_int.stash_valid = '0' then
v_int.stash := i_in;
v_int.stash_valid := '1';
end if;

-- If unstalling, source input from the stash and invalidate it,
-- otherwise source normally from the icache.
--
v_i_in := i_in;
if v_int.stash_valid = '1' and stall_in = '0' then
v_i_in := v_int.stash;
v_int.stash_valid := '0';
end if;

v.valid := v_i_in.valid;
v.stop_mark := v_i_in.stop_mark;
v.fetch_failed := v_i_in.fetch_failed;
v.nia := v_i_in.nia;
v.insn := v_i_in.insn;

-- Clear stash internal valid bit on flush. We still mark
-- the stash itself as valid since we still want to override
-- whatever comes form icache when unstalling, but we'll
-- override it with something invalid.
--
if flush_in = '1' then
v_int.stash.valid := '0';
v_int.stash.fetch_failed := '0';
end if;

-- If we are flushing or the instruction comes with a stop mark
-- we tag it as invalid so it doesn't get decoded and executed
if flush_in = '1' or v.stop_mark = '1' then
v.valid := '0';
v.fetch_failed := '0';
end if;

-- Clear stash on reset
if rst = '1' then
v_int.stash_valid := '0';
v.valid := '0';
end if;

-- Update registers
rin <= v;
rin_int <= v_int;

-- Update outputs
f_out <= r;
end process;

end architecture behaviour;

@ -20,7 +20,8 @@ entity toplevel is
SCLK_STARTUPE2 : boolean := false; SCLK_STARTUPE2 : boolean := false;
SPI_FLASH_OFFSET : integer := 4194304; SPI_FLASH_OFFSET : integer := 4194304;
SPI_FLASH_DEF_CKDV : natural := 1; SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 512
); );
port( port(
ext_clk : in std_ulogic; ext_clk : in std_ulogic;
@ -140,7 +141,8 @@ begin
SPI_FLASH_DLINES => 4, SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET, SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV, SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
-- System signals -- System signals

@ -4,11 +4,15 @@ use ieee.numeric_std.all;


entity gpr_hazard is entity gpr_hazard is
generic ( generic (
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 1
); );
port( port(
clk : in std_ulogic; clk : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0); gpr_write_in : in std_ulogic_vector(5 downto 0);
@ -16,6 +20,9 @@ entity gpr_hazard is
gpr_read_valid_in : in std_ulogic; gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0); gpr_read_in : in std_ulogic_vector(5 downto 0);


ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0);

stall_out : out std_ulogic; stall_out : out std_ulogic;
use_bypass : out std_ulogic use_bypass : out std_ulogic
); );
@ -25,10 +32,13 @@ architecture behaviour of gpr_hazard is
valid : std_ulogic; valid : std_ulogic;
bypass : std_ulogic; bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0); gpr : std_ulogic_vector(5 downto 0);
ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0);
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0')); constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));


type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);


signal r, rin : pipeline_t := pipeline_t_init; signal r, rin : pipeline_t := pipeline_t_init;
@ -45,50 +55,46 @@ begin
begin begin
v := r; v := r;


if complete_in = '1' then
v(PIPELINE_DEPTH).valid := '0';
v(PIPELINE_DEPTH).ugpr_valid := '0';
end if;

stall_out <= '0'; stall_out <= '0';
use_bypass <= '0'; use_bypass <= '0';
if gpr_read_valid_in = '1' then if gpr_read_valid_in = '1' then
if r(0).valid = '1' and r(0).gpr = gpr_read_in then loop_0: for i in 0 to PIPELINE_DEPTH loop
if r(0).bypass = '1' and stall_in = '0' then if v(i).valid = '1' and r(i).gpr = gpr_read_in then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
if r(i).bypass = '1' then if r(i).bypass = '1' then
use_bypass <= '1'; use_bypass <= '1';
else else
stall_out <= '1'; stall_out <= '1';
end if; end if;
end if; end if;
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
stall_out <= '1';
end if;
end loop; end loop;
end if; end if;


if stall_in = '0' then -- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := v(0);
v(0).valid := '0';
v(0).ugpr_valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := gpr_write_valid_in; v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail; v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in; v(0).gpr := gpr_write_in;
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop v(0).ugpr_valid := ugpr_write_valid;
-- propagate to next slot v(0).ugpr := ugpr_write_reg;
v(i).valid := r(i-1).valid; end if;
v(i).bypass := r(i-1).bypass; if flush_in = '1' then
v(i).gpr := r(i-1).gpr; v(0).valid := '0';
end loop; v(0).ugpr_valid := '0';

v(1).valid := '0';
else v(1).ugpr_valid := '0';
-- stage 0 stalled, so stage 1 becomes empty
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
-- propagate to next slot
if i = 1 then
v(i).valid := '0';
else
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end if;
end loop;
end if; end if;


-- update registers -- update registers

@ -48,16 +48,19 @@ entity icache is
rst : in std_ulogic; rst : in std_ulogic;


i_in : in Fetch1ToIcacheType; i_in : in Fetch1ToIcacheType;
i_out : out IcacheToFetch2Type; i_out : out IcacheToDecode1Type;


m_in : in MmuToIcacheType; m_in : in MmuToIcacheType;


stall_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
inval_in : in std_ulogic; inval_in : in std_ulogic;


wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out wishbone_in : in wishbone_slave_out;

log_out : out std_ulogic_vector(53 downto 0)
); );
end entity icache; end entity icache;


@ -112,6 +115,7 @@ architecture rtl of icache is
subtype row_t is integer range 0 to BRAM_ROWS-1; subtype row_t is integer range 0 to BRAM_ROWS-1;
subtype index_t is integer range 0 to NUM_LINES-1; subtype index_t is integer range 0 to NUM_LINES-1;
subtype way_t is integer range 0 to NUM_WAYS-1; subtype way_t is integer range 0 to NUM_WAYS-1;
subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);


-- The cache data BRAM organized as described above for each way -- The cache data BRAM organized as described above for each way
subtype cache_row_t is std_ulogic_vector(wishbone_data_bits-1 downto 0); subtype cache_row_t is std_ulogic_vector(wishbone_data_bits-1 downto 0);
@ -129,6 +133,7 @@ architecture rtl of icache is
-- The cache valid bits -- The cache valid bits
subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0); subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
type cache_valids_t is array(index_t) of cache_way_valids_t; type cache_valids_t is array(index_t) of cache_way_valids_t;
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;


-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
signal cache_tags : cache_tags_array_t; signal cache_tags : cache_tags_array_t;
@ -176,6 +181,8 @@ architecture rtl of icache is
store_row : row_t; store_row : row_t;
store_tag : cache_tag_t; store_tag : cache_tag_t;
store_valid : std_ulogic; store_valid : std_ulogic;
end_row_ix : row_in_line_t;
rows_valid : row_per_line_valid_t;


-- TLB miss state -- TLB miss state
fetch_failed : std_ulogic; fetch_failed : std_ulogic;
@ -197,6 +204,10 @@ architecture rtl of icache is
signal ra_valid : std_ulogic; signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic; signal priv_fault : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;
signal use_previous : std_ulogic;

-- Output data to logger
signal log_data : std_ulogic_vector(53 downto 0);


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -219,20 +230,24 @@ architecture rtl of icache is
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))); return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
end; end;


-- Return the index of a row within a line
function get_row_of_line(row: row_t) return row_in_line_t is
variable row_v : unsigned(ROW_BITS-1 downto 0);
begin
row_v := to_unsigned(row, ROW_BITS);
return row_v(ROW_LINEBITS-1 downto 0);
end;

-- Returns whether this is the last row of a line -- Returns whether this is the last row of a line
function is_last_row_addr(addr: wishbone_addr_type) return boolean is function is_last_row_addr(addr: wishbone_addr_type; last: row_in_line_t) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones; return unsigned(addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)) = last;
end; end;


-- Returns whether this is the last row of a line -- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is function is_last_row(row: row_t; last: row_in_line_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS)); return get_row_of_line(row) = last;
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end; end;


-- Return the address of the next row in the current cache line -- Return the address of the next row in the current cache line
@ -361,7 +376,7 @@ begin
); );
process(all) process(all)
begin begin
do_read <= '1'; do_read <= not (stall_in or use_previous);
do_write <= '0'; do_write <= '0';
if wishbone_in.ack = '1' and r.store_way = i then if wishbone_in.ack = '1' and r.store_way = i then
do_write <= '1'; do_write <= '1';
@ -466,23 +481,38 @@ begin
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_t; variable hit_way : way_t;
begin begin
-- i_in.sequential means that i_in.nia this cycle is 4 more than
-- last cycle. If we read more than 32 bits at a time, had a cache hit
-- last cycle, and we don't want the first 32-bit chunk, then we can
-- keep the data we read last cycle and just use that.
if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
use_previous <= i_in.sequential and r.hit_valid;
else
use_previous <= '0';
end if;

-- Extract line, row and tag from request -- Extract line, row and tag from request
req_index <= get_index(i_in.nia); req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia); req_row <= get_row(i_in.nia);
req_tag <= get_tag(real_addr); req_tag <= get_tag(real_addr);


-- Calculate address of beginning of cache line, will be -- Calculate address of beginning of cache row, will be
-- used for cache miss processing if needed -- used for cache miss processing if needed
-- --
req_laddr <= (63 downto REAL_ADDR_BITS => '0') & req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) & real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0'); (ROW_OFF_BITS-1 downto 0 => '0');


-- Test if pending request is a hit on any way -- Test if pending request is a hit on any way
hit_way := 0; hit_way := 0;
is_hit := '0'; is_hit := '0';
for i in way_t loop for i in way_t loop
if i_in.req = '1' and cache_valids(req_index)(i) = '1' then if i_in.req = '1' and
(cache_valids(req_index)(i) = '1' or
(r.state = WAIT_ACK and
req_index = r.store_index and
i = r.store_way and
r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
if read_tag(i, cache_tags(req_index)) = req_tag then if read_tag(i, cache_tags(req_index)) = req_tag then
hit_way := i; hit_way := i;
is_hit := '1'; is_hit := '1';
@ -528,25 +558,35 @@ begin
icache_hit : process(clk) icache_hit : process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
-- On a hit, latch the request for the next cycle, when the BRAM data -- keep outputs to fetch2 unchanged on a stall
-- will be available on the cache_out output of the corresponding way -- except that flush or reset sets valid to 0
-- -- If use_previous, keep the same data as last cycle and use the second half
r.hit_valid <= req_is_hit; if stall_in = '1' or use_previous = '1' then
-- Send stop marks and NIA down regardless of validity if rst = '1' or flush_in = '1' then
r.hit_smark <= i_in.stop_mark; r.hit_valid <= '0';
r.hit_nia <= i_in.nia; end if;
if req_is_hit = '1' then else
r.hit_way <= req_hit_way; -- On a hit, latch the request for the next cycle, when the BRAM data
r.hit_smark <= i_in.stop_mark; -- will be available on the cache_out output of the corresponding way

--
report "cache hit nia:" & to_hstring(i_in.nia) & r.hit_valid <= req_is_hit;
" IR:" & std_ulogic'image(i_in.virt_mode) & if req_is_hit = '1' then
" SM:" & std_ulogic'image(i_in.stop_mark) & r.hit_way <= req_hit_way;
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) & report "cache hit nia:" & to_hstring(i_in.nia) &
" way:" & integer'image(req_hit_way) & " IR:" & std_ulogic'image(i_in.virt_mode) &
" RA:" & to_hstring(real_addr); " SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
" way:" & integer'image(req_hit_way) &
" RA:" & to_hstring(real_addr);
end if;
end if; end if;
if stall_in = '0' then
-- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
end if;
end if; end if;
end process; end process;


@ -584,6 +624,11 @@ begin
-- Main state machine -- Main state machine
case r.state is case r.state is
when IDLE => when IDLE =>
-- Reset per-row valid flags, only used in WAIT_ACK
for i in 0 to ROW_PER_LINE - 1 loop
r.rows_valid(i) <= '0';
end loop;

-- We need to read a cache line -- We need to read a cache line
if req_is_miss = '1' then if req_is_miss = '1' then
report "cache miss nia:" & to_hstring(i_in.nia) & report "cache miss nia:" & to_hstring(i_in.nia) &
@ -600,6 +645,7 @@ begin
r.store_row <= get_row(req_laddr); r.store_row <= get_row(req_laddr);
r.store_tag <= req_tag; r.store_tag <= req_tag;
r.store_valid <= '1'; r.store_valid <= '1';
r.end_row_ix <= get_row_of_line(get_row(req_laddr)) - 1;


-- Prep for first wishbone read. We calculate the address of -- Prep for first wishbone read. We calculate the address of
-- the start of the cache line and start the WB cycle. -- the start of the cache line and start the WB cycle.
@ -637,7 +683,7 @@ begin
-- stb and set stbs_done so we can handle an eventual last -- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle. -- ack on the same cycle.
-- --
if is_last_row_addr(r.wb.adr) then if is_last_row_addr(r.wb.adr, r.end_row_ix) then
r.wb.stb <= '0'; r.wb.stb <= '0';
stbs_done := true; stbs_done := true;
end if; end if;
@ -648,8 +694,9 @@ begin


-- Incoming acks processing -- Incoming acks processing
if wishbone_in.ack = '1' then if wishbone_in.ack = '1' then
r.rows_valid(r.store_row mod ROW_PER_LINE) <= '1';
-- Check for completion -- Check for completion
if stbs_done and is_last_row(r.store_row) then if stbs_done and is_last_row(r.store_row, r.end_row_ix) then
-- Complete wishbone cycle -- Complete wishbone cycle
r.wb.cyc <= '0'; r.wb.cyc <= '0';


@ -669,9 +716,41 @@ begin
-- TLB miss and protection fault processing -- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0'; r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' then elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
r.fetch_failed <= '1'; r.fetch_failed <= '1';
end if; end if;
end if; end if;
end process; end process;

data_log: process(clk)
variable lway: way_t;
variable wstate: std_ulogic;
begin
if rising_edge(clk) then
if req_is_hit then
lway := req_hit_way;
else
lway := replace_way;
end if;
wstate := '0';
if r.state /= IDLE then
wstate := '1';
end if;
log_data <= i_out.valid &
i_out.insn &
wishbone_in.ack &
r.wb.adr(5 downto 3) &
r.wb.stb & r.wb.cyc &
wishbone_in.stall &
stall_out &
r.fetch_failed &
r.hit_nia(5 downto 2) &
wstate &
std_ulogic_vector(to_unsigned(lway, 3)) &
req_is_hit & req_is_miss &
access_ok &
ra_valid;
end if;
end process;
log_out <= log_data;
end; end;

@ -13,7 +13,7 @@ architecture behave of icache_tb is
signal rst : std_ulogic; signal rst : std_ulogic;


signal i_out : Fetch1ToIcacheType; signal i_out : Fetch1ToIcacheType;
signal i_in : IcacheToFetch2Type; signal i_in : IcacheToDecode1Type;


signal m_out : MmuToIcacheType; signal m_out : MmuToIcacheType;


@ -33,6 +33,7 @@ begin
i_in => i_out, i_in => i_out,
i_out => i_in, i_out => i_in,
m_in => m_out, m_in => m_out,
stall_in => '0',
flush_in => '0', flush_in => '0',
inval_in => '0', inval_in => '0',
wishbone_out => wb_bram_in, wishbone_out => wb_bram_in,

@ -25,7 +25,8 @@ entity loadstore1 is
m_in : in MmuToLoadstore1Type; m_in : in MmuToLoadstore1Type;


dc_stall : in std_ulogic; dc_stall : in std_ulogic;
stall_out : out std_ulogic
log_out : out std_ulogic_vector(9 downto 0)
); );
end loadstore1; end loadstore1;


@ -41,7 +42,8 @@ architecture behave of loadstore1 is
ACK_WAIT, -- waiting for ack from dcache ACK_WAIT, -- waiting for ack from dcache
LD_UPDATE, -- writing rA with computed addr on load LD_UPDATE, -- writing rA with computed addr on load
MMU_LOOKUP, -- waiting for MMU to look up translation MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT -- waiting for MMU to finish doing a tlbie TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
SPR_CMPLT -- complete a mf/tspr operation
); );


type reg_stage_t is record type reg_stage_t is record
@ -49,6 +51,7 @@ architecture behave of loadstore1 is
load : std_ulogic; load : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
mfspr : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0);
@ -71,6 +74,7 @@ architecture behave of loadstore1 is
dar : std_ulogic_vector(63 downto 0); dar : std_ulogic_vector(63 downto 0);
dsisr : std_ulogic_vector(31 downto 0); dsisr : std_ulogic_vector(31 downto 0);
instr_fault : std_ulogic; instr_fault : std_ulogic;
sprval : std_ulogic_vector(63 downto 0);
end record; end record;


type byte_sel_t is array(0 to 7) of std_ulogic; type byte_sel_t is array(0 to 7) of std_ulogic;
@ -80,6 +84,8 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t; signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0); signal lsu_sum : std_ulogic_vector(63 downto 0);


signal log_data : std_ulogic_vector(9 downto 0);

-- Generate byte enables from sizes -- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin begin
@ -135,7 +141,7 @@ begin
variable long_sel : std_ulogic_vector(15 downto 0); variable long_sel : std_ulogic_vector(15 downto 0);
variable byte_sel : std_ulogic_vector(7 downto 0); variable byte_sel : std_ulogic_vector(7 downto 0);
variable req : std_ulogic; variable req : std_ulogic;
variable stall : std_ulogic; variable busy : std_ulogic;
variable addr : std_ulogic_vector(63 downto 0); variable addr : std_ulogic_vector(63 downto 0);
variable wdata : std_ulogic_vector(63 downto 0); variable wdata : std_ulogic_vector(63 downto 0);
variable write_enable : std_ulogic; variable write_enable : std_ulogic;
@ -147,9 +153,7 @@ begin
variable use_second : byte_sel_t; variable use_second : byte_sel_t;
variable trim_ctl : trim_ctl_t; variable trim_ctl : trim_ctl_t;
variable negative : std_ulogic; variable negative : std_ulogic;
variable mfspr : std_ulogic;
variable sprn : std_ulogic_vector(9 downto 0); variable sprn : std_ulogic_vector(9 downto 0);
variable sprval : std_ulogic_vector(63 downto 0);
variable exception : std_ulogic; variable exception : std_ulogic;
variable next_addr : std_ulogic_vector(63 downto 0); variable next_addr : std_ulogic_vector(63 downto 0);
variable mmureq : std_ulogic; variable mmureq : std_ulogic;
@ -159,16 +163,12 @@ begin
begin begin
v := r; v := r;
req := '0'; req := '0';
stall := '0';
done := '0';
byte_sel := (others => '0'); byte_sel := (others => '0');
addr := lsu_sum; addr := lsu_sum;
mfspr := '0'; v.mfspr := '0';
mmu_mtspr := '0'; mmu_mtspr := '0';
itlb_fault := '0'; itlb_fault := '0';
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10)); sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
sprval := (others => '0'); -- avoid inferred latches
exception := '0';
dsisr := (others => '0'); dsisr := (others => '0');
mmureq := '0'; mmureq := '0';


@ -227,130 +227,18 @@ begin
-- compute (addr + 8) & ~7 for the second doubleword when unaligned -- compute (addr + 8) & ~7 for the second doubleword when unaligned
next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000"; next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";


done := '0';
exception := '0';
case r.state is case r.state is
when IDLE => when IDLE =>
if l_in.valid = '1' then
v.addr := lsu_sum;
v.load := '0';
v.dcbz := '0';
v.tlbie := '0';
v.instr_fault := '0';
v.dwords_done := '0';
case l_in.op is
when OP_STORE =>
req := '1';
when OP_LOAD =>
req := '1';
v.load := '1';
when OP_DCBZ =>
req := '1';
v.dcbz := '1';
when OP_TLBIE =>
mmureq := '1';
stall := '1';
v.tlbie := '1';
v.state := TLBIE_WAIT;
when OP_MFSPR =>
done := '1';
mfspr := '1';
-- partial decode on SPR number should be adequate given
-- the restricted set that get sent down this path
if sprn(9) = '0' and sprn(5) = '0' then
if sprn(0) = '0' then
sprval := x"00000000" & r.dsisr;
else
sprval := r.dar;
end if;
else
-- reading one of the SPRs in the MMU
sprval := m_in.sprval;
end if;
when OP_MTSPR =>
if sprn(9) = '0' and sprn(5) = '0' then
if sprn(0) = '0' then
v.dsisr := l_in.data(31 downto 0);
else
v.dar := l_in.data;
end if;
done := '1';
else
-- writing one of the SPRs in the MMU
mmu_mtspr := '1';
stall := '1';
v.state := TLBIE_WAIT;
end if;
when OP_FETCH_FAILED =>
-- send it to the MMU to do the radix walk
addr := l_in.nia;
v.addr := l_in.nia;
v.instr_fault := '1';
mmureq := '1';
stall := '1';
v.state := MMU_LOOKUP;
when others =>
assert false report "unknown op sent to loadstore1";
end case;

v.write_reg := l_in.write_reg;
v.length := l_in.length;
v.byte_reverse := l_in.byte_reverse;
v.sign_extend := l_in.sign_extend;
v.update := l_in.update;
v.update_reg := l_in.update_reg;
v.xerc := l_in.xerc;
v.reserve := l_in.reserve;
v.rc := l_in.rc;
v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode;

-- XXX Temporary hack. Mark the op as non-cachable if the address
-- is the form 0xc------- for a real-mode access.
--
-- This will have to be replaced by a combination of implementing the
-- proper HV CI load/store instructions and having an MMU to get the I
-- bit otherwise.
if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
v.nc := '1';
end if;

-- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
byte_sel := long_sel(7 downto 0);
v.first_bytes := byte_sel;
v.second_bytes := long_sel(15 downto 8);

-- Do byte reversing and rotating for stores in the first cycle
byte_offset := unsigned(lsu_sum(2 downto 0));
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
end loop;

if req = '1' then
stall := '1';
if long_sel(15 downto 8) = "00000000" then
v.state := ACK_WAIT;
else
v.state := SECOND_REQ;
end if;
end if;
end if;


when SECOND_REQ => when SECOND_REQ =>
addr := next_addr; addr := next_addr;
byte_sel := r.second_bytes; byte_sel := r.second_bytes;
req := '1'; req := '1';
stall := '1';
v.state := ACK_WAIT; v.state := ACK_WAIT;


when ACK_WAIT => when ACK_WAIT =>
stall := '1';
if d_in.valid = '1' then if d_in.valid = '1' then
if d_in.error = '1' then if d_in.error = '1' then
-- dcache will discard the second request if it -- dcache will discard the second request if it
@ -388,7 +276,6 @@ begin
else else
-- stores write back rA update in this cycle -- stores write back rA update in this cycle
do_update := r.update; do_update := r.update;
stall := '0';
done := '1'; done := '1';
v.state := IDLE; v.state := IDLE;
end if; end if;
@ -397,7 +284,6 @@ begin
end if; end if;


when MMU_LOOKUP => when MMU_LOOKUP =>
stall := '1';
if r.dwords_done = '1' then if r.dwords_done = '1' then
addr := next_addr; addr := next_addr;
byte_sel := r.second_bytes; byte_sel := r.second_bytes;
@ -418,7 +304,6 @@ begin
end if; end if;
else else
-- nothing to do, the icache retries automatically -- nothing to do, the icache retries automatically
stall := '0';
done := '1'; done := '1';
v.state := IDLE; v.state := IDLE;
end if; end if;
@ -434,10 +319,8 @@ begin
end if; end if;


when TLBIE_WAIT => when TLBIE_WAIT =>
stall := '1';
if m_in.done = '1' then if m_in.done = '1' then
-- tlbie is finished -- tlbie is finished
stall := '0';
done := '1'; done := '1';
v.state := IDLE; v.state := IDLE;
end if; end if;
@ -447,8 +330,123 @@ begin
v.state := IDLE; v.state := IDLE;
done := '1'; done := '1';


when SPR_CMPLT =>
done := '1';
v.state := IDLE;

end case; end case;


busy := '1';
if r.state = IDLE or done = '1' then
busy := '0';
end if;

-- Note that l_in.valid is gated with busy inside execute1
if l_in.valid = '1' then
v.addr := lsu_sum;
v.load := '0';
v.dcbz := '0';
v.tlbie := '0';
v.instr_fault := '0';
v.dwords_done := '0';
v.write_reg := l_in.write_reg;
v.length := l_in.length;
v.byte_reverse := l_in.byte_reverse;
v.sign_extend := l_in.sign_extend;
v.update := l_in.update;
v.update_reg := l_in.update_reg;
v.xerc := l_in.xerc;
v.reserve := l_in.reserve;
v.rc := l_in.rc;
v.nc := l_in.ci;
v.virt_mode := l_in.virt_mode;
v.priv_mode := l_in.priv_mode;

-- XXX Temporary hack. Mark the op as non-cachable if the address
-- is the form 0xc------- for a real-mode access.
if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
v.nc := '1';
end if;

-- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
byte_sel := long_sel(7 downto 0);
v.first_bytes := byte_sel;
v.second_bytes := long_sel(15 downto 8);

-- Do byte reversing and rotating for stores in the first cycle
byte_offset := unsigned(lsu_sum(2 downto 0));
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
end loop;

case l_in.op is
when OP_STORE =>
req := '1';
when OP_LOAD =>
req := '1';
v.load := '1';
when OP_DCBZ =>
req := '1';
v.dcbz := '1';
when OP_TLBIE =>
mmureq := '1';
v.tlbie := '1';
v.state := TLBIE_WAIT;
when OP_MFSPR =>
v.mfspr := '1';
-- partial decode on SPR number should be adequate given
-- the restricted set that get sent down this path
if sprn(9) = '0' and sprn(5) = '0' then
if sprn(0) = '0' then
v.sprval := x"00000000" & r.dsisr;
else
v.sprval := r.dar;
end if;
else
-- reading one of the SPRs in the MMU
v.sprval := m_in.sprval;
end if;
v.state := SPR_CMPLT;
when OP_MTSPR =>
if sprn(9) = '0' and sprn(5) = '0' then
if sprn(0) = '0' then
v.dsisr := l_in.data(31 downto 0);
else
v.dar := l_in.data;
end if;
v.state := SPR_CMPLT;
else
-- writing one of the SPRs in the MMU
mmu_mtspr := '1';
v.state := TLBIE_WAIT;
end if;
when OP_FETCH_FAILED =>
-- send it to the MMU to do the radix walk
addr := l_in.nia;
v.addr := l_in.nia;
v.instr_fault := '1';
mmureq := '1';
v.state := MMU_LOOKUP;
when others =>
assert false report "unknown op sent to loadstore1";
end case;

if req = '1' then
if long_sel(15 downto 8) = "00000000" then
v.state := ACK_WAIT;
else
v.state := SECOND_REQ;
end if;
end if;
end if;

-- Update outputs to dcache -- Update outputs to dcache
d_out.valid <= req; d_out.valid <= req;
d_out.load <= v.load; d_out.load <= v.load;
@ -477,10 +475,10 @@ begin
-- Multiplex either cache data to the destination GPR or -- Multiplex either cache data to the destination GPR or
-- the address for the rA update. -- the address for the rA update.
l_out.valid <= done; l_out.valid <= done;
if mfspr = '1' then if r.mfspr = '1' then
l_out.write_enable <= '1'; l_out.write_enable <= '1';
l_out.write_reg <= l_in.write_reg; l_out.write_reg <= r.write_reg;
l_out.write_data <= sprval; l_out.write_data <= r.sprval;
elsif do_update = '1' then elsif do_update = '1' then
l_out.write_enable <= '1'; l_out.write_enable <= '1';
l_out.write_reg <= r.update_reg; l_out.write_reg <= r.update_reg;
@ -495,6 +493,7 @@ begin
l_out.store_done <= d_in.store_done; l_out.store_done <= d_in.store_done;


-- update exception info back to execute1 -- update exception info back to execute1
e_out.busy <= busy;
e_out.exception <= exception; e_out.exception <= exception;
e_out.instr_fault <= r.instr_fault; e_out.instr_fault <= r.instr_fault;
e_out.invalid <= m_in.invalid; e_out.invalid <= m_in.invalid;
@ -509,11 +508,23 @@ begin
end if; end if;
end if; end if;


stall_out <= stall;

-- Update registers -- Update registers
rin <= v; rin <= v;


end process; end process;


ls1_log: process(clk)
begin
if rising_edge(clk) then
log_data <= e_out.busy &
e_out.exception &
l_out.valid &
m_out.valid &
d_out.valid &
m_in.done &
r.dwords_done &
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
end if;
end process;
log_out <= log_data;
end; end;

@ -4,6 +4,7 @@ use ieee.numeric_std.all;


library work; library work;
use work.decode_types.all; use work.decode_types.all;
use work.ppc_fx_insns.all;


entity logical is entity logical is
port ( port (
@ -13,9 +14,7 @@ entity logical is
invert_in : in std_ulogic; invert_in : in std_ulogic;
invert_out : in std_ulogic; invert_out : in std_ulogic;
result : out std_ulogic_vector(63 downto 0); result : out std_ulogic_vector(63 downto 0);
datalen : in std_logic_vector(3 downto 0); datalen : in std_logic_vector(3 downto 0)
popcnt : out std_ulogic_vector(63 downto 0);
parity : out std_ulogic_vector(63 downto 0)
); );
end entity logical; end entity logical;


@ -34,30 +33,14 @@ architecture behaviour of logical is
type sixbit2 is array(0 to 1) of sixbit; type sixbit2 is array(0 to 1) of sixbit;
signal pc32 : sixbit2; signal pc32 : sixbit2;
signal par0, par1 : std_ulogic; signal par0, par1 : std_ulogic;
signal popcnt : std_ulogic_vector(63 downto 0);
signal parity : std_ulogic_vector(63 downto 0);


begin begin
logical_0: process(all) logical_0: process(all)
variable rb_adj, tmp : std_ulogic_vector(63 downto 0); variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
variable negative : std_ulogic;
begin begin
rb_adj := rb;
if invert_in = '1' then
rb_adj := not rb;
end if;

case op is
when OP_AND =>
tmp := rs and rb_adj;
when OP_OR =>
tmp := rs or rb_adj;
when others =>
tmp := rs xor rb_adj;
end case;

result <= tmp;
if invert_out = '1' then
result <= not tmp;
end if;

-- population counts -- population counts
for i in 0 to 31 loop for i in 0 to 31 loop
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1)); pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
@ -98,5 +81,44 @@ begin
parity(32) <= par1; parity(32) <= par1;
end if; end if;


rb_adj := rb;
if invert_in = '1' then
rb_adj := not rb;
end if;

case op is
when OP_AND =>
tmp := rs and rb_adj;
when OP_OR =>
tmp := rs or rb_adj;
when OP_XOR =>
tmp := rs xor rb_adj;
when OP_POPCNT =>
tmp := popcnt;
when OP_PRTY =>
tmp := parity;
when OP_CMPB =>
tmp := ppc_cmpb(rs, rb);
when others =>
-- EXTS
-- note datalen is a 1-hot encoding
negative := (datalen(0) and rs(7)) or
(datalen(1) and rs(15)) or
(datalen(2) and rs(31));
tmp := (others => negative);
if datalen(2) = '1' then
tmp(31 downto 16) := rs(31 downto 16);
end if;
if datalen(2) = '1' or datalen(1) = '1' then
tmp(15 downto 8) := rs(15 downto 8);
end if;
tmp(7 downto 0) := rs(7 downto 0);
end case;

if invert_out = '1' then
tmp := not tmp;
end if;
result <= tmp;

end process; end process;
end behaviour; end behaviour;

@ -9,7 +9,6 @@ filesets:
- wishbone_types.vhdl - wishbone_types.vhdl
- common.vhdl - common.vhdl
- fetch1.vhdl - fetch1.vhdl
- fetch2.vhdl
- decode1.vhdl - decode1.vhdl
- helpers.vhdl - helpers.vhdl
- decode2.vhdl - decode2.vhdl
@ -27,7 +26,6 @@ filesets:
- loadstore1.vhdl - loadstore1.vhdl
- mmu.vhdl - mmu.vhdl
- dcache.vhdl - dcache.vhdl
- multiply.vhdl
- divider.vhdl - divider.vhdl
- rotator.vhdl - rotator.vhdl
- writeback.vhdl - writeback.vhdl
@ -63,6 +61,10 @@ filesets:
- fpga/firmware.hex : {copyto : firmware.hex, file_type : user} - fpga/firmware.hex : {copyto : firmware.hex, file_type : user}
file_type : vhdlSource-2008 file_type : vhdlSource-2008


xilinx_specific:
files:
- xilinx-mult.vhdl : {file_type : vhdlSource-2008}

debug_xilinx: debug_xilinx:
files: files:
- dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008} - dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008}
@ -101,20 +103,21 @@ filesets:
targets: targets:
nexys_a7: nexys_a7:
default_tool: vivado default_tool: vivado
filesets: [core, nexys_a7, soc, fpga, debug_xilinx] filesets: [core, nexys_a7, soc, fpga, debug_xilinx, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
- clk_input - clk_input
- clk_frequency - clk_frequency
- disable_flatten_core - disable_flatten_core
- log_length=2048
tools: tools:
vivado: {part : xc7a100tcsg324-1} vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel toplevel : toplevel


nexys_video-nodram: nexys_video-nodram:
default_tool: vivado default_tool: vivado
filesets: [core, nexys_video, soc, fpga, debug_xilinx] filesets: [core, nexys_video, soc, fpga, debug_xilinx, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -122,13 +125,14 @@ targets:
- clk_frequency - clk_frequency
- disable_flatten_core - disable_flatten_core
- spi_flash_offset=10485760 - spi_flash_offset=10485760
- log_length=2048
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel toplevel : toplevel


nexys_video: nexys_video:
default_tool: vivado default_tool: vivado
filesets: [core, nexys_video, soc, fpga, debug_xilinx, litedram] filesets: [core, nexys_video, soc, fpga, debug_xilinx, litedram, xilinx_specific]
parameters: parameters:
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -136,6 +140,7 @@ targets:
- disable_flatten_core - disable_flatten_core
- no_bram - no_bram
- spi_flash_offset=10485760 - spi_flash_offset=10485760
- log_length=2048
generate: [dram_nexys_video] generate: [dram_nexys_video]
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
@ -143,7 +148,7 @@ targets:


arty_a7-35-nodram: arty_a7-35-nodram:
default_tool: vivado default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx] filesets: [core, arty_a7, soc, fpga, debug_xilinx, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -151,13 +156,14 @@ targets:
- clk_frequency - clk_frequency
- disable_flatten_core - disable_flatten_core
- spi_flash_offset=3145728 - spi_flash_offset=3145728
- log_length=512
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel toplevel : toplevel


arty_a7-35: arty_a7-35:
default_tool: vivado default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram] filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -165,6 +171,7 @@ targets:
- disable_flatten_core - disable_flatten_core
- no_bram - no_bram
- spi_flash_offset=3145728 - spi_flash_offset=3145728
- log_length=512
generate: [dram_arty] generate: [dram_arty]
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
@ -172,7 +179,7 @@ targets:


arty_a7-100-nodram: arty_a7-100-nodram:
default_tool: vivado default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx] filesets: [core, arty_a7, soc, fpga, debug_xilinx, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -180,13 +187,14 @@ targets:
- clk_frequency - clk_frequency
- disable_flatten_core - disable_flatten_core
- spi_flash_offset=4194304 - spi_flash_offset=4194304
- log_length=2048
tools: tools:
vivado: {part : xc7a100ticsg324-1L} vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel toplevel : toplevel


arty_a7-100: arty_a7-100:
default_tool: vivado default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram] filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, xilinx_specific]
parameters: parameters:
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -194,6 +202,7 @@ targets:
- disable_flatten_core - disable_flatten_core
- no_bram - no_bram
- spi_flash_offset=4194304 - spi_flash_offset=4194304
- log_length=2048
generate: [dram_arty] generate: [dram_arty]
tools: tools:
vivado: {part : xc7a100ticsg324-1L} vivado: {part : xc7a100ticsg324-1L}
@ -201,7 +210,7 @@ targets:


cmod_a7-35: cmod_a7-35:
default_tool: vivado default_tool: vivado
filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx] filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx, xilinx_specific]
parameters : parameters :
- memory_size - memory_size
- ram_init_file - ram_init_file
@ -209,12 +218,13 @@ targets:
- clk_input=12000000 - clk_input=12000000
- clk_frequency - clk_frequency
- disable_flatten_core - disable_flatten_core
- log_length=512
tools: tools:
vivado: {part : xc7a35tcpg236-1} vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel toplevel : toplevel


synth: synth:
filesets: [core, soc] filesets: [core, soc, xilinx_specific]
tools: tools:
vivado: {pnr : none} vivado: {pnr : none}
toplevel: core toplevel: core
@ -279,3 +289,8 @@ parameters:
datatype : int datatype : int
description : Offset (in bytes) in the SPI flash of the code payload to run description : Offset (in bytes) in the SPI flash of the code payload to run
paramtype : generic paramtype : generic

log_length:
datatype : int
description : Length of the core log buffer in entries (32 bytes each)
paramtype : generic

@ -27,6 +27,7 @@ end mmu;
architecture behave of mmu is architecture behave of mmu is


type state_t is (IDLE, type state_t is (IDLE,
DO_TLBIE,
TLB_WAIT, TLB_WAIT,
PROC_TBL_READ, PROC_TBL_READ,
PROC_TBL_WAIT, PROC_TBL_WAIT,
@ -44,6 +45,7 @@ architecture behave of mmu is
store : std_ulogic; store : std_ulogic;
priv : std_ulogic; priv : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
inval_all : std_ulogic;
-- config SPRs -- config SPRs
prtbl : std_ulogic_vector(63 downto 0); prtbl : std_ulogic_vector(63 downto 0);
pid : std_ulogic_vector(31 downto 0); pid : std_ulogic_vector(31 downto 0);
@ -178,7 +180,6 @@ begin
variable tlb_load : std_ulogic; variable tlb_load : std_ulogic;
variable itlb_load : std_ulogic; variable itlb_load : std_ulogic;
variable tlbie_req : std_ulogic; variable tlbie_req : std_ulogic;
variable inval_all : std_ulogic;
variable prtbl_rd : std_ulogic; variable prtbl_rd : std_ulogic;
variable pt_valid : std_ulogic; variable pt_valid : std_ulogic;
variable effpid : std_ulogic_vector(31 downto 0); variable effpid : std_ulogic_vector(31 downto 0);
@ -207,7 +208,7 @@ begin
tlb_load := '0'; tlb_load := '0';
itlb_load := '0'; itlb_load := '0';
tlbie_req := '0'; tlbie_req := '0';
inval_all := '0'; v.inval_all := '0';
prtbl_rd := '0'; prtbl_rd := '0';


-- Radix tree data structures in memory are big-endian, -- Radix tree data structures in memory are big-endian,
@ -240,19 +241,17 @@ begin
v.store := not (l_in.load or l_in.iside); v.store := not (l_in.load or l_in.iside);
v.priv := l_in.priv; v.priv := l_in.priv;
if l_in.tlbie = '1' then if l_in.tlbie = '1' then
dcreq := '1';
tlbie_req := '1';
-- Invalidate all iTLB/dTLB entries for tlbie with -- Invalidate all iTLB/dTLB entries for tlbie with
-- RB[IS] != 0 or RB[AP] != 0, or for slbia -- RB[IS] != 0 or RB[AP] != 0, or for slbia
inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or v.inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
l_in.addr(7) or l_in.addr(6) or l_in.addr(5); l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
-- The RIC field of the tlbie instruction comes across on the -- The RIC field of the tlbie instruction comes across on the
-- sprn bus as bits 2--3. RIC=2 flushes process table caches. -- sprn bus as bits 2--3. RIC=2 flushes process table caches.
if l_in.sprn(3) = '1' then if l_in.sprn(3) = '1' then
v.pt0_valid := '0'; v.pt0_valid := '0';
v.pt3_valid := '0'; v.pt3_valid := '0';
end if; end if;
v.state := TLB_WAIT; v.state := DO_TLBIE;
else else
v.valid := '1'; v.valid := '1';
if pt_valid = '0' then if pt_valid = '0' then
@ -281,12 +280,15 @@ begin
v.pt3_valid := '0'; v.pt3_valid := '0';
end if; end if;
v.pt0_valid := '0'; v.pt0_valid := '0';
dcreq := '1'; v.inval_all := '1';
tlbie_req := '1'; v.state := DO_TLBIE;
inval_all := '1';
v.state := TLB_WAIT;
end if; end if;


when DO_TLBIE =>
dcreq := '1';
tlbie_req := '1';
v.state := TLB_WAIT;

when TLB_WAIT => when TLB_WAIT =>
if d_in.done = '1' then if d_in.done = '1' then
done := '1'; done := '1';
@ -436,8 +438,8 @@ begin


-- drive outputs -- drive outputs
if tlbie_req = '1' then if tlbie_req = '1' then
addr := l_in.addr; addr := r.addr;
tlb_data := l_in.rs; tlb_data := (others => '0');
elsif tlb_load = '1' then elsif tlb_load = '1' then
addr := r.addr(63 downto 12) & x"000"; addr := r.addr(63 downto 12) & x"000";
tlb_data := pte; tlb_data := pte;
@ -458,14 +460,14 @@ begin


d_out.valid <= dcreq; d_out.valid <= dcreq;
d_out.tlbie <= tlbie_req; d_out.tlbie <= tlbie_req;
d_out.doall <= inval_all; d_out.doall <= r.inval_all;
d_out.tlbld <= tlb_load; d_out.tlbld <= tlb_load;
d_out.addr <= addr; d_out.addr <= addr;
d_out.pte <= tlb_data; d_out.pte <= tlb_data;


i_out.tlbld <= itlb_load; i_out.tlbld <= itlb_load;
i_out.tlbie <= tlbie_req; i_out.tlbie <= tlbie_req;
i_out.doall <= inval_all; i_out.doall <= r.inval_all;
i_out.addr <= addr; i_out.addr <= addr;
i_out.pte <= tlb_data; i_out.pte <= tlb_data;



@ -4,11 +4,10 @@ use ieee.numeric_std.all;


library work; library work;
use work.common.all; use work.common.all;
use work.decode_types.all;


entity multiply is entity multiply is
generic ( generic (
PIPELINE_DEPTH : natural := 16 PIPELINE_DEPTH : natural := 4
); );
port ( port (
clk : in std_logic; clk : in std_logic;
@ -19,17 +18,16 @@ entity multiply is
end entity multiply; end entity multiply;


architecture behaviour of multiply is architecture behaviour of multiply is
signal m: Execute1ToMultiplyType; signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit;


type multiply_pipeline_stage is record type multiply_pipeline_stage is record
valid : std_ulogic; valid : std_ulogic;
insn_type : insn_type_t; data : unsigned(127 downto 0);
data : signed(129 downto 0);
is_32bit : std_ulogic; is_32bit : std_ulogic;
neg_res : std_ulogic;
end record; end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL, is_32bit => '0', neg_res => '0',
is_32bit => '0',
data => (others => '0')); data => (others => '0'));


type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -51,50 +49,35 @@ begin


multiply_1: process(all) multiply_1: process(all)
variable v : reg_type; variable v : reg_type;
variable d : std_ulogic_vector(129 downto 0); variable d : std_ulogic_vector(127 downto 0);
variable d2 : std_ulogic_vector(63 downto 0); variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic; variable ov : std_ulogic;
begin begin
v := r;

m_out <= MultiplyToExecute1Init;

v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type; v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2);
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).is_32bit := m.is_32bit; v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).neg_res := m.neg_result;


loop_0: for i in 1 to PIPELINE_DEPTH-1 loop loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop; end loop;


d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
ov := '0'; d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
else
d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data));
end if;


-- TODO: Handle overflows ov := '0';
case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
when OP_MUL_L64 => ov := (or d(63 downto 31)) and not (and d(63 downto 31));
d2 := d(63 downto 0); else
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then ov := (or d(127 downto 63)) and not (and d(127 downto 63));
ov := (or d(63 downto 31)) and not (and d(63 downto 31)); end if;
else
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if;
when OP_MUL_H32 =>
d2 := d(63 downto 32) & d(63 downto 32);
when OP_MUL_H64 =>
d2 := d(127 downto 64);
when others =>
--report "Illegal insn type in multiplier";
d2 := (others => '0');
end case;


m_out.write_reg_data <= d2; m_out.result <= d;
m_out.overflow <= ov; m_out.overflow <= ov;

m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid;
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1';
end if;


rin <= v; rin <= v;
end process; end process;

@ -17,8 +17,18 @@ architecture behave of multiply_tb is


constant pipeline_depth : integer := 4; constant pipeline_depth : integer := 4;


signal m1 : Execute1ToMultiplyType; signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit;
signal m2 : MultiplyToExecute1Type; signal m2 : MultiplyToExecute1Type;

function absval(x: std_ulogic_vector) return std_ulogic_vector is
begin
if x(x'left) = '1' then
return std_ulogic_vector(- signed(x));
else
return x;
end if;
end;

begin begin
multiply_0: entity work.multiply multiply_0: entity work.multiply
generic map (PIPELINE_DEPTH => pipeline_depth) generic map (PIPELINE_DEPTH => pipeline_depth)
@ -39,9 +49,8 @@ begin
wait for clk_period; wait for clk_period;


m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64; m1.data1 <= x"0000000000001000";
m1.data1 <= '0' & x"0000000000001000"; m1.data2 <= x"0000000000001111";
m1.data2 <= '0' & x"0000000000001111";


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -56,7 +65,7 @@ begin


wait for clk_period; wait for clk_period;
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_data = x"0000000001111000"; assert m2.result = x"00000000000000000000000001111000";


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -70,7 +79,7 @@ begin


wait for clk_period * (pipeline_depth-1); wait for clk_period * (pipeline_depth-1);
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_data = x"0000000001111000"; assert m2.result = x"00000000000000000000000001111000";


-- test mulld -- test mulld
mulld_loop : for i in 0 to 1000 loop mulld_loop : for i in 0 to 1000 loop
@ -79,10 +88,10 @@ begin


behave_rt := ppc_mulld(ra, rb); behave_rt := ppc_mulld(ra, rb);


m1.data1 <= '0' & ra; m1.data1 <= absval(ra);
m1.data2 <= '0' & rb; m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -92,8 +101,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


-- test mulhdu -- test mulhdu
@ -103,10 +112,10 @@ begin


behave_rt := ppc_mulhdu(ra, rb); behave_rt := ppc_mulhdu(ra, rb);


m1.data1 <= '0' & ra; m1.data1 <= ra;
m1.data2 <= '0' & rb; m1.data2 <= rb;
m1.neg_result <= '0';
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;


wait for clk_period; wait for clk_period;


@ -116,8 +125,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
end loop; end loop;


-- test mulhd -- test mulhd
@ -127,10 +136,10 @@ begin


behave_rt := ppc_mulhd(ra, rb); behave_rt := ppc_mulhd(ra, rb);


m1.data1 <= ra(63) & ra; m1.data1 <= absval(ra);
m1.data2 <= rb(63) & rb; m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;


wait for clk_period; wait for clk_period;


@ -140,8 +149,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
end loop; end loop;


-- test mullw -- test mullw
@ -151,12 +160,12 @@ begin


behave_rt := ppc_mullw(ra, rb); behave_rt := ppc_mullw(ra, rb);


m1.data1 <= (others => ra(31)); m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => rb(31)); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -166,8 +175,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


-- test mulhw -- test mulhw
@ -177,12 +186,12 @@ begin


behave_rt := ppc_mulhw(ra, rb); behave_rt := ppc_mulhw(ra, rb);


m1.data1 <= (others => ra(31)); m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => rb(31)); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;


wait for clk_period; wait for clk_period;


@ -192,8 +201,9 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
report "bad mulhw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhw expected " & to_hstring(behave_rt) & " got " &
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
end loop; end loop;


-- test mulhwu -- test mulhwu
@ -207,8 +217,8 @@ begin
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= rb(31 downto 0);
m1.neg_result <= '0';
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;


wait for clk_period; wait for clk_period;


@ -218,8 +228,9 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
report "bad mulhwu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhwu expected " & to_hstring(behave_rt) & " got " &
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
end loop; end loop;


-- test mulli -- test mulli
@ -229,11 +240,11 @@ begin


behave_rt := ppc_mulli(ra, si); behave_rt := ppc_mulli(ra, si);


m1.data1 <= ra(63) & ra; m1.data1 <= absval(ra);
m1.data2 <= (others => si(15)); m1.data2 <= (others => '0');
m1.data2(15 downto 0) <= si; m1.data2(15 downto 0) <= absval(si);
m1.neg_result <= ra(63) xor si(15);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -243,8 +254,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


std.env.finish; std.env.finish;

@ -93,7 +93,7 @@ package ppc_fx_insns is
function ppc_divd (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector; function ppc_divd (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector; function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;


function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer; function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic;
end package ppc_fx_insns; end package ppc_fx_insns;


package body ppc_fx_insns is package body ppc_fx_insns is
@ -785,13 +785,12 @@ package body ppc_fx_insns is
return std_ulogic_vector(resize(tmp, ra'length)); return std_ulogic_vector(resize(tmp, ra'length));
end; end;


function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer is function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic is
variable crfield: integer; variable crfield: integer;
variable crbit_match: std_ulogic; variable crbit_match: std_ulogic;
variable ctr_not_zero: std_ulogic; variable ctr_not_zero: std_ulogic;
variable ctr_ok: std_ulogic; variable ctr_ok: std_ulogic;
variable cond_ok: std_ulogic; variable cond_ok: std_ulogic;
variable ret: integer;
begin begin
crfield := to_integer(unsigned(bi)); crfield := to_integer(unsigned(bi));
-- BE bit numbering -- BE bit numbering
@ -800,12 +799,7 @@ package body ppc_fx_insns is
ctr_not_zero := '1' when ctr /= x"0000000000000001" else '0'; ctr_not_zero := '1' when ctr /= x"0000000000000001" else '0';
ctr_ok := bo(4-2) or (ctr_not_zero xor bo(4-3)); ctr_ok := bo(4-2) or (ctr_not_zero xor bo(4-3));
cond_ok := bo(4-0) or crbit_match; cond_ok := bo(4-0) or crbit_match;
if ctr_ok = '1' and cond_ok = '1' then return ctr_ok and cond_ok;
ret := 1;
else
ret := 0;
end if;
return ret;
end; end;


end package body ppc_fx_insns; end package body ppc_fx_insns;

@ -24,7 +24,9 @@ entity register_file is


-- debug -- debug
sim_dump : in std_ulogic; sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(70 downto 0)
); );
end entity register_file; end entity register_file;


@ -34,18 +36,19 @@ architecture behaviour of register_file is
signal rd_port_b : std_ulogic_vector(63 downto 0); signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic; signal dbg_ack : std_ulogic;
signal log_data : std_ulogic_vector(70 downto 0);
begin begin
-- synchronous writes -- synchronous writes
register_write_0: process(clk) register_write_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
if w_in.write_reg(5) = '0' then if w_in.write_reg(5) = '0' then
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data); report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
else else
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data); report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
end if; end if;
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data; registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
end if; end if;
end if; end if;
@ -131,4 +134,13 @@ begin
sim_dump_done <= '0'; sim_dump_done <= '0';
end generate; end generate;


reg_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_data &
w_in.write_enable &
w_in.write_reg;
end if;
end process;
log_out <= log_data;
end architecture behaviour; end architecture behaviour;

@ -0,0 +1,12 @@
CFLAGS = -O2 -g -Wall -std=c99

all: fmt_log

fmt_log: fmt_log.c
$(CC) -o $@ $^ $(CFLAGS)

clean:
rm -f fmt_log
distclean:
rm -f *~

@ -0,0 +1,235 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>

typedef unsigned long long u64;

struct log_entry {
u64 nia_lo: 42;
u64 nia_hi: 1;
u64 ic_ra_valid: 1;
u64 ic_access_ok: 1;
u64 ic_is_miss: 1;
u64 ic_is_hit: 1;
u64 ic_way: 3;
u64 ic_state: 1;
u64 ic_part_nia: 4;
u64 ic_fetch_failed: 1;
u64 ic_stall_out: 1;
u64 ic_wb_stall: 1;
u64 ic_wb_cyc: 1;
u64 ic_wb_stb: 1;
u64 ic_wb_adr: 3;
u64 ic_wb_ack: 1;

u64 ic_insn: 32;
u64 ic_valid: 1;
u64 d1_valid: 1;
u64 d1_unit: 2;
u64 d1_part_nia: 4;
u64 d1_insn_type: 6;
u64 d2_bypass_a: 1;
u64 d2_bypass_b: 1;
u64 d2_bypass_c: 1;
u64 d2_stall_out: 1;
u64 d2_stopped_out: 1;
u64 d2_valid: 1;
u64 d2_part_nia: 4;
u64 e1_flush_out: 1;
u64 e1_stall_out: 1;
u64 e1_redirect: 1;
u64 e1_valid: 1;
u64 e1_write_enable: 1;
u64 e1_unused: 3;

u64 e1_irq_state: 1;
u64 e1_irq: 1;
u64 e1_exception: 1;
u64 e1_msr_dr: 1;
u64 e1_msr_ir: 1;
u64 e1_msr_pr: 1;
u64 e1_msr_ee: 1;
u64 pad1: 5;
u64 ls_state: 3;
u64 ls_dw_done: 1;
u64 ls_min_done: 1;
u64 ls_do_valid: 1;
u64 ls_mo_valid: 1;
u64 ls_lo_valid: 1;
u64 ls_eo_except: 1;
u64 ls_stall_out: 1;
u64 pad2: 2;
u64 dc_state: 3;
u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3;
u64 dc_stall_out: 1;
u64 dc_op: 3;
u64 dc_do_valid: 1;
u64 dc_do_error: 1;
u64 dc_wb_cyc: 1;
u64 dc_wb_stb: 1;
u64 dc_wb_ack: 1;
u64 dc_wb_stall: 1;
u64 dc_wb_adr: 3;
u64 cr_wr_mask: 8;
u64 cr_wr_data: 4;
u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6;
u64 reg_wr_enable: 1;

u64 reg_wr_data;
};

#define FLAG(i, y) (log.i? y: ' ')
#define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff)

const char *units[4] = { "--", "al", "ls", "?3" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ",
"maddhdu", "maddld ", "mcrxr ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?62 ", "?63 "
};

const char *spr_names[13] =
{
"lr ", "ctr", "sr0", "sr1", "hr0", "hr1", "sg0", "sg1",
"sg2", "sg3", "hg0", "hg1", "xer"
};
int main(int ac, char **av)
{
struct log_entry log;
u64 full_nia[16];
long int lineno = 1;
FILE *f;
const char *filename;
int i;
long int ncompl = 0;

if (ac != 1 && ac != 2) {
fprintf(stderr, "Usage: %s [filename]\n", av[0]);
exit(1);
}
f = stdin;
if (ac == 2) {
filename = av[1];
f = fopen(filename, "rb");
if (f == NULL) {
perror(filename);
exit(1);
}
}

for (i = 0; i < 15; ++i)
full_nia[i] = i << 2;

while (fread(&log, sizeof(log), 1, f) == 1) {
full_nia[log.nia_lo & 0xf] = (log.nia_hi? 0xc000000000000000: 0) |
(log.nia_lo << 2);
if (lineno % 20 == 1) {
printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n");
printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n");
}
printf("%4ld %c0000%.11llx %c ", lineno,
(log.nia_hi? 'c': '0'),
(unsigned long long)log.nia_lo << 2,
FLAG(ic_stall_out, '|'));
printf("%c%c%c%d %c %c%c%d%c%c %.2llx ",
FLGA(ic_ra_valid, ' ', 'T'),
FLGA(ic_access_ok, ' ', 'X'),
FLGA(ic_is_hit, 'H', FLGA(ic_is_miss, 'M', ' ')),
log.ic_way,
FLAG(ic_state, 'W'),
FLAG(ic_wb_cyc, 'c'),
FLAG(ic_wb_stb, 's'),
log.ic_wb_adr,
FLAG(ic_wb_stall, 'S'),
FLAG(ic_wb_ack, 'a'),
PNIA(ic_part_nia));
if (log.ic_valid)
printf("%.8x", log.ic_insn);
else if (log.ic_fetch_failed)
printf("!!!!!!!!");
else
printf("--------");
printf(" %c%c %.2llx ",
FLAG(ic_valid, '>'),
FLAG(d2_stall_out, '|'),
PNIA(d1_part_nia));
if (log.d1_valid)
printf("%s %s",
units[log.d1_unit],
ops[log.d1_insn_type]);
else
printf("-- -------");
printf(" %c%c ",
FLAG(d1_valid, '>'),
FLAG(d2_stall_out, '|'));
printf("%.2llx %c%c%c %c%c ",
PNIA(d2_part_nia),
FLAG(d2_bypass_a, 'a'),
FLAG(d2_bypass_b, 'b'),
FLAG(d2_bypass_c, 'c'),
FLAG(d2_valid, '>'),
FLAG(e1_stall_out, '|'));
printf("%c%c %c%c%c %c%c%c%c %c%c ",
FLAG(e1_flush_out, 'F'),
FLAG(e1_redirect, 'R'),
FLAG(e1_irq_state, 'w'),
FLAG(e1_irq, 'I'),
FLAG(e1_exception, 'X'),
FLAG(e1_msr_ee, 'E'),
FLGA(e1_msr_pr, 'u', 's'),
FLAG(e1_msr_ir, 'I'),
FLAG(e1_msr_dr, 'D'),
FLAG(e1_write_enable, 'W'),
FLAG(e1_valid, 'C'));
printf("%c %d%d %c%c %c%c %c ",
FLAG(ls_stall_out, '|'),
log.ls_state,
log.ls_dw_done,
FLAG(ls_mo_valid, 'M'),
FLAG(ls_min_done, 'm'),
FLAG(ls_lo_valid, 'C'),
FLAG(ls_eo_except, 'X'),
FLAG(ls_do_valid, '>'));
printf("%d%c%d%d %c%c %c%c%d%c%c ",
log.dc_state,
FLAG(dc_ra_valid, 'R'),
log.dc_tlb_way,
log.dc_op,
FLAG(dc_do_valid, 'V'),
FLAG(dc_do_error, 'E'),
FLAG(dc_wb_cyc, 'c'),
FLAG(dc_wb_stb, 's'),
log.dc_wb_adr,
FLAG(dc_wb_stall, 'S'),
FLAG(dc_wb_ack, 'a'));
if (log.cr_wr_enable)
printf("%x>%.2x ", log.cr_wr_data, log.cr_wr_mask);
else
printf(" ");
if (log.reg_wr_enable) {
if (log.reg_wr_reg < 32 || log.reg_wr_reg > 44)
printf("r%02d", log.reg_wr_reg);
else
printf("%s", spr_names[log.reg_wr_reg - 32]);
printf("=%.16llx", log.reg_wr_data);
}
printf("\n");
++lineno;
if (log.ls_lo_valid || log.e1_valid)
++ncompl;
}
printf("%ld instructions completed, %.2f CPI\n", ncompl,
(double)(lineno - 1) / ncompl);
exit(0);
}

@ -42,6 +42,9 @@
#define DBG_CORE_GSPR_INDEX 0x14 #define DBG_CORE_GSPR_INDEX 0x14
#define DBG_CORE_GSPR_DATA 0x15 #define DBG_CORE_GSPR_DATA 0x15


#define DBG_LOG_ADDR 0x16
#define DBG_LOG_DATA 0x17

static bool debug; static bool debug;


struct backend { struct backend {
@ -507,8 +510,10 @@ static void load(const char *filename, uint64_t addr)
// if (rc < 8) XXX fixup endian ? // if (rc < 8) XXX fixup endian ?
check(dmi_write(DBG_WB_DATA, data), "writing WB_DATA"); check(dmi_write(DBG_WB_DATA, data), "writing WB_DATA");
count += 8; count += 8;
if (!(count % 1024)) if (!(count % 1024)) {
printf("%x...\n", count); printf("%x...\r", count);
fflush(stdout);
}
} }
close(fd); close(fd);
printf("%x done.\n", count); printf("%x done.\n", count);
@ -535,8 +540,10 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
break; break;
} }
count += 8; count += 8;
if (!(count % 1024)) if (!(count % 1024)) {
printf("%x...\n", count); printf("%x...\r", count);
fflush(stdout);
}
if (count >= size) if (count >= size)
break; break;
} }
@ -544,6 +551,73 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
printf("%x done.\n", count); printf("%x done.\n", count);
} }


#define LOG_STOP 0x80000000ull

static void log_start(void)
{
check(dmi_write(DBG_LOG_ADDR, 0), "writing LOG_ADDR");
}

static void log_stop(void)
{
uint64_t lsize, laddr, waddr;

check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");
check(dmi_read(DBG_LOG_ADDR, &laddr), "reading LOG_ADDR");
waddr = laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries, ", lsize);
printf("write ptr = %" PRIx64 "\n", waddr);
}

static void log_dump(const char *filename)
{
FILE *f;
uint64_t lsize, laddr, waddr;
uint64_t orig_laddr;
uint64_t i, ldata;

f = fopen(filename, "w");
if (f == NULL) {
fprintf(stderr, "Failed to create '%s': %s\n", filename,
strerror(errno));
exit(1);
}

check(dmi_read(DBG_LOG_ADDR, &orig_laddr), "reading LOG_ADDR");
if (!(orig_laddr & LOG_STOP))
check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");

waddr = orig_laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries\n", lsize);

laddr = LOG_STOP | (waddr << 2);
check(dmi_write(DBG_LOG_ADDR, laddr), "writing LOG_ADDR");

for (i = 0; i < lsize * 4; ++i) {
check(dmi_read(DBG_LOG_DATA, &ldata), "reading LOG_DATA");
if (fwrite(&ldata, sizeof(ldata), 1, f) != 1) {
fprintf(stderr, "Write error on %s\n", filename);
exit(1);
}
if (!(i % 128)) {
printf("%" PRIu64 "...\r", i * 8);
fflush(stdout);
}
}
fclose(f);
printf("%" PRIu64 " done\n", lsize * 32);

check(dmi_write(DBG_LOG_ADDR, orig_laddr), "writing LOG_ADDR");
}

static void usage(const char *cmd) static void usage(const char *cmd)
{ {
fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd); fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd);
@ -568,6 +642,12 @@ static void usage(const char *cmd)
fprintf(stderr, " gpr <reg> [count]\n"); fprintf(stderr, " gpr <reg> [count]\n");
fprintf(stderr, " status\n"); fprintf(stderr, " status\n");


fprintf(stderr, "\n");
fprintf(stderr, " Core logging:\n");
fprintf(stderr, " lstart start logging\n");
fprintf(stderr, " lstop stop logging\n");
fprintf(stderr, " ldump <file> dump log to file\n");

fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, " JTAG:\n"); fprintf(stderr, " JTAG:\n");
fprintf(stderr, " dmiread <hex addr>\n"); fprintf(stderr, " dmiread <hex addr>\n");
@ -706,6 +786,17 @@ int main(int argc, char *argv[])
if (((i+1) < argc) && isdigit(argv[i+1][0])) if (((i+1) < argc) && isdigit(argv[i+1][0]))
count = strtoul(argv[++i], NULL, 10); count = strtoul(argv[++i], NULL, 10);
gpr_read(reg, count); gpr_read(reg, count);
} else if (strcmp(argv[i], "lstart") == 0) {
log_start();
} else if (strcmp(argv[i], "lstop") == 0) {
log_stop();
} else if (strcmp(argv[i], "ldump") == 0) {
const char *filename;

if ((i+1) >= argc)
usage(argv[0]);
filename = argv[++i];
log_dump(filename);
} else { } else {
fprintf(stderr, "Unknown command %s\n", argv[i]); fprintf(stderr, "Unknown command %s\n", argv[i]);
exit(1); exit(1);

@ -51,7 +51,8 @@ entity soc is
SPI_FLASH_DLINES : positive := 1; SPI_FLASH_DLINES : positive := 1;
SPI_FLASH_OFFSET : integer := 0; SPI_FLASH_OFFSET : integer := 0;
SPI_FLASH_DEF_CKDV : natural := 2; SPI_FLASH_DEF_CKDV : natural := 2;
SPI_FLASH_DEF_QUAD : boolean := false SPI_FLASH_DEF_QUAD : boolean := false;
LOG_LENGTH : natural := 512
); );
port( port(
rst : in std_ulogic; rst : in std_ulogic;
@ -198,7 +199,8 @@ begin
generic map( generic map(
SIM => SIM, SIM => SIM,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1') ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH
) )
port map( port map(
clk => system_clk, clk => system_clk,

Binary file not shown.

@ -9,6 +9,14 @@
#undef DEBUG #undef DEBUG
//#define DEBUG 1 //#define DEBUG 1


void delay(void)
{
static volatile int i;

for (i = 0; i < 10; ++i)
;
}

void print_number(unsigned int i) // only for i = 0-999 void print_number(unsigned int i) // only for i = 0-999
{ {
unsigned int j, k, m; unsigned int j, k, m;
@ -148,14 +156,17 @@ int xics_test_0(void)
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt


// still masked, so shouldn't happen yet // still masked, so shouldn't happen yet
delay();
assert(isrs_run == 0); assert(isrs_run == 0);


// unmask IPI only // unmask IPI only
xics_write8(XICS_XIRR, 0x40); xics_write8(XICS_XIRR, 0x40);
delay();
assert(isrs_run == ISR_IPI); assert(isrs_run == ISR_IPI);


// unmask UART // unmask UART
xics_write8(XICS_XIRR, 0xc0); xics_write8(XICS_XIRR, 0xc0);
delay();
assert(isrs_run == (ISR_IPI | ISR_UART)); assert(isrs_run == (ISR_IPI | ISR_UART));


// cleanup // cleanup
@ -174,12 +185,14 @@ int xics_test_1(void)
xics_write8(XICS_XIRR, 0xff); // allow all interrupts xics_write8(XICS_XIRR, 0xff); // allow all interrupts


// should be none pending // should be none pending
delay();
assert(isrs_run == 0); assert(isrs_run == 0);


// trigger both // trigger both
potato_uart_irq_en(); // cause 0x500 interrupt potato_uart_irq_en(); // cause 0x500 interrupt
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt


delay();
assert(isrs_run == (ISR_IPI | ISR_UART)); assert(isrs_run == (ISR_IPI | ISR_UART));


// cleanup // cleanup
@ -208,9 +221,11 @@ int xics_test_2(void)
// trigger an IPI // trigger an IPI
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt


delay();
assert(isrs_run == 0); assert(isrs_run == 0);


mtmsrd(0x9000000000008003); // EE on mtmsrd(0x9000000000008003); // EE on
delay();
assert(isrs_run == ISR_IPI); assert(isrs_run == ISR_IPI);


// cleanup // cleanup

@ -22,27 +22,33 @@ end entity writeback;


architecture behaviour of writeback is architecture behaviour of writeback is
begin begin
writeback_1: process(all) writeback_0: process(clk)
variable x : std_ulogic_vector(0 downto 0); variable x : std_ulogic_vector(0 downto 0);
variable y : std_ulogic_vector(0 downto 0); variable y : std_ulogic_vector(0 downto 0);
variable w : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0);
begin
if rising_edge(clk) then
-- Do consistency checks only on the clock edge
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
end if;
end process;

writeback_1: process(all)
variable cf: std_ulogic_vector(3 downto 0); variable cf: std_ulogic_vector(3 downto 0);
variable zero : std_ulogic; variable zero : std_ulogic;
variable sign : std_ulogic; variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0); variable scf : std_ulogic_vector(3 downto 0);
begin begin
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;



@ -0,0 +1,985 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

library unisim;
use unisim.vcomponents.all;

entity multiply is
port (
clk : in std_logic;

m_in : in Execute1ToMultiplyType;
m_out : out MultiplyToExecute1Type
);
end entity multiply;

architecture behaviour of multiply is
signal m00_p, m01_p, m02_p, m03_p : std_ulogic_vector(47 downto 0);
signal m00_pc : std_ulogic_vector(47 downto 0);
signal m10_p, m11_p, m12_p, m13_p : std_ulogic_vector(47 downto 0);
signal m11_pc, m12_pc, m13_pc : std_ulogic_vector(47 downto 0);
signal m20_p, m21_p, m22_p, m23_p : std_ulogic_vector(47 downto 0);
signal s0_pc, s1_pc : std_ulogic_vector(47 downto 0);
signal product_lo : std_ulogic_vector(31 downto 0);
signal product : std_ulogic_vector(127 downto 0);
signal addend : std_ulogic_vector(127 downto 0);
signal s0_carry, p0_carry : std_ulogic_vector(3 downto 0);
signal p0_mask : std_ulogic_vector(47 downto 0);
signal p0_pat, p0_patb : std_ulogic;
signal p1_pat, p1_patb : std_ulogic;

signal req_32bit, r32_1 : std_ulogic;
signal req_neg, rneg_1 : std_ulogic;
signal valid_1 : std_ulogic;

begin
addend <= (others => m_in.neg_result);

m00: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
BCIN => (others => '0'),
C => "00000000000000" & addend(33 downto 0),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m00_p,
PCIN => (others => '0'),
PCOUT => m00_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m01: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "1010101",
P => m01_p,
PCIN => m00_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m02: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => x"0000000" & "000" & addend(50 downto 34),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m02_p,
PCIN => (others => '0'),
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m03: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"000000" & '0' & addend(73 downto 51),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m03_p,
PCIN => (others => '0'),
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m10: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
BCIN => (others => '0'),
C => x"000" & "00" & m01_p(39 downto 6),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m10_p,
PCIN => (others => '0'),
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m11: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
BCIN => (others => '0'),
C => x"000" & "00" & m02_p(39 downto 6),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m11_p,
PCIN => (others => '0'),
PCOUT => m11_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m12: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => x"0000" & '0' & m03_p(36 downto 6),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m12_p,
PCIN => (others => '0'),
PCOUT => m12_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m13: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"0000000" & "000" & addend(90 downto 74),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m13_p,
PCIN => (others => '0'),
PCOUT => m13_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m20: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
P => m20_p,
PCIN => m11_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m21: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
P => m21_p,
PCIN => m12_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m22: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
P => m22_p,
PCIN => m13_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m23: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
INMODEREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"00" & "000" & addend(127 downto 91),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m23_p,
PCIN => (others => '0'),
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

s0: DSP48E1
generic map (
ACASCREG => 1,
ALUMODEREG => 0,
AREG => 1,
BCASCREG => 1,
BREG => 1,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0,
USE_MULT => "none"
)
port map (
A => m22_p(5 downto 0) & x"0000" & m10_p(34 downto 27),
ACIN => (others => '0'),
ALUMODE => "0000",
B => m10_p(26 downto 9),
BCIN => (others => '0'),
C => m20_p(39 downto 0) & m02_p(5 downto 0) & "00",
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CARRYOUT => s0_carry,
CEA1 => '0',
CEA2 => '1',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0001111",
PCIN => (others => '0'),
PCOUT => s0_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

s1: DSP48E1
generic map (
ACASCREG => 1,
ALUMODEREG => 0,
AREG => 1,
BCASCREG => 1,
BREG => 1,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0,
USE_MULT => "none"
)
port map (
A => x"000" & m22_p(41 downto 24),
ACIN => (others => '0'),
ALUMODE => "0000",
B => m22_p(23 downto 6),
BCIN => (others => '0'),
C => m23_p(36 downto 0) & x"00" & "0" & m20_p(41 downto 40),
CARRYCASCIN => '0',
CARRYIN => s0_carry(3),
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '1',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0001111",
PCIN => (others => '0'),
PCOUT => s1_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

-- mask is 0 for 32-bit ops, 0x0000ffffffff for 64-bit
p0_mask(47 downto 31) <= (others => '0');
p0_mask(30 downto 0) <= (others => not r32_1);

p0: DSP48E1
generic map (
ACASCREG => 1,
ALUMODEREG => 1,
AREG => 1,
BCASCREG => 1,
BREG => 1,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0,
SEL_MASK => "C",
USE_MULT => "none",
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0',
B => (others => '0'),
BCIN => (others => '0'),
C => p0_mask,
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CARRYOUT => p0_carry,
CEA1 => '0',
CEA2 => '1',
CEAD => '0',
CEALUMODE => '1',
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010011",
P => product(79 downto 32),
PATTERNDETECT => p0_pat,
PATTERNBDETECT => p0_patb,
PCIN => s0_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

p1: DSP48E1
generic map (
ACASCREG => 1,
ALUMODEREG => 1,
AREG => 1,
BCASCREG => 1,
BREG => 1,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MASK => x"000000000000",
MREG => 0,
OPMODEREG => 0,
PREG => 0,
USE_MULT => "none",
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => x"0000000" & '0' & m21_p(41),
ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0',
B => m21_p(40 downto 23),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => p0_carry(3),
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '1',
CEAD => '0',
CEALUMODE => '1',
CEB1 => '0',
CEB2 => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010011",
P => product(127 downto 80),
PATTERNDETECT => p1_pat,
PATTERNBDETECT => p1_patb,
PCIN => s1_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg);

mult_out: process(all)
variable ov : std_ulogic;
begin
-- set overflow if the high bits are neither all zeroes nor all ones
if req_32bit = '0' then
ov := not ((p1_pat and p0_pat) or (p1_patb and p0_patb));
else
ov := not ((p1_pat and p0_pat and not product(31)) or
(p1_patb and p0_patb and product(31)));
end if;

m_out.result <= product;
m_out.overflow <= ov;
end process;

process(clk)
begin
if rising_edge(clk) then
product_lo <= m10_p(8 downto 0) & m01_p(5 downto 0) & m00_p(16 downto 0);
m_out.valid <= valid_1;
valid_1 <= m_in.valid;
req_32bit <= r32_1;
r32_1 <= m_in.is_32bit;
req_neg <= rneg_1;
rneg_1 <= m_in.neg_result;
end if;
end process;

end architecture behaviour;
Loading…
Cancel
Save