Merge pull request #382 from paulusmack/master

Decode in block RAM and other improvements
pull/397/head
Michael Neuling 2 years ago committed by GitHub
commit ff63ffdbfd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -56,13 +56,13 @@ all = core_tb icache_tb dcache_tb dmi_dtm_tb \
all: $(all)

core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl predecode.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countbits.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl fpu.vhdl pmu.vhdl
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl

soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \

@ -7,6 +7,7 @@ entity cache_ram is
generic(
ROW_BITS : integer := 16;
WIDTH : integer := 64;
BYTEWID : integer := 8;
TRACE : boolean := false;
ADD_BUF : boolean := false
);
@ -16,7 +17,7 @@ entity cache_ram is
rd_en : in std_logic;
rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
rd_data : out std_logic_vector(WIDTH - 1 downto 0);
wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0);
wr_sel : in std_logic_vector(WIDTH/BYTEWID - 1 downto 0);
wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
wr_data : in std_logic_vector(WIDTH - 1 downto 0)
);
@ -38,7 +39,7 @@ begin
variable lbit : integer range 0 to WIDTH - 1;
variable mbit : integer range 0 to WIDTH - 1;
variable widx : integer range 0 to SIZE - 1;
constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0)
constant sel0 : std_logic_vector(WIDTH/BYTEWID - 1 downto 0)
:= (others => '0');
begin
if rising_edge(clk) then
@ -49,9 +50,9 @@ begin
" dat:" & to_hstring(wr_data);
end if;
end if;
for i in 0 to WIDTH/8-1 loop
lbit := i * 8;
mbit := lbit + 7;
for i in 0 to WIDTH/BYTEWID-1 loop
lbit := i * BYTEWID;
mbit := lbit + BYTEWID - 1;
widx := to_integer(unsigned(wr_addr));
if wr_sel(i) = '1' then
ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);

@ -246,10 +246,13 @@ package common is
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
icode: insn_code;
big_endian: std_ulogic;
next_predicted: std_ulogic;
next_pred_ntaken: std_ulogic;
end record;
constant IcacheToDecode1Init : IcacheToDecode1Type :=
(nia => (others => '0'), insn => (others => '0'), icode => INSN_illegal, others => '0');

type IcacheEventType is record
icache_miss : std_ulogic;
@ -317,6 +320,9 @@ package common is
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
reg_valid1: std_ulogic;
reg_valid2: std_ulogic;
reg_valid3: std_ulogic;
cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic;
@ -363,6 +369,7 @@ package common is
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
reg_valid1 => '0', reg_valid2 => '0', reg_valid3 => '0',
cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
result_sel => "000", sub_select => "000",
repeat => '0', second => '0', spr_select => spr_id_init,
@ -378,12 +385,11 @@ package common is
data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0);
addend: std_ulogic_vector(127 downto 0);
is_32bit: std_ulogic;
not_result: std_ulogic;
is_signed: std_ulogic;
subtract: std_ulogic; -- 0 => addend + data1 * data2, 1 => addend - data1 * data2
end record;
constant MultiplyInputInit : MultiplyInputType := (valid => '0',
is_32bit => '0', not_result => '0',
others => (others => '0'));
constant MultiplyInputInit : MultiplyInputType := (data1 => 64x"0", data2 => 64x"0",
addend => 128x"0", others => '0');

type MultiplyOutputType is record
valid: std_ulogic;
@ -476,7 +482,6 @@ package common is
type Execute1ToLoadstore1Type is record
valid : std_ulogic;
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0);
@ -504,7 +509,7 @@ package common is
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
insn => (others => '0'),
instr_tag => instr_tag_init,
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'),
@ -525,8 +530,6 @@ package common is
dcbz : std_ulogic;
nc : std_ulogic;
reserve : std_ulogic;
atomic : std_ulogic; -- part of a multi-transfer atomic op
atomic_last : std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
@ -674,6 +677,9 @@ package common is
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
valid_a : std_ulogic;
valid_b : std_ulogic;
valid_c : std_ulogic;
frt : gspr_index_t;
rc : std_ulogic;
m32b : std_ulogic;
@ -687,6 +693,7 @@ package common is
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
valid_a => '0', valid_b => '0', valid_c => '0',
single => '0', is_signed => '0', out_cr => '0',
m32b => '0', oe => '0', xerc => xerc_init,
stall => '0');

@ -13,7 +13,6 @@ entity core is
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512;
ICACHE_NUM_LINES : natural := 64;
@ -246,6 +245,7 @@ begin
icache_0: entity work.icache
generic map(
SIM => SIM,
HAS_FPU => HAS_FPU,
LINE_SIZE => 64,
NUM_LINES => ICACHE_NUM_LINES,
NUM_WAYS => ICACHE_NUM_WAYS,
@ -266,7 +266,7 @@ begin
wishbone_in => wishbone_insn_in,
wb_snoop_in => wb_snoop_in,
events => icache_events,
log_out => log_data(96 downto 43)
log_out => log_data(100 downto 43)
);

icache_stall_in <= decode1_busy;
@ -287,7 +287,7 @@ begin
d_out => decode1_to_decode2,
f_out => decode1_to_fetch1,
r_out => decode1_to_register_file,
log_out => log_data(109 downto 97)
log_out => log_data(113 downto 101)
);

decode1_stall_in <= decode2_stall_out;
@ -319,7 +319,7 @@ begin
writeback_bypass => writeback_bypass,
dbg_spr_req => dbg_spr_req,
dbg_spr_addr => dbg_spr_addr,
log_out => log_data(119 downto 110)
log_out => log_data(123 downto 114)
);
decode2_busy_in <= ex1_busy_out;

@ -365,7 +365,6 @@ begin
SIM => SIM,
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
HAS_SHORT_MULT => HAS_SHORT_MULT,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -398,7 +397,7 @@ begin
dbg_spr_data => dbg_spr_data,
sim_dump => sim_ex_dump,
sim_dump_done => sim_cr_dump,
log_out => log_data(134 downto 120),
log_out => log_data(135 downto 124),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
@ -500,7 +499,7 @@ begin
);

log_data(150) <= '0';
log_data(139 downto 135) <= "00000";
log_data(139 downto 136) <= "0000";

debug_0: entity work.core_debug
generic map (

@ -175,7 +175,8 @@ begin
gspr_index <= (others => '0');
else
if do_log_trigger = '1' or log_trigger_delay /= 0 then
if log_trigger_delay = 255 then
if log_trigger_delay = 255 or
(LOG_LENGTH < 1024 and log_trigger_delay = LOG_LENGTH / 4) then
log_dmi_trigger(1) <= '1';
log_trigger_delay <= 0;
else

@ -1004,10 +1004,10 @@ begin
-- XXX or if r0.req.nc = '1'
if r0.req.load = '1' then
-- load with reservation
set_rsrv <= r0.req.atomic_last;
set_rsrv <= '1';
else
-- store conditional
clear_rsrv <= r0.req.atomic_last;
clear_rsrv <= '1';
if reservation.valid = '0' or
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
cancel_store <= '1';

File diff suppressed because it is too large Load Diff

@ -58,10 +58,6 @@ architecture behaviour of decode2 is
busy : std_ulogic;
sgl_pipe : std_ulogic;
prev_sgl : std_ulogic;
reg_a_valid : std_ulogic;
reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic;
input_ov : std_ulogic;
output_ov : std_ulogic;
read_rspr : std_ulogic;
@ -192,7 +188,7 @@ architecture behaviour of decode2 is
function decode_rc (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
when RC | RCOE =>
return insn_rc(insn_in);
when ONE =>
return '1';
@ -393,6 +389,7 @@ begin
variable v : reg_type;
variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t;
variable unit : unit_t;
variable valid_in : std_ulogic;
variable decctr : std_ulogic;
variable sprs_busy : std_ulogic;
@ -405,6 +402,7 @@ begin
v.e := Decode2ToExecute1Init;

sprs_busy := '0';
unit := d_in.decode.unit;

if d_in.valid = '1' then
v.prev_sgl := dc2.sgl_pipe;
@ -429,32 +427,40 @@ begin
end if;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
if d_in.decode.rc = RCOE and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
v.output_ov := '1';
v.input_ov := '1'; -- need SO state if setting OV to 0
end if;
when OP_MFSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
case decode_spr_num(d_in.insn) is
when SPR_XER =>
v.input_ov := '1';
end if;
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
unit := LDST;
when others =>
end case;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
case decode_spr_num(d_in.insn) is
when SPR_XER =>
v.e.output_xer := '1';
v.output_ov := '1';
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
unit := LDST;
if d_in.valid = '1' then
v.sgl_pipe := '1';
end if;
when others =>
end case;
if d_in.spr_info.valid = '1' and d_in.valid = '1' then
v.sgl_pipe := '1';
end if;
when OP_CMP | OP_MCRXRX =>
v.input_ov := '1';
when others =>
end case;

v.reg_a_valid := decoded_reg_a.reg_valid;
v.reg_b_valid := decoded_reg_b.reg_valid;
v.reg_c_valid := decoded_reg_c.reg_valid;
v.reg_o_valid := decoded_reg_o.reg_valid;

if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
-- b and bc have even major opcodes; bcreg is considered absolute
@ -537,11 +543,14 @@ begin

-- execute unit
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.unit := unit;
v.e.fac := d_in.decode.facility;
v.e.read_reg1 := d_in.reg_a;
v.e.read_reg2 := d_in.reg_b;
v.e.read_reg3 := d_in.reg_c;
v.e.reg_valid1 := decoded_reg_a.reg_valid;
v.e.reg_valid2 := decoded_reg_b.reg_valid;
v.e.reg_valid3 := decoded_reg_c.reg_valid;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.invert_a := d_in.decode.invert_a;
@ -583,16 +592,16 @@ begin
control_valid_in <= valid_in;
control_serialize <= v.sgl_pipe or v.prev_sgl;

gpr_write_valid <= v.reg_o_valid;
gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= v.e.write_reg;

gpr_a_read_valid <= v.reg_a_valid;
gpr_a_read_valid <= v.e.reg_valid1;
gpr_a_read <= v.e.read_reg1;

gpr_b_read_valid <= v.reg_b_valid;
gpr_b_read_valid <= v.e.reg_valid2;
gpr_b_read <= v.e.read_reg2;

gpr_c_read_valid <= v.reg_c_valid;
gpr_c_read_valid <= v.e.reg_valid3;
gpr_c_read <= v.e.read_reg3;

cr_write_valid <= v.e.output_cr or v.e.rc;

@ -4,14 +4,16 @@ use ieee.std_logic_1164.all;
package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_BCD, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_DCBZ, OP_ICBI, OP_ICBT,
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
OP_DIV, OP_DIVE, OP_MOD,
OP_EXTS, OP_EXTSWSLI,
OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNT, OP_PRTY, OP_RFID,
@ -19,15 +21,349 @@ package decode_types is
OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP,
OP_XOR,
OP_BCD, OP_ADDG6S,
OP_ADDG6S,
OP_FETCH_FAILED
);

-- The following list is ordered in such a way that we can know some
-- things about which registers are accessed by an instruction by its place
-- in the list. In other words we can decide whether an instruction
-- accesses FPRs and whether it has an RB operand by doing simple
-- comparisons of the insn_code for the instruction with a few constants.
type insn_code is (
-- The following instructions don't have an RB operand or access FPRs
INSN_illegal, -- 0
INSN_fetch_fail,
INSN_addi,
INSN_addic,
INSN_addic_dot,
INSN_addis,
INSN_addme,
INSN_addpcis,
INSN_addze,
INSN_andi_dot,
INSN_andis_dot, -- 10
INSN_attn,
INSN_b,
INSN_bc,
INSN_bcctr,
INSN_bclr,
INSN_bctar,
INSN_cbcdtd,
INSN_cdtbcd,
INSN_cmpi,
INSN_cmpli, -- 20
INSN_cntlzw,
INSN_cntlzd,
INSN_cnttzw,
INSN_cnttzd,
INSN_crand,
INSN_crandc,
INSN_creqv,
INSN_crnand,
INSN_crnor,
INSN_cror, -- 30
INSN_crorc,
INSN_crxor,
INSN_darn,
INSN_eieio,
INSN_extsb,
INSN_extsh,
INSN_extsw,
INSN_extswsli,
INSN_isync,
INSN_lbz, -- 40
INSN_lbzu,
INSN_ld,
INSN_ldu,
INSN_lha,
INSN_lhau,
INSN_lhz,
INSN_lhzu,
INSN_lwa,
INSN_lwz,
INSN_lwzu, -- 50
INSN_mcrf,
INSN_mcrfs,
INSN_mcrxrx,
INSN_mfcr,
INSN_mfmsr,
INSN_mfspr,
INSN_mtcrf,
INSN_mtfsb,
INSN_mtfsfi,
INSN_mtmsr, -- 60
INSN_mtmsrd,
INSN_mtspr,
INSN_mulli,
INSN_neg,
INSN_nop,
INSN_ori,
INSN_oris,
INSN_popcntb,
INSN_popcntw,
INSN_popcntd, -- 70
INSN_prtyw,
INSN_prtyd,
INSN_rfid,
INSN_rldic,
INSN_rldicl,
INSN_rldicr,
INSN_rldimi,
INSN_rlwimi,
INSN_rlwinm,
INSN_sc, -- 80
INSN_setb,
INSN_slbia,
INSN_sradi,
INSN_srawi,
INSN_stb,
INSN_stbu,
INSN_std,
INSN_stdu,
INSN_sth,
INSN_sthu, -- 90
INSN_stw,
INSN_stwu,
INSN_subfic,
INSN_subfme,
INSN_subfze,
INSN_sync,
INSN_tdi,
INSN_tlbsync,
INSN_twi,
INSN_wait, -- 100
INSN_xori,
INSN_xoris,

-- pad to 112 to simplify comparison logic
INSN_103,
INSN_104, INSN_105, INSN_106, INSN_107,
INSN_108, INSN_109, INSN_110, INSN_111,

-- The following instructions have an RB operand but don't access FPRs
INSN_add,
INSN_addc,
INSN_adde,
INSN_addex,
INSN_addg6s,
INSN_and,
INSN_andc,
INSN_bperm,
INSN_cmp, -- 120
INSN_cmpb,
INSN_cmpeqb,
INSN_cmpl,
INSN_cmprb,
INSN_dcbf,
INSN_dcbst,
INSN_dcbt,
INSN_dcbtst,
INSN_dcbz,
INSN_divd, -- 130
INSN_divdu,
INSN_divde,
INSN_divdeu,
INSN_divw,
INSN_divwu,
INSN_divwe,
INSN_divweu,
INSN_eqv,
INSN_icbi,
INSN_icbt, -- 140
INSN_isel,
INSN_lbarx,
INSN_lbzcix,
INSN_lbzux,
INSN_lbzx,
INSN_ldarx,
INSN_ldbrx,
INSN_ldcix,
INSN_ldx,
INSN_ldux, -- 150
INSN_lharx,
INSN_lhax,
INSN_lhaux,
INSN_lhbrx,
INSN_lhzcix,
INSN_lhzx,
INSN_lhzux,
INSN_lwarx,
INSN_lwax,
INSN_lwaux, -- 160
INSN_lwbrx,
INSN_lwzcix,
INSN_lwzx,
INSN_lwzux,
INSN_modsd,
INSN_modsw,
INSN_moduw,
INSN_modud,
INSN_mulhw,
INSN_mulhwu, -- 170
INSN_mulhd,
INSN_mulhdu,
INSN_mullw,
INSN_mulld,
INSN_nand,
INSN_nor,
INSN_or,
INSN_orc,
INSN_rldcl,
INSN_rldcr, -- 180
INSN_rlwnm,
INSN_slw,
INSN_sld,
INSN_sraw,
INSN_srad,
INSN_srw,
INSN_srd,
INSN_stbcix,
INSN_stbcx,
INSN_stbx, -- 190
INSN_stbux,
INSN_stdbrx,
INSN_stdcix,
INSN_stdcx,
INSN_stdx,
INSN_stdux,
INSN_sthbrx,
INSN_sthcix,
INSN_sthcx,
INSN_sthx, -- 200
INSN_sthux,
INSN_stwbrx,
INSN_stwcix,
INSN_stwcx,
INSN_stwx,
INSN_stwux,
INSN_subf,
INSN_subfc,
INSN_subfe,
INSN_td, -- 210
INSN_tlbie,
INSN_tlbiel,
INSN_tw,
INSN_xor,

-- pad to 224 to simplify comparison logic
INSN_215,
INSN_216, INSN_217, INSN_218, INSN_219,
INSN_220, INSN_221, INSN_222, INSN_223,

-- The following instructions have a third input addressed by RC
INSN_maddld,
INSN_maddhd,
INSN_maddhdu,

-- pad to 256 to simplify comparison logic
INSN_227,
INSN_228, INSN_229, INSN_230, INSN_231,
INSN_232, INSN_233, INSN_234, INSN_235,
INSN_236, INSN_237, INSN_238, INSN_239,
INSN_240, INSN_241, INSN_242, INSN_243,
INSN_244, INSN_245, INSN_246, INSN_247,
INSN_248, INSN_249, INSN_250, INSN_251,
INSN_252, INSN_253, INSN_254, INSN_255,

-- The following instructions access floating-point registers
-- These ones have an FRS operand, but RA/RB are GPRs
INSN_stfd,
INSN_stfdu,
INSN_stfs,
INSN_stfsu,
INSN_stfdux, -- 260
INSN_stfdx,
INSN_stfiwx,
INSN_stfsux,
INSN_stfsx,
-- These ones don't actually have an FRS operand (rather an FRT destination)
-- but are here so that all FP instructions are >= INST_first_frs.
INSN_lfd,
INSN_lfdu,
INSN_lfs,
INSN_lfsu,
INSN_lfdx,
INSN_lfdux, -- 270
INSN_lfiwax,
INSN_lfiwzx,
INSN_lfsx,
INSN_lfsux,
INSN_275, -- padding

-- The following instructions access FRA and/or FRB operands
INSN_fabs,
INSN_fadd,
INSN_fadds,
INSN_fcfid,
INSN_fcfids, -- 280
INSN_fcfidu,
INSN_fcfidus,
INSN_fcmpo,
INSN_fcmpu,
INSN_fcpsgn,
INSN_fctid,
INSN_fctidz,
INSN_fctidu,
INSN_fctiduz,
INSN_fctiw, -- 290
INSN_fctiwz,
INSN_fctiwu,
INSN_fctiwuz,
INSN_fdiv,
INSN_fdivs,
INSN_fmr,
INSN_fmrgew,
INSN_fmrgow,
INSN_fnabs,
INSN_fneg, -- 300
INSN_fre,
INSN_fres,
INSN_frim,
INSN_frin,
INSN_frip,
INSN_friz,
INSN_frsp,
INSN_frsqrte,
INSN_frsqrtes,
INSN_fsqrt, -- 310
INSN_fsqrts,
INSN_fsub,
INSN_fsubs,
INSN_ftdiv,
INSN_ftsqrt,
INSN_mffs,
INSN_mtfsf,

-- pad to 320
INSN_318, INSN_319,

-- The following instructions access FRA, FRB (possibly) and FRC operands
INSN_fmul, -- 320
INSN_fmuls,
INSN_fmadd,
INSN_fmadds,
INSN_fmsub,
INSN_fmsubs,
INSN_fnmadd,
INSN_fnmadds,
INSN_fnmsub,
INSN_fnmsubs,
INSN_fsel -- 330
);

constant INSN_first_rb : insn_code := INSN_add;
constant INSN_first_rc : insn_code := INSN_maddld;
constant INSN_first_frs : insn_code := INSN_stfd;
constant INSN_first_frab : insn_code := INSN_fabs;
constant INSN_first_frabc : insn_code := INSN_fmul;

type input_reg_a_t is (NONE, RA, RA_OR_ZERO, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, FRT);
type rc_t is (NONE, ONE, RC);
type rc_t is (NONE, ONE, RC, RCOE);
type carry_in_t is (ZERO, CA, OV, ONE);

constant SH_OFFSET : integer := 0;

@ -15,7 +15,6 @@ entity execute1 is
SIM : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
HAS_SHORT_MULT : boolean := false;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -65,7 +64,7 @@ entity execute1 is
sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(14 downto 0);
log_out : out std_ulogic_vector(11 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0);
log_rd_data : in std_ulogic_vector(63 downto 0);
log_wr_addr : in std_ulogic_vector(31 downto 0)
@ -85,6 +84,7 @@ architecture behaviour of execute1 is
write_pmuspr : std_ulogic;
ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic;
mult_32s : std_ulogic;
end record;
constant side_effect_init : side_effect_type := (others => '0');

@ -203,6 +203,8 @@ architecture behaviour of execute1 is
-- multiply signals
signal x_to_multiply: MultiplyInputType;
signal multiply_to_x: MultiplyOutputType;
signal x_to_mult_32s: MultiplyInputType;
signal mult_32s_to_x: MultiplyOutputType;

-- divider signals
signal x_to_divider: Execute1ToDividerType;
@ -411,6 +413,14 @@ begin
m_out => multiply_to_x
);

mult_32s_0: entity work.multiply_32s
port map (
clk => clk,
stall => stage2_stall,
m_in => x_to_mult_32s,
m_out => mult_32s_to_x
);

divider_0: if not HAS_FPU generate
div_0: entity work.divider
port map (
@ -437,17 +447,6 @@ begin
p_out => pmu_to_x
);

short_mult_0: if HAS_SHORT_MULT generate
begin
short_mult: entity work.short_multiply
port map (
clk => clk,
a_in => a_in(15 downto 0),
b_in => b_in(15 downto 0),
m_out => mshort_p
);
end generate;

dbg_ctrl_out <= ctrl;
log_rd_addr <= ex2.log_addr_spr;

@ -684,7 +683,23 @@ begin
overflow_32 <= calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31));
overflow_64 <= calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63));

-- signals to multiply and divide units
-- signals to multiplier
addend := (others => '0');
if e_in.reg_valid3 = '1' then
-- integer multiply-add, major op 4 (if it is a multiply)
addend(63 downto 0) := c_in;
if e_in.is_signed = '1' then
addend(127 downto 64) := (others => c_in(63));
end if;
end if;
x_to_multiply.data1 <= std_ulogic_vector(a_in);
x_to_multiply.data2 <= std_ulogic_vector(b_in);
x_to_multiply.is_signed <= e_in.is_signed;
x_to_multiply.subtract <= '0';
x_to_multiply.addend <= addend;

-- Interface to divide unit
if not HAS_FPU then
sign1 := '0';
sign2 := '0';
if e_in.is_signed = '1' then
@ -708,7 +723,6 @@ begin
abs2 := - signed(b_in);
end if;

-- Interface to multiply and divide units
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
x_to_divider.is_extended <= '0';
@ -717,27 +731,9 @@ begin
x_to_divider.is_modulus <= '1';
end if;
x_to_divider.flush <= flush_in;

addend := (others => '0');
if e_in.insn(26) = '0' then
-- integer multiply-add, major op 4 (if it is a multiply)
addend(63 downto 0) := c_in;
if e_in.is_signed = '1' then
addend(127 downto 64) := (others => c_in(63));
end if;
end if;
if (sign1 xor sign2) = '1' then
addend := not addend;
end if;

x_to_multiply.is_32bit <= e_in.is_32bit;
x_to_multiply.not_result <= sign1 xor sign2;
x_to_multiply.addend <= addend;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then
-- 64-bit forms
x_to_multiply.data1 <= std_ulogic_vector(abs1);
x_to_multiply.data2 <= std_ulogic_vector(abs2);
if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1';
end if;
@ -745,8 +741,6 @@ begin
x_to_divider.divisor <= std_ulogic_vector(abs2);
else
-- 32-bit forms
x_to_multiply.data1 <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
x_to_multiply.data2 <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
@ -755,6 +749,15 @@ begin
end if;
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
end if;
end if;

-- signals to 32-bit multiplier
x_to_mult_32s.data1 <= 32x"0" & a_in(31 downto 0);
x_to_mult_32s.data2 <= 32x"0" & b_in(31 downto 0);
x_to_mult_32s.is_signed <= e_in.is_signed;
-- The following are unused, but set here to avoid X states
x_to_mult_32s.subtract <= '0';
x_to_mult_32s.addend <= (others => '0');

shortmul_result <= std_ulogic_vector(resize(signed(mshort_p), 64));
case ex1.mul_select is
@ -1271,13 +1274,10 @@ begin
v.se.icache_inval := '1';

when OP_MUL_L64 =>
if HAS_SHORT_MULT and e_in.insn(26) = '1' and
fits_in_n_bits(a_in, 16) and fits_in_n_bits(b_in, 16) then
-- Operands fit into 16 bits, so use short multiplier
if e_in.oe = '1' then
-- Note 16x16 multiply can't overflow, even for mullwo
set_ov(v.e, '0', '0');
end if;
if e_in.is_32bit = '1' then
v.se.mult_32s := '1';
v.res2_sel := "00";
slow_op := '1';
else
-- Use standard multiplier
v.start_mul := '1';
@ -1285,11 +1285,16 @@ begin
owait := '1';
end if;

when OP_MUL_H64 | OP_MUL_H32 =>
when OP_MUL_H64 =>
v.start_mul := '1';
slow_op := '1';
owait := '1';

when OP_MUL_H32 =>
v.se.mult_32s := '1';
v.res2_sel := "01";
slow_op := '1';

when OP_DIV | OP_DIVE | OP_MOD =>
if not HAS_FPU then
v.start_div := '1';
@ -1370,6 +1375,7 @@ begin
fv := Execute1ToFPUInit;

x_to_multiply.valid <= '0';
x_to_mult_32s.valid <= '0';
x_to_divider.valid <= '0';
v.ext_interrupt := '0';
v.taken_branch_event := '0';
@ -1456,6 +1462,7 @@ begin
v.res2_sel := actions.res2_sel;
v.msr := actions.new_msr;
x_to_multiply.valid <= actions.start_mul;
x_to_mult_32s.valid <= actions.se.mult_32s;
v.mul_in_progress := actions.start_mul;
x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div;
@ -1481,7 +1488,7 @@ begin
end if;
end if;

if ex1.div_in_progress = '1' then
if not HAS_FPU and ex1.div_in_progress = '1' then
v.div_in_progress := not divider_to_x.valid;
v.busy := not divider_to_x.valid;
if divider_to_x.valid = '1' and ex1.oe = '1' then
@ -1554,7 +1561,6 @@ begin

-- Outputs to loadstore1 (async)
lv.op := e_in.insn_type;
lv.nia := e_in.nia;
lv.instr_tag := e_in.instr_tag;
lv.addr1 := a_in;
lv.addr2 := b_in;
@ -1568,11 +1574,9 @@ begin
lv.reserve := e_in.reserve;
lv.rc := e_in.rc;
lv.insn := e_in.insn;
-- decode l*cix and st*cix instructions here
if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
e_in.insn(5 downto 1) = "10101" then
lv.ci := '1';
end if;
-- invert_a field is overloaded for load/store instructions
-- to mark l*cix and st*cix
lv.ci := e_in.invert_a;
lv.virt_mode := ex1.msr(MSR_DR);
lv.priv_mode := not ex1.msr(MSR_PR);
lv.mode_32bit := not ex1.msr(MSR_SF);
@ -1591,6 +1595,9 @@ begin
fv.fra := a_in;
fv.frb := b_in;
fv.frc := c_in;
fv.valid_a := e_in.reg_valid1;
fv.valid_b := e_in.reg_valid2;
fv.valid_c := e_in.reg_valid3;
fv.frt := e_in.write_reg;
fv.rc := e_in.rc;
fv.out_cr := e_in.output_cr;
@ -1624,11 +1631,6 @@ begin
-- Second execute stage control
execute2_1: process(all)
variable v : reg_stage2_type;
variable overflow : std_ulogic;
variable lv : Execute1ToLoadstore1Type;
variable fv : Execute1ToFPUType;
variable k : integer;
variable go : std_ulogic;
variable bypass_valid : std_ulogic;
variable rcresult : std_ulogic_vector(63 downto 0);
variable sprres : std_ulogic_vector(63 downto 0);
@ -1647,6 +1649,14 @@ begin
v.br_mispredict := ex1.br_mispredict;
end if;

if ex1.se.mult_32s = '1' and ex1.oe = '1' then
v.e.xerc.ov := mult_32s_to_x.overflow;
v.e.xerc.ov32 := mult_32s_to_x.overflow;
if mult_32s_to_x.overflow = '1' then
v.e.xerc.so := '1';
end if;
end if;

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
@ -1667,24 +1677,34 @@ begin
v.e.write_xerc_enable := '0';
v.e.redirect := '0';
v.e.br_last := '0';
v.se := side_effect_init;
v.taken_branch_event := '0';
v.br_mispredict := '0';
end if;
if flush_in = '1' then
v.e.valid := '0';
v.e.interrupt := '0';
v.se := side_effect_init;
v.ext_interrupt := '0';
end if;

-- This is split like this because mfspr doesn't have an Rc bit,
-- and we don't want the zero-detect logic to be after the
-- SPR mux for timing reasons.
if ex1.se.mult_32s = '1' then
if ex1.res2_sel(0) = '0' then
rcresult := mult_32s_to_x.result(63 downto 0);
else
rcresult := mult_32s_to_x.result(63 downto 32) &
mult_32s_to_x.result(63 downto 32);
end if;
elsif ex1.res2_sel(0) = '0' then
rcresult := ex1.e.write_data;
sprres := spr_result;
else
rcresult := countbits_result;
end if;
if ex1.res2_sel(0) = '0' then
sprres := spr_result;
else
sprres := pmu_to_x.spr_val;
end if;
if ex1.res2_sel(1) = '0' then
@ -1708,7 +1728,7 @@ begin
cr_res(31) := sign;
cr_res(30) := not (sign or zero);
cr_res(29) := zero;
cr_res(28) := ex1.e.xerc.so;
cr_res(28) := v.e.xerc.so;
cr_mask(7) := '1';
end if;

@ -1802,7 +1822,7 @@ begin
end generate;

e1_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(14 downto 0);
signal log_data : std_ulogic_vector(11 downto 0);
begin
ex1_log : process(clk)
begin
@ -1812,7 +1832,6 @@ begin
exception_log &
irq_valid_log &
interrupt_in.intr &
"000" &
ex2.e.write_enable &
ex2.e.valid &
(ex2.e.redirect or ex2.e.interrupt) &

@ -16,7 +16,6 @@ entity toplevel is
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -199,7 +198,6 @@ begin
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -13,7 +13,6 @@ entity toplevel is
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := false;
HAS_SHORT_MULT: boolean := false;
ICACHE_NUM_LINES : natural := 64;
LOG_LENGTH : natural := 512;
DISABLE_FLATTEN_CORE : boolean := false;
@ -75,7 +74,6 @@ begin
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
ICACHE_NUM_LINES => ICACHE_NUM_LINES,
LOG_LENGTH => LOG_LENGTH,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,

@ -16,7 +16,6 @@ entity toplevel is
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT: boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -175,7 +174,6 @@ begin
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT=> HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -188,7 +188,6 @@ begin
HAS_UART1 => HAS_UART1,
HAS_SD_CARD => USE_LITESDCARD,
ICACHE_NUM_LINES => ICACHE_NUM_LINES,
HAS_SHORT_MULT => true,
NGPIO => NGPIO
)
port map (

@ -16,7 +16,6 @@ entity toplevel is
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -175,7 +174,6 @@ begin
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

File diff suppressed because it is too large Load Diff

@ -23,6 +23,7 @@ use ieee.numeric_std.all;
library work;
use work.utils.all;
use work.common.all;
use work.decode_types.all;
use work.wishbone_types.all;

-- 64 bit direct mapped icache. All instructions are 4B aligned.
@ -30,6 +31,7 @@ use work.wishbone_types.all;
entity icache is
generic (
SIM : boolean := false;
HAS_FPU : boolean := true;
-- Line size in bytes
LINE_SIZE : positive := 64;
-- BRAM organisation: We never access more than wishbone_data_bits at
@ -69,7 +71,7 @@ entity icache is
wb_snoop_in : in wishbone_master_out := wishbone_master_out_init;

events : out IcacheEventType;
log_out : out std_ulogic_vector(53 downto 0)
log_out : out std_ulogic_vector(57 downto 0)
);
end entity icache;

@ -122,8 +124,20 @@ architecture rtl of icache is
subtype way_t is integer range 0 to NUM_WAYS-1;
subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);

-- We store a pre-decoded 10-bit insn_code along with the bottom 26 bits of
-- each instruction, giving a total of 36 bits per instruction, which
-- fits neatly into the block RAMs available on FPGAs.
-- For illegal instructions, the top 4 bits are ones and the bottom 6 bits
-- are the instruction's primary opcode, so we have the whole instruction
-- word available (e.g. to put in HEIR). For other instructions, the
-- primary opcode is not stored but could be determined from the insn_code.
constant PREDECODE_BITS : natural := 10;
constant INSN_IMAGE_BITS : natural := 26;
constant ICWORDLEN : natural := PREDECODE_BITS + INSN_IMAGE_BITS;
constant ROW_WIDTH : natural := INSN_PER_ROW * ICWORDLEN;

-- The cache data BRAM organized as described above for each way
subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
subtype cache_row_t is std_ulogic_vector(ROW_WIDTH-1 downto 0);

-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
-- not handle a clean (commented) definition of the cache tags as a 3d
@ -184,6 +198,8 @@ architecture rtl of icache is
wb : wishbone_master_out;
store_way : way_t;
store_index : index_t;
recv_row : row_t;
recv_valid : std_ulogic;
store_row : row_t;
store_tag : cache_tag_t;
store_valid : std_ulogic;
@ -215,6 +231,8 @@ architecture rtl of icache is
-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;
signal cache_wr_data : std_ulogic_vector(ROW_WIDTH - 1 downto 0);
signal wb_rd_data : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0);

-- PLRU output interface
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
@ -226,6 +244,8 @@ architecture rtl of icache is
signal snoop_index : index_t;
signal snoop_hits : cache_way_valids_t;

signal log_insn : std_ulogic_vector(35 downto 0);

-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector) return index_t is
begin
@ -293,7 +313,7 @@ architecture rtl of icache is
variable word: integer range 0 to INSN_PER_ROW-1;
begin
word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
return data(31+word*32 downto word*32);
return data(word * ICWORDLEN + ICWORDLEN - 1 downto word * ICWORDLEN);
end;

-- Get the tag value from the address
@ -327,6 +347,34 @@ architecture rtl of icache is

begin

-- byte-swap read data if big endian
process(all)
variable j: integer;
begin
if r.store_tag(TAG_BITS - 1) = '0' then
wb_rd_data <= wishbone_in.dat;
else
for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
j := ((ii / 4) * 4) + (3 - (ii mod 4));
wb_rd_data(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
end loop;
end if;
end process;

predecoder_0: entity work.predecoder
generic map (
HAS_FPU => HAS_FPU,
WIDTH => INSN_PER_ROW,
ICODE_LEN => PREDECODE_BITS,
IMAGE_LEN => INSN_IMAGE_BITS
)
port map (
clk => clk,
valid_in => wishbone_in.ack,
insns_in => wb_rd_data,
icodes_out => cache_wr_data
);

assert LINE_SIZE mod ROW_SIZE = 0;
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
@ -367,13 +415,13 @@ begin
signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal dout : cache_row_t;
signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
signal wr_dat : std_ulogic_vector(wishbone_in.dat'left downto 0);
signal wr_sel : std_ulogic_vector(0 downto 0);
begin
way: entity work.cache_ram
generic map (
ROW_BITS => ROW_BITS,
WIDTH => ROW_SIZE_BITS
WIDTH => ROW_WIDTH,
BYTEWID => ROW_WIDTH
)
port map (
clk => clk,
@ -382,31 +430,19 @@ begin
rd_data => dout,
wr_sel => wr_sel,
wr_addr => wr_addr,
wr_data => wr_dat
wr_data => cache_wr_data
);
process(all)
variable j: integer;
begin
-- byte-swap read data if big endian
if r.store_tag(TAG_BITS - 1) = '0' then
wr_dat <= wishbone_in.dat;
else
for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
j := ((ii / 4) * 4) + (3 - (ii mod 4));
wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
end loop;
end if;
do_read <= not stall_in;
do_write <= '0';
if wishbone_in.ack = '1' and replace_way = i then
if r.recv_valid = '1' and r.store_way = i then
do_write <= '1';
end if;
cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
for ii in 0 to ROW_SIZE-1 loop
wr_sel(ii) <= do_write;
end loop;
wr_sel(0) <= do_write;
end process;
end generate;
@ -515,6 +551,8 @@ begin
icache_comb : process(all)
variable is_hit : std_ulogic;
variable hit_way : way_t;
variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0);
variable icode : insn_code;
begin
-- Extract line, row and tag from request
if not is_X(i_in.nia) then
@ -575,11 +613,19 @@ begin
-- I prefer not to do just yet as it would force fetch2 to know about
-- some of the cache geometry information.
--
insn := (others => '0');
icode := INSN_illegal;
if r.hit_valid = '1' then
i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
else
i_out.insn <= (others => '0');
insn := read_insn_word(r.hit_nia, cache_out(r.hit_way));
-- Currently we use only the top bit for indicating illegal
-- instructions because we know that insn_codes fit into 9 bits.
if insn(ICWORDLEN - 1) = '0' then
icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS))));
end if;
end if;
i_out.insn <= insn(31 downto 0);
i_out.icode <= icode;
log_insn <= cache_wr_data(35 downto 0);
i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark;
@ -640,9 +686,11 @@ begin
variable snoop_addr : real_addr_t;
variable snoop_tag : cache_tag_t;
variable snoop_cache_tags : cache_tags_set_t;
variable replace_way : way_t;
begin
if rising_edge(clk) then
ev.icache_miss <= '0';
r.recv_valid <= '0';
-- On reset, clear all valid bits to force misses
if rst = '1' then
for i in index_t loop
@ -714,13 +762,13 @@ begin
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr);
ev.icache_miss <= '1';

-- Keep track of our index and way for subsequent stores
r.store_index <= req_index;
r.recv_row <= get_row(req_raddr);
r.store_row <= get_row(req_raddr);
r.store_tag <= req_tag;
r.store_valid <= '1';
@ -740,6 +788,7 @@ begin
when CLR_TAG | WAIT_ACK =>
if r.state = CLR_TAG then
-- Get victim way from plru
replace_way := to_integer(unsigned(plru_victim(r.store_index)));
r.store_way <= replace_way;

-- Force misses on that way while reloading that line
@ -757,6 +806,19 @@ begin
r.state <= WAIT_ACK;
end if;

-- If we are writing in this cycle, mark row valid and see if we are done
if r.recv_valid = '1' then
r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in;
if is_last_row(r.store_row, r.end_row_ix) then
-- Cache line is now valid
cache_valids(r.store_index)(r.store_way) <= r.store_valid and not inval_in;
-- We are done
r.state <= IDLE;
end if;
-- Increment store row counter
r.store_row <= r.recv_row;
end if;

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and r.wb.stb = '1' then
-- That was the last word ? We are done sending. Clear stb.
@ -777,33 +839,27 @@ begin

-- Incoming acks processing
if wishbone_in.ack = '1' then
r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in;
-- Check for completion
if is_last_row(r.store_row, r.end_row_ix) then
if is_last_row(r.recv_row, r.end_row_ix) then
-- Complete wishbone cycle
r.wb.cyc <= '0';

-- Cache line is now valid
cache_valids(r.store_index)(replace_way) <= r.store_valid and not inval_in;

-- We are done
r.state <= IDLE;
end if;
r.recv_valid <= '1';

-- Increment store row counter
r.store_row <= next_row(r.store_row);
-- Increment receive row counter
r.recv_row <= next_row(r.recv_row);
end if;

when STOP_RELOAD =>
-- Wait for all outstanding requests to be satisfied, then
-- go to IDLE state.
if get_row_of_line(r.store_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
if get_row_of_line(r.recv_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
r.wb.cyc <= '0';
r.state <= IDLE;
end if;
if wishbone_in.ack = '1' then
-- Increment store row counter
r.store_row <= next_row(r.store_row);
r.recv_row <= next_row(r.recv_row);
end if;
end case;
end if;
@ -819,7 +875,7 @@ begin

icache_log: if LOG_LENGTH > 0 generate
-- Output data to logger
signal log_data : std_ulogic_vector(53 downto 0);
signal log_data : std_ulogic_vector(57 downto 0);
begin
data_log: process(clk)
variable lway: way_t;
@ -832,7 +888,7 @@ begin
wstate := '1';
end if;
log_data <= i_out.valid &
i_out.insn &
log_insn &
wishbone_in.ack &
r.wb.adr(2 downto 0) &
r.wb.stb & r.wb.cyc &

@ -83,8 +83,6 @@ architecture behave of loadstore1 is
update : std_ulogic;
xerc : xer_common_t;
reserve : std_ulogic;
atomic : std_ulogic;
atomic_last : std_ulogic;
rc : std_ulogic;
nc : std_ulogic; -- non-cacheable access
virt_mode : std_ulogic;
@ -108,7 +106,7 @@ architecture behave of loadstore1 is
elt_length => x"0", byte_reverse => '0', brev_mask => "000",
sign_extend => '0', update => '0',
xerc => xerc_init, reserve => '0',
atomic => '0', atomic_last => '0', rc => '0', nc => '0',
rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0',
sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0',
dword_index => '0', two_dwords => '0', incomplete => '0');
@ -439,16 +437,10 @@ begin

addr := lsu_sum;
if l_in.second = '1' then
if l_in.update = '0' then
-- for the second half of a 16-byte transfer,
-- use the previous address plus 8.
addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + 1) & r1.addr0(2 downto 0);
else
-- for an update-form load, use the previous address
-- as the value to write back to RA.
addr := r1.addr0;
end if;
end if;
if l_in.mode_32bit = '1' then
addr(63 downto 32) := (others => '0');
end if;
@ -474,14 +466,12 @@ begin
misaligned := or (addr_mask and addr(2 downto 0));
v.align_intr := l_in.reserve and misaligned;

v.atomic := not misaligned;
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);

case l_in.op is
when OP_STORE =>
v.store := '1';
when OP_LOAD =>
if l_in.update = '0' or l_in.second = '0' then
-- Note: only RA updates have l_in.second = 1
if l_in.second = '0' then
v.load := '1';
if HAS_FPU and l_in.is_32bit = '1' then
-- Allow an extra cycle for SP->DP precision conversion
@ -507,7 +497,6 @@ begin
when OP_FETCH_FAILED =>
-- send it to the MMU to do the radix walk
v.instr_fault := '1';
v.addr := l_in.nia;
v.mmu_op := '1';
when others =>
end case;
@ -953,8 +942,6 @@ begin
d_out.dcbz <= stage1_req.dcbz;
d_out.nc <= stage1_req.nc;
d_out.reserve <= stage1_req.reserve;
d_out.atomic <= stage1_req.atomic;
d_out.atomic_last <= stage1_req.atomic_last;
d_out.addr <= stage1_req.addr;
d_out.byte_sel <= stage1_req.byte_sel;
d_out.virt_mode <= stage1_req.virt_mode;
@ -965,8 +952,6 @@ begin
d_out.dcbz <= r2.req.dcbz;
d_out.nc <= r2.req.nc;
d_out.reserve <= r2.req.reserve;
d_out.atomic <= r2.req.atomic;
d_out.atomic_last <= r2.req.atomic_last;
d_out.addr <= r2.req.addr;
d_out.byte_sel <= r2.req.byte_sel;
d_out.virt_mode <= r2.req.virt_mode;

@ -9,6 +9,7 @@ filesets:
- wishbone_types.vhdl
- common.vhdl
- fetch1.vhdl
- predecode.vhdl
- decode1.vhdl
- helpers.vhdl
- decode2.vhdl
@ -65,6 +66,7 @@ filesets:
xilinx_specific:
files:
- xilinx-mult.vhdl : {file_type : vhdlSource-2008}
- xilinx-mult-32s.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.xdc : {file_type : xdc}

@ -144,7 +146,6 @@ targets:
- uart_is_16550
- has_fpu
- has_btc
- has_short_mult
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
@ -250,7 +251,6 @@ targets:
- uart_is_16550
- has_fpu
- has_btc
- has_short_mult
generate: [litedram_nexys_video, liteeth_nexys_video, litesdcard_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
@ -271,7 +271,6 @@ targets:
- has_uart1
- has_fpu=false
- has_btc=false
- has_short_mult
- use_litesdcard
tools:
vivado: {part : xc7a35ticsg324-1L}
@ -294,7 +293,6 @@ targets:
- has_uart1
- has_fpu=false
- has_btc=false
- has_short_mult
generate: [litedram_arty, liteeth_arty, litesdcard_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
@ -315,7 +313,6 @@ targets:
- has_uart1
- has_fpu
- has_btc
- has_short_mult
- use_litesdcard
tools:
vivado: {part : xc7a100ticsg324-1L}
@ -338,7 +335,6 @@ targets:
- has_uart1
- has_fpu
- has_btc
- has_short_mult
generate: [litedram_arty, liteeth_arty, litesdcard_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
@ -360,7 +356,6 @@ targets:
- uart_is_16550
- has_fpu
- has_btc
- has_short_mult
generate: [litesdcard_wukong-v2]
tools:
vivado: {part : xc7a100tfgg676-1}
@ -382,7 +377,6 @@ targets:
- uart_is_16550
- has_fpu
- has_btc
- has_short_mult
generate: [litedram_wukong-v2, liteeth_wukong-v2, litesdcard_wukong-v2]
tools:
vivado: {part : xc7a100tfgg676-1}
@ -498,12 +492,6 @@ parameters:
paramtype : generic
default : true

has_short_mult:
datatype : bool
description : Include a 16 bit x 16 bit single-cycle multiplier in the core
paramtype : generic
default : false

disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components

@ -0,0 +1,56 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

-- Signed 33b x 33b multiplier giving 64-bit product, with no addend,
-- with fixed 1-cycle latency.

entity multiply_32s is
port (
clk : in std_logic;
stall : in std_ulogic;

m_in : in MultiplyInputType;
m_out : out MultiplyOutputType
);
end entity multiply_32s;

architecture behaviour of multiply_32s is
type reg_type is record
valid : std_ulogic;
data : signed(65 downto 0);
end record;
constant reg_type_init : reg_type := (valid => '0', data => (others => '0'));

signal r, rin : reg_type := reg_type_init;
begin
multiply_0: process(clk)
begin
if rising_edge(clk) and stall = '0' then
r <= rin;
end if;
end process;

multiply_1: process(all)
variable v : reg_type;
variable d : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic;
begin
v.valid := m_in.valid;
v.data := signed((m_in.is_signed and m_in.data1(31)) & m_in.data1(31 downto 0)) *
signed((m_in.is_signed and m_in.data2(31)) & m_in.data2(31 downto 0));

d := std_ulogic_vector(r.data(63 downto 0));

ov := (or d(63 downto 31)) and not (and d(63 downto 31));

m_out.result <= 64x"0" & d;
m_out.overflow <= ov;
m_out.valid <= r.valid;

rin <= v;
end process;
end architecture behaviour;

@ -7,7 +7,7 @@ use work.common.all;

entity multiply is
generic (
PIPELINE_DEPTH : natural := 4
PIPELINE_DEPTH : natural := 3
);
port (
clk : in std_logic;
@ -23,11 +23,8 @@ architecture behaviour of multiply is
type multiply_pipeline_stage is record
valid : std_ulogic;
data : unsigned(127 downto 0);
is_32bit : std_ulogic;
not_res : std_ulogic;
end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
is_32bit => '0', not_res => '0',
data => (others => '0'));

type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -52,31 +49,29 @@ begin

multiply_1: process(all)
variable v : reg_type;
variable a, b : std_ulogic_vector(64 downto 0);
variable prod : std_ulogic_vector(129 downto 0);
variable d : std_ulogic_vector(127 downto 0);
variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic;
begin
v := r;
a := (m.is_signed and m.data1(63)) & m.data1;
b := (m.is_signed and m.data2(63)) & m.data2;
prod := std_ulogic_vector(signed(a) * signed(b));
v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend);
v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).not_res := m.not_result;
if m.subtract = '1' then
v.multiply_pipeline(0).data := unsigned(m.addend) - unsigned(prod(127 downto 0));
else
v.multiply_pipeline(0).data := unsigned(m.addend) + unsigned(prod(127 downto 0));
end if;

loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop;

d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then
d := not d;
end if;

ov := '0';
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
else
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if;
ovf_in <= ov;

m_out.result <= d;

@ -26,15 +26,6 @@ architecture behave of multiply_tb is
signal m1 : MultiplyInputType := MultiplyInputInit;
signal m2 : MultiplyOutputType;

function absval(x: std_ulogic_vector) return std_ulogic_vector is
begin
if x(x'left) = '1' then
return std_ulogic_vector(- signed(x));
else
return x;
end if;
end;

begin
multiply_0: entity work.multiply
generic map (PIPELINE_DEPTH => pipeline_depth)
@ -51,7 +42,6 @@ begin
stim_process: process
variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0);
variable si: std_ulogic_vector(15 downto 0);
variable sign: std_ulogic;
variable rnd : RandomPType;
begin
rnd.InitSeed(stim_process'path_name);
@ -102,11 +92,11 @@ begin

behave_rt := ppc_mulld(ra, rb);

m1.data1 <= absval(ra);
m1.data2 <= absval(rb);
sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.data1 <= ra;
m1.data2 <= rb;
m1.is_signed <= '1';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -128,7 +118,8 @@ begin

m1.data1 <= ra;
m1.data2 <= rb;
m1.not_result <= '0';
m1.is_signed <= '0';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

@ -149,11 +140,11 @@ begin

behave_rt := ppc_mulhd(ra, rb);

m1.data1 <= absval(ra);
m1.data2 <= absval(rb);
sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.data1 <= ra;
m1.data2 <= rb;
m1.is_signed <= '1';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -173,13 +164,13 @@ begin

behave_rt := ppc_mullw(ra, rb);

m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.data1 <= (others => ra(31));
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => rb(31));
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.is_signed <= '1';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -199,13 +190,13 @@ begin

behave_rt := ppc_mulhw(ra, rb);

m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.data1 <= (others => ra(31));
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => rb(31));
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.is_signed <= '1';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -229,7 +220,8 @@ begin
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.not_result <= '0';
m1.is_signed <= '0';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

@ -250,12 +242,12 @@ begin

behave_rt := ppc_mulli(ra, si);

m1.data1 <= absval(ra);
m1.data2 <= (others => '0');
m1.data2(15 downto 0) <= absval(si);
sign := ra(63) xor si(15);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.data1 <= ra;
m1.data2 <= (others => si(15));
m1.data2(15 downto 0) <= si;
m1.is_signed <= '1';
m1.subtract <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;

@ -0,0 +1,592 @@
-- Instruction pre-decoder for microwatt
-- One cycle latency. Does 'WIDTH' instructions in parallel.

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.decode_types.all;
use work.insn_helpers.all;

entity predecoder is
generic (
HAS_FPU : boolean := true;
WIDTH : natural := 2;
ICODE_LEN : natural := 10;
IMAGE_LEN : natural := 26
);
port (
clk : in std_ulogic;
valid_in : in std_ulogic;
insns_in : in std_ulogic_vector(WIDTH * 32 - 1 downto 0);
icodes_out : out std_ulogic_vector(WIDTH * (ICODE_LEN + IMAGE_LEN) - 1 downto 0)
);
end entity predecoder;

architecture behaviour of predecoder is

type predecoder_rom_t is array(0 to 2047) of insn_code;

constant major_predecode_rom : predecoder_rom_t := (
2#001100_00000# to 2#001100_11111# => INSN_addic,
2#001101_00000# to 2#001101_11111# => INSN_addic_dot,
2#001110_00000# to 2#001110_11111# => INSN_addi,
2#001111_00000# to 2#001111_11111# => INSN_addis,
2#010011_00100# to 2#010011_00101# => INSN_addpcis,
2#011100_00000# to 2#011100_11111# => INSN_andi_dot,
2#011101_00000# to 2#011101_11111# => INSN_andis_dot,
2#000000_00000# => INSN_attn,
2#010010_00000# to 2#010010_11111# => INSN_b,
2#010000_00000# to 2#010000_11111# => INSN_bc,
2#001011_00000# to 2#001011_11111# => INSN_cmpi,
2#001010_00000# to 2#001010_11111# => INSN_cmpli,
2#100010_00000# to 2#100010_11111# => INSN_lbz,
2#100011_00000# to 2#100011_11111# => INSN_lbzu,
2#110010_00000# to 2#110010_11111# => INSN_lfd,
2#110011_00000# to 2#110011_11111# => INSN_lfdu,
2#110000_00000# to 2#110000_11111# => INSN_lfs,
2#110001_00000# to 2#110001_11111# => INSN_lfsu,
2#101010_00000# to 2#101010_11111# => INSN_lha,
2#101011_00000# to 2#101011_11111# => INSN_lhau,
2#101000_00000# to 2#101000_11111# => INSN_lhz,
2#101001_00000# to 2#101001_11111# => INSN_lhzu,
2#100000_00000# to 2#100000_11111# => INSN_lwz,
2#100001_00000# to 2#100001_11111# => INSN_lwzu,
2#000111_00000# to 2#000111_11111# => INSN_mulli,
2#011000_00000# to 2#011000_11111# => INSN_ori,
2#011001_00000# to 2#011001_11111# => INSN_oris,
2#010100_00000# to 2#010100_11111# => INSN_rlwimi,
2#010101_00000# to 2#010101_11111# => INSN_rlwinm,
2#010111_00000# to 2#010111_11111# => INSN_rlwnm,
2#010001_00000# to 2#010001_11111# => INSN_sc,
2#100110_00000# to 2#100110_11111# => INSN_stb,
2#100111_00000# to 2#100111_11111# => INSN_stbu,
2#110110_00000# to 2#110110_11111# => INSN_stfd,
2#110111_00000# to 2#110111_11111# => INSN_stfdu,
2#110100_00000# to 2#110100_11111# => INSN_stfs,
2#110101_00000# to 2#110101_11111# => INSN_stfsu,
2#101100_00000# to 2#101100_11111# => INSN_sth,
2#101101_00000# to 2#101101_11111# => INSN_sthu,
2#100100_00000# to 2#100100_11111# => INSN_stw,
2#100101_00000# to 2#100101_11111# => INSN_stwu,
2#001000_00000# to 2#001000_11111# => INSN_subfic,
2#000010_00000# to 2#000010_11111# => INSN_tdi,
2#000011_00000# to 2#000011_11111# => INSN_twi,
2#011010_00000# to 2#011010_11111# => INSN_xori,
2#011011_00000# to 2#011011_11111# => INSN_xoris,
-- major opcode 4
2#000100_10000# => INSN_maddhd,
2#000100_10001# => INSN_maddhdu,
2#000100_10011# => INSN_maddld,
-- major opcode 30
2#011110_01000# to 2#011110_01001# => INSN_rldic,
2#011110_01010# to 2#011110_01011# => INSN_rldic,
2#011110_00000# to 2#011110_00001# => INSN_rldicl,
2#011110_00010# to 2#011110_00011# => INSN_rldicl,
2#011110_00100# to 2#011110_00101# => INSN_rldicr,
2#011110_00110# to 2#011110_00111# => INSN_rldicr,
2#011110_01100# to 2#011110_01101# => INSN_rldimi,
2#011110_01110# to 2#011110_01111# => INSN_rldimi,
2#011110_10000# to 2#011110_10001# => INSN_rldcl,
2#011110_10010# to 2#011110_10011# => INSN_rldcr,
-- major opcode 58
2#111010_00000# => INSN_ld,
2#111010_00001# => INSN_ldu,
2#111010_00010# => INSN_lwa,
2#111010_00100# => INSN_ld,
2#111010_00101# => INSN_ldu,
2#111010_00110# => INSN_lwa,
2#111010_01000# => INSN_ld,
2#111010_01001# => INSN_ldu,
2#111010_01010# => INSN_lwa,
2#111010_01100# => INSN_ld,
2#111010_01101# => INSN_ldu,
2#111010_01110# => INSN_lwa,
2#111010_10000# => INSN_ld,
2#111010_10001# => INSN_ldu,
2#111010_10010# => INSN_lwa,
2#111010_10100# => INSN_ld,
2#111010_10101# => INSN_ldu,
2#111010_10110# => INSN_lwa,
2#111010_11000# => INSN_ld,
2#111010_11001# => INSN_ldu,
2#111010_11010# => INSN_lwa,
2#111010_11100# => INSN_ld,
2#111010_11101# => INSN_ldu,
2#111010_11110# => INSN_lwa,
-- major opcode 59
2#111011_00100# to 2#111011_00101# => INSN_fdivs,
2#111011_01000# to 2#111011_01001# => INSN_fsubs,
2#111011_01010# to 2#111011_01011# => INSN_fadds,
2#111011_01100# to 2#111011_01101# => INSN_fsqrts,
2#111011_10000# to 2#111011_10001# => INSN_fres,
2#111011_10010# to 2#111011_10011# => INSN_fmuls,
2#111011_10100# to 2#111011_10101# => INSN_frsqrtes,
2#111011_11000# to 2#111011_11001# => INSN_fmsubs,
2#111011_11010# to 2#111011_11011# => INSN_fmadds,
2#111011_11100# to 2#111011_11101# => INSN_fnmsubs,
2#111011_11110# to 2#111011_11111# => INSN_fnmadds,
-- major opcode 62
2#111110_00000# => INSN_std,
2#111110_00001# => INSN_stdu,
2#111110_00100# => INSN_std,
2#111110_00101# => INSN_stdu,
2#111110_01000# => INSN_std,
2#111110_01001# => INSN_stdu,
2#111110_01100# => INSN_std,
2#111110_01101# => INSN_stdu,
2#111110_10000# => INSN_std,
2#111110_10001# => INSN_stdu,
2#111110_10100# => INSN_std,
2#111110_10101# => INSN_stdu,
2#111110_11000# => INSN_std,
2#111110_11001# => INSN_stdu,
2#111110_11100# => INSN_std,
2#111110_11101# => INSN_stdu,
-- major opcode 63
2#111111_00100# to 2#111111_00101# => INSN_fdiv,
2#111111_01000# to 2#111111_01001# => INSN_fsub,
2#111111_01010# to 2#111111_01011# => INSN_fadd,
2#111111_01100# to 2#111111_01101# => INSN_fsqrt,
2#111111_01110# to 2#111111_01111# => INSN_fsel,
2#111111_10000# to 2#111111_10001# => INSN_fre,
2#111111_10010# to 2#111111_10011# => INSN_fmul,
2#111111_10100# to 2#111111_10101# => INSN_frsqrte,
2#111111_11000# to 2#111111_11001# => INSN_fmsub,
2#111111_11010# to 2#111111_11011# => INSN_fmadd,
2#111111_11100# to 2#111111_11101# => INSN_fnmsub,
2#111111_11110# to 2#111111_11111# => INSN_fnmadd,
others => INSN_illegal
);

constant row_predecode_rom : predecoder_rom_t := (
-- Major opcode 31
-- Address bits are 0, insn(10:1)
2#0_01000_01010# => INSN_add,
2#0_11000_01010# => INSN_add, -- addo
2#0_00000_01010# => INSN_addc,
2#0_10000_01010# => INSN_addc, -- addco
2#0_00100_01010# => INSN_adde,
2#0_10100_01010# => INSN_adde, -- addeo
2#0_00101_01010# => INSN_addex,
2#0_00010_01010# => INSN_addg6s,
2#0_00111_01010# => INSN_addme,
2#0_10111_01010# => INSN_addme, -- addmeo
2#0_00110_01010# => INSN_addze,
2#0_10110_01010# => INSN_addze, -- addzeo
2#0_00000_11100# => INSN_and,
2#0_00001_11100# => INSN_andc,
2#0_00111_11100# => INSN_bperm,
2#0_01001_11010# => INSN_cbcdtd,
2#0_01000_11010# => INSN_cdtbcd,
2#0_00000_00000# => INSN_cmp,
2#0_01111_11100# => INSN_cmpb,
2#0_00111_00000# => INSN_cmpeqb,
2#0_00001_00000# => INSN_cmpl,
2#0_00110_00000# => INSN_cmprb,
2#0_00001_11010# => INSN_cntlzd,
2#0_00000_11010# => INSN_cntlzw,
2#0_10001_11010# => INSN_cnttzd,
2#0_10000_11010# => INSN_cnttzw,
2#0_10111_10011# => INSN_darn,
2#0_00010_10110# => INSN_dcbf,
2#0_00001_10110# => INSN_dcbst,
2#0_01000_10110# => INSN_dcbt,
2#0_00111_10110# => INSN_dcbtst,
2#0_11111_10110# => INSN_dcbz,
2#0_01100_01001# => INSN_divdeu,
2#0_11100_01001# => INSN_divdeu, -- divdeuo
2#0_01100_01011# => INSN_divweu,
2#0_11100_01011# => INSN_divweu, -- divweuo
2#0_01101_01001# => INSN_divde,
2#0_11101_01001# => INSN_divde, -- divdeo
2#0_01101_01011# => INSN_divwe,
2#0_11101_01011# => INSN_divwe, -- divweo
2#0_01110_01001# => INSN_divdu,
2#0_11110_01001# => INSN_divdu, -- divduo
2#0_01110_01011# => INSN_divwu,
2#0_11110_01011# => INSN_divwu, -- divwuo
2#0_01111_01001# => INSN_divd,
2#0_11111_01001# => INSN_divd, -- divdo
2#0_01111_01011# => INSN_divw,
2#0_11111_01011# => INSN_divw, -- divwo
2#0_11001_10110# => INSN_nop, -- dss
2#0_01010_10110# => INSN_nop, -- dst
2#0_01011_10110# => INSN_nop, -- dstst
2#0_11010_10110# => INSN_eieio,
2#0_01000_11100# => INSN_eqv,
2#0_11101_11010# => INSN_extsb,
2#0_11100_11010# => INSN_extsh,
2#0_11110_11010# => INSN_extsw,
2#0_11011_11010# => INSN_extswsli,
2#0_11011_11011# => INSN_extswsli,
2#0_11110_10110# => INSN_icbi,
2#0_00000_10110# => INSN_icbt,
2#0_00000_01111# => INSN_isel,
2#0_00001_01111# => INSN_isel,
2#0_00010_01111# => INSN_isel,
2#0_00011_01111# => INSN_isel,
2#0_00100_01111# => INSN_isel,
2#0_00101_01111# => INSN_isel,
2#0_00110_01111# => INSN_isel,
2#0_00111_01111# => INSN_isel,
2#0_01000_01111# => INSN_isel,
2#0_01001_01111# => INSN_isel,
2#0_01010_01111# => INSN_isel,
2#0_01011_01111# => INSN_isel,
2#0_01100_01111# => INSN_isel,
2#0_01101_01111# => INSN_isel,
2#0_01110_01111# => INSN_isel,
2#0_01111_01111# => INSN_isel,
2#0_10000_01111# => INSN_isel,
2#0_10001_01111# => INSN_isel,
2#0_10010_01111# => INSN_isel,
2#0_10011_01111# => INSN_isel,
2#0_10100_01111# => INSN_isel,
2#0_10101_01111# => INSN_isel,
2#0_10110_01111# => INSN_isel,
2#0_10111_01111# => INSN_isel,
2#0_11000_01111# => INSN_isel,
2#0_11001_01111# => INSN_isel,
2#0_11010_01111# => INSN_isel,
2#0_11011_01111# => INSN_isel,
2#0_11100_01111# => INSN_isel,
2#0_11101_01111# => INSN_isel,
2#0_11110_01111# => INSN_isel,
2#0_11111_01111# => INSN_isel,
2#0_00001_10100# => INSN_lbarx,
2#0_11010_10101# => INSN_lbzcix,
2#0_00011_10111# => INSN_lbzux,
2#0_00010_10111# => INSN_lbzx,
2#0_00010_10100# => INSN_ldarx,
2#0_10000_10100# => INSN_ldbrx,
2#0_11011_10101# => INSN_ldcix,
2#0_00001_10101# => INSN_ldux,
2#0_00000_10101# => INSN_ldx,
2#0_10010_10111# => INSN_lfdx,
2#0_10011_10111# => INSN_lfdux,
2#0_11010_10111# => INSN_lfiwax,
2#0_11011_10111# => INSN_lfiwzx,
2#0_10000_10111# => INSN_lfsx,
2#0_10001_10111# => INSN_lfsux,
2#0_00011_10100# => INSN_lharx,
2#0_01011_10111# => INSN_lhaux,
2#0_01010_10111# => INSN_lhax,
2#0_11000_10110# => INSN_lhbrx,
2#0_11001_10101# => INSN_lhzcix,
2#0_01001_10111# => INSN_lhzux,
2#0_01000_10111# => INSN_lhzx,
2#0_00000_10100# => INSN_lwarx,
2#0_01011_10101# => INSN_lwaux,
2#0_01010_10101# => INSN_lwax,
2#0_10000_10110# => INSN_lwbrx,
2#0_11000_10101# => INSN_lwzcix,
2#0_00001_10111# => INSN_lwzux,
2#0_00000_10111# => INSN_lwzx,
2#0_10010_00000# => INSN_mcrxrx,
2#0_00000_10011# => INSN_mfcr,
2#0_00010_10011# => INSN_mfmsr,
2#0_01010_10011# => INSN_mfspr,
2#0_01000_01001# => INSN_modud,
2#0_01000_01011# => INSN_moduw,
2#0_11000_01001# => INSN_modsd,
2#0_11000_01011# => INSN_modsw,
2#0_00100_10000# => INSN_mtcrf,
2#0_00100_10010# => INSN_mtmsr,
2#0_00101_10010# => INSN_mtmsrd,
2#0_01110_10011# => INSN_mtspr,
2#0_00010_01001# => INSN_mulhd,
2#0_00000_01001# => INSN_mulhdu,
2#0_00010_01011# => INSN_mulhw,
2#0_00000_01011# => INSN_mulhwu,
-- next 4 have reserved bit set
2#0_10010_01001# => INSN_mulhd,
2#0_10000_01001# => INSN_mulhdu,
2#0_10010_01011# => INSN_mulhw,
2#0_10000_01011# => INSN_mulhwu,
2#0_00111_01001# => INSN_mulld,
2#0_10111_01001# => INSN_mulld, -- mulldo
2#0_00111_01011# => INSN_mullw,
2#0_10111_01011# => INSN_mullw, -- mullwo
2#0_01110_11100# => INSN_nand,
2#0_00011_01000# => INSN_neg,
2#0_10011_01000# => INSN_neg, -- nego
-- next 8 are reserved no-op instructions
2#0_10000_10010# => INSN_nop,
2#0_10001_10010# => INSN_nop,
2#0_10010_10010# => INSN_nop,
2#0_10011_10010# => INSN_nop,
2#0_10100_10010# => INSN_nop,
2#0_10101_10010# => INSN_nop,
2#0_10110_10010# => INSN_nop,
2#0_10111_10010# => INSN_nop,
2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc,
2#0_00011_11010# => INSN_popcntb,
2#0_01111_11010# => INSN_popcntd,
2#0_01011_11010# => INSN_popcntw,
2#0_00101_11010# => INSN_prtyd,
2#0_00100_11010# => INSN_prtyw,
2#0_00100_00000# => INSN_setb,
2#0_01111_10010# => INSN_slbia,
2#0_00000_11011# => INSN_sld,
2#0_00000_11000# => INSN_slw,
2#0_11000_11010# => INSN_srad,
2#0_11001_11010# => INSN_sradi,
2#0_11001_11011# => INSN_sradi,
2#0_11000_11000# => INSN_sraw,
2#0_11001_11000# => INSN_srawi,
2#0_10000_11011# => INSN_srd,
2#0_10000_11000# => INSN_srw,
2#0_11110_10101# => INSN_stbcix,
2#0_10101_10110# => INSN_stbcx,
2#0_00111_10111# => INSN_stbux,
2#0_00110_10111# => INSN_stbx,
2#0_10100_10100# => INSN_stdbrx,
2#0_11111_10101# => INSN_stdcix,
2#0_00110_10110# => INSN_stdcx,
2#0_00101_10101# => INSN_stdux,
2#0_00100_10101# => INSN_stdx,
2#0_10110_10111# => INSN_stfdx,
2#0_10111_10111# => INSN_stfdux,
2#0_11110_10111# => INSN_stfiwx,
2#0_10100_10111# => INSN_stfsx,
2#0_10101_10111# => INSN_stfsux,
2#0_11100_10110# => INSN_sthbrx,
2#0_11101_10101# => INSN_sthcix,
2#0_10110_10110# => INSN_sthcx,
2#0_01101_10111# => INSN_sthux,
2#0_01100_10111# => INSN_sthx,
2#0_10100_10110# => INSN_stwbrx,
2#0_11100_10101# => INSN_stwcix,
2#0_00100_10110# => INSN_stwcx,
2#0_00101_10111# => INSN_stwux,
2#0_00100_10111# => INSN_stwx,
2#0_00001_01000# => INSN_subf,
2#0_10001_01000# => INSN_subf, -- subfo
2#0_00000_01000# => INSN_subfc,
2#0_10000_01000# => INSN_subfc, -- subfco
2#0_00100_01000# => INSN_subfe,
2#0_10100_01000# => INSN_subfe, -- subfeo
2#0_00111_01000# => INSN_subfme,
2#0_10111_01000# => INSN_subfme, -- subfmeo
2#0_00110_01000# => INSN_subfze,
2#0_10110_01000# => INSN_subfze, -- subfzeo
2#0_10010_10110# => INSN_sync,
2#0_00010_00100# => INSN_td,
2#0_00000_00100# => INSN_tw,
2#0_01001_10010# => INSN_tlbie,
2#0_01000_10010# => INSN_tlbiel,
2#0_10001_10110# => INSN_tlbsync,
2#0_00000_11110# => INSN_wait,
2#0_01001_11100# => INSN_xor,

-- Major opcode 19
-- Columns with insn(4) = '1' are all illegal and not mapped here; to
-- fit into 2048 entries, the columns are remapped so that 16-24 are
-- stored here as 8-15; in other words the address bits are
-- 1, insn(10..6), 1, insn(5), insn(3..1)
-- Columns 16-17 here are opcode 19 columns 0-1
-- Columns 24-31 here are opcode 19 columns 16-23
2#1_10000_11000# => INSN_bcctr,
2#1_00000_11000# => INSN_bclr,
2#1_10001_11000# => INSN_bctar,
2#1_01000_10001# => INSN_crand,
2#1_00100_10001# => INSN_crandc,
2#1_01001_10001# => INSN_creqv,
2#1_00111_10001# => INSN_crnand,
2#1_00001_10001# => INSN_crnor,
2#1_01110_10001# => INSN_cror,
2#1_01101_10001# => INSN_crorc,
2#1_00110_10001# => INSN_crxor,
2#1_00100_11110# => INSN_isync,
2#1_00000_10000# => INSN_mcrf,
2#1_00000_11010# => INSN_rfid,

-- Major opcode 59
-- Address bits are 1, insn(10..6), 1, 0, insn(3..1)
-- Only column 14 is valid here; columns 16-31 are handled in the major table
-- Column 14 is mapped to column 22.
-- Columns 20-23 here are opcode 59 columns 12-15
2#1_11010_10110# => INSN_fcfids,
2#1_11110_10110# => INSN_fcfidus,

-- Major opcode 63
-- Columns 0-15 are mapped here; columns 16-31 are in the major table.
-- Address bits are 1, insn(10:6), 0, insn(4:1)
-- Columns 0-15 here are opcode 63 columns 0-15
2#1_00000_00000# => INSN_fcmpu,
2#1_00001_00000# => INSN_fcmpo,
2#1_00010_00000# => INSN_mcrfs,
2#1_00100_00000# => INSN_ftdiv,
2#1_00101_00000# => INSN_ftsqrt,
2#1_00001_00110# => INSN_mtfsb,
2#1_00010_00110# => INSN_mtfsb,
2#1_00100_00110# => INSN_mtfsfi,
2#1_11010_00110# => INSN_fmrgow,
2#1_11110_00110# => INSN_fmrgew,
2#1_10010_00111# => INSN_mffs,
2#1_10110_00111# => INSN_mtfsf,
2#1_00000_01000# => INSN_fcpsgn,
2#1_00001_01000# => INSN_fneg,
2#1_00010_01000# => INSN_fmr,
2#1_00100_01000# => INSN_fnabs,
2#1_01000_01000# => INSN_fabs,
2#1_01100_01000# => INSN_frin,
2#1_01101_01000# => INSN_friz,
2#1_01110_01000# => INSN_frip,
2#1_01111_01000# => INSN_frim,
2#1_00000_01100# => INSN_frsp,
2#1_00000_01110# => INSN_fctiw,
2#1_00100_01110# => INSN_fctiwu,
2#1_11001_01110# => INSN_fctid,
2#1_11010_01110# => INSN_fcfid,
2#1_11101_01110# => INSN_fctidu,
2#1_11110_01110# => INSN_fcfidu,
2#1_00000_01111# => INSN_fctiwz,
2#1_00100_01111# => INSN_fctiwuz,
2#1_11001_01111# => INSN_fctidz,
2#1_11101_01111# => INSN_fctiduz,

others => INSN_illegal
);

constant IOUT_LEN : natural := ICODE_LEN + IMAGE_LEN;

type predec_t is record
image : std_ulogic_vector(31 downto 0);
maj_predecode : unsigned(ICODE_LEN - 1 downto 0);
row_predecode : unsigned(ICODE_LEN - 1 downto 0);
end record;

subtype index_t is integer range 0 to WIDTH-1;
type predec_array is array(index_t) of predec_t;

signal pred : predec_array;
signal valid : std_ulogic;

begin
predecode_0: process(clk)
variable majaddr : std_ulogic_vector(10 downto 0);
variable rowaddr : std_ulogic_vector(10 downto 0);
variable iword : std_ulogic_vector(31 downto 0);
variable majcode : insn_code;
variable rowcode : insn_code;
begin
if rising_edge(clk) then
valid <= valid_in;
for i in index_t loop
iword := insns_in(i * 32 + 31 downto i * 32);
pred(i).image <= iword;

if is_X(iword) then
pred(i).maj_predecode <= (others => 'X');
pred(i).row_predecode <= (others => 'X');
else
majaddr := iword(31 downto 26) & iword(4 downto 0);

-- row_predecode_rom is used for op 19, 31, 59, 63
-- addr bit 10 is 0 for op 31, 1 for 19, 59, 63
rowaddr(10) := iword(31) or not iword(29);
rowaddr(9 downto 5) := iword(10 downto 6);
if iword(28) = '0' then
-- op 19 and op 59
rowaddr(4 downto 3) := '1' & iword(5);
else
-- op 31 and 63; for 63 we only use this when iword(5) = '0'
rowaddr(4 downto 3) := iword(5 downto 4);
end if;
rowaddr(2 downto 0) := iword(3 downto 1);

majcode := major_predecode_rom(to_integer(unsigned(majaddr)));
pred(i).maj_predecode <= to_unsigned(insn_code'pos(majcode), ICODE_LEN);
rowcode := row_predecode_rom(to_integer(unsigned(rowaddr)));
pred(i).row_predecode <= to_unsigned(insn_code'pos(rowcode), ICODE_LEN);
end if;
end loop;
end if;
end process;

predecode_1: process(all)
variable iword : std_ulogic_vector(31 downto 0);
variable use_row : std_ulogic;
variable illegal : std_ulogic;
variable ici : std_ulogic_vector(IOUT_LEN - 1 downto 0);
variable icode : unsigned(ICODE_LEN - 1 downto 0);
begin
for i in index_t loop
iword := pred(i).image;
icode := pred(i).maj_predecode;
use_row := '0';
illegal := '0';

case iword(31 downto 26) is
when "000100" => -- 4
-- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*)
illegal := not iword(5);

when "010011" => -- 19
-- Columns 8-15 and 24-31 don't have any valid instructions
-- (where insn(5..1) is the column number).
-- addpcis (column 2) is in the major table
-- Other valid columns are mapped to columns in the second
-- half of the row table: columns 0-1 are mapped to 16-17
-- and 16-23 are mapped to 24-31.
illegal := iword(4);
use_row := iword(5) or (not iword(3) and not iword(2));

when "011000" => -- 24
-- ori, special-case the standard NOP
if std_match(iword, "01100000000000000000000000000000") then
icode := to_unsigned(insn_code'pos(INSN_nop), ICODE_LEN);
end if;

when "011111" => -- 31
-- major opcode 31, lots of things
-- Use the first half of the row table for all columns
use_row := '1';

when "111011" => -- 59
-- floating point operations, mostly single-precision
-- Columns 0-11 are illegal; columns 12-15 are mapped
-- to columns 20-23 in the second half of the row table,
-- and columns 16-31 are in the major table.
illegal := not iword(5) and (not iword(4) or not iword(3));
use_row := not iword(5);

when "111111" => -- 63
-- floating point operations, general and double-precision
-- Use columns 0-15 of the second half of the row table
-- for columns 0-15, and the major table for columns 16-31.
use_row := not iword(5);

when others =>
end case;
if use_row = '1' then
icode := pred(i).row_predecode;
end if;

-- Mark FP instructions as illegal if we don't have an FPU
if not HAS_FPU and not is_X(icode) and
to_integer(icode) >= insn_code'pos(INSN_first_frs) then
illegal := '1';
end if;

ici(31 downto 0) := iword;
ici(IOUT_LEN - 1 downto 32) := (others => '0');
if valid = '0' or illegal = '1' or is_X(icode) or
icode = to_unsigned(insn_code'pos(INSN_illegal), ICODE_LEN) then
-- Since an insn_code currently fits in 9 bits, use just
-- the most significant bit of ici to indicate illegal insns.
ici(IOUT_LEN - 1) := '1';
else
ici(IOUT_LEN - 1 downto IMAGE_LEN) := std_ulogic_vector(icode);
end if;
icodes_out(i * IOUT_LEN + IOUT_LEN - 1 downto i * IOUT_LEN) <= ici;
end loop;
end process;

end architecture behaviour;

@ -22,7 +22,7 @@ struct log_entry {
u64 ic_wb_adr: 3;
u64 ic_wb_ack: 1;

u64 ic_insn: 32;
u64 ic_insn: 36;
u64 ic_valid: 1;
u64 d1_valid: 1;
u64 d1_unit: 2;
@ -39,9 +39,8 @@ struct log_entry {
u64 e1_stall_out: 1;
u64 e1_redirect: 1;
u64 e1_valid: 1;
u64 e1_write_enable: 1;
u64 e1_unused: 3;

u64 e1_write_enable: 1;
u64 e1_irq_state: 1;
u64 e1_irq: 1;
u64 e1_exception: 1;
@ -49,7 +48,7 @@ struct log_entry {
u64 e1_msr_ir: 1;
u64 e1_msr_pr: 1;
u64 e1_msr_ee: 1;
u64 pad1: 5;
u64 pad1: 4;
u64 ls_state: 3;
u64 ls_dw_done: 1;
u64 ls_min_done: 1;
@ -88,13 +87,13 @@ const char *units[4] = { "--", "al", "ls", "fp" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "fpop ", "fpopi ", "icbi ", "icbt ", "isel ", "isync ", "ld ",
"st ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ",
"mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ",
"rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ",
"tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ", "?62 ", "?63 "
"bcd ", "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ",
"darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "icbi ", "icbt ",
"fpcmp ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ",
"extswsl", "isel ", "isync ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ",
"mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ",
"popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ",
"shl ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "ffail ",
};

const char *spr_names[13] =
@ -135,7 +134,7 @@ int main(int ac, char **av)
(log.nia_lo << 2);
if (lineno % 20 == 1) {
printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n");
printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
printf(" ---------------- TAHW S -WB-- pN ic --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n");
}
printf("%4ld %c0000%.11llx %c ", lineno,
@ -154,12 +153,16 @@ int main(int ac, char **av)
FLAG(ic_wb_stall, 'S'),
FLAG(ic_wb_ack, 'a'),
PNIA(ic_part_nia));
if (log.ic_valid)
printf("%.8x", log.ic_insn);
else if (log.ic_fetch_failed)
printf("!!!!!!!!");
if (log.ic_valid) {
if (log.ic_insn & (1ul << 35))
printf("ill %.8lx", log.ic_insn & 0xfffffffful);
else
printf("%3lu x%.7lx", (long)(log.ic_insn >> 26),
(unsigned long)(log.ic_insn & 0x3ffffff));
} else if (log.ic_fetch_failed)
printf(" !!!!!!!!");
else
printf("--------");
printf("--- --------");
printf(" %c%c %.2llx ",
FLAG(ic_valid, '>'),
FLAG(d2_stall_out, '|'),

@ -59,7 +59,6 @@ entity soc is
SIM : boolean;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (23 downto 0 => '0', others => '1');
HAS_DRAM : boolean := false;
@ -335,7 +334,6 @@ begin
SIM => SIM,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
LOG_LENGTH => LOG_LENGTH,

@ -92,7 +92,6 @@ begin
intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;
interrupt_out.intr <= intr;

if intr = '1' then
srr1 := (others => '0');
if e_in.interrupt = '1' then
vec := e_in.intr_vec;
@ -106,7 +105,7 @@ begin
end if;
interrupt_out.srr1 <= srr1;

else
if intr = '0' then
if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
w_out.write_data <= e_in.write_data;

@ -0,0 +1,295 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

library unisim;
use unisim.vcomponents.all;

-- Signed 33b x 33b multiplier giving 64-bit product, with no addend.

entity multiply_32s is
port (
clk : in std_logic;
stall : in std_ulogic;

m_in : in MultiplyInputType;
m_out : out MultiplyOutputType
);
end entity multiply_32s;

architecture behaviour of multiply_32s is
signal clocken : std_ulogic;
signal data1 : std_ulogic_vector(52 downto 0);
signal data2 : std_ulogic_vector(34 downto 0);
signal m00_p, m01_p : std_ulogic_vector(47 downto 0);
signal m00_pc : std_ulogic_vector(47 downto 0);
signal m10_p, m11_p : std_ulogic_vector(47 downto 0);
signal m10_pc : std_ulogic_vector(47 downto 0);
signal p0_pat, p0_patb : std_ulogic;
signal p1_pat, p1_patb : std_ulogic;
signal product_lo : std_ulogic_vector(22 downto 0);

begin
-- sign extend if signed
data1(31 downto 0) <= m_in.data1(31 downto 0);
data1(52 downto 32) <= (others => m_in.is_signed and m_in.data1(31));
data2(31 downto 0) <= m_in.data2(31 downto 0);
data2(34 downto 32) <= (others => m_in.is_signed and m_in.data2(31));

clocken <= m_in.valid and not stall;

m00: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & data2(16 downto 0),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m00_p,
PCIN => (others => '0'),
PCOUT => m00_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m01: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & data1(22 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => data2(34 downto 17),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "1010101",
P => m01_p,
PCIN => m00_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m10: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
INMODEREG => 0,
MASK => x"fffffffe00ff",
OPMODEREG => 0,
PREG => 0,
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => data1(52 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & data2(16 downto 0),
BCIN => (others => '0'),
C => std_ulogic_vector(resize(signed(m01_p(38 downto 6)), 48)),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => clocken,
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => clocken,
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
P => m10_p,
PATTERNDETECT => p0_pat,
PATTERNBDETECT => p0_patb,
PCIN => (others => '0'),
PCOUT => m10_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m11: DSP48E1
generic map (
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MASK => x"fffffc000000",
OPMODEREG => 0,
PREG => 0,
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => data1(52 downto 23),
ACIN => (others => '0'),
ALUMODE => "0000",
B => data2(34 downto 17),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => clocken,
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "1010101",
P => m11_p,
PATTERNDETECT => p1_pat,
PATTERNBDETECT => p1_patb,
PCIN => m10_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
RSTB => '0',
RSTC => '0',
RSTCTRL => '0',
RSTD => '0',
RSTINMODE => '0',
RSTM => '0',
RSTP => '0'
);

m_out.result(127 downto 64) <= (others => '0');
m_out.result(63 downto 40) <= m11_p(23 downto 0);
m_out.result(39 downto 23) <= m10_p(16 downto 0);
m_out.result(22 downto 0) <= product_lo;

m_out.overflow <= not ((p0_pat and p1_pat) or (p0_patb and p1_patb));

process(clk)
begin
if rising_edge(clk) and stall = '0' then
m_out.valid <= m_in.valid;
product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0);
end if;
end process;

end architecture behaviour;

@ -18,26 +18,32 @@ entity multiply is
end entity multiply;

architecture behaviour of multiply is
signal d1sign : std_ulogic_vector(13 downto 0);
signal d2sign : std_ulogic_vector(4 downto 0);
signal m00_p, m01_p, m02_p, m03_p : std_ulogic_vector(47 downto 0);
signal m00_pc : std_ulogic_vector(47 downto 0);
signal m00_pc, m02_pc : std_ulogic_vector(47 downto 0);
signal m10_p, m11_p, m12_p, m13_p : std_ulogic_vector(47 downto 0);
signal m11_pc, m12_pc, m13_pc : std_ulogic_vector(47 downto 0);
signal m10_pc, m12_pc : std_ulogic_vector(47 downto 0);
signal m20_p, m21_p, m22_p, m23_p : std_ulogic_vector(47 downto 0);
signal m20_pc, m22_pc : std_ulogic_vector(47 downto 0);
signal pp0, pp1 : std_ulogic_vector(127 downto 0);
signal pp23 : std_ulogic_vector(127 downto 0);
signal sumlo : std_ulogic_vector(8 downto 0);
signal s0_pc, s1_pc : std_ulogic_vector(47 downto 0);
signal s0_carry, p0_carry : std_ulogic_vector(3 downto 0);
signal product : std_ulogic_vector(127 downto 0);
signal addend : std_ulogic_vector(127 downto 0);
signal s0_carry, p0_carry : std_ulogic_vector(3 downto 0);
signal p0_mask : std_ulogic_vector(47 downto 0);
signal p0_pat, p0_patb : std_ulogic;
signal p1_pat, p1_patb : std_ulogic;

signal req_32bit, r32_1 : std_ulogic;
signal rnot_1 : std_ulogic;
signal valid_1 : std_ulogic;
signal overflow, ovf_in : std_ulogic;
signal overflow : std_ulogic;

begin
addend <= m_in.addend;
addend <= m_in.addend when m_in.subtract = '0' else not m_in.addend;
d1sign <= (others => m_in.data1(63) and m_in.is_signed);
d2sign <= (others => m_in.data2(63) and m_in.is_signed);

m00: DSP48E1
generic map (
@ -55,12 +61,12 @@ begin
PREG => 1
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
A => 6x"0" & m_in.data1(23 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
BCIN => (others => '0'),
C => "00000000000000" & addend(33 downto 0),
C => 14x"0" & addend(33 downto 0),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -106,12 +112,14 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
A => 6x"0" & m_in.data1(23 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
@ -126,7 +134,7 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
@ -168,12 +176,12 @@ begin
PREG => 1
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
A => 6x"0" & m_in.data1(23 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => x"0000000" & "000" & addend(50 downto 34),
C => 24x"0" & addend(57 downto 34),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -197,6 +205,7 @@ begin
OPMODE => "0110101",
P => m02_p,
PCIN => (others => '0'),
PCOUT => m02_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -220,17 +229,17 @@ begin
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 1
PREG => 0
)
port map (
A => "0000000" & m_in.data1(22 downto 0),
A => 6x"0" & m_in.data1(23 downto 0),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
B => d2sign & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"000000" & '0' & addend(73 downto 51),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -245,15 +254,15 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => m_in.valid,
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
OPMODE => "1010101",
P => m03_p,
PCIN => (others => '0'),
PCIN => m02_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -277,16 +286,17 @@ begin
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
PREG => 1
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
A => 6x"0" & m_in.data1(47 downto 24),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
BCIN => (others => '0'),
C => x"000" & "00" & m01_p(39 downto 6),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -301,15 +311,16 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => m_in.valid,
CEP => '0',
CEM => '0',
CEP => m_in.valid,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
OPMODE => "0000101",
P => m10_p,
PCIN => (others => '0'),
PCOUT => m10_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -333,16 +344,17 @@ begin
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
A => 6x"0" & m_in.data1(47 downto 24),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
BCIN => (others => '0'),
C => x"000" & "00" & m02_p(39 downto 6),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -363,10 +375,9 @@ begin
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
OPMODE => "1010101",
P => m11_p,
PCIN => (others => '0'),
PCOUT => m11_pc,
PCIN => m10_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -390,16 +401,17 @@ begin
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
PREG => 1
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
A => 6x"0" & m_in.data1(47 downto 24),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => x"0000" & '0' & m03_p(36 downto 6),
C => 24x"0" & addend(81 downto 58),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -414,8 +426,8 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => m_in.valid,
CEP => '0',
CEM => '0',
CEP => m_in.valid,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
@ -445,17 +457,19 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "0000000000000" & m_in.data1(39 downto 23),
A => 6x"0" & m_in.data1(47 downto 24),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
B => d2sign & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"0000000" & "000" & addend(90 downto 74),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -465,7 +479,7 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
@ -476,10 +490,9 @@ begin
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
OPMODE => "1010101",
P => m13_p,
PCIN => (others => '0'),
PCOUT => m13_pc,
PCIN => m12_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -501,12 +514,14 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
PREG => 1
)
port map (
A => "000000" & m_in.data1(63 downto 40),
A => d1sign & m_in.data1(63 downto 48),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(16 downto 0),
@ -521,20 +536,21 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => m_in.valid,
CEP => '0',
CEM => '0',
CEP => m_in.valid,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
OPMODE => "0000101",
P => m20_p,
PCIN => m11_pc,
PCIN => (others => '0'),
PCOUT => m20_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -556,12 +572,14 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
A => d1sign & m_in.data1(63 downto 48),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(33 downto 17),
@ -576,7 +594,7 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
@ -587,9 +605,9 @@ begin
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
OPMODE => "1010101",
P => m21_p,
PCIN => m12_pc,
PCIN => m20_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -611,17 +629,19 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0
PREG => 1
)
port map (
A => "000000" & m_in.data1(63 downto 40),
A => d1sign & m_in.data1(63 downto 48),
ACIN => (others => '0'),
ALUMODE => "0000",
B => '0' & m_in.data2(50 downto 34),
BCIN => (others => '0'),
C => (others => '0'),
C => "00" & addend(127 downto 82),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -631,20 +651,21 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => m_in.valid,
CEP => '0',
CEM => '0',
CEP => m_in.valid,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0010101",
OPMODE => "0110101",
P => m22_p,
PCIN => m13_pc,
PCIN => (others => '0'),
PCOUT => m22_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -666,17 +687,19 @@ begin
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 0,
INMODEREG => 0,
MREG => 1,
OPMODEREG => 0,
PREG => 0
)
port map (
A => "000000" & m_in.data1(63 downto 40),
A => d1sign & m_in.data1(63 downto 48),
ACIN => (others => '0'),
ALUMODE => "0000",
B => "00000" & m_in.data2(63 downto 51),
B => d2sign & m_in.data2(63 downto 51),
BCIN => (others => '0'),
C => x"00" & "000" & addend(127 downto 91),
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -686,7 +709,7 @@ begin
CEALUMODE => '0',
CEB1 => '0',
CEB2 => '0',
CEC => '1',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
@ -697,9 +720,9 @@ begin
D => (others => '0'),
INMODE => "00000",
MULTSIGNIN => '0',
OPMODE => "0110101",
OPMODE => "1010101",
P => m23_p,
PCIN => (others => '0'),
PCIN => m22_pc,
RSTA => '0',
RSTALLCARRYIN => '0',
RSTALUMODE => '0',
@ -712,6 +735,17 @@ begin
RSTP => '0'
);

pp0 <= std_ulogic_vector(resize(signed(m13_p(37 downto 0) & m12_p(16 downto 0) &
m01_p(40 downto 0) & m00_p(16 downto 0)), 128));
pp1 <= m23_p(28 downto 0) & m22_p(16 downto 0) & m11_p(40 downto 0) & m10_p(16 downto 0) & 24x"0";
-- pp2 <= std_ulogic_vector(resize(signed(m03_p(37 downto 0) & m02_p(16 downto 0) & 34x"0"), 128));
-- pp3 <= std_ulogic_vector(resize(signed(m21_p(34 downto 0) & m20_p(16 downto 0) & 48x"0"), 128));

pp23 <= std_ulogic_vector(resize(resize(signed(m03_p(37 downto 0) & m02_p(16 downto 0) & 34x"0"), 100) +
signed(m21_p(34 downto 0) & m20_p(16 downto 0) & 48x"0"), 128));

sumlo <= std_ulogic_vector(unsigned('0' & pp0(31 downto 24)) + unsigned('0' & pp1(31 downto 24)));

s0: DSP48E1
generic map (
ACASCREG => 0,
@ -725,16 +759,16 @@ begin
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 1,
PREG => 0,
USE_MULT => "none"
)
port map (
A => m22_p(5 downto 0) & x"0000" & m10_p(34 downto 27),
A => pp0(79 downto 50),
ACIN => (others => '0'),
ALUMODE => "0000",
B => m10_p(26 downto 9),
B => pp0(49 downto 32),
BCIN => (others => '0'),
C => m20_p(39 downto 0) & m02_p(5 downto 0) & "00",
C => pp1(79 downto 32),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYINSEL => "000",
@ -751,7 +785,7 @@ begin
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => valid_1,
CEP => '0',
CLK => clk,
D => (others => '0'),
INMODE => "00000",
@ -773,43 +807,43 @@ begin

s1: DSP48E1
generic map (
ACASCREG => 1,
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 1,
BCASCREG => 1,
BREG => 1,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
CREG => 0,
INMODEREG => 0,
MREG => 0,
OPMODEREG => 0,
PREG => 0,
PREG => 1,
USE_MULT => "none"
)
port map (
A => x"000" & m22_p(41 downto 24),
A => pp0(127 downto 98),
ACIN => (others => '0'),
ALUMODE => "0000",
B => m22_p(23 downto 6),
B => pp0(97 downto 80),
BCIN => (others => '0'),
C => m23_p(36 downto 0) & x"00" & "0" & m20_p(41 downto 40),
C => pp1(127 downto 80),
CARRYCASCIN => '0',
CARRYIN => s0_carry(3),
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => valid_1,
CEA2 => '0',
CEAD => '0',
CEALUMODE => '0',
CEB1 => '0',
CEB2 => valid_1,
CEC => valid_1,
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CEP => valid_1,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
@ -829,52 +863,48 @@ begin
RSTP => '0'
);

-- mask is 0 for 32-bit ops, 0x0000ffffffff for 64-bit
p0_mask(47 downto 31) <= (others => '0');
p0_mask(30 downto 0) <= (others => not r32_1);

p0: DSP48E1
generic map (
ACASCREG => 1,
ALUMODEREG => 1,
AREG => 1,
BCASCREG => 1,
BREG => 1,
ACASCREG => 0,
ALUMODEREG => 0,
AREG => 0,
BCASCREG => 0,
BREG => 0,
CARRYINREG => 0,
CARRYINSELREG => 0,
CREG => 1,
CREG => 0,
INMODEREG => 0,
MASK => x"00007fffffff",
MREG => 0,
OPMODEREG => 0,
PREG => 0,
SEL_MASK => "C",
PREG => 1,
USE_MULT => "none",
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
A => pp23(79 downto 50),
ACIN => (others => '0'),
ALUMODE => "00" & rnot_1 & '0',
B => (others => '0'),
B => pp23(49 downto 32),
BCIN => (others => '0'),
C => p0_mask,
C => (others => '0'),
CARRYCASCIN => '0',
CARRYIN => '0',
CARRYIN => sumlo(8),
CARRYINSEL => "000",
CARRYOUT => p0_carry,
CEA1 => '0',
CEA2 => valid_1,
CEA2 => '0',
CEAD => '0',
CEALUMODE => valid_1,
CEALUMODE => '0',
CEB1 => '0',
CEB2 => valid_1,
CEC => valid_1,
CEB2 => '0',
CEC => '0',
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '0',
CEP => '0',
CEP => valid_1,
CLK => clk,
D => (others => '0'),
INMODE => "00000",
@ -915,10 +945,10 @@ begin
USE_PATTERN_DETECT => "PATDET"
)
port map (
A => x"0000000" & '0' & m21_p(41),
A => pp23(127 downto 98),
ACIN => (others => '0'),
ALUMODE => "00" & rnot_1 & '0',
B => m21_p(40 downto 23),
B => pp23(97 downto 80),
BCIN => (others => '0'),
C => (others => '0'),
CARRYCASCIN => '0',
@ -930,7 +960,7 @@ begin
CEALUMODE => valid_1,
CEB1 => '0',
CEB2 => valid_1,
CEC => '0',
CEC => valid_1,
CECARRYIN => '0',
CECTRL => '0',
CED => '0',
@ -958,39 +988,26 @@ begin
RSTP => '0'
);

mult_out: process(all)
variable ov : std_ulogic;
begin
-- set overflow if the high bits are neither all zeroes nor all ones
if req_32bit = '0' then
ov := not ((p1_pat and p0_pat) or (p1_patb and p0_patb));
else
ov := not ((p1_pat and p0_pat and not product(31)) or
(p1_patb and p0_patb and product(31)));
end if;
ovf_in <= ov;

m_out.result <= product;
m_out.overflow <= overflow;
end process;

process(clk)
begin
if rising_edge(clk) then
if valid_1 = '1' then
if rnot_1 = '0' then
product(31 downto 0) <= m10_p(8 downto 0) & m01_p(5 downto 0) & m00_p(16 downto 0);
product(31 downto 0) <= sumlo(7 downto 0) & pp0(23 downto 0);
else
product(31 downto 0) <= not (m10_p(8 downto 0) & m01_p(5 downto 0) & m00_p(16 downto 0));
product(31 downto 0) <= not (sumlo(7 downto 0) & pp0(23 downto 0));
end if;
end if;
m_out.valid <= valid_1;
valid_1 <= m_in.valid;
req_32bit <= r32_1;
r32_1 <= m_in.is_32bit;
rnot_1 <= m_in.not_result;
overflow <= ovf_in;
rnot_1 <= m_in.subtract;
overflow <= not ((p1_pat and p0_pat) or (p1_patb and p0_patb));
end if;
end process;

m_out.result <= product;
m_out.overflow <= overflow;

end architecture behaviour;

library ieee;

Loading…
Cancel
Save