Merge pull request #245 from paulusmack/fpu

Add a simple FPU
pull/251/head
Michael Neuling 4 years ago committed by GitHub
commit e40e752b9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,7 +48,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl
core.vhdl fpu.vhdl

soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \

@ -13,8 +13,11 @@ package common is
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
constant MSR_FE0 : integer := (63 - 52); -- Floating Exception mode
constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field
constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field
constant MSR_FE1 : integer := (63 - 55); -- Floating Exception mode
constant MSR_IR : integer := (63 - 58); -- Instruction Relocation
constant MSR_DR : integer := (63 - 59); -- Data Relocation
constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt
@ -53,8 +56,11 @@ package common is
-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR indice (can hold an SPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);
-- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(6 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
@ -64,6 +70,9 @@ package common is
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
@ -71,6 +80,7 @@ package common is
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
@ -84,6 +94,38 @@ package common is
end record;
constant xerc_init : xer_common_t := (others => '0');

-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
constant FPSCR_VX : integer := 63 - 34;
constant FPSCR_OX : integer := 63 - 35;
constant FPSCR_UX : integer := 63 - 36;
constant FPSCR_ZX : integer := 63 - 37;
constant FPSCR_XX : integer := 63 - 38;
constant FPSCR_VXSNAN : integer := 63 - 39;
constant FPSCR_VXISI : integer := 63 - 40;
constant FPSCR_VXIDI : integer := 63 - 41;
constant FPSCR_VXZDZ : integer := 63 - 42;
constant FPSCR_VXIMZ : integer := 63 - 43;
constant FPSCR_VXVC : integer := 63 - 44;
constant FPSCR_FR : integer := 63 - 45;
constant FPSCR_FI : integer := 63 - 46;
constant FPSCR_C : integer := 63 - 47;
constant FPSCR_FL : integer := 63 - 48;
constant FPSCR_FG : integer := 63 - 49;
constant FPSCR_FE : integer := 63 - 50;
constant FPSCR_FU : integer := 63 - 51;
constant FPSCR_VXSOFT : integer := 63 - 53;
constant FPSCR_VXSQRT : integer := 63 - 54;
constant FPSCR_VXCVI : integer := 63 - 55;
constant FPSCR_VE : integer := 63 - 56;
constant FPSCR_OE : integer := 63 - 57;
constant FPSCR_UE : integer := 63 - 58;
constant FPSCR_ZE : integer := 63 - 59;
constant FPSCR_XE : integer := 63 - 60;
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

type irq_state_t is (WRITE_SRR0, WRITE_SRR1);

-- For now, fixed 16 sources, make this either a parametric
@ -226,7 +268,7 @@ package common is
read2_enable : std_ulogic;
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : gpr_index_t;
read3_reg : gspr_index_t;
end record;

type RegisterFileToDecode2Type is record
@ -264,7 +306,7 @@ package common is
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
ci : std_ulogic; -- cache-inhibited load/store
byte_reverse : std_ulogic;
@ -277,13 +319,15 @@ package common is
virt_mode : std_ulogic; -- do translation through TLB
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'), length => (others => '0'),
mode_32bit => '0', others => (others => '0'));
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0', others => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
@ -369,7 +413,7 @@ package common is
type Loadstore1ToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;
write_reg : gpr_index_t;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
@ -401,6 +445,43 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
interrupt : std_ulogic;
illegal : std_ulogic;
end record;

type FPUToWritebackType is record
valid : std_ulogic;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;

type DividerToExecute1Type is record
valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0);
@ -473,10 +554,10 @@ package body common is
n := 13;
when others =>
n := 0;
return "000000";
return "0000000";
end case;
tmp := std_ulogic_vector(to_unsigned(n, 5));
return "1" & tmp;
return "01" & tmp;
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
@ -486,7 +567,7 @@ package body common is

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "0" & i;
return "00" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
@ -502,4 +583,9 @@ package body common is
begin
return s(5);
end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
end;
end common;

@ -34,7 +34,7 @@ entity control is
gpr_b_read_in : in gspr_index_t;

gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gpr_index_t;
gpr_c_read_in : in gspr_index_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -70,7 +70,6 @@ architecture rtl of control is
signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

signal gpr_c_read_in_fmt : std_ulogic_vector(5 downto 0);
begin
gpr_hazard0: entity work.gpr_hazard
generic map (
@ -122,8 +121,6 @@ begin
use_bypass => gpr_bypass_b
);

gpr_c_read_in_fmt <= "0" & gpr_c_read_in;

gpr_hazard2: entity work.gpr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
@ -140,7 +137,7 @@ begin
gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,
gpr_read_in => gpr_c_read_in,

ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

@ -11,6 +11,7 @@ entity core is
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
);
@ -79,6 +80,11 @@ architecture behave of core is
signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType;

-- FPU signals
signal execute1_to_fpu: Execute1ToFPUType;
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
@ -108,6 +114,7 @@ architecture behave of core is
signal rst_dec1 : std_ulogic := '1';
signal rst_dec2 : std_ulogic := '1';
signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic;
@ -170,6 +177,7 @@ begin
rst_dec1 <= core_rst;
rst_dec2 <= core_rst;
rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst;
rst_dbg <= rst;
alt_reset_d <= alt_reset;
@ -224,6 +232,7 @@ begin

decode1_0: entity work.decode1
generic map(
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -244,6 +253,7 @@ begin
decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -267,6 +277,7 @@ begin
register_file_0: entity work.register_file
generic map (
SIM => SIM,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -280,7 +291,7 @@ begin
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
log_out => log_data(255 downto 184)
);

cr_file_0: entity work.cr_file
@ -294,12 +305,13 @@ begin
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
log_out => log_data(183 downto 171)
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -309,9 +321,11 @@ begin
busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
@ -322,8 +336,32 @@ begin
log_wr_addr => log_wr_addr
);

with_fpu: if HAS_FPU generate
begin
fpu_0: entity work.fpu
port map (
clk => clk,
rst => rst_fpu,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
);
end generate;

no_fpu: if not HAS_FPU generate
begin
fpu_to_execute1.busy <= '0';
fpu_to_execute1.exception <= '0';
fpu_to_execute1.interrupt <= '0';
fpu_to_execute1.illegal <= '0';
fpu_to_writeback.valid <= '0';
fpu_to_writeback.write_enable <= '0';
fpu_to_writeback.write_cr_enable <= '0';
end generate;

loadstore1_0: entity work.loadstore1
generic map (
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
@ -368,7 +406,7 @@ begin
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
log_out => log_data(170 downto 151)
);

writeback_0: entity work.writeback
@ -376,12 +414,13 @@ begin
clk => clk,
e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
complete_out => complete
);

log_data(151 downto 150) <= "00";
log_data(150) <= '0';
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity zero_counter is
port (
@ -15,42 +16,6 @@ entity zero_counter is
end entity zero_counter;

architecture behaviour of zero_counter is
-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

signal inp : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0);
signal msb_r : std_ulogic;

@ -8,6 +8,7 @@ use work.decode_types.all;

entity decode1 is
generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -54,7 +55,10 @@ architecture behaviour of decode1 is
type op_19_subop_array_t is array(0 to 7) of decode_rom_t;
type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
type op_59_subop_array_t is array(0 to 31) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;
type op_63_subop_array_1_t is array(0 to 16) of decode_rom_t;

constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
@ -72,6 +76,10 @@ architecture behaviour of decode1 is
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lbz
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
@ -87,6 +95,10 @@ architecture behaviour of decode1 is
17 => (ALU, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sc
38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
@ -272,6 +284,12 @@ architecture behaviour of decode1 is
2#1101110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldcix
2#0000110101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- ldux
2#0000010101# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ldx
2#1001010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfdx
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
@ -350,6 +368,11 @@ architecture behaviour of decode1 is
2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx
2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux
2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx
@ -389,6 +412,24 @@ architecture behaviour of decode1 is
others => decode_rom_init
);

constant decode_op_59_array : op_59_subop_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s
2#10010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fdivs
2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs
2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds
2#10110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsqrts
2#11000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fres
2#11001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmuls
2#11010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- frsqrtes
2#11100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmsubs
2#11101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmadds
2#11110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmsubs
2#11111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fnmadds
others => illegal_inst
);

constant decode_op_62_array : minor_rom_array_2_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
@ -397,6 +438,64 @@ architecture behaviour of decode1 is
others => decode_rom_init
);

-- indexed by bits 4..1 and 10..6 of instruction word
constant decode_op_63l_array : op_63_subop_array_0_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#000000000# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 0/0=fcmpu
2#000000001# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 1/0=fcmpo
2#000000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 2/0=mcrfs
2#000000100# => (FPU, OP_FPOP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 4/0=ftdiv
2#000000101# => (FPU, OP_FPOP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 5/0=ftsqrt
2#011000001# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/6=mtfsb1
2#011000010# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/6=mtfsb0
2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
2#011011010# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 26/6=fmrgow
2#011011110# => (FPU, OP_FPOP_I, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- 30/6=fmrgew
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
2#100001100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 12/8=frin
2#100001101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 13/8=friz
2#100001110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 14/8=frip
2#100001111# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 15/8=frim
2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp
2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw
2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu
2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid
2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid
2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu
2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu
2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz
2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz
2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz
2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz
others => illegal_inst
);

-- indexed by bits 4..1 of instruction word
constant decode_op_63h_array : op_63_subop_array_1_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#0010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fdiv
2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub
2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd
2#0110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsqrt
2#0111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsel
2#1000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fre
2#1001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmul
2#1010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- frsqrte
2#1100# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmsub
2#1101# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmadd
2#1110# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmsub
2#1111# => (FPU, OP_FPOP, FRA, FRB, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fnmadd
others => illegal_inst
);

-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
@ -547,9 +646,28 @@ begin
when 58 =>
v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0))));

when 59 =>
if HAS_FPU then
-- floating point operations, mostly single-precision
v.decode := decode_op_59_array(to_integer(unsigned(f_in.insn(5 downto 1))));
if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then
vi.override := '1';
end if;
end if;

when 62 =>
v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0))));

when 63 =>
if HAS_FPU then
-- floating point operations, general and double-precision
if f_in.insn(5) = '0' then
v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
else
v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
end if;
end if;

when others =>
end case;


@ -11,6 +11,7 @@ use work.insn_helpers.all;
entity decode2 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -73,12 +74,14 @@ architecture behaviour of decode2 is
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
@ -92,6 +95,12 @@ architecture behaviour of decode2 is
case t is
when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
else
ret := ('0', (others => '0'), (others => '0'));
end if;
when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
@ -118,7 +127,7 @@ architecture behaviour of decode2 is
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
@ -137,6 +146,18 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
@ -150,16 +171,22 @@ architecture behaviour of decode2 is
return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA =>
return ('1', gpr_to_gspr(insn_ra(insn_in)));
when FRT =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
end if;
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return ('0', "000000");
return ('0', "0000000");
end case;
end;

@ -212,7 +239,7 @@ architecture behaviour of decode2 is
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_write_valid : std_ulogic;
@ -281,11 +308,15 @@ begin
end process;

r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR
else insn_rs(d_in.insn);
r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR
else fpr_to_gspr(insn_frc(d_in.insn)) when d_in.decode.input_reg_c = FRC and HAS_FPU
else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU
else gpr_to_gspr(insn_rs(d_in.insn));

c_out.read <= d_in.decode.input_cr;

@ -307,7 +338,7 @@ begin
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;
v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
@ -394,11 +425,11 @@ begin
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0';
if EX1_BYPASS then
if EX1_BYPASS and d_in.decode.unit = ALU then
cr_bypass_avail <= d_in.decode.output_cr;
end if;


@ -7,9 +7,11 @@ package decode_types is
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE,
OP_FPLOAD, OP_FPSTORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
@ -21,11 +23,11 @@ package decode_types is
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
type input_reg_c_t is (NONE, RS, RCR);
type output_reg_a_t is (NONE, RT, RA, SPR);
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE);

@ -47,7 +49,7 @@ package decode_types is

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST);
type unit_t is (NONE, ALU, LDST, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record

@ -13,6 +13,7 @@ use work.ppc_fx_insns.all;
entity execute1 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -26,12 +27,14 @@ entity execute1 is

e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type;
fp_in : in FPUToExecute1Type;

ext_irq_in : std_ulogic;

-- asynchronous
l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type;
fp_out : out Execute1ToFPUType;

e_out : out Execute1ToWritebackType;

@ -53,6 +56,7 @@ architecture behaviour of execute1 is
f : Execute1ToFetch1Type;
busy: std_ulogic;
terminate: std_ulogic;
fp_exception_next : std_ulogic;
trace_next : std_ulogic;
prev_op : insn_type_t;
lr_update : std_ulogic;
@ -71,7 +75,8 @@ architecture behaviour of execute1 is
end record;
constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, f => Execute1ToFetch1Init,
busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL,
busy => '0', lr_update => '0', terminate => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));
@ -267,7 +272,7 @@ begin
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;

busy_out <= l_in.busy or r.busy;
busy_out <= l_in.busy or r.busy or fp_in.busy;
valid_in <= e_in.valid and not busy_out;

terminate_out <= r.terminate;
@ -333,6 +338,7 @@ begin
variable spr_val : std_ulogic_vector(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0);
variable do_trace : std_ulogic;
variable fv : Execute1ToFPUType;
begin
result := (others => '0');
sum_with_carry := (others => '0');
@ -346,6 +352,7 @@ begin
v.e := Execute1ToWritebackInit;
lv := Execute1ToLoadstore1Init;
v.f.redirect := '0';
fv := Execute1ToFPUInit;

-- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value.
@ -521,9 +528,11 @@ begin
exception_nextpc := '0';
v.e.exc_write_enable := '0';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := e_in.nia;
if valid_in = '1' then
v.e.exc_write_data := e_in.nia;
v.last_nia := e_in.nia;
else
v.e.exc_write_data := r.last_nia;
end if;

v.e.mode_32bit := not ctrl.msr(MSR_SF);
@ -542,24 +551,36 @@ begin
ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0';
ctrl_tmp.msr(MSR_FP) <= '0';
ctrl_tmp.msr(MSR_FE0) <= '0';
ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1';
v.e.valid := '1';
v.trace_next := '0';
v.fp_exception_next := '0';
report "Writing SRR1: " & to_hstring(ctrl.srr1);

elsif r.trace_next = '1' and valid_in = '1' then
-- Generate a trace interrupt rather than executing the next instruction
-- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
elsif valid_in = '1' and ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then
if HAS_FPU and r.fp_exception_next = '1' then
-- This is used for FP-type program interrupts that
-- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
ctrl_tmp.srr1(63 - 47) <= '1';
else
-- Generate a trace interrupt rather than executing the next instruction
-- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
end if;
end if;
exception := '1';

@ -578,7 +599,19 @@ begin
-- set bit 45 to indicate privileged instruction type interrupt
ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction";

elsif not HAS_FPU and valid_in = '1' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations
illegal := '1';

elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and
(e_in.unit = FPU or e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- generate a floating-point unavailable interrupt
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64));
report "FP unavailable interrupt";

elsif valid_in = '1' and e_in.unit = ALU then

report "execute nia " & to_hstring(e_in.nia);
@ -793,6 +826,10 @@ begin
is_branch := '1';
taken_branch := '1';
abs_branch := '1';
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(a_in(MSR_FE0) or a_in(MSR_FE1));
end if;
do_trace := '0';

when OP_CNTZ =>
@ -964,6 +1001,10 @@ begin
ctrl_tmp.msr(MSR_IR) <= '1';
ctrl_tmp.msr(MSR_DR) <= '1';
end if;
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(c_in(MSR_FE0) or c_in(MSR_FE1));
end if;
end if;
when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
@ -1080,6 +1121,8 @@ begin
lv.valid := '1';
elsif e_in.unit = NONE then
illegal := '1';
elsif HAS_FPU and e_in.unit = FPU then
fv.valid := '1';
end if;

elsif r.f.redirect = '1' then
@ -1154,7 +1197,17 @@ begin
v.e.valid := '1';
end if;

if illegal = '1' then
-- Generate FP-type program interrupt. fp_in.interrupt will only
-- be set during the execution of a FP instruction.
-- The case where MSR[FE0,FE1] goes from zero to non-zero is
-- handled above by mtmsrd and rfid setting v.fp_exception_next.
if HAS_FPU and fp_in.interrupt = '1' then
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
exception := '1';
end if;

if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then
exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
-- Since we aren't doing Hypervisor emulation assist (0xe40) we
@ -1200,7 +1253,6 @@ begin
end if;
v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.last_nia;
report "ldst exception writing srr0=" & to_hstring(r.last_nia);
end if;

@ -1225,7 +1277,7 @@ begin
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
lv.write_reg := gspr_to_gpr(e_in.write_reg);
lv.write_reg := e_in.write_reg;
lv.length := e_in.data_len;
lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend;
@ -1243,6 +1295,20 @@ begin
lv.virt_mode := ctrl.msr(MSR_DR);
lv.priv_mode := not ctrl.msr(MSR_PR);
lv.mode_32bit := not ctrl.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit;

-- Outputs to FPU
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;
fv.frb := b_in;
fv.frc := c_in;
fv.frt := e_in.write_reg;
fv.rc := e_in.rc;
fv.out_cr := e_in.output_cr;

-- Update registers
rin <= v;
@ -1251,6 +1317,7 @@ begin
f_out <= r.f;
l_out <= lv;
e_out <= r.e;
fp_out <= fv;
flush_out <= f_out.redirect;

exception_log <= exception;

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -168,6 +169,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

@ -11,6 +11,7 @@ entity toplevel is
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true
);
@ -68,6 +69,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550
)

@ -14,6 +14,7 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
@ -120,6 +121,7 @@ begin
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,

2568
fpu.vhdl

File diff suppressed because it is too large Load Diff

@ -2,6 +2,9 @@ library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity gpr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
@ -15,13 +18,13 @@ entity gpr_hazard is
issuing : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_write_in : in gspr_index_t;
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0);
gpr_read_in : in gspr_index_t;

ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0);
ugpr_write_reg : in gspr_index_t;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
@ -31,9 +34,9 @@ architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
gpr : gspr_index_t;
ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0);
ugpr : gspr_index_t;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));

@ -25,6 +25,10 @@ package helpers is
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;

function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;

function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
end package helpers;

package body helpers is
@ -206,4 +210,53 @@ package body helpers is
return std_ulogic_vector(ret);

end;

-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

-- Count leading zeroes operation
-- Assumes the value passed in is not zero (if it is, zero is returned)
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right);
begin
rev := bit_reverse(val);
sum := std_ulogic_vector(- signed(rev));
onehot := sum and rev;
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
end;
end package body helpers;

@ -37,6 +37,11 @@ package insn_helpers is
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
end package insn_helpers;

package body insn_helpers is
@ -214,4 +219,29 @@ package body insn_helpers is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_frt(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fra(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_frb(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_frc(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_u(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 12);
end;
end package body insn_helpers;

@ -5,12 +5,15 @@ use ieee.numeric_std.all;
library work;
use work.decode_types.all;
use work.common.all;
use work.insn_helpers.all;
use work.helpers.all;

-- 2 cycle LSU
-- We calculate the address in the first cycle

entity loadstore1 is
generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -42,10 +45,12 @@ architecture behave of loadstore1 is

-- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction
FPR_CONV, -- converting double to float for store
SECOND_REQ, -- send 2nd request of unaligned xfer
ACK_WAIT, -- waiting for ack from dcache
MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
FINISH_LFS, -- write back converted SP data for lfs*
COMPLETE -- extra cycle to complete an operation
);

@ -58,7 +63,7 @@ architecture behave of loadstore1 is
addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0);
write_reg : gpr_index_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
@ -86,6 +91,11 @@ architecture behave of loadstore1 is
do_update : std_ulogic;
extra_cycle : std_ulogic;
mode_32bit : std_ulogic;
load_sp : std_ulogic;
ld_sp_data : std_ulogic_vector(31 downto 0);
ld_sp_nz : std_ulogic;
ld_sp_lz : std_ulogic_vector(5 downto 0);
st_sp_data : std_ulogic_vector(31 downto 0);
end record;

type byte_sel_t is array(0 to 7) of std_ulogic;
@ -95,6 +105,9 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0);

signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);

-- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin
@ -125,6 +138,72 @@ architecture behave of loadstore1 is
to_integer(unsigned(address))));
end function xfer_data_sel;

-- 23-bit right shifter for DP -> SP float conversions
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := '0' & frac(22 downto 1);
when "10" =>
fs1 := "00" & frac(22 downto 2);
when others =>
fs1 := "000" & frac(22 downto 3);
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := x"0" & fs1(22 downto 4);
when "010" =>
fs2 := x"00" & fs1(22 downto 8);
when "011" =>
fs2 := x"000" & fs1(22 downto 12);
when "100" =>
fs2 := x"0000" & fs1(22 downto 16);
when others =>
fs2 := x"00000" & fs1(22 downto 20);
end case;
return fs2;
end;

-- 23-bit left shifter for SP -> DP float conversions
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
return std_ulogic_vector is
variable fs1 : std_ulogic_vector(22 downto 0);
variable fs2 : std_ulogic_vector(22 downto 0);
begin
case shift(1 downto 0) is
when "00" =>
fs1 := frac;
when "01" =>
fs1 := frac(21 downto 0) & '0';
when "10" =>
fs1 := frac(20 downto 0) & "00";
when others =>
fs1 := frac(19 downto 0) & "000";
end case;
case shift(4 downto 2) is
when "000" =>
fs2 := fs1;
when "001" =>
fs2 := fs1(18 downto 0) & x"0" ;
when "010" =>
fs2 := fs1(14 downto 0) & x"00";
when "011" =>
fs2 := fs1(10 downto 0) & x"000";
when "100" =>
fs2 := fs1(6 downto 0) & x"0000";
when others =>
fs2 := fs1(2 downto 0) & x"00000";
end case;
return fs2;
end;

begin
-- Calculate the address in the first cycle
lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
@ -142,6 +221,59 @@ begin
end if;
end process;

ls_fp_conv: if HAS_FPU generate
-- Convert DP data to SP for stfs
dp_to_sp: process(all)
variable exp : unsigned(10 downto 0);
variable frac : std_ulogic_vector(22 downto 0);
variable shift : unsigned(4 downto 0);
begin
store_sp_data(31) <= l_in.data(63);
store_sp_data(30 downto 0) <= (others => '0');
exp := unsigned(l_in.data(62 downto 52));
if exp > 896 then
store_sp_data(30) <= l_in.data(62);
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
elsif exp >= 874 then
-- denormalization required
frac := '1' & l_in.data(51 downto 30);
shift := 0 - exp(4 downto 0);
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
end if;
end process;

-- Convert SP data to DP for lfs
sp_to_dp: process(all)
variable exp : unsigned(7 downto 0);
variable exp_dp : unsigned(10 downto 0);
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac : std_ulogic_vector(22 downto 0);
variable frac_shift : unsigned(4 downto 0);
begin
frac := r.ld_sp_data(22 downto 0);
exp := unsigned(r.ld_sp_data(30 downto 23));
exp_nz := or (r.ld_sp_data(30 downto 23));
exp_ao := and (r.ld_sp_data(30 downto 23));
frac_shift := (others => '0');
if exp_ao = '1' then
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
elsif exp_nz = '1' then
exp_dp := 896 + resize(exp, 11); -- finite normalized value
elsif r.ld_sp_nz = '0' then
exp_dp := to_unsigned(0, 11); -- zero
else
-- denormalized SP operand, need to normalize
exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
end if;
load_dp_data(63) <= r.ld_sp_data(31);
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
load_dp_data(28 downto 0) <= (others => '0');
end process;
end generate;

loadstore1_1: process(all)