You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
microwatt/fpu.vhdl

3259 lines
138 KiB
VHDL

-- Floating-point unit for Microwatt
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.insn_helpers.all;
use work.decode_types.all;
use work.crhelpers.all;
use work.helpers.all;
use work.common.all;
entity fpu is
port (
clk : in std_ulogic;
rst : in std_ulogic;
flush_in : in std_ulogic;
e_in : in Execute1ToFPUType;
e_out : out FPUToExecute1Type;
w_out : out FPUToWritebackType
);
end entity fpu;
architecture behaviour of fpu is
type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
constant EXP_BITS : natural := 13;
constant UNIT_BIT : natural := 56;
constant QNAN_BIT : natural := UNIT_BIT - 1;
constant SP_LSB : natural := UNIT_BIT - 23;
constant SP_GBIT : natural := SP_LSB - 1;
constant SP_RBIT : natural := SP_LSB - 2;
constant DP_LSB : natural := UNIT_BIT - 52;
constant DP_GBIT : natural := DP_LSB - 1;
constant DP_RBIT : natural := DP_LSB - 2;
type fpu_reg_type is record
class : fp_number_class;
negative : std_ulogic;
exponent : signed(EXP_BITS-1 downto 0); -- unbiased
mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format
end record;
type state_t is (IDLE, DO_ILLEGAL,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
DO_FCFID, DO_FCTI,
DO_FRSP, DO_FRI,
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
DO_FRE, DO_FRSQRTE,
DO_FSEL,
FRI_1,
ADD_1, ADD_SHIFT, ADD_2, ADD_3,
CMP_1, CMP_2,
MULT_1,
FMADD_1, FMADD_2, FMADD_3,
FMADD_4, FMADD_5, FMADD_6,
LOOKUP,
DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
FRE_1,
RSQRT_1,
FTDIV_1,
SQRT_1, SQRT_2, SQRT_3, SQRT_4,
SQRT_5, SQRT_6, SQRT_7, SQRT_8,
SQRT_9, SQRT_10, SQRT_11, SQRT_12,
INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE,
ROUND_UFLOW, ROUND_OFLOW,
ROUNDING, ROUNDING_2, ROUNDING_3,
DENORM,
RENORM_A, RENORM_A2,
RENORM_B, RENORM_B2,
RENORM_C, RENORM_C2,
NAN_RESULT, EXC_RESULT,
DO_IDIVMOD,
IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
IDIV_DODIV, IDIV_SH32,
IDIV_DIV, IDIV_DIV2, IDIV_DIV3, IDIV_DIV4, IDIV_DIV5,
IDIV_DIV6, IDIV_DIV7, IDIV_DIV8, IDIV_DIV9,
IDIV_EXT_TBH, IDIV_EXT_TBH2, IDIV_EXT_TBH3,
IDIV_EXT_TBH4, IDIV_EXT_TBH5,
IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3,
IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6,
IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO);
type reg_type is record
state : state_t;
busy : std_ulogic;
f2stall : std_ulogic;
instr_done : std_ulogic;
complete : std_ulogic;
do_intr : std_ulogic;
illegal : std_ulogic;
op : insn_type_t;
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
dest_fpr : gspr_index_t;
fe_mode : std_ulogic;
rc : std_ulogic;
is_cmp : std_ulogic;
single_prec : std_ulogic;
sp_result : std_ulogic;
fpscr : std_ulogic_vector(31 downto 0);
comm_fpscr : std_ulogic_vector(31 downto 0); -- committed FPSCR value
a : fpu_reg_type;
b : fpu_reg_type;
c : fpu_reg_type;
r : std_ulogic_vector(63 downto 0); -- 8.56 format
s : std_ulogic_vector(55 downto 0); -- extended fraction
x : std_ulogic;
p : std_ulogic_vector(63 downto 0); -- 8.56 format
y : std_ulogic_vector(63 downto 0); -- 8.56 format
result_sign : std_ulogic;
result_class : fp_number_class;
result_exp : signed(EXP_BITS-1 downto 0);
shift : signed(EXP_BITS-1 downto 0);
writing_fpr : std_ulogic;
write_reg : gspr_index_t;
complete_tag : instr_tag_t;
writing_cr : std_ulogic;
writing_xer : std_ulogic;
int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
old_exc : std_ulogic_vector(4 downto 0);
update_fprf : std_ulogic;
quieten_nan : std_ulogic;
nsnan_result : std_ulogic;
tiny : std_ulogic;
denorm : std_ulogic;
round_mode : std_ulogic_vector(2 downto 0);
is_subtract : std_ulogic;
exp_cmp : std_ulogic;
madd_cmp : std_ulogic;
add_bsmall : std_ulogic;
is_multiply : std_ulogic;
is_sqrt : std_ulogic;
first : std_ulogic;
count : unsigned(1 downto 0);
doing_ftdiv : std_ulogic_vector(1 downto 0);
opsel_a : std_ulogic_vector(1 downto 0);
use_a : std_ulogic;
use_b : std_ulogic;
use_c : std_ulogic;
invalid : std_ulogic;
negate : std_ulogic;
longmask : std_ulogic;
integer_op : std_ulogic;
divext : std_ulogic;
divmod : std_ulogic;
is_signed : std_ulogic;
int_ovf : std_ulogic;
div_close : std_ulogic;
inc_quot : std_ulogic;
a_hi : std_ulogic_vector(7 downto 0);
a_lo : std_ulogic_vector(55 downto 0);
m32b : std_ulogic;
oe : std_ulogic;
xerc : xer_common_t;
xerc_result : xer_common_t;
end record;
type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
signal r, rin : reg_type;
signal fp_result : std_ulogic_vector(63 downto 0);
signal opsel_b : std_ulogic_vector(1 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
signal opsel_s : std_ulogic_vector(1 downto 0);
signal opsel_ainv : std_ulogic;
signal opsel_mask : std_ulogic;
signal opsel_binv : std_ulogic;
signal in_a : std_ulogic_vector(63 downto 0);
signal in_b : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal carry_in : std_ulogic;
signal lost_bits : std_ulogic;
signal r_hi_nz : std_ulogic;
signal r_lo_nz : std_ulogic;
signal r_gt_1 : std_ulogic;
signal s_nz : std_ulogic;
signal misc_sel : std_ulogic_vector(3 downto 0);
signal f_to_multiply : MultiplyInputType;
signal multiply_to_f : MultiplyOutputType;
signal msel_1 : std_ulogic_vector(1 downto 0);
signal msel_2 : std_ulogic_vector(1 downto 0);
signal msel_add : std_ulogic_vector(1 downto 0);
signal msel_inv : std_ulogic;
signal inverse_est : std_ulogic_vector(18 downto 0);
-- opsel values
constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11";
constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
-- msel values
constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
-- Inverse lookup table, indexed by the top 8 fraction bits
-- The first 256 entries are the reciprocal (1/x) lookup table,
-- and the remaining 768 entries are the reciprocal square root table.
-- Output range is [0.5, 1) in 0.19 format, though the top
-- bit isn't stored since it is always 1.
-- Each output value is the inverse of the center of the input
-- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
-- entry 1 is 1 / (1 + 3/512), etc.
constant inverse_table : lookup_table := (
-- 1/x lookup table
-- Unit bit is assumed to be 1, so input range is [1, 2)
18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
-- 1/sqrt(x) lookup table
-- Input is in the range [1, 4), i.e. two bits to the left of the
-- binary point. Those 2 bits index the following 3 blocks of 256 values.
-- 1.0 ... 1.9999
18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
-- 2.0 ... 2.9999
18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
-- 3.0 ... 3.9999
18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
);
-- Left and right shifter with 120 bit input and 64 bit output.
-- Shifts inp left by shift bits and returns the upper 64 bits of
-- the result. The shift parameter is interpreted as a signed
-- number in the range -64..63, with negative values indicating
-- right shifts.
function shifter_64(inp: std_ulogic_vector(119 downto 0);
shift: std_ulogic_vector(6 downto 0))
return std_ulogic_vector is
variable s1 : std_ulogic_vector(94 downto 0);
variable s2 : std_ulogic_vector(70 downto 0);
variable result : std_ulogic_vector(63 downto 0);
begin
case shift(6 downto 5) is
when "00" =>
s1 := inp(119 downto 25);
when "01" =>
s1 := inp(87 downto 0) & "0000000";
when "10" =>
s1 := x"0000000000000000" & inp(119 downto 89);
when others =>
s1 := x"00000000" & inp(119 downto 57);
end case;
case shift(4 downto 3) is
when "00" =>
s2 := s1(94 downto 24);
when "01" =>
s2 := s1(86 downto 16);
when "10" =>
s2 := s1(78 downto 8);
when others =>
s2 := s1(70 downto 0);
end case;
case shift(2 downto 0) is
when "000" =>
result := s2(70 downto 7);
when "001" =>
result := s2(69 downto 6);
when "010" =>
result := s2(68 downto 5);
when "011" =>
result := s2(67 downto 4);
when "100" =>
result := s2(66 downto 3);
when "101" =>
result := s2(65 downto 2);
when "110" =>
result := s2(64 downto 1);
when others =>
result := s2(63 downto 0);
end case;
return result;
end;
-- Generate a mask with 0-bits on the left and 1-bits on the right which
-- selects the bits will be lost in doing a right shift. The shift
-- parameter is the bottom 6 bits of a negative shift count,
-- indicating a right shift.
function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
variable result: std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
for i in 0 to 63 loop
if i >= shift then
result(63 - i) := '1';
end if;
end loop;
return result;
end;
-- Split a DP floating-point number into components and work out its class.
-- If is_int = 1, the input is considered an integer
function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic;
is_32bint: std_ulogic; is_signed: std_ulogic) return fpu_reg_type is
variable r : fpu_reg_type;
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac_nz : std_ulogic;
variable low_nz : std_ulogic;
variable cls : std_ulogic_vector(2 downto 0);
begin
r.negative := fpr(63);
exp_nz := or (fpr(62 downto 52));
exp_ao := and (fpr(62 downto 52));
frac_nz := or (fpr(51 downto 0));
low_nz := or (fpr(31 downto 0));
if is_int = '0' then
r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
if exp_nz = '0' then
r.exponent := to_signed(-1022, EXP_BITS);
end if;
r.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64),
UNIT_BIT - 52));
cls := exp_ao & exp_nz & frac_nz;
case cls is
when "000" => r.class := ZERO;
when "001" => r.class := FINITE; -- denormalized
when "010" => r.class := FINITE;
when "011" => r.class := FINITE;
when "110" => r.class := INFINITY;
when others => r.class := NAN;
end case;
elsif is_32bint = '1' then
r.negative := fpr(31);
r.mantissa(31 downto 0) := fpr(31 downto 0);
r.mantissa(63 downto 32) := (others => (is_signed and fpr(31)));
r.exponent := (others => '0');
if low_nz = '1' then
r.class := FINITE;
else
r.class := ZERO;
end if;
else
r.mantissa := fpr;
r.exponent := (others => '0');
if (fpr(63) or exp_nz or frac_nz) = '1' then
r.class := FINITE;
else
r.class := ZERO;
end if;
end if;
return r;
end;
-- Construct a DP floating-point result from components
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
return std_ulogic_vector is
variable result : std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
result(63) := sign;
case class is
when ZERO =>
when FINITE =>
if mantissa(UNIT_BIT) = '1' then
-- normalized number
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
end if;
result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB);
if single_prec = '0' then
result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
end if;
when INFINITY =>
result(62 downto 52) := "11111111111";
when NAN =>
result(62 downto 52) := "11111111111";
result(51) := quieten_nan or mantissa(QNAN_BIT);
result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB);
if single_prec = '0' then
result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
end if;
end case;
return result;
end;
-- Determine whether to increment when rounding
-- Returns rounding_inc & inexact
-- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2)
-- bits of the mantissa already (usually arranged by setting set_x = 1 earlier).
function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
sign: std_ulogic)
return std_ulogic_vector is
variable grx : std_ulogic_vector(2 downto 0);
variable ret : std_ulogic_vector(1 downto 0);
variable lsb : std_ulogic;
begin
if single_prec = '0' then
grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0)));
lsb := mantissa(DP_LSB);
else
grx := mantissa(SP_GBIT downto SP_RBIT) & x;
lsb := mantissa(SP_LSB);
end if;
ret(1) := '0';
ret(0) := or (grx);
case rn(1 downto 0) is
when "00" => -- round to nearest
if grx = "100" and rn(2) = '0' then
ret(1) := lsb; -- tie, round to even
else
ret(1) := grx(2);
end if;
when "01" => -- round towards zero
when others => -- round towards +/- inf
if rn(0) = sign then
-- round towards greater magnitude
ret(1) := ret(0);
end if;
end case;
return ret;
end;
-- Determine result flags to write into the FPSCR
function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
return std_ulogic_vector is
begin
case class is
when ZERO =>
return sign & "0010";
when FINITE =>
return (not unitbit) & sign & (not sign) & "00";
when INFINITY =>
return '0' & sign & (not sign) & "01";
when NAN =>
return "10001";
end case;
end;
begin
fpu_multiply_0: entity work.multiply
port map (
clk => clk,
m_in => f_to_multiply,
m_out => multiply_to_f
);
fpu_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or flush_in = '1' then
r.state <= IDLE;
r.busy <= '0';
r.f2stall <= '0';
r.instr_done <= '0';
r.complete <= '0';
r.illegal <= '0';
r.do_intr <= '0';
r.writing_fpr <= '0';
r.writing_cr <= '0';
r.writing_xer <= '0';
r.fpscr <= (others => '0');
r.write_reg <= (others =>'0');
r.complete_tag.valid <= '0';
r.cr_mask <= (others =>'0');
r.cr_result <= (others =>'0');
r.instr_tag.valid <= '0';
if rst = '1' then
r.fpscr <= (others => '0');
r.comm_fpscr <= (others => '0');
elsif r.do_intr = '0' then
-- flush_in = 1 and not due to us generating an interrupt,
-- roll back to committed fpscr
r.fpscr <= r.comm_fpscr;
end if;