You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
microwatt/fpu.vhdl

2241 lines
95 KiB
VHDL

-- Floating-point unit for Microwatt
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.insn_helpers.all;
use work.decode_types.all;
use work.crhelpers.all;
use work.helpers.all;
use work.common.all;
entity fpu is
port (
clk : in std_ulogic;
rst : in std_ulogic;
e_in : in Execute1toFPUType;
e_out : out FPUToExecute1Type;
w_out : out FPUToWritebackType
);
end entity fpu;
architecture behaviour of fpu is
type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
constant EXP_BITS : natural := 13;
type fpu_reg_type is record
class : fp_number_class;
negative : std_ulogic;
exponent : signed(EXP_BITS-1 downto 0); -- unbiased
mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
end record;
type state_t is (IDLE,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR, DO_FMRG, DO_FCMP,
DO_FCFID, DO_FCTI,
DO_FRSP, DO_FRI,
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT,
DO_FRE, DO_FRSQRTE,
DO_FSEL,
FRI_1,
ADD_SHIFT, ADD_2, ADD_3,
CMP_1, CMP_2,
MULT_1,
LOOKUP,
DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
FRE_1,
RSQRT_1,
SQRT_1, SQRT_2, SQRT_3, SQRT_4,
SQRT_5, SQRT_6, SQRT_7, SQRT_8,
SQRT_9, SQRT_10, SQRT_11, SQRT_12,
INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE,
ROUND_UFLOW, ROUND_OFLOW,
ROUNDING, ROUNDING_2, ROUNDING_3,
DENORM,
RENORM_A, RENORM_A2,
RENORM_B, RENORM_B2,
RENORM_C, RENORM_C2);
type reg_type is record
state : state_t;
busy : std_ulogic;
instr_done : std_ulogic;
do_intr : std_ulogic;
op : insn_type_t;
insn : std_ulogic_vector(31 downto 0);
dest_fpr : gspr_index_t;
fe_mode : std_ulogic;
rc : std_ulogic;
is_cmp : std_ulogic;
single_prec : std_ulogic;
fpscr : std_ulogic_vector(31 downto 0);
a : fpu_reg_type;
b : fpu_reg_type;
c : fpu_reg_type;
r : std_ulogic_vector(63 downto 0); -- 10.54 format
x : std_ulogic;
p : std_ulogic_vector(63 downto 0); -- 8.56 format
y : std_ulogic_vector(63 downto 0); -- 8.56 format
result_sign : std_ulogic;
result_class : fp_number_class;
result_exp : signed(EXP_BITS-1 downto 0);
shift : signed(EXP_BITS-1 downto 0);
writing_back : std_ulogic;
int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
old_exc : std_ulogic_vector(4 downto 0);
update_fprf : std_ulogic;
quieten_nan : std_ulogic;
tiny : std_ulogic;
denorm : std_ulogic;
round_mode : std_ulogic_vector(2 downto 0);
is_subtract : std_ulogic;
exp_cmp : std_ulogic;
add_bsmall : std_ulogic;
is_multiply : std_ulogic;
is_sqrt : std_ulogic;
first : std_ulogic;
count : unsigned(1 downto 0);
end record;
type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
signal r, rin : reg_type;
signal fp_result : std_ulogic_vector(63 downto 0);
signal opsel_a : std_ulogic_vector(1 downto 0);
signal opsel_b : std_ulogic_vector(1 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
signal opsel_ainv : std_ulogic;
signal opsel_amask : std_ulogic;
signal opsel_binv : std_ulogic;
signal in_a : std_ulogic_vector(63 downto 0);
signal in_b : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal carry_in : std_ulogic;
signal lost_bits : std_ulogic;
signal r_hi_nz : std_ulogic;
signal r_lo_nz : std_ulogic;
signal misc_sel : std_ulogic_vector(3 downto 0);
signal f_to_multiply : MultiplyInputType;
signal multiply_to_f : MultiplyOutputType;
signal msel_1 : std_ulogic_vector(1 downto 0);
signal msel_2 : std_ulogic_vector(1 downto 0);
signal msel_add : std_ulogic_vector(1 downto 0);
signal msel_inv : std_ulogic;
signal inverse_est : std_ulogic_vector(18 downto 0);
-- opsel values
constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
-- msel values
constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
-- Inverse lookup table, indexed by the top 8 fraction bits
-- The first 256 entries are the reciprocal (1/x) lookup table,
-- and the remaining 768 entries are the reciprocal square root table.
-- Output range is [0.5, 1) in 0.19 format, though the top
-- bit isn't stored since it is always 1.
-- Each output value is the inverse of the center of the input
-- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
-- entry 1 is 1 / (1 + 3/512), etc.
signal inverse_table : lookup_table := (
-- 1/x lookup table
-- Unit bit is assumed to be 1, so input range is [1, 2)
18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
-- 1/sqrt(x) lookup table
-- Input is in the range [1, 4), i.e. two bits to the left of the
-- binary point. Those 2 bits index the following 3 blocks of 256 values.
-- 1.0 ... 1.9999
18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
-- 2.0 ... 2.9999
18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
-- 3.0 ... 3.9999
18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
);
-- Left and right shifter with 120 bit input and 64 bit output.
-- Shifts inp left by shift bits and returns the upper 64 bits of
-- the result. The shift parameter is interpreted as a signed
-- number in the range -64..63, with negative values indicating
-- right shifts.
function shifter_64(inp: std_ulogic_vector(119 downto 0);
shift: std_ulogic_vector(6 downto 0))
return std_ulogic_vector is
variable s1 : std_ulogic_vector(94 downto 0);
variable s2 : std_ulogic_vector(70 downto 0);
variable result : std_ulogic_vector(63 downto 0);
begin
case shift(6 downto 5) is
when "00" =>
s1 := inp(119 downto 25);
when "01" =>
s1 := inp(87 downto 0) & "0000000";
when "10" =>
s1 := x"0000000000000000" & inp(119 downto 89);
when others =>
s1 := x"00000000" & inp(119 downto 57);
end case;
case shift(4 downto 3) is
when "00" =>
s2 := s1(94 downto 24);
when "01" =>
s2 := s1(86 downto 16);
when "10" =>
s2 := s1(78 downto 8);
when others =>
s2 := s1(70 downto 0);
end case;
case shift(2 downto 0) is
when "000" =>
result := s2(70 downto 7);
when "001" =>
result := s2(69 downto 6);
when "010" =>
result := s2(68 downto 5);
when "011" =>
result := s2(67 downto 4);
when "100" =>
result := s2(66 downto 3);
when "101" =>
result := s2(65 downto 2);
when "110" =>
result := s2(64 downto 1);
when others =>
result := s2(63 downto 0);
end case;
return result;
end;
-- Generate a mask with 0-bits on the left and 1-bits on the right which
-- selects the bits will be lost in doing a right shift. The shift
-- parameter is the bottom 6 bits of a negative shift count,
-- indicating a right shift.
function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
variable result: std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
for i in 0 to 63 loop
if i >= shift then
result(63 - i) := '1';
end if;
end loop;
return result;
end;
-- Split a DP floating-point number into components and work out its class.
-- If is_int = 1, the input is considered an integer
function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
variable r : fpu_reg_type;
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac_nz : std_ulogic;
variable cls : std_ulogic_vector(2 downto 0);
begin
r.negative := fpr(63);
exp_nz := or (fpr(62 downto 52));
exp_ao := and (fpr(62 downto 52));
frac_nz := or (fpr(51 downto 0));
if is_int = '0' then
r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
if exp_nz = '0' then
r.exponent := to_signed(-1022, EXP_BITS);
end if;
r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
cls := exp_ao & exp_nz & frac_nz;
case cls is
when "000" => r.class := ZERO;
when "001" => r.class := FINITE; -- denormalized
when "010" => r.class := FINITE;
when "011" => r.class := FINITE;
when "110" => r.class := INFINITY;
when others => r.class := NAN;
end case;
else
r.mantissa := fpr;
r.exponent := (others => '0');
if (fpr(63) or exp_nz or frac_nz) = '1' then
r.class := FINITE;
else
r.class := ZERO;
end if;
end if;
return r;
end;
-- Construct a DP floating-point result from components
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
return std_ulogic_vector is
variable result : std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
result(63) := sign;
case class is
when ZERO =>
when FINITE =>
if mantissa(54) = '1' then
-- normalized number
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
end if;
result(51 downto 29) := mantissa(53 downto 31);
if single_prec = '0' then
result(28 downto 0) := mantissa(30 downto 2);
end if;
when INFINITY =>
result(62 downto 52) := "11111111111";
when NAN =>
result(62 downto 52) := "11111111111";
result(51) := quieten_nan or mantissa(53);
result(50 downto 29) := mantissa(52 downto 31);
if single_prec = '0' then
result(28 downto 0) := mantissa(30 downto 2);
end if;
end case;
return result;
end;
-- Determine whether to increment when rounding
-- Returns rounding_inc & inexact
-- Assumes x includes the bottom 29 bits of the mantissa already
-- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
sign: std_ulogic)
return std_ulogic_vector is
variable grx : std_ulogic_vector(2 downto 0);
variable ret : std_ulogic_vector(1 downto 0);
variable lsb : std_ulogic;
begin
if single_prec = '0' then
grx := mantissa(1 downto 0) & x;
lsb := mantissa(2);
else
grx := mantissa(30 downto 29) & x;
lsb := mantissa(31);
end if;
ret(1) := '0';
ret(0) := or (grx);
case rn(1 downto 0) is
when "00" => -- round to nearest
if grx = "100" and rn(2) = '0' then
ret(1) := lsb; -- tie, round to even
else
ret(1) := grx(2);
end if;
when "01" => -- round towards zero
when others => -- round towards +/- inf
if rn(0) = sign then
-- round towards greater magnitude
ret(1) := ret(0);
end if;
end case;
return ret;
end;
-- Determine result flags to write into the FPSCR
function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
return std_ulogic_vector is
begin
case class is
when ZERO =>
return sign & "0010";
when FINITE =>
return (not unitbit) & sign & (not sign) & "00";
when INFINITY =>
return '0' & sign & (not sign) & "01";
when NAN =>
return "10001";
end case;
end;
begin
fpu_multiply_0: entity work.multiply
port map (
clk => clk,
m_in => f_to_multiply,
m_out => multiply_to_f
);
fpu_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r.state <= IDLE;
r.busy <= '0';
r.instr_done <= '0';
r.do_intr <= '0';
r.fpscr <= (others => '0');
r.writing_back <= '0';
else
assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
r <= rin;
end if;
end if;
end process;
-- synchronous reads from lookup table
lut_access: process(clk)
variable addrhi : std_ulogic_vector(1 downto 0);
variable addr : std_ulogic_vector(9 downto 0);
begin
if rising_edge(clk) then
if r.is_sqrt = '1' then
addrhi := r.b.mantissa(55 downto 54);
else
addrhi := "00";
end if;
addr := addrhi & r.b.mantissa(53 downto 46);
inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
end if;
end process;
e_out.busy <= r.busy;
e_out.exception <= r.fpscr(FPSCR_FEX);
e_out.interrupt <= r.do_intr;
w_out.valid <= r.instr_done and not r.do_intr;
w_out.write_enable <= r.writing_back;
w_out.write_reg <= r.dest_fpr;
w_out.write_data <= fp_result;
w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
w_out.write_cr_mask <= r.cr_mask;
w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
r.cr_result & r.cr_result & r.cr_result & r.cr_result;
fpu_1: process(all)
variable v : reg_type;
variable adec : fpu_reg_type;
variable bdec : fpu_reg_type;
variable cdec : fpu_reg_type;
variable fpscr_mask : std_ulogic_vector(31 downto 0);
variable illegal : std_ulogic;
variable j, k : integer;
variable flm : std_ulogic_vector(7 downto 0);
variable int_input : std_ulogic;
variable mask : std_ulogic_vector(63 downto 0);
variable in_a0 : std_ulogic_vector(63 downto 0);
variable in_b0 : std_ulogic_vector(63 downto 0);
variable misc : std_ulogic_vector(63 downto 0);
variable shift_res : std_ulogic_vector(63 downto 0);
variable round : std_ulogic_vector(1 downto 0);
variable update_fx : std_ulogic;
variable arith_done : std_ulogic;
variable invalid : std_ulogic;
variable zero_divide : std_ulogic;
variable mant_nz : std_ulogic;
variable min_exp : signed(EXP_BITS-1 downto 0);
variable max_exp : signed(EXP_BITS-1 downto 0);
variable bias_exp : signed(EXP_BITS-1 downto 0);
variable new_exp : signed(EXP_BITS-1 downto 0);
variable exp_tiny : std_ulogic;
variable exp_huge : std_ulogic;
variable renormalize : std_ulogic;
variable clz : std_ulogic_vector(5 downto 0);
variable set_x : std_ulogic;
variable mshift : signed(EXP_BITS-1 downto 0);
variable need_check : std_ulogic;
variable msb : std_ulogic;
variable is_add : std_ulogic;
variable qnan_result : std_ulogic;
variable longmask : std_ulogic;
variable set_a : std_ulogic;
variable set_b : std_ulogic;
variable set_c : std_ulogic;
variable px_nz : std_ulogic;
variable maddend : std_ulogic_vector(127 downto 0);
variable set_y : std_ulogic;
variable pcmpb_eq : std_ulogic;
variable pcmpb_lt : std_ulogic;
variable pshift : std_ulogic;
variable renorm_sqrt : std_ulogic;
variable sqrt_exp : signed(EXP_BITS-1 downto 0);
variable shiftin : std_ulogic;
begin
v := r;
illegal := '0';
v.busy := '0';
int_input := '0';
-- capture incoming instruction
if e_in.valid = '1' then
v.insn := e_in.insn;
v.op := e_in.op;
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
v.int_result := '0';
v.rc := e_in.rc;
v.is_cmp := e_in.out_cr;
if e_in.out_cr = '0' then
v.cr_mask := num_to_fxm(1);
else
v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
end if;
int_input := '0';
if e_in.op = OP_FPOP_I then
int_input := '1';
end if;
v.quieten_nan := '1';
v.tiny := '0';
v.denorm := '0';
v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
v.is_subtract := '0';
v.is_multiply := '0';
v.is_sqrt := '0';
v.add_bsmall := '0';
adec := decode_dp(e_in.fra, int_input);
bdec := decode_dp(e_in.frb, int_input);
cdec := decode_dp(e_in.frc, int_input);
v.a := adec;
v.b := bdec;
v.c := cdec;
v.exp_cmp := '0';
if adec.exponent > bdec.exponent then
v.exp_cmp := '1';
end if;
end if;
r_hi_nz <= or (r.r(55 downto 31));
r_lo_nz <= or (r.r(30 downto 2));
if r.single_prec = '0' then
max_exp := to_signed(1023, EXP_BITS);
min_exp := to_signed(-1022, EXP_BITS);
bias_exp := to_signed(1536, EXP_BITS);
else
max_exp := to_signed(127, EXP_BITS);
min_exp := to_signed(-126, EXP_BITS);
bias_exp := to_signed(192, EXP_BITS);
end if;
new_exp := r.result_exp - r.shift;
exp_tiny := '0';
exp_huge := '0';
if new_exp < min_exp then
exp_tiny := '1';
end if;
if new_exp > max_exp then
exp_huge := '1';
end if;
-- Compare P with zero and with B
px_nz := or (r.p(57 downto 4));
pcmpb_eq := '0';
if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
pcmpb_eq := '1';
end if;
pcmpb_lt := '0';
if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
pcmpb_lt := '1';
end if;
v.writing_back := '0';
v.instr_done := '0';
v.update_fprf := '0';
v.shift := to_signed(0, EXP_BITS);
v.first := '0';
opsel_a <= AIN_R;
opsel_ainv <= '0';
opsel_amask <= '0';