diff --git a/decode1.vhdl b/decode1.vhdl index 2fb1ad4..1978a27 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -151,8 +151,8 @@ architecture behaviour of decode1 is INSN_fabs => (FPU, FPU, OP_FP_MOVE, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), INSN_fadd => (FPU, FPU, OP_FP_ARITH, FRA, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), INSN_fadds => (FPU, FPU, OP_FP_ARITH, FRA, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', '0', NONE), - INSN_fcfid => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), - INSN_fcfids => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', '0', NONE), + INSN_fcfid => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', '0', NONE), + INSN_fcfids => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', '0', NONE), INSN_fcfidu => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', '0', NONE), INSN_fcfidus => (FPU, FPU, OP_FP_MISC, NONE, FRB, NONE, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', '0', NONE), INSN_fcmpo => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', '0', NONE), diff --git a/fpu.vhdl b/fpu.vhdl index f07f9d1..5648012 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -41,29 +41,30 @@ architecture behaviour of fpu is class : fp_number_class; negative : std_ulogic; denorm : std_ulogic; + naninf : std_ulogic; + zeroexp : std_ulogic; exponent : signed(EXP_BITS-1 downto 0); -- unbiased mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format end record; - type state_t is (IDLE, DO_ILLEGAL, + type state_t is (IDLE, DO_ILLEGAL, DO_SPECIAL, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT, DO_FCFID, DO_FCTI, - DO_FRSP, DO_FRI, + DO_FRSP, DO_FRSP_2, DO_FRI, DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD, - DO_FRE, DO_FRSQRTE, + DO_FRE, DO_FSEL, DO_IDIVMOD, FRI_1, - ADD_1, ADD_SHIFT, ADD_2, ADD_3, + ADD_1, ADD_SHIFT, ADD_2, ADD_2B, ADD_3, CMP_1, CMP_2, MULT_1, FMADD_0, FMADD_1, FMADD_2, FMADD_3, FMADD_4, FMADD_5, FMADD_6, - LOOKUP, DIV_2, DIV_3, DIV_4, DIV_5, DIV_6, FRE_1, - RSQRT_1, + SQRT_ODD, RSQRT_1, FTDIV_1, SQRT_1, SQRT_2, SQRT_3, SQRT_4, SQRT_5, SQRT_6, SQRT_7, SQRT_8, @@ -72,12 +73,10 @@ architecture behaviour of fpu is INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, ROUND_UFLOW, ROUND_OFLOW, - ROUNDING, ROUNDING_2, ROUNDING_3, + ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3, DENORM, - RENORM_A, RENORM_A2, - RENORM_B, RENORM_B2, - RENORM_C, RENORM_C2, - NAN_RESULT, EXC_RESULT, + RENORM_A, RENORM_B, RENORM_C, + RENORM_1, RENORM_2, IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3, IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3, IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5, @@ -88,11 +87,23 @@ architecture behaviour of fpu is IDIV_EXT_TBH4, IDIV_EXT_TBH5, IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3, IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6, - IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO); + IDIV_MODADJ, IDIV_MODADJ_NEG, IDIV_MODSUB, + IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO); type decode32 is array(0 to 31) of state_t; type decode8 is array(0 to 7) of state_t; + type specialcase_t is record + invalid : std_ulogic; + zero_divide : std_ulogic; + new_fpscr : std_ulogic_vector(31 downto 0); + immed_result : std_ulogic; -- result is an input, zero, infinity or NaN + qnan_result : std_ulogic; + result_sel : std_ulogic_vector(2 downto 0); + result_class : fp_number_class; + rsgn_op : std_ulogic_vector(1 downto 0); + end record; + type reg_type is record state : state_t; busy : std_ulogic; @@ -141,15 +152,15 @@ architecture behaviour of fpu is denorm : std_ulogic; round_mode : std_ulogic_vector(2 downto 0); is_subtract : std_ulogic; - exp_cmp : std_ulogic; - madd_cmp : std_ulogic; add_bsmall : std_ulogic; + is_arith : std_ulogic; + is_addition : std_ulogic; is_multiply : std_ulogic; + is_inverse : std_ulogic; is_sqrt : std_ulogic; first : std_ulogic; count : unsigned(1 downto 0); doing_ftdiv : std_ulogic_vector(1 downto 0); - opsel_a : std_ulogic_vector(1 downto 0); use_a : std_ulogic; use_b : std_ulogic; use_c : std_ulogic; @@ -170,6 +181,12 @@ architecture behaviour of fpu is xerc : xer_common_t; xerc_result : xer_common_t; res_sign : std_ulogic; + res_int : std_ulogic; + exec_state : state_t; + cycle_1 : std_ulogic; + cycle_1_ar : std_ulogic; + regsel : std_ulogic_vector(2 downto 0); + is_nan_inf : std_ulogic; end record; type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); @@ -177,22 +194,24 @@ architecture behaviour of fpu is signal r, rin : reg_type; signal fp_result : std_ulogic_vector(63 downto 0); - signal opsel_b : std_ulogic_vector(1 downto 0); + signal opsel_a : std_ulogic_vector(2 downto 0); + signal opsel_b : std_ulogic_vector(2 downto 0); + signal opsel_c : std_ulogic_vector(2 downto 0); signal opsel_r : std_ulogic_vector(1 downto 0); signal opsel_s : std_ulogic_vector(1 downto 0); - signal opsel_ainv : std_ulogic; + signal opsel_aneg : std_ulogic; + signal opsel_aabs : std_ulogic; signal opsel_mask : std_ulogic; - signal opsel_binv : std_ulogic; + signal opsel_sel : std_ulogic_vector(2 downto 0); signal in_a : std_ulogic_vector(63 downto 0); signal in_b : std_ulogic_vector(63 downto 0); signal result : std_ulogic_vector(63 downto 0); - signal carry_in : std_ulogic; signal lost_bits : std_ulogic; signal r_hi_nz : std_ulogic; signal r_lo_nz : std_ulogic; signal r_gt_1 : std_ulogic; signal s_nz : std_ulogic; - signal misc_sel : std_ulogic_vector(3 downto 0); + signal misc_sel : std_ulogic_vector(2 downto 0); signal f_to_multiply : MultiplyInputType; signal multiply_to_f : MultiplyOutputType; signal msel_1 : std_ulogic_vector(1 downto 0); @@ -202,15 +221,29 @@ architecture behaviour of fpu is signal inverse_est : std_ulogic_vector(18 downto 0); -- opsel values - constant AIN_R : std_ulogic_vector(1 downto 0) := "00"; - constant AIN_A : std_ulogic_vector(1 downto 0) := "01"; - constant AIN_B : std_ulogic_vector(1 downto 0) := "10"; - constant AIN_C : std_ulogic_vector(1 downto 0) := "11"; - - constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00"; - constant BIN_R : std_ulogic_vector(1 downto 0) := "01"; - constant BIN_RND : std_ulogic_vector(1 downto 0) := "10"; - constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11"; + constant AIN_ZERO : std_ulogic_vector(2 downto 0) := "000"; + constant AIN_A : std_ulogic_vector(2 downto 0) := "001"; + constant AIN_B : std_ulogic_vector(2 downto 0) := "010"; + constant AIN_C : std_ulogic_vector(2 downto 0) := "011"; + constant AIN_PS8 : std_ulogic_vector(2 downto 0) := "100"; + constant AIN_RND_B32 : std_ulogic_vector(2 downto 0) := "101"; + constant AIN_RND_RBIT : std_ulogic_vector(2 downto 0) := "110"; + constant AIN_RND : std_ulogic_vector(2 downto 0) := "111"; + + constant BIN_ZERO : std_ulogic_vector(2 downto 0) := "000"; + constant BIN_R : std_ulogic_vector(2 downto 0) := "001"; + constant BIN_MINUSR : std_ulogic_vector(2 downto 0) := "100"; + constant BIN_ABSR : std_ulogic_vector(2 downto 0) := "101"; + constant BIN_ADDSUBR : std_ulogic_vector(2 downto 0) := "110"; + constant BIN_RSIGNR : std_ulogic_vector(2 downto 0) := "111"; + + constant CIN_ZERO : std_ulogic_vector(2 downto 0) := "000"; + constant CIN_SUBEXT : std_ulogic_vector(2 downto 0) := "001"; + constant CIN_ABSEXT : std_ulogic_vector(2 downto 0) := "010"; + constant CIN_INC : std_ulogic_vector(2 downto 0) := "011"; + constant CIN_ROUND : std_ulogic_vector(2 downto 0) := "100"; + constant CIN_RNDX : std_ulogic_vector(2 downto 0) := "101"; + constant CIN_RNDQ : std_ulogic_vector(2 downto 0) := "110"; constant RES_SUM : std_ulogic_vector(1 downto 0) := "00"; constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01"; @@ -287,6 +320,26 @@ architecture behaviour of fpu is signal rs_neg2 : std_ulogic; signal rs_norm : std_ulogic; + constant RSGN_NOP : std_ulogic_vector(1 downto 0) := "00"; + constant RSGN_INV : std_ulogic_vector(1 downto 0) := "01"; + constant RSGN_SUB : std_ulogic_vector(1 downto 0) := "10"; + constant RSGN_SEL : std_ulogic_vector(1 downto 0) := "11"; + + signal rcls_op : std_ulogic_vector(1 downto 0); + constant RCLS_NOP : std_ulogic_vector(1 downto 0) := "00"; + constant RCLS_SEL : std_ulogic_vector(1 downto 0) := "01"; + constant RCLS_TZERO : std_ulogic_vector(1 downto 0) := "10"; + constant RCLS_TINF : std_ulogic_vector(1 downto 0) := "11"; + + constant CROP_NONE : std_ulogic_vector(2 downto 0) := "000"; + constant CROP_FCMP : std_ulogic_vector(2 downto 0) := "001"; + constant CROP_MCRFS : std_ulogic_vector(2 downto 0) := "010"; + constant CROP_FTDIV : std_ulogic_vector(2 downto 0) := "100"; + constant CROP_FTSQRT : std_ulogic_vector(2 downto 0) := "101"; + constant CROP_INTRES : std_ulogic_vector(2 downto 0) := "110"; + + signal scinfo : specialcase_t; + constant arith_decode : decode32 := ( -- indexed by bits 5..1 of opcode 2#01000# => DO_FRI, @@ -299,7 +352,7 @@ architecture behaviour of fpu is 2#10110# => DO_FSQRT, 2#11000# => DO_FRE, 2#11001# => DO_FMUL, - 2#11010# => DO_FRSQRTE, + 2#11010# => DO_FSQRT, 2#11100# => DO_FMADD, 2#11101# => DO_FMADD, 2#11110# => DO_FMADD, @@ -562,11 +615,15 @@ architecture behaviour of fpu is begin reg.negative := fpr(63); reg.denorm := '0'; + reg.naninf := '0'; + reg.zeroexp := '0'; exp_nz := or (fpr(62 downto 52)); exp_ao := and (fpr(62 downto 52)); frac_nz := or (fpr(51 downto 0)); low_nz := or (fpr(31 downto 0)); if is_fp = '1' then + reg.naninf := exp_ao; + reg.zeroexp := not exp_nz; reg.denorm := frac_nz and not exp_nz; reg.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS); if exp_nz = '0' then @@ -719,6 +776,7 @@ begin r.cr_mask <= (others =>'0'); r.cr_result <= (others =>'0'); r.instr_tag.valid <= '0'; + r.exec_state <= IDLE; if rst = '1' then r.fpscr <= (others => '0'); r.comm_fpscr <= (others => '0'); @@ -777,6 +835,140 @@ begin w_out.intr_vec <= 16#700#; w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0'); + -- This is active in the second cycle of an instruction, and works out if + -- we have a special case where one or more operand is NaN, infinity, or zero, + -- meaning that an exception is generated or a specific value results + -- immediately without further calculation. + fpu_specialcases: process(all) + variable e : specialcase_t; + variable invalid_mul : std_ulogic; + begin + e.invalid := '0'; + e.zero_divide := '0'; + e.new_fpscr := (others => '0'); + e.immed_result := '0'; + e.qnan_result := '0'; + e.result_sel := AIN_ZERO; + e.result_class := FINITE; + e.rsgn_op := RSGN_NOP; + + -- Check if any operand is a signalling NAN + if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or + (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or + (r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then + e.new_fpscr(FPSCR_VXSNAN) := '1'; + e.invalid := '1'; + end if; + + -- Check for this case here since VXIMZ can be set along with VXSNAN + invalid_mul := '0'; + if r.is_multiply = '1' and + ((r.a.class = INFINITY and r.c.class = ZERO) or + (r.a.class = ZERO and r.c.class = INFINITY)) then + e.new_fpscr(FPSCR_VXIMZ) := '1'; + e.invalid := '1'; + invalid_mul := '1'; + end if; + + -- Note that any operand for which r.use_X is 0 will have class = ZERO + if r.is_nan_inf = '1' then + e.immed_result := '1'; + + if r.int_result = '1' then + e.qnan_result := '1'; + e.new_fpscr(FPSCR_VXCVI) := '1'; + + elsif r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then + e.result_class := NAN; + e.rsgn_op := RSGN_SEL; + -- Select the first input that is a NaN + if r.a.class = NAN then + e.result_sel := AIN_A; + elsif r.b.class = NAN then + e.result_sel := AIN_B; + elsif r.c.class = NAN then + e.result_sel := AIN_C; + end if; + + else + -- some operand is an infinity + if invalid_mul = '1' then + e.qnan_result := '1'; + elsif (r.a.class = INFINITY or r.c.class = INFINITY) then + if r.is_multiply = '1' then + e.rsgn_op := RSGN_SUB; + end if; + if r.is_subtract = '1' and r.b.class = INFINITY then + e.new_fpscr(FPSCR_VXISI) := '1'; + e.qnan_result := '1'; + end if; + end if; + if r.is_inverse = '1' and r.a.class = INFINITY and r.b.class = INFINITY then + e.new_fpscr(FPSCR_VXIDI) := '1'; + e.qnan_result := '1'; + end if; + if r.b.class = INFINITY and r.is_sqrt = '1' and r.b.negative = '1' then + e.new_fpscr(FPSCR_VXSQRT) := '1'; + e.qnan_result := '1'; + end if; + if r.b.class = INFINITY and r.is_inverse = '1' then + -- fdiv, fre, frsqrte + e.result_class := ZERO; + else + e.result_class := INFINITY; + end if; + end if; + + elsif r.use_a = '1' and r.a.class = ZERO then + e.immed_result := '1'; + if r.is_addition = '1' then + -- result is +/- B + e.result_sel := AIN_B; + e.result_class := r.b.class; + else + e.result_class := ZERO; + end if; + if r.is_inverse = '1' and r.b.class = ZERO then + -- fdiv 0 / 0 + e.new_fpscr(FPSCR_VXZDZ) := '1'; + e.qnan_result := '1'; + end if; + + elsif r.use_c = '1' and r.c.class = ZERO then + -- fmadd/sub A * 0 + B + e.immed_result := '1'; + e.result_sel := AIN_B; + e.result_class := r.b.class; + + elsif r.use_b = '1' and r.b.class = ZERO and r.is_multiply = '0' then + -- B is zero, other operands are finite + e.immed_result := '1'; + if r.is_inverse = '1' then + -- fdiv, fre, frsqrte + e.result_class := INFINITY; + e.new_fpscr(FPSCR_ZX) := '1'; + e.zero_divide := '1'; + elsif r.is_addition = '1' then + -- fadd, result is A + e.result_sel := AIN_A; + else + -- other things, result is zero + e.result_class := ZERO; + end if; + end if; + if r.is_sqrt = '1' and r.b.class = FINITE and r.b.negative = '1' then + e.immed_result := '1'; + e.new_fpscr(FPSCR_VXSQRT) := '1'; + e.qnan_result := '1'; + end if; + + if e.qnan_result = '1' then + e.invalid := '1'; + e.result_class := NAN; + end if; + scinfo <= e; + end process; + fpu_1: process(all) variable v : reg_type; variable adec : fpu_reg_type; @@ -799,7 +991,6 @@ begin variable arith_done : std_ulogic; variable invalid : std_ulogic; variable zero_divide : std_ulogic; - variable mant_nz : std_ulogic; variable min_exp : signed(EXP_BITS-1 downto 0); variable max_exp : signed(EXP_BITS-1 downto 0); variable bias_exp : signed(EXP_BITS-1 downto 0); @@ -811,7 +1002,6 @@ begin variable mshift : signed(EXP_BITS-1 downto 0); variable need_check : std_ulogic; variable msb : std_ulogic; - variable is_add : std_ulogic; variable set_a : std_ulogic; variable set_a_exp : std_ulogic; variable set_a_mant : std_ulogic; @@ -821,8 +1011,10 @@ begin variable set_b_mant : std_ulogic; variable set_c : std_ulogic; variable set_y : std_ulogic; + variable set_r : std_ulogic; variable set_s : std_ulogic; variable qnan_result : std_ulogic; + variable invalid_mul : std_ulogic; variable px_nz : std_ulogic; variable pcmpb_eq : std_ulogic; variable pcmpb_lt : std_ulogic; @@ -836,11 +1028,8 @@ begin variable mulexp : signed(EXP_BITS-1 downto 0); variable maddend : std_ulogic_vector(127 downto 0); variable sum : std_ulogic_vector(63 downto 0); - variable round_inc : std_ulogic_vector(63 downto 0); - variable rbit_inc : std_ulogic; variable mult_mask : std_ulogic; variable sign_bit : std_ulogic; - variable rnd_b32 : std_ulogic; variable rexp_in1 : signed(EXP_BITS-1 downto 0); variable rexp_in2 : signed(EXP_BITS-1 downto 0); variable rexp_cin : std_ulogic; @@ -849,15 +1038,29 @@ begin variable rsh_in2 : signed(EXP_BITS-1 downto 0); variable exec_state : state_t; variable opcbits : std_ulogic_vector(4 downto 0); - variable int_result : std_ulogic; variable illegal : std_ulogic; variable rsign : std_ulogic; + variable rsgn_op : std_ulogic_vector(1 downto 0); + variable is_nan_inf : std_ulogic; + variable is_zero_den : std_ulogic; + variable set_reg_ind : std_ulogic; + variable cr_op : std_ulogic_vector(2 downto 0); + variable cr_result : std_ulogic_vector(3 downto 0); + variable set_cr : std_ulogic; + variable set_fpcc : std_ulogic; + variable asign : std_ulogic; + variable bneg : std_ulogic; + variable ci : std_ulogic; begin v := r; v.complete := '0'; v.do_intr := '0'; is_32bint := '0'; exec_state := IDLE; + is_nan_inf := '0'; + is_zero_den := '0'; + v.cycle_1 := e_in.valid; + v.cycle_1_ar := '0'; if r.complete = '1' or r.do_intr = '1' then v.instr_done := '0'; @@ -889,6 +1092,9 @@ begin v.divmod := '0'; v.is_sqrt := '0'; v.is_multiply := '0'; + v.is_addition := '0'; + v.is_subtract := '0'; + v.is_inverse := '0'; fpin_a := '0'; fpin_b := '0'; fpin_c := '0'; @@ -896,6 +1102,11 @@ begin v.use_b := e_in.valid_b; v.use_c := e_in.valid_c; v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN); + v.result_sign := '0'; + v.negate := '0'; + v.quieten_nan := '1'; + v.int_result := '0'; + v.is_arith := '0'; case e_in.op is when OP_FP_ARITH => fpin_a := e_in.valid_a; @@ -903,16 +1114,47 @@ begin fpin_c := e_in.valid_c; v.longmask := e_in.single; v.fp_rc := e_in.rc; + v.is_arith := '1'; + v.cycle_1_ar := '1'; exec_state := arith_decode(to_integer(unsigned(e_in.insn(5 downto 1)))); - if e_in.insn(5 downto 1) = "11001" or e_in.insn(5 downto 3) = "111" then - v.is_multiply := '1'; - end if; if e_in.insn(5 downto 1) = "10110" or e_in.insn(5 downto 1) = "11010" then v.is_sqrt := '1'; end if; - if e_in.insn(5 downto 1) = "01111" then + if e_in.insn(5 downto 1) = "01111" then -- fcti*z v.round_mode := "001"; + elsif e_in.insn(5 downto 1) = "01000" then -- fri* + v.round_mode := '1' & e_in.insn(7 downto 6); end if; + case e_in.insn(5 downto 1) is + when "10100" | "10101" => -- fadd and fsub + v.is_addition := '1'; + v.result_sign := e_in.fra(63); + if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then + v.result_sign := e_in.frb(63) xnor e_in.insn(1); + end if; + v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1)); + when "11001" => -- fmul + v.is_multiply := '1'; + v.result_sign := e_in.fra(63) xor e_in.frc(63); + when "11100" | "11101" | "11110" | "11111" => --fmadd family + v.is_multiply := '1'; + v.is_addition := '1'; + v.result_sign := e_in.frb(63) xnor e_in.insn(1); + v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor + e_in.frc(63) xor e_in.insn(1)); + v.negate := e_in.insn(2); + when "10010" => -- fdiv + v.is_inverse := '1'; + v.result_sign := e_in.fra(63) xor e_in.frb(63); + when "11000" | "11010" => -- fre and frsqrte + v.is_inverse := '1'; + v.result_sign := e_in.frb(63); + when "01110" | "01111" => -- fcti* + v.int_result := '1'; + v.result_sign := e_in.frb(63); + when others => -- fri* and frsp + v.result_sign := e_in.frb(63); + end case; when OP_FP_CMP => fpin_a := e_in.valid_a; fpin_b := e_in.valid_b; @@ -921,37 +1163,73 @@ begin v.fp_rc := e_in.rc; opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1); exec_state := misc_decode(to_integer(unsigned(opcbits))); + case opcbits is + when "10010" | "11010" | "10011" => + -- fmrg*, mffs + v.int_result := '1'; + v.result_sign := '0'; + when "10110" => -- fcfid + v.result_sign := e_in.frb(63); + when others => + v.result_sign := '0'; + end case; when OP_FP_MOVE => v.fp_rc := e_in.rc; fpin_a := e_in.valid_a; fpin_b := e_in.valid_b; fpin_c := e_in.valid_c; + v.quieten_nan := '0'; if e_in.insn(5) = '0' then exec_state := DO_FMR; + if e_in.insn(9) = '1' then + v.result_sign := '0'; -- fabs + elsif e_in.insn(8) = '1' then + v.result_sign := '1'; -- fnabs + elsif e_in.insn(7) = '1' then + v.result_sign := e_in.frb(63); -- fmr + elsif e_in.insn(6) = '1' then + v.result_sign := not e_in.frb(63); -- fneg + else + v.result_sign := e_in.fra(63); -- fcpsgn + end if; else exec_state := DO_FSEL; + v.result_sign := e_in.frb(63); end if; when OP_DIV => v.integer_op := '1'; is_32bint := e_in.single; + if e_in.single = '0' then + v.result_sign := e_in.is_signed and (e_in.fra(63) xor e_in.frb(63)); + else + v.result_sign := e_in.is_signed and (e_in.fra(31) xor e_in.frb(31)); + end if; exec_state := DO_IDIVMOD; when OP_DIVE => v.integer_op := '1'; v.divext := '1'; is_32bint := e_in.single; + if e_in.single = '0' then + v.result_sign := e_in.is_signed and (e_in.fra(63) xor e_in.frb(63)); + else + v.result_sign := e_in.is_signed and (e_in.fra(31) xor e_in.frb(31)); + end if; exec_state := DO_IDIVMOD; when OP_MOD => v.integer_op := '1'; v.divmod := '1'; is_32bint := e_in.single; + if e_in.single = '0' then + v.result_sign := e_in.is_signed and e_in.fra(63); + else + v.result_sign := e_in.is_signed and e_in.fra(31); + end if; exec_state := DO_IDIVMOD; when others => exec_state := DO_ILLEGAL; end case; - v.quieten_nan := '1'; v.tiny := '0'; v.denorm := '0'; - v.is_subtract := '0'; v.add_bsmall := '0'; v.int_ovf := '0'; v.div_close := '0'; @@ -963,13 +1241,9 @@ begin v.b := bdec; v.c := cdec; - v.exp_cmp := '0'; - if adec.exponent > bdec.exponent then - v.exp_cmp := '1'; - end if; - v.madd_cmp := '0'; - if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then - v.madd_cmp := '1'; + if e_in.op = OP_FP_ARITH then + is_nan_inf := adec.naninf or bdec.naninf or cdec.naninf; + is_zero_den := adec.zeroexp or bdec.zeroexp or cdec.zeroexp; end if; v.a_hi := 8x"0"; @@ -1038,16 +1312,18 @@ begin v.update_fprf := '0'; v.first := '0'; v.doing_ftdiv := "00"; - v.opsel_a := AIN_R; - opsel_ainv <= '0'; + opsel_a <= AIN_ZERO; + opsel_aneg <= '0'; + opsel_aabs <= '0'; opsel_mask <= '0'; - opsel_b <= BIN_ZERO; - opsel_binv <= '0'; + opsel_b <= BIN_R; + opsel_c <= CIN_ZERO; opsel_r <= RES_SUM; opsel_s <= S_ZERO; - carry_in <= '0'; - misc_sel <= "0000"; + misc_sel <= "000"; + opsel_sel <= AIN_ZERO; fpscr_mask := (others => '1'); + cr_op := CROP_NONE; update_fx := '0'; arith_done := '0'; invalid := '0'; @@ -1062,7 +1338,10 @@ begin set_b := '0'; set_b_mant := '0'; set_c := '0'; + set_r := '1'; set_s := '0'; + set_cr := '0'; + set_fpcc := '0'; f_to_multiply.is_signed <= '0'; f_to_multiply.valid <= '0'; msel_1 <= MUL1_A; @@ -1074,11 +1353,9 @@ begin renorm_sqrt := '0'; shiftin := '0'; shiftin0 := '0'; - rbit_inc := '0'; mult_mask := '0'; - rnd_b32 := '0'; - int_result := '0'; illegal := '0'; + set_reg_ind := '0'; re_sel1 <= REXP1_ZERO; re_sel2 <= REXP2_CON; @@ -1093,48 +1370,61 @@ begin rs_neg2 <= '0'; rs_norm <= '0'; + rsgn_op := RSGN_NOP; + rcls_op <= RCLS_NOP; + + if r.cycle_1_ar = '1' then + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + v.result_class := FINITE; + end if; + case r.state is when IDLE => v.invalid := '0'; - v.negate := '0'; if e_in.valid = '1' then - v.opsel_a := AIN_B; v.busy := '1'; - if e_in.op = OP_FP_ARITH and e_in.valid_a = '1' and - (e_in.valid_b = '0' or e_in.valid_c = '0') then - v.opsel_a := AIN_A; - end if; - if e_in.op = OP_FP_ARITH then - -- input selection for denorm cases - case e_in.insn(5 downto 1) is - when "10010" => -- fdiv - if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then - v.opsel_a := AIN_B; - end if; - when "11001" => -- fmul - if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then - v.opsel_a := AIN_C; - end if; - when "11100" | "11101" | "11110" | "11111" => -- fmadd etc. - if v.a.mantissa(UNIT_BIT) = '0' then - v.opsel_a := AIN_A; - elsif v.c.mantissa(UNIT_BIT) = '0' then - v.opsel_a := AIN_C; - end if; - when others => - end case; + v.exec_state := exec_state; + v.is_nan_inf := is_nan_inf; + if is_nan_inf = '1' or is_zero_den = '1' then + v.state := DO_SPECIAL; + else + v.state := exec_state; end if; - v.state := exec_state; end if; v.x := '0'; v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX); set_s := '1'; + v.regsel := AIN_ZERO; + + when DO_SPECIAL => + -- At least one floating point operand is NaN, infinity, zero or denormalized + -- Most of the special cases are handled in the fpu_specialcases process + -- and in the code below (the scinfo.immed_result = '1' block). + if r.is_multiply = '1' and r.b.class = ZERO then + -- This will trigger for fmul as well as fmadd/sub, but + -- it doesn't matter since r.is_subtract = 0 for fmul. + rsgn_op := RSGN_SUB; + end if; + if r.a.denorm = '1' and (r.is_multiply = '1' or r.is_inverse = '1') then + v.state := RENORM_A; + elsif r.c.denorm = '1' then + v.state := RENORM_C; + elsif r.b.denorm = '1' and (r.is_inverse = '1' or r.is_sqrt = '1') then + v.state := RENORM_B; + elsif r.is_multiply = '1' and r.b.class = ZERO then + v.state := DO_FMUL; + else + v.state := r.exec_state; + end if; when DO_ILLEGAL => illegal := '1'; v.instr_done := '1'; when DO_MCRFS => + cr_op := CROP_MCRFS; + set_cr := '1'; j := to_integer(unsigned(insn_bfa(r.insn))); for i in 0 to 7 loop if i = j then @@ -1147,93 +1437,56 @@ begin v.instr_done := '1'; when DO_FTDIV => - v.cr_result := "0000"; -- set result_exp to the exponent of B re_sel2 <= REXP2_B; re_set_result <= '1'; - if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or - (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then - v.cr_result(2) := '1'; - end if; - if r.a.class = NAN or r.a.class = INFINITY or - r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or - (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then - v.cr_result(1) := '1'; - v.instr_done := '1'; - else + cr_op := CROP_FTDIV; + if (r.a.class = ZERO or r.a.class = FINITE) and r.b.class = FINITE then v.doing_ftdiv := "11"; v.first := '1'; v.state := FTDIV_1; - v.instr_done := '0'; + else + set_cr := '1'; + v.instr_done := '1'; end if; when DO_FTSQRT => + cr_op := CROP_FTSQRT; + set_cr := '1'; v.instr_done := '1'; - v.cr_result := "0000"; - if r.b.class = ZERO or r.b.class = INFINITY or - (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then - v.cr_result(2) := '1'; - end if; - if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO - or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then - v.cr_result(1) := '1'; - end if; when DO_FCMP => -- fcmp[uo] - -- r.opsel_a = AIN_B - v.instr_done := '1'; + -- Prepare to subtract mantissas, put B in R + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; update_fx := '1'; - re_sel2 <= REXP2_B; - re_set_result <= '1'; - if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or - (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then - -- Signalling NAN - v.fpscr(FPSCR_VXSNAN) := '1'; - if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then - v.fpscr(FPSCR_VXVC) := '1'; - end if; - invalid := '1'; - v.cr_result := "0001"; -- unordered - elsif r.a.class = NAN or r.b.class = NAN then - if r.insn(6) = '1' then + cr_op := CROP_FCMP; + if r.a.class = NAN or r.b.class = NAN then + if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or + (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then + -- Signalling NAN + v.fpscr(FPSCR_VXSNAN) := '1'; + if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then + v.fpscr(FPSCR_VXVC) := '1'; + end if; + invalid := '1'; + elsif r.insn(6) = '1' then -- fcmpo v.fpscr(FPSCR_VXVC) := '1'; invalid := '1'; end if; - v.cr_result := "0001"; -- unordered - elsif r.a.class = ZERO and r.b.class = ZERO then - v.cr_result := "0010"; -- equal - elsif r.a.negative /= r.b.negative then - v.cr_result := r.a.negative & r.b.negative & "00"; - elsif r.a.class = ZERO then - -- A and B are the same sign from here down - v.cr_result := not r.b.negative & r.b.negative & "00"; - elsif r.a.class = INFINITY then - if r.b.class = INFINITY then - v.cr_result := "0010"; - else - v.cr_result := r.a.negative & not r.a.negative & "00"; - end if; - elsif r.b.class = ZERO then - -- A is finite from here down - v.cr_result := r.a.negative & not r.a.negative & "00"; - elsif r.b.class = INFINITY then - v.cr_result := not r.b.negative & r.b.negative & "00"; - elsif r.exp_cmp = '1' then - -- A and B are both finite from here down - v.cr_result := r.a.negative & not r.a.negative & "00"; - elsif r.a.exponent /= r.b.exponent then - -- A exponent is smaller than B - v.cr_result := not r.a.negative & r.a.negative & "00"; - else - -- Prepare to subtract mantissas, put B in R - v.cr_result := "0000"; - v.instr_done := '0'; - v.opsel_a := AIN_A; + end if; + if r.a.class = FINITE and r.b.class = FINITE and + r.a.negative = r.b.negative and + r.a.exponent = r.b.exponent then v.state := CMP_1; + else + set_cr := '1'; + set_fpcc := '1'; + v.instr_done := '1'; end if; - v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result; when DO_MTFSB => -- mtfsb{0,1} @@ -1260,15 +1513,17 @@ begin when DO_FMRG => -- fmrgew, fmrgow + set_r := '1'; opsel_r <= RES_MISC; - misc_sel <= "01" & r.insn(8) & '0'; - int_result := '1'; + misc_sel <= "100"; v.writing_fpr := '1'; v.instr_done := '1'; when DO_MFFS => v.writing_fpr := '1'; + set_r := '1'; opsel_r <= RES_MISC; + misc_sel <= "011"; case r.insn(20 downto 16) is when "00000" => -- mffs @@ -1294,7 +1549,6 @@ begin v.illegal := '1'; v.writing_fpr := '0'; end case; - int_result := '1'; v.instr_done := '1'; when DO_MTFSF => @@ -1314,145 +1568,97 @@ begin v.instr_done := '1'; when DO_FMR => - -- r.opsel_a = AIN_B - v.result_class := r.b.class; + opsel_r <= RES_MISC; + misc_sel <= "111"; + opsel_sel <= AIN_B; + set_r := '1'; + rcls_op <= RCLS_SEL; re_sel2 <= REXP2_B; re_set_result <= '1'; - v.quieten_nan := '0'; - if r.insn(9) = '1' then - v.result_sign := '0'; -- fabs - elsif r.insn(8) = '1' then - v.result_sign := '1'; -- fnabs - elsif r.insn(7) = '1' then - v.result_sign := r.b.negative; -- fmr - elsif r.insn(6) = '1' then - v.result_sign := not r.b.negative; -- fneg - else - v.result_sign := r.a.negative; -- fcpsgn - end if; v.writing_fpr := '1'; v.instr_done := '1'; when DO_FRI => -- fri[nzpm] - -- r.opsel_a = AIN_B - v.result_class := r.b.class; - v.result_sign := r.b.negative; + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; -- set shift to exponent - 52 rs_sel1 <= RSH1_B; rs_con2 <= RSCON2_52; rs_neg2 <= '1'; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then - -- Signalling NAN - v.fpscr(FPSCR_VXSNAN) := '1'; - invalid := '1'; - end if; - if r.b.class = FINITE then - if r.b.exponent >= to_signed(52, EXP_BITS) then - -- integer already, no rounding required - arith_done := '1'; - else - v.state := FRI_1; - v.round_mode := '1' & r.insn(7 downto 6); - end if; - else + if r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required arith_done := '1'; + else + v.state := FRI_1; end if; when DO_FRSP => - -- r.opsel_a = AIN_B, r.shift = 0 - v.result_class := r.b.class; - v.result_sign := r.b.negative; + -- r.shift = 0 + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; - -- set shift to exponent - -126 + v.state := DO_FRSP_2; + + when DO_FRSP_2 => + -- r.shift = 0 + -- set shift to exponent - -126 (for ROUND_UFLOW state) rs_sel1 <= RSH1_B; rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - if r.b.class = NAN and r.b.mantissa(53) = '0' then - -- Signalling NAN - v.fpscr(FPSCR_VXSNAN) := '1'; - invalid := '1'; - end if; - set_x := '1'; - if r.b.class = FINITE then - if r.b.exponent < to_signed(-126, EXP_BITS) then - v.state := ROUND_UFLOW; - elsif r.b.exponent > to_signed(127, EXP_BITS) then - v.state := ROUND_OFLOW; - else - v.state := ROUNDING; - end if; + set_x := '1'; -- uses r.r and r.shift + if r.b.exponent < to_signed(-126, EXP_BITS) then + v.state := ROUND_UFLOW; + elsif r.b.exponent > to_signed(127, EXP_BITS) then + v.state := ROUND_OFLOW; else - arith_done := '1'; + v.state := ROUNDING; end if; when DO_FCTI => -- instr bit 9: 1=dword 0=word -- instr bit 8: 1=unsigned 0=signed -- instr bit 1: 1=round to zero 0=use fpscr[RN] - -- r.opsel_a = AIN_B - v.result_class := r.b.class; - v.result_sign := r.b.negative; + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; rs_sel1 <= RSH1_B; rs_neg2 <= '1'; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - if r.b.class = NAN and r.b.mantissa(53) = '0' then - -- Signalling NAN - v.fpscr(FPSCR_VXSNAN) := '1'; - invalid := '1'; - end if; - - int_result := '1'; - case r.b.class is - when ZERO => - arith_done := '1'; - when FINITE => - if r.b.exponent >= to_signed(64, EXP_BITS) or - (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then - v.state := INT_OFLOW; - elsif r.b.exponent >= to_signed(52, EXP_BITS) then - -- integer already, no rounding required, - -- shift into final position - -- set shift to exponent - 56 - rs_con2 <= RSCON2_UNIT; - if r.insn(8) = '1' and r.b.negative = '1' then - v.state := INT_OFLOW; - else - v.state := INT_ISHIFT; - end if; - else - -- set shift to exponent - 52 - rs_con2 <= RSCON2_52; - v.state := INT_SHIFT; - end if; - when INFINITY | NAN => + if r.b.exponent >= to_signed(64, EXP_BITS) or + (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then + v.state := INT_OFLOW; + elsif r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required, + -- shift into final position + -- set shift to exponent - 56 + rs_con2 <= RSCON2_UNIT; + if r.insn(8) = '1' and r.b.negative = '1' then v.state := INT_OFLOW; - end case; + else + v.state := INT_ISHIFT; + end if; + else + -- set shift to exponent - 52 + rs_con2 <= RSCON2_52; + v.state := INT_SHIFT; + end if; when DO_FCFID => - -- r.opsel_a = AIN_B - v.result_sign := '0'; - if r.insn(8) = '0' and r.b.negative = '1' then - -- fcfid[s] with negative operand, set R = -B - opsel_ainv <= '1'; - carry_in <= '1'; - v.result_sign := '1'; - end if; - v.result_class := r.b.class; + opsel_a <= AIN_B; + opsel_aabs <= '1'; + opsel_b <= BIN_ZERO; + set_r := '1'; + opsel_sel <= AIN_B; + rcls_op <= RCLS_SEL; re_con2 <= RECON2_UNIT; re_set_result <= '1'; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; if r.b.class = ZERO then arith_done := '1'; else @@ -1461,399 +1667,162 @@ begin when DO_FADD => -- fadd[s] and fsub[s] - -- r.opsel_a = AIN_A - v.result_sign := r.a.negative; - v.result_class := r.a.class; + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel1 <= REXP1_A; re_set_result <= '1'; -- set shift to a.exp - b.exp rs_sel1 <= RSH1_B; rs_neg1 <= '1'; rs_sel2 <= RSH2_A; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - is_add := r.a.negative xor r.b.negative xor r.insn(1); - v.is_subtract := not is_add; - if r.a.class = FINITE and r.b.class = FINITE then - v.add_bsmall := r.exp_cmp; - v.opsel_a := AIN_B; - if r.exp_cmp = '0' then - v.result_sign := r.b.negative xnor r.insn(1); - if r.a.exponent = r.b.exponent then - v.state := ADD_2; - else - v.longmask := '0'; - v.state := ADD_SHIFT; - end if; - else - v.state := ADD_1; - end if; + v.add_bsmall := '0'; + if r.a.exponent = r.b.exponent then + v.state := ADD_2B; + elsif r.a.exponent < r.b.exponent then + v.longmask := '0'; + v.state := ADD_SHIFT; else - if r.a.class = NAN or r.b.class = NAN then - v.state := NAN_RESULT; - elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then - -- invalid operation, construct QNaN - v.fpscr(FPSCR_VXISI) := '1'; - qnan_result := '1'; - arith_done := '1'; - elsif r.a.class = INFINITY or r.b.class = ZERO then - -- result is A; we're already set up to put A into R - arith_done := '1'; - else - -- result is +/- B - v.opsel_a := AIN_B; - v.result_sign := r.b.negative xnor r.insn(1); - v.state := EXC_RESULT; - end if; + v.add_bsmall := '1'; + v.state := ADD_1; end if; when DO_FMUL => -- fmul[s] - -- r.opsel_a = AIN_A unless C is denorm and A isn't - v.result_sign := r.a.negative xor r.c.negative; - v.result_class := r.a.class; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel1 <= REXP1_A; re_sel2 <= REXP2_C; re_set_result <= '1'; - if r.a.class = FINITE and r.c.class = FINITE then - -- Renormalize denorm operands - if r.a.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_A; - elsif r.c.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_C; - else - f_to_multiply.valid <= '1'; - v.state := MULT_1; - end if; - else - if r.a.class = NAN or r.c.class = NAN then - v.state := NAN_RESULT; - elsif (r.a.class = INFINITY and r.c.class = ZERO) or - (r.a.class = ZERO and r.c.class = INFINITY) then - -- invalid operation, construct QNaN - v.fpscr(FPSCR_VXIMZ) := '1'; - qnan_result := '1'; - elsif r.a.class = ZERO or r.a.class = INFINITY then - -- result is +/- A - arith_done := '1'; - else - -- r.c.class is ZERO or INFINITY - v.opsel_a := AIN_C; - v.state := EXC_RESULT; - end if; - end if; + f_to_multiply.valid <= '1'; + v.state := MULT_1; when DO_FDIV => - -- r.opsel_a = AIN_A unless B is denorm and A isn't - v.result_class := r.a.class; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - v.result_sign := r.a.negative xor r.b.negative; + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel1 <= REXP1_A; re_sel2 <= REXP2_B; re_neg2 <= '1'; re_set_result <= '1'; v.count := "00"; - if r.a.class = FINITE and r.b.class = FINITE then - -- Renormalize denorm operands - if r.a.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_A; - elsif r.b.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_B; - else - v.first := '1'; - v.state := DIV_2; - end if; - else - if r.a.class = NAN or r.b.class = NAN then - v.state := NAN_RESULT; - elsif r.b.class = INFINITY then - if r.a.class = INFINITY then - v.fpscr(FPSCR_VXIDI) := '1'; - qnan_result := '1'; - else - v.result_class := ZERO; - end if; - arith_done := '1'; - elsif r.b.class = ZERO then - if r.a.class = ZERO then - v.fpscr(FPSCR_VXZDZ) := '1'; - qnan_result := '1'; - else - if r.a.class = FINITE then - zero_divide := '1'; - end if; - v.result_class := INFINITY; - end if; - arith_done := '1'; - else -- r.b.class = FINITE, result_class = r.a.class - arith_done := '1'; - end if; - end if; + v.first := '1'; + v.state := DIV_2; when DO_FSEL => + rsgn_op := RSGN_SEL; + rcls_op <= RCLS_SEL; if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then - v.opsel_a := AIN_C; - v.result_sign := r.c.negative; + opsel_sel <= AIN_C; + re_sel2 <= REXP2_C; else - v.opsel_a := AIN_B; - v.result_sign := r.b.negative; + opsel_sel <= AIN_B; + re_sel2 <= REXP2_B; end if; - v.quieten_nan := '0'; - v.state := EXC_RESULT; + opsel_r <= RES_MISC; + misc_sel <= "111"; + set_r := '1'; + re_set_result <= '1'; + arith_done := '1'; when DO_FSQRT => - -- r.opsel_a = AIN_B - v.result_class := r.b.class; - v.result_sign := r.b.negative; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; - case r.b.class is - when FINITE => - if r.b.negative = '1' then - v.fpscr(FPSCR_VXSQRT) := '1'; - qnan_result := '1'; - elsif r.b.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_B; - elsif r.b.exponent(0) = '0' then - v.state := SQRT_1; - else - -- set shift to 1 - rs_con2 <= RSCON2_1; - v.state := RENORM_B2; - end if; - when NAN => - v.state := NAN_RESULT; - when ZERO => - -- result is B - arith_done := '1'; - when INFINITY => - if r.b.negative = '1' then - v.fpscr(FPSCR_VXSQRT) := '1'; - qnan_result := '1'; - -- else result is B - end if; - arith_done := '1'; - end case; + if r.b.exponent(0) = '1' then + v.state := SQRT_ODD; + elsif r.is_inverse = '0' then + v.state := SQRT_1; + else + v.state := RSQRT_1; + end if; when DO_FRE => - -- r.opsel_a = AIN_B - v.result_class := r.b.class; - v.result_sign := r.b.negative; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - re_sel2 <= REXP2_B; - re_set_result <= '1'; - case r.b.class is - when FINITE => - if r.b.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_B; - else - v.state := FRE_1; - end if; - when NAN => - v.state := NAN_RESULT; - when INFINITY => - v.result_class := ZERO; - arith_done := '1'; - when ZERO => - v.result_class := INFINITY; - zero_divide := '1'; - arith_done := '1'; - end case; - - when DO_FRSQRTE => - -- r.opsel_a = AIN_B - v.result_class := r.b.class; - v.result_sign := r.b.negative; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; re_sel2 <= REXP2_B; re_set_result <= '1'; - -- set shift to 1 - rs_con2 <= RSCON2_1; - case r.b.class is - when FINITE => - if r.b.negative = '1' then - v.fpscr(FPSCR_VXSQRT) := '1'; - qnan_result := '1'; - elsif r.b.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_B; - elsif r.b.exponent(0) = '0' then - v.state := RSQRT_1; - else - v.state := RENORM_B2; - end if; - when NAN => - v.state := NAN_RESULT; - when INFINITY => - if r.b.negative = '1' then - v.fpscr(FPSCR_VXSQRT) := '1'; - qnan_result := '1'; - else - v.result_class := ZERO; - end if; - arith_done := '1'; - when ZERO => - v.result_class := INFINITY; - zero_divide := '1'; - arith_done := '1'; - end case; + v.state := FRE_1; when DO_FMADD => -- fmadd, fmsub, fnmadd, fnmsub - -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm, - -- else AIN_B - v.result_sign := r.a.negative; - v.result_class := r.a.class; + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; -- put a.exp + c.exp into result_exp re_sel1 <= REXP1_A; re_sel2 <= REXP2_C; re_set_result <= '1'; -- put b.exp into shift rs_sel1 <= RSH1_B; - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1); - v.negate := r.insn(2); - v.is_subtract := not is_add; - if r.a.class = FINITE and r.c.class = FINITE and - (r.b.class = FINITE or r.b.class = ZERO) then - -- Make sure A and C are normalized - if r.a.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_A; - elsif r.c.mantissa(UNIT_BIT) = '0' then - v.state := RENORM_C; - elsif r.b.class = ZERO then - -- no addend, degenerates to multiply - v.result_sign := r.a.negative xor r.c.negative; - f_to_multiply.valid <= '1'; - v.is_multiply := '1'; - v.state := MULT_1; - elsif r.madd_cmp = '0' then - -- addend is bigger, do multiply first - v.result_sign := r.b.negative xnor r.insn(1); - f_to_multiply.valid <= '1'; - v.first := '1'; - v.state := FMADD_0; - else - -- product is bigger, shift B first - v.state := FMADD_1; - end if; + if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then + -- addend is bigger, do multiply first + -- if subtracting, sign is opposite to initial estimate + f_to_multiply.valid <= '1'; + v.first := '1'; + v.state := FMADD_0; else - if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then - v.state := NAN_RESULT; - elsif (r.a.class = ZERO and r.c.class = INFINITY) or - (r.a.class = INFINITY and r.c.class = ZERO) then - -- invalid operation, construct QNaN - v.fpscr(FPSCR_VXIMZ) := '1'; - qnan_result := '1'; - elsif r.a.class = INFINITY or r.c.class = INFINITY then - if r.b.class = INFINITY and is_add = '0' then - -- invalid operation, construct QNaN - v.fpscr(FPSCR_VXISI) := '1'; - qnan_result := '1'; - else - -- result is infinity - v.result_class := INFINITY; - v.result_sign := r.a.negative xor r.c.negative; - arith_done := '1'; - end if; - else - -- Here A is zero, C is zero, or B is infinity - -- Result is +/-B in all of those cases - v.opsel_a := AIN_B; - v.result_sign := r.b.negative xnor r.insn(1); - v.state := EXC_RESULT; - end if; + -- product is bigger, shift B first + v.state := FMADD_1; end if; when RENORM_A => - rs_norm <= '1'; - v.state := RENORM_A2; - if r.use_c = '1' and r.c.denorm = '1' then - v.opsel_a := AIN_C; - else - v.opsel_a := AIN_B; - end if; - - when RENORM_A2 => - -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv - set_a := '1'; - re_sel2 <= REXP2_NE; + -- Get A into R + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; + v.regsel := AIN_A; + re_sel1 <= REXP1_A; re_set_result <= '1'; - if r.insn(4) = '1' then - if r.c.mantissa(UNIT_BIT) = '1' then - if r.insn(3) = '0' or r.b.class = ZERO then - v.first := '1'; - v.state := MULT_1; - else - v.madd_cmp := '0'; - if new_exp + 1 >= r.b.exponent then - v.madd_cmp := '1'; - end if; - v.opsel_a := AIN_B; - v.state := DO_FMADD; - end if; - else - v.state := RENORM_C; - end if; - else - if r.b.mantissa(UNIT_BIT) = '1' then - v.first := '1'; - v.state := DIV_2; - else - v.state := RENORM_B; - end if; - end if; + v.a.denorm := '0'; + v.state := RENORM_1; when RENORM_B => - rs_norm <= '1'; - renorm_sqrt := r.is_sqrt; - v.state := RENORM_B2; - - when RENORM_B2 => - set_b := '1'; - -- For fdiv, we need to increase result_exp by shift rather - -- than decreasing it as for fre/frsqrte and fsqrt. - -- We do that by negating r.shift in this cycle and then - -- setting result_exp to new_exp in the next cycle - if r.use_a = '1' then - rs_sel1 <= RSH1_S; - rs_neg1 <= '1'; - else - re_sel2 <= REXP2_NE; - re_set_result <= '1'; - end if; - v.opsel_a := AIN_B; - v.state := LOOKUP; + -- Get B into R + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; + v.regsel := AIN_B; + re_sel2 <= REXP2_B; + re_set_result <= '1'; + v.b.denorm := '0'; + v.state := RENORM_1; when RENORM_C => - rs_norm <= '1'; - v.state := RENORM_C2; - - when RENORM_C2 => - set_c := '1'; - re_sel2 <= REXP2_NE; + -- Get C into R + opsel_a <= AIN_C; + opsel_b <= BIN_ZERO; + set_r := '1'; + v.regsel := AIN_C; + re_sel2 <= REXP2_C; re_set_result <= '1'; - if r.insn(3) = '0' or r.b.class = ZERO then - v.first := '1'; - v.state := MULT_1; + v.c.denorm := '0'; + v.state := RENORM_1; + + when RENORM_1 => + rs_norm <= '1'; + renorm_sqrt := r.is_sqrt; + v.state := RENORM_2; + + when RENORM_2 => + set_reg_ind := '1'; + if r.c.denorm = '1' then + -- must be either fmul or fmadd/sub + v.state := RENORM_C; + elsif r.b.denorm = '1' and r.is_addition = '0' then + v.state := RENORM_B; + elsif r.is_multiply = '1' and r.b.class = ZERO then + v.state := DO_FMUL; else - v.madd_cmp := '0'; - if new_exp + 1 >= r.b.exponent then - v.madd_cmp := '1'; - end if; - v.opsel_a := AIN_B; - v.state := DO_FMADD; + v.state := r.exec_state; end if; when ADD_1 => -- transferring B to R + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; -- set shift to b.exp - a.exp @@ -1866,23 +1835,33 @@ begin when ADD_SHIFT => -- r.shift = - exponent difference, r.longmask = 0 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; v.x := s_nz; set_x := '1'; v.longmask := r.single_prec; if r.add_bsmall = '1' then - v.opsel_a := AIN_A; + v.state := ADD_2; else - v.opsel_a := AIN_B; + v.state := ADD_2B; end if; - v.state := ADD_2; when ADD_2 => - -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B - opsel_b <= BIN_R; - opsel_binv <= r.is_subtract; - carry_in <= r.is_subtract and not r.x; + opsel_a <= AIN_A; + opsel_b <= BIN_ADDSUBR; + opsel_c <= CIN_SUBEXT; + set_r := '1'; + -- set shift to -1 + rs_con2 <= RSCON2_1; + rs_neg2 <= '1'; + v.state := ADD_3; + + when ADD_2B => + opsel_a <= AIN_B; + opsel_b <= BIN_ADDSUBR; + opsel_c <= CIN_SUBEXT; + set_r := '1'; -- set shift to -1 rs_con2 <= RSCON2_1; rs_neg2 <= '1'; @@ -1892,15 +1871,18 @@ begin -- check for overflow or negative result (can't get both) -- r.shift = -1 re_sel2 <= REXP2_NE; + rcls_op <= RCLS_TZERO; + opsel_a <= AIN_ZERO; + opsel_b <= BIN_ABSR; if r.r(63) = '1' then -- result is opposite sign to expected - v.result_sign := not r.result_sign; - opsel_ainv <= '1'; - carry_in <= '1'; + rsgn_op := RSGN_INV; + set_r := '1'; v.state := FINISH; elsif r.r(UNIT_BIT + 1) = '1' then -- sum overflowed, shift right opsel_r <= RES_SHIFT; + set_r := '1'; re_set_result <= '1'; set_x := '1'; if exp_huge = '1' then @@ -1911,37 +1893,27 @@ begin elsif r.r(UNIT_BIT) = '1' then set_x := '1'; v.state := ROUNDING; - elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then - -- r.x must be zero at this point - v.result_class := ZERO; - arith_done := '1'; else rs_norm <= '1'; v.state := NORMALIZE; end if; when CMP_1 => - -- r.opsel_a = AIN_A - opsel_b <= BIN_R; - opsel_binv <= '1'; - carry_in <= '1'; + opsel_a <= AIN_A; + opsel_b <= BIN_MINUSR; + set_r := '1'; v.state := CMP_2; when CMP_2 => - if r.r(63) = '1' then - -- A is smaller in magnitude - v.cr_result := not r.a.negative & r.a.negative & "00"; - elsif (r_hi_nz or r_lo_nz) = '0' then - v.cr_result := "0010"; - else - v.cr_result := r.a.negative & not r.a.negative & "00"; - end if; - v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result; + cr_op := CROP_FCMP; + set_cr := '1'; + set_fpcc := '1'; v.instr_done := '1'; when MULT_1 => f_to_multiply.valid <= r.first; opsel_r <= RES_MULT; + set_r := '1'; if multiply_to_f.valid = '1' then v.state := FINISH; end if; @@ -1958,6 +1930,7 @@ begin rs_sel1 <= RSH1_S; end if; opsel_r <= RES_MULT; + set_r := '1'; opsel_s <= S_MULT; set_s := '1'; if multiply_to_f.valid = '1' then @@ -1970,7 +1943,6 @@ begin -- product is bigger here -- shift B right and use it as the addend to the multiplier -- for subtract, multiplier does B - A * C - v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract; re_sel2 <= REXP2_B; re_set_result <= '1'; -- set shift to b.exp - result_exp + 64 @@ -1993,6 +1965,7 @@ begin when FMADD_3 => -- r.shift = addend exp - product exp opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; v.first := '1'; @@ -2000,6 +1973,7 @@ begin when FMADD_4 => msel_add <= MULADD_RS; + set_r := '1'; f_to_multiply.valid <= r.first; msel_inv <= r.is_subtract; opsel_r <= RES_MULT; @@ -2011,10 +1985,11 @@ begin when FMADD_5 => -- negate R:S:X if negative + opsel_b <= BIN_ABSR; + opsel_c <= CIN_ABSEXT; if r.r(63) = '1' then - v.result_sign := not r.result_sign; - opsel_ainv <= '1'; - carry_in <= not (s_nz or r.x); + rsgn_op := RSGN_INV; + set_r := '1'; opsel_s <= S_NEG; set_s := '1'; end if; @@ -2024,49 +1999,25 @@ begin when FMADD_6 => -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero) + set_r := '0'; + opsel_r <= RES_SHIFT; re_sel2 <= REXP2_NE; rs_norm <= '1'; + rcls_op <= RCLS_TZERO; if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then - if s_nz = '0' then - -- must be a subtraction, and r.x must be zero - v.result_class := ZERO; - arith_done := '1'; - else - -- R is all zeroes but there are non-zero bits in S - -- so shift them into R and set S to 0 - opsel_r <= RES_SHIFT; - re_set_result <= '1'; - set_s := '1'; - v.state := FINISH; - end if; + -- S = 0 case is handled by RCLS_TZERO logic, otherwise... + -- R is all zeroes but there are non-zero bits in S + -- so shift them into R and set S to 0 + set_r := '1'; + re_set_result <= '1'; + set_s := '1'; + v.state := FINISH; elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then v.state := FINISH; else v.state := NORMALIZE; end if; - when LOOKUP => - -- r.opsel_a = AIN_B - -- wait one cycle for inverse_table[B] lookup - -- if this is a division, compute exponent - -- (see comment on RENORM_B2 above) - if r.use_a = '1' then - re_sel2 <= REXP2_NE; - re_set_result <= '1'; - end if; - v.first := '1'; - if r.insn(4) = '0' then - if r.insn(3) = '0' then - v.state := DIV_2; - else - v.state := SQRT_1; - end if; - elsif r.insn(2) = '0' then - v.state := FRE_1; - else - v.state := RSQRT_1; - end if; - when DIV_2 => -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y msel_1 <= MUL1_B; @@ -2109,8 +2060,9 @@ begin f_to_multiply.valid <= r.first; pshift := '1'; mult_mask := '1'; + opsel_r <= RES_MULT; + set_r := '1'; if multiply_to_f.valid = '1' then - opsel_r <= RES_MULT; v.first := '1'; v.state := DIV_5; end if; @@ -2127,15 +2079,15 @@ begin end if; when DIV_6 => - -- r.opsel_a = AIN_R -- test if remainder is 0 or >= B + opsel_a <= AIN_RND_RBIT; if pcmpb_lt = '1' then -- quotient is correct, set X if remainder non-zero + set_r := '0'; v.x := r.p(UNIT_BIT + 2) or px_nz; else -- quotient needs to be incremented by 1 in R-bit position - rbit_inc := '1'; - opsel_b <= BIN_RND; + set_r := '1'; v.x := not pcmpb_eq; end if; v.state := FINISH; @@ -2145,7 +2097,8 @@ begin re_neg1 <= '1'; re_set_result <= '1'; opsel_r <= RES_MISC; - misc_sel <= "0111"; + set_r := '1'; + misc_sel <= "101"; -- set shift to 1 rs_con2 <= RSCON2_1; v.state := NORMALIZE; @@ -2154,18 +2107,25 @@ begin -- We go through this state up to two times; the first sees if -- B.exponent is in the range [-1021,1020], and the second tests -- whether B.exp - A.exp is in the range [-1022,1020]. - v.cr_result(1) := exp_tiny or exp_huge; - -- set shift to a.exp - rs_sel2 <= RSH2_A; + rs_sel2 <= RSH2_A; -- set shift to a.exp + cr_op := CROP_FTDIV; if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then + set_cr := '1'; v.instr_done := '1'; else v.doing_ftdiv := "10"; end if; + when SQRT_ODD => + -- set shift to 1 + rs_con2 <= RSCON2_1; + v.regsel := AIN_B; + v.state := RENORM_2; + when RSQRT_1 => opsel_r <= RES_MISC; - misc_sel <= "0111"; + misc_sel <= "101"; + set_r := '1'; re_sel1 <= REXP1_BHALF; re_neg1 <= '1'; re_set_result <= '1'; @@ -2178,7 +2138,8 @@ begin -- also transfer B (in R) to A set_a := '1'; opsel_r <= RES_MISC; - misc_sel <= "0111"; + misc_sel <= "101"; + set_r := '1'; msel_1 <= MUL1_B; msel_2 <= MUL2_LUT; f_to_multiply.valid <= '1'; @@ -2193,6 +2154,7 @@ begin -- not expecting multiplier result yet -- r.shift = -1 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; v.first := '1'; @@ -2204,9 +2166,9 @@ begin set_y := r.first; pshift := '1'; mult_mask := '1'; + opsel_r <= RES_MULT; + set_r := '1'; if multiply_to_f.valid = '1' then - -- put result into R - opsel_r <= RES_MULT; v.first := '1'; v.state := SQRT_4; end if; @@ -2249,9 +2211,9 @@ begin -- wait for second multiply (should be here already) pshift := '1'; mult_mask := '1'; + opsel_r <= RES_MULT; + set_r := '1'; if multiply_to_f.valid = '1' then - -- put result into R - opsel_r <= RES_MULT; v.first := '1'; v.count := r.count + 1; if r.count < 2 then @@ -2294,7 +2256,8 @@ begin when SQRT_10 => -- Add the bottom 8 bits of P, sign-extended, onto R. - opsel_b <= BIN_PS8; + opsel_a <= AIN_PS8; + set_r := '1'; re_sel1 <= REXP1_BHALF; re_set_result <= '1'; -- set shift to 1 @@ -2318,12 +2281,14 @@ begin when SQRT_12 => -- test if remainder is 0 or >= B = 2*R + 1 + set_r := '0'; + opsel_c <= CIN_INC; if pcmpb_lt = '1' then -- square root is correct, set X if remainder non-zero v.x := r.p(UNIT_BIT + 2) or px_nz; else -- square root needs to be incremented by 1 - carry_in <= '1'; + set_r := '1'; v.x := not pcmpb_eq; end if; v.state := FINISH; @@ -2331,6 +2296,7 @@ begin when INT_SHIFT => -- r.shift = b.exponent - 52 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; set_x := '1'; @@ -2342,6 +2308,7 @@ begin when INT_ROUND => -- r.shift = -4 (== 52 - UNIT_BIT) opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign); @@ -2357,14 +2324,16 @@ begin when INT_ISHIFT => -- r.shift = b.exponent - UNIT_BIT; opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; v.state := INT_FINAL; when INT_FINAL => -- Negate if necessary, and increment for rounding if needed - opsel_ainv <= r.result_sign; - carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign; + opsel_b <= BIN_RSIGNR; + opsel_c <= CIN_ROUND; + set_r := '1'; -- Check for possible overflows case r.insn(9 downto 8) is when "00" => -- fctiw[z] @@ -2376,7 +2345,6 @@ begin when others => -- fctidu[z] need_check := r.r(63); end case; - int_result := '1'; if need_check = '1' then v.state := INT_CHECK; else @@ -2392,34 +2360,33 @@ begin else msb := r.r(63); end if; - misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; + opsel_r <= RES_MISC; + misc_sel <= "110"; if (r.insn(8) = '0' and msb /= r.result_sign) or (r.insn(8) = '1' and msb /= '1') then - opsel_r <= RES_MISC; + set_r := '1'; v.fpscr(FPSCR_VXCVI) := '1'; invalid := '1'; else + set_r := '0'; if r.fpscr(FPSCR_FI) = '1' then v.fpscr(FPSCR_XX) := '1'; end if; end if; - int_result := '1'; arith_done := '1'; when INT_OFLOW => opsel_r <= RES_MISC; - misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; - if r.b.class = NAN then - misc_sel(0) <= '1'; - end if; + misc_sel <= "110"; + set_r := '1'; v.fpscr(FPSCR_VXCVI) := '1'; invalid := '1'; - int_result := '1'; arith_done := '1'; when FRI_1 => -- r.shift = b.exponent - 52 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; set_x := '1'; @@ -2451,6 +2418,7 @@ begin -- Shift so we have 9 leading zeroes (we know R is non-zero) -- r.shift = clz(r.r) - 7 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; -- set shift to new_exp - min_exp @@ -2469,10 +2437,12 @@ begin when ROUND_UFLOW => -- r.shift = - amount by which exponent underflows v.tiny := '1'; + opsel_r <= RES_SHIFT; + set_r := '0'; if r.fpscr(FPSCR_UE) = '0' then -- disabled underflow exception case -- have to denormalize before rounding - opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; set_x := '1'; @@ -2493,24 +2463,20 @@ begin end if; when ROUND_OFLOW => + rcls_op <= RCLS_TINF; v.fpscr(FPSCR_OX) := '1'; + opsel_r <= RES_MISC; + misc_sel <= "010"; + set_r := '0'; if r.fpscr(FPSCR_OE) = '0' then -- disabled overflow exception -- result depends on rounding mode + set_r := '1'; v.fpscr(FPSCR_XX) := '1'; v.fpscr(FPSCR_FI) := '1'; - if r.round_mode(1 downto 0) = "00" or - (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then - v.result_class := INFINITY; - v.fpscr(FPSCR_FR) := '1'; - else - v.fpscr(FPSCR_FR) := '0'; - end if; -- construct largest representable number re_con2 <= RECON2_MAX; re_set_result <= '1'; - opsel_r <= RES_MISC; - misc_sel <= "001" & r.single_prec; arith_done := '1'; else -- enabled overflow exception @@ -2523,24 +2489,19 @@ begin when ROUNDING => opsel_mask <= '1'; + set_r := '1'; round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign); v.fpscr(FPSCR_FR downto FPSCR_FI) := round; if round(1) = '1' then -- increment the LSB for the precision - opsel_b <= BIN_RND; - -- set shift to -1 - rs_con2 <= RSCON2_1; - rs_neg2 <= '1'; - v.state := ROUNDING_2; + v.state := ROUND_INC; + elsif r.r(UNIT_BIT) = '0' then + -- result after masking could be zero, or could be a + -- denormalized result that needs to be renormalized + rs_norm <= '1'; + v.state := ROUNDING_3; else - if r.r(UNIT_BIT) = '0' then - -- result after masking could be zero, or could be a - -- denormalized result that needs to be renormalized - rs_norm <= '1'; - v.state := ROUNDING_3; - else - arith_done := '1'; - end if; + arith_done := '1'; end if; if round(0) = '1' then v.fpscr(FPSCR_XX) := '1'; @@ -2549,13 +2510,23 @@ begin end if; end if; + when ROUND_INC => + set_r := '1'; + opsel_a <= AIN_RND; + -- set shift to -1 + rs_con2 <= RSCON2_1; + rs_neg2 <= '1'; + v.state := ROUNDING_2; + when ROUNDING_2 => -- Check for overflow during rounding -- r.shift = -1 v.x := '0'; re_sel2 <= REXP2_NE; + opsel_r <= RES_SHIFT; + set_r := '0'; if r.r(UNIT_BIT + 1) = '1' then - opsel_r <= RES_SHIFT; + set_r := '1'; re_set_result <= '1'; if exp_huge = '1' then v.state := ROUND_OFLOW; @@ -2572,73 +2543,40 @@ begin when ROUNDING_3 => -- r.shift = clz(r.r) - 9 - mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec); + opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; -- set shift to new_exp - min_exp (== -1022) rs_sel1 <= RSH1_NE; rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; - if mant_nz = '0' then - v.result_class := ZERO; - arith_done := '1'; + rcls_op <= RCLS_TZERO; + -- If the result is zero, that's handled below. + -- Renormalize result after rounding + re_set_result <= '1'; + v.denorm := exp_tiny; + if new_exp < to_signed(-1022, EXP_BITS) then + v.state := DENORM; else - -- Renormalize result after rounding - opsel_r <= RES_SHIFT; - re_set_result <= '1'; - v.denorm := exp_tiny; - if new_exp < to_signed(-1022, EXP_BITS) then - v.state := DENORM; - else - arith_done := '1'; - end if; + arith_done := '1'; end if; when DENORM => -- r.shift = result_exp - -1022 opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; arith_done := '1'; - when NAN_RESULT => - if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or - (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or - (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then - -- Signalling NAN - v.fpscr(FPSCR_VXSNAN) := '1'; - invalid := '1'; - end if; - if r.use_a = '1' and r.a.class = NAN then - v.opsel_a := AIN_A; - v.result_sign := r.a.negative; - elsif r.use_b = '1' and r.b.class = NAN then - v.opsel_a := AIN_B; - v.result_sign := r.b.negative; - elsif r.use_c = '1' and r.c.class = NAN then - v.opsel_a := AIN_C; - v.result_sign := r.c.negative; - end if; - v.state := EXC_RESULT; - - when EXC_RESULT => - -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result - case r.opsel_a is - when AIN_B => - re_sel2 <= REXP2_B; - v.result_class := r.b.class; - when AIN_C => - re_sel2 <= REXP2_C; - v.result_class := r.c.class; - when others => - re_sel1 <= REXP1_A; - v.result_class := r.a.class; - end case; - re_set_result <= '1'; - arith_done := '1'; - when DO_IDIVMOD => - -- r.opsel_a = AIN_B - v.result_sign := r.is_signed and (r.a.negative xor (r.b.negative and not r.divmod)); + opsel_a <= AIN_B; + opsel_aabs <= '1'; + opsel_b <= BIN_ZERO; + set_r := '1'; + -- normalize and round up B to 8.56 format, like fcfid[u] + re_con2 <= RECON2_UNIT; + re_set_result <= '1'; if r.b.class = ZERO then -- B is zero, signal overflow v.int_ovf := '1'; @@ -2647,15 +2585,6 @@ begin -- A is zero, result is zero (both for div and for mod) v.state := IDIV_ZERO; else - -- take absolute value for signed division, and - -- normalize and round up B to 8.56 format, like fcfid[u] - if r.is_signed = '1' and r.b.negative = '1' then - opsel_ainv <= '1'; - carry_in <= '1'; - end if; - v.result_class := FINITE; - re_con2 <= RECON2_UNIT; - re_set_result <= '1'; v.state := IDIV_NORMB; end if; when IDIV_NORMB => @@ -2668,37 +2597,36 @@ begin -- get B into the range [1, 2) in 8.56 format set_x := '1'; -- record if any 1 bits shifted out opsel_r <= RES_SHIFT; + set_r := '1'; re_sel2 <= REXP2_NE; re_set_result <= '1'; v.state := IDIV_NORMB3; when IDIV_NORMB3 => -- add the X bit onto R to round up B - carry_in <= r.x; + opsel_c <= CIN_RNDX; + set_r := '1'; -- prepare to do count-leading-zeroes on A - v.opsel_a := AIN_A; v.state := IDIV_CLZA; when IDIV_CLZA => set_b := '1'; -- put R back into B - -- r.opsel_a = AIN_A - if r.is_signed = '1' and r.a.negative = '1' then - opsel_ainv <= '1'; - carry_in <= '1'; - end if; + opsel_a <= AIN_A; + opsel_aabs <= '1'; + opsel_b <= BIN_ZERO; + set_r := '1'; re_con2 <= RECON2_UNIT; re_set_result <= '1'; - v.opsel_a := AIN_C; v.state := IDIV_CLZA2; when IDIV_CLZA2 => - -- r.opsel_a = AIN_C rs_norm <= '1'; -- write the dividend back into A in case we negated it set_a_mant := '1'; -- while doing the count-leading-zeroes on A, -- also compute A - B to tell us whether A >= B -- (using the original value of B, which is now in C) + opsel_a <= AIN_C; opsel_b <= BIN_R; - opsel_ainv <= '1'; - carry_in <= '1'; + opsel_aneg <= '1'; + set_r := '1'; v.state := IDIV_CLZA3; when IDIV_CLZA3 => -- save the exponent of A (but don't overwrite the mantissa) @@ -2738,6 +2666,13 @@ begin msel_inv <= '1'; msel_2 <= MUL2_LUT; set_y := '1'; + -- Get 0.5 into R in case the inverse estimate turns out to be + -- less than 0.5, in which case we want to use 0.5, to avoid + -- infinite loops in some cases. + -- It turns out the generated QNaN mantissa is actually what we want + opsel_r <= RES_MISC; + misc_sel <= "001"; + set_r := '1'; if r.b.mantissa(UNIT_BIT + 1) = '1' then -- rounding up of the mantissa caused overflow, meaning the -- normalized B is 2.0. Since this is outside the range @@ -2758,10 +2693,22 @@ begin msel_2 <= MUL2_P; set_y := r.first; pshift := '1'; - f_to_multiply.valid <= r.first; + -- set shift to 64 + rs_con2 <= RSCON2_64; + if r.first = '1' then + if r.count = "11" then + if r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0' then + -- inverse estimate is < 0.5, so use 0.5 + v.state := IDIV_USE0_5; + else + v.state := IDIV_DODIV; + end if; + else + f_to_multiply.valid <= r.first; + end if; + end if; if multiply_to_f.valid = '1' then v.first := '1'; - v.count := r.count + 1; v.state := IDIV_NR2; end if; when IDIV_NR2 => @@ -2770,42 +2717,27 @@ begin msel_2 <= MUL2_P; f_to_multiply.valid <= r.first; pshift := '1'; - v.opsel_a := AIN_A; - -- set shift to 64 - rs_con2 <= RSCON2_64; - -- Get 0.5 into R in case the inverse estimate turns out to be - -- less than 0.5, in which case we want to use 0.5, to avoid - -- infinite loops in some cases. - opsel_r <= RES_MISC; - misc_sel <= "0001"; + if r.first = '1' then + v.count := r.count + 1; + end if; if multiply_to_f.valid = '1' then v.first := '1'; - if r.count = "11" then - v.state := IDIV_DODIV; - else - v.state := IDIV_NR1; - end if; + v.state := IDIV_NR1; end if; when IDIV_USE0_5 => - -- Get 0.5 into R; it turns out the generated - -- QNaN mantissa is actually what we want - opsel_r <= RES_MISC; - misc_sel <= "0001"; - v.opsel_a := AIN_A; + -- Put the 0.5 which is in R into Y as the inverse estimate + set_y := '1'; + msel_2 <= MUL2_R; -- set shift to 64 rs_con2 <= RSCON2_64; v.state := IDIV_DODIV; when IDIV_DODIV => - -- r.opsel_a = AIN_A -- r.shift = 64 - -- inverse estimate is in P or in R; copy it to Y - if r.b.mantissa(UNIT_BIT + 1) = '1' or - (r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0') then - msel_2 <= MUL2_R; - else - msel_2 <= MUL2_P; - end if; - set_y := '1'; + -- inverse estimate is in Y + -- put A (dividend) into R + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; -- shift_res is 0 because r.shift = 64; -- put that into B, which now holds the quotient set_b_mant := '1'; @@ -2827,12 +2759,12 @@ begin else -- handle top bit of quotient specially -- for this we need the divisor left-justified in B - v.opsel_a := AIN_C; v.state := IDIV_EXT_TBH; end if; when IDIV_SH32 => -- r.shift = 32, R contains the dividend opsel_r <= RES_SHIFT; + set_r := '1'; -- set shift to -UNIT_BIT (== -56) rs_con2 <= RSCON2_UNIT; rs_neg2 <= '1'; @@ -2849,6 +2781,7 @@ begin f_to_multiply.valid <= r.first; pshift := '1'; opsel_r <= RES_MULT; + set_r := '1'; -- set shift to - b.exp rs_sel1 <= RSH1_B; rs_neg1 <= '1'; @@ -2859,6 +2792,7 @@ begin -- r.shift = - b.exponent -- shift the quotient estimate right by b.exponent bits opsel_r <= RES_SHIFT; + set_r := '1'; v.first := '1'; v.state := IDIV_DIV3; when IDIV_DIV3 => @@ -2871,6 +2805,7 @@ begin -- store the current quotient estimate in B set_b_mant := r.first; opsel_r <= RES_MULT; + set_r := '1'; opsel_s <= S_MULT; set_s := '1'; if multiply_to_f.valid = '1' then @@ -2881,9 +2816,10 @@ begin msel_1 <= MUL1_Y; msel_2 <= MUL2_P; v.inc_quot := not pcmpc_lt and not r.divmod; - if r.divmod = '0' then - v.opsel_a := AIN_B; - end if; + -- if dividing, get B into R for IDIV_DIVADJ state + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := not r.divmod; -- set shift to UNIT_BIT (== 56) rs_con2 <= RSCON2_UNIT; if pcmpc_lt = '1' or pcmpc_eq = '1' then @@ -2902,6 +2838,7 @@ begin when IDIV_DIV5 => pshift := '1'; opsel_r <= RES_MULT; + set_r := '1'; -- set shift to - b.exp rs_sel1 <= RSH1_B; rs_neg1 <= '1'; @@ -2912,13 +2849,14 @@ begin -- r.shift = - b.exponent -- shift the quotient estimate right by b.exponent bits opsel_r <= RES_SHIFT; - v.opsel_a := AIN_B; + set_r := '1'; v.first := '1'; v.state := IDIV_DIV7; when IDIV_DIV7 => - -- r.opsel_a = AIN_B -- add shifted quotient delta onto the total quotient + opsel_a <= AIN_B; opsel_b <= BIN_R; + set_r := '1'; v.first := '1'; v.state := IDIV_DIV8; when IDIV_DIV8 => @@ -2931,6 +2869,7 @@ begin -- store the current quotient estimate in B set_b_mant := r.first; opsel_r <= RES_MULT; + set_r := '1'; opsel_s <= S_MULT; set_s := '1'; if multiply_to_f.valid = '1' then @@ -2941,11 +2880,12 @@ begin msel_1 <= MUL1_Y; msel_2 <= MUL2_P; v.inc_quot := not pcmpc_lt and not r.divmod; - if r.divmod = '0' then - v.opsel_a := AIN_B; - end if; -- set shift to UNIT_BIT (== 56) rs_con2 <= RSCON2_UNIT; + -- if dividing, get B into R for IDIV_DIVADJ state + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := not r.divmod; if r.divmod = '0' then v.state := IDIV_DIVADJ; elsif pcmpc_eq = '1' then @@ -2954,16 +2894,21 @@ begin v.state := IDIV_MODADJ; end if; when IDIV_EXT_TBH => - -- r.opsel_a = AIN_C; get divisor into R and prepare to shift left + -- get divisor into R and prepare to shift left -- set shift to 63 - b.exp + opsel_a <= AIN_C; + opsel_b <= BIN_ZERO; + set_r := '1'; rs_sel1 <= RSH1_B; rs_neg1 <= '1'; rs_con2 <= RSCON2_63; - v.opsel_a := AIN_A; v.state := IDIV_EXT_TBH2; when IDIV_EXT_TBH2 => - -- r.opsel_a = AIN_A; divisor is in R + -- divisor is in R -- r.shift = 63 - b.exponent; shift and put into B + opsel_a <= AIN_A; + opsel_b <= BIN_ZERO; + set_r := '1'; set_b_mant := '1'; -- set shift to 64 - UNIT_BIT (== 8) rs_con2 <= RSCON2_64_UNIT; @@ -2983,17 +2928,18 @@ begin -- dividend (A) is in R -- r.shift = 64 - B.exponent, so is at least 1 opsel_r <= RES_SHIFT; + set_r := '1'; -- top bit of A gets lost in the shift, so handle it specially - v.opsel_a := AIN_B; -- set shift to 63 rs_con2 <= RSCON2_63; v.state := IDIV_EXT_TBH5; when IDIV_EXT_TBH5 => - -- r.opsel_a = AIN_B, r.shift = 63 + -- r.shift = 63 -- shifted dividend is in R, subtract left-justified divisor + opsel_a <= AIN_B; opsel_b <= BIN_R; - opsel_ainv <= '1'; - carry_in <= '1'; + opsel_aneg <= '1'; + set_r := '1'; -- and put 1<<63 into B as the divisor (S is still 0) shiftin0 := '1'; set_b_mant := '1'; @@ -3014,6 +2960,7 @@ begin -- dividend is in R -- r.shift = 64 - B.exponent opsel_r <= RES_SHIFT; + set_r := '1'; v.first := '1'; v.state := IDIV_EXTDIV2; when IDIV_EXTDIV2 => @@ -3022,16 +2969,17 @@ begin msel_2 <= MUL2_R; f_to_multiply.valid <= r.first; pshift := '1'; - v.opsel_a := AIN_B; opsel_r <= RES_MULT; + set_r := '1'; if multiply_to_f.valid = '1' then v.first := '1'; v.state := IDIV_EXTDIV3; end if; when IDIV_EXTDIV3 => - -- r.opsel_a = AIN_B -- delta quotient is in R; add it to B + opsel_a <= AIN_B; opsel_b <= BIN_R; + set_r := '1'; v.first := '1'; v.state := IDIV_EXTDIV4; when IDIV_EXTDIV4 => @@ -3043,6 +2991,7 @@ begin msel_inv <= '1'; f_to_multiply.valid <= r.first; opsel_r <= RES_MULT; + set_r := '1'; opsel_s <= S_MULT; set_s := '1'; -- set shift to UNIT_BIT - b.exp @@ -3056,14 +3005,16 @@ begin -- r.shift = r.b.exponent - 56 -- remainder is in R/S; shift it right r.b.exponent bits opsel_r <= RES_SHIFT; + set_r := '1'; -- test LS 64b of remainder in P against divisor in C v.inc_quot := not pcmpc_lt; - v.opsel_a := AIN_B; v.state := IDIV_EXTDIV6; when IDIV_EXTDIV6 => - -- r.opsel_a = AIN_B -- shifted remainder is in R, see if it is > 1 -- and compute R = R * Y if so + opsel_a <= AIN_B; + opsel_b <= BIN_ZERO; + set_r := '0'; msel_1 <= MUL1_Y; msel_2 <= MUL2_R; pshift := '1'; @@ -3071,114 +3022,162 @@ begin f_to_multiply.valid <= '1'; v.state := IDIV_EXTDIV2; else + -- Put B (quotient) into R for IDIV_DIVADJ state + set_r := '1'; v.state := IDIV_DIVADJ; end if; when IDIV_MODADJ => -- r.shift = 56 -- result is in R/S opsel_r <= RES_SHIFT; + set_r := '1'; if pcmpc_lt = '0' then - v.opsel_a := AIN_C; v.state := IDIV_MODSUB; elsif r.result_sign = '0' then v.state := IDIV_DONE; else - v.state := IDIV_DIVADJ; + v.state := IDIV_MODADJ_NEG; end if; + when IDIV_MODADJ_NEG => + -- result (so far) is in R + -- set carry to increment quotient if needed + -- and also negate R since the answer is negative + opsel_b <= BIN_MINUSR; + set_r := '1'; + v.state := IDIV_OVFCHK; when IDIV_MODSUB => - -- r.opsel_a = AIN_C -- Subtract divisor from remainder - opsel_ainv <= '1'; - carry_in <= '1'; + opsel_a <= AIN_C; + opsel_aneg <= '1'; opsel_b <= BIN_R; + set_r := '1'; if r.result_sign = '0' then v.state := IDIV_DONE; else v.state := IDIV_DIVADJ; end if; when IDIV_DIVADJ => - -- result (so far) is on the A input of the adder + -- result (so far) is in R -- set carry to increment quotient if needed -- and also negate R if the answer is negative - opsel_ainv <= r.result_sign; - carry_in <= r.inc_quot xor r.result_sign; - rnd_b32 := '1'; - if r.divmod = '0' then - opsel_b <= BIN_RND; - end if; + opsel_a <= AIN_RND_B32; + opsel_b <= BIN_RSIGNR; + opsel_c <= CIN_RNDQ; + set_r := '1'; if r.is_signed = '0' then v.state := IDIV_DONE; else v.state := IDIV_OVFCHK; end if; when IDIV_OVFCHK => + opsel_r <= RES_MISC; + misc_sel <= "000"; if r.single_prec = '0' then sign_bit := r.r(63); else sign_bit := r.r(31); end if; v.int_ovf := sign_bit xor r.result_sign; - if v.int_ovf = '1' then - v.state := IDIV_ZERO; - else - v.state := IDIV_DONE; - end if; + set_r := sign_bit xor r.result_sign; + v.state := IDIV_DONE; when IDIV_DONE => - v.xerc_result := v.xerc; - if r.oe = '1' then - v.xerc_result.ov := '0'; - v.xerc_result.ov32 := '0'; - v.writing_xer := '1'; - end if; - if r.m32b = '0' then - v.cr_result(3) := r.r(63); - v.cr_result(2 downto 1) := "00"; - if r.r = 64x"0" then - v.cr_result(1) := '1'; - else - v.cr_result(2) := not r.r(63); - end if; - else - v.cr_result(3) := r.r(31); - v.cr_result(2 downto 1) := "00"; - if r.r(31 downto 0) = 32x"0" then - v.cr_result(1) := '1'; - else - v.cr_result(2) := not r.r(31); - end if; - end if; - v.cr_result(0) := v.xerc.so; - int_result := '1'; + cr_op := CROP_INTRES; + set_cr := '1'; v.writing_fpr := '1'; v.instr_done := '1'; when IDIV_ZERO => opsel_r <= RES_MISC; - misc_sel <= "0101"; - v.xerc_result := v.xerc; - if r.oe = '1' then - v.xerc_result.ov := r.int_ovf; - v.xerc_result.ov32 := r.int_ovf; - v.xerc_result.so := r.xerc.so or r.int_ovf; - v.writing_xer := '1'; - end if; - v.cr_result := "001" & v.xerc_result.so; - int_result := '1'; - v.writing_fpr := '1'; - v.instr_done := '1'; + misc_sel <= "000"; + set_r := '1'; + v.state := IDIV_DONE; end case; - rsign := v.result_sign; - if zero_divide = '1' then - v.fpscr(FPSCR_ZX) := '1'; + -- Handle exceptions and special cases for arithmetic operations + if r.cycle_1_ar = '1' then + v.fpscr := r.fpscr or scinfo.new_fpscr; + invalid := scinfo.invalid; + zero_divide := scinfo.zero_divide; + qnan_result := scinfo.qnan_result; + if scinfo.immed_result = '1' then + -- state machine is in the DO_SPECIAL or DO_FSQRT state here + arith_done := '1'; + set_r := '1'; + opsel_r <= RES_MISC; + opsel_sel <= scinfo.result_sel; + if scinfo.qnan_result = '1' then + if r.int_result = '0' then + misc_sel <= "001"; + else + misc_sel <= "110"; + end if; + else + misc_sel <= "111"; + end if; + rsgn_op := scinfo.rsgn_op; + v.result_class := scinfo.result_class; + if scinfo.result_sel = AIN_B then + re_sel2 <= REXP2_B; + else + re_sel1 <= REXP1_A; + end if; + re_set_result <= '1'; + end if; end if; + + rsign := r.result_sign; + case rsgn_op is + when RSGN_SEL => + case opsel_sel is + when AIN_A => + rsign := r.a.negative; + when AIN_B => + rsign := r.b.negative; + when AIN_C => + rsign := r.c.negative; + when others => + end case; + v.result_sign := rsign; + when RSGN_SUB => + rsign := r.result_sign xor r.is_subtract; + v.result_sign := rsign; + when RSGN_INV => + rsign := not r.result_sign; + v.result_sign := rsign; + when others => + end case; + + case rcls_op is + when RCLS_SEL => + case opsel_sel is + when AIN_A => + v.result_class := r.a.class; + when AIN_B => + v.result_class := r.b.class; + when AIN_C => + v.result_class := r.c.class; + when others => + end case; + when RCLS_TZERO => + if or (r.r(UNIT_BIT + 2 downto 0)) = '0' and s_nz = '0' then + v.result_class := ZERO; + arith_done := '1'; + end if; + when RCLS_TINF => + if r.fpscr(FPSCR_OE) = '0' then + if r.round_mode(1 downto 0) = "00" or + (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then + v.result_class := INFINITY; + v.fpscr(FPSCR_FR) := '1'; + else + v.fpscr(FPSCR_FR) := '0'; + end if; + end if; + when others => + end case; + if qnan_result = '1' then - invalid := '1'; - v.result_class := NAN; rsign := '0'; - misc_sel <= "0001"; - opsel_r <= RES_MISC; - arith_done := '1'; end if; if invalid = '1' then v.invalid := '1'; @@ -3191,10 +3190,10 @@ begin v.writing_fpr := '1'; v.update_fprf := '1'; end if; - if v.is_subtract = '1' and v.result_class = ZERO then + if r.is_subtract = '1' and v.result_class = ZERO then rsign := r.round_mode(0) and r.round_mode(1); end if; - if v.negate = '1' and v.result_class /= NAN then + if r.negate = '1' and v.result_class /= NAN then rsign := not rsign; end if; v.instr_done := '1'; @@ -3273,43 +3272,69 @@ begin else mask := right_mask(unsigned(mshift(5 downto 0))); end if; - case r.opsel_a is - when AIN_R => - in_a0 := r.r; + if (or (mask and r.r)) = '1' and set_x = '1' then + v.x := '1'; + end if; + asign := '0'; + case opsel_a is when AIN_A => in_a0 := r.a.mantissa; + asign := r.a.negative; when AIN_B => in_a0 := r.b.mantissa; - when others => + asign := r.b.negative; + when AIN_C => in_a0 := r.c.mantissa; + when AIN_PS8 => -- 8 LSBs of P sign-extended to 64 + in_a0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64)); + when AIN_RND_B32 => + in_a0 := (32 => r.result_sign and r.single_prec, others => '0'); + when AIN_RND_RBIT => + in_a0 := (DP_RBIT => '1', others => '0'); + when AIN_RND => + in_a0 := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0'); + when others => + in_a0 := (others => '0'); end case; - if (or (mask and in_a0)) = '1' and set_x = '1' then - v.x := '1'; - end if; - if opsel_ainv = '1' then + ci := '0'; + case opsel_c is + when CIN_SUBEXT => + ci := r.is_subtract and r.x; + when CIN_ABSEXT => + ci := r.r(63) and (s_nz or r.x); + when CIN_INC => + ci := '1'; + when CIN_ROUND => + ci := r.fpscr(FPSCR_FR); + when CIN_RNDX => + ci := r.x; + when CIN_RNDQ => + ci := r.inc_quot; + when others => + end case; + if opsel_aneg = '1' or (opsel_aabs = '1' and r.is_signed = '1' and asign = '1') then in_a0 := not in_a0; + ci := not ci; end if; in_a <= in_a0; + in_b0 := r.r; + bneg := '0'; case opsel_b is - when BIN_ZERO => - in_b0 := (others => '0'); when BIN_R => - in_b0 := r.r; - when BIN_RND => - if rnd_b32 = '1' then - round_inc := (32 => r.result_sign and r.single_prec, others => '0'); - elsif rbit_inc = '0' then - round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0'); - else - round_inc := (DP_RBIT => '1', others => '0'); - end if; - in_b0 := round_inc; + when BIN_MINUSR => + bneg := '1'; + when BIN_ABSR => + bneg := r.r(63); + when BIN_ADDSUBR => + bneg := r.is_subtract; + when BIN_RSIGNR => + bneg := r.result_sign; when others => - -- BIN_PS8, 8 LSBs of P sign-extended to 64 - in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64)); + in_b0 := (others => '0'); end case; - if opsel_binv = '1' then + if bneg = '1' then in_b0 := not in_b0; + ci := not ci; end if; in_b <= in_b0; if is_X(r.shift) then @@ -3321,7 +3346,7 @@ begin else shift_res := (others => '0'); end if; - sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in); + sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + ci); if opsel_mask = '1' then sum(DP_LSB - 1 downto 0) := "0000"; if r.single_prec = '1' then @@ -3342,55 +3367,59 @@ begin when others => misc := (others => '0'); case misc_sel is - when "0000" => - misc := x"00000000" & (r.fpscr and fpscr_mask); - when "0001" => - -- generated QNaN mantissa + when "000" => + -- zero result, used in idiv logic + when "001" => + -- generated QNaN mantissa; also used for 0.5 in idiv logic misc(QNAN_BIT) := '1'; - when "0010" => - -- mantissa of max representable DP number - misc(UNIT_BIT downto DP_LSB) := (others => '1'); - when "0011" => - -- mantissa of max representable SP number + when "010" => + -- mantissa of max representable number, DP or SP misc(UNIT_BIT downto SP_LSB) := (others => '1'); - when "0100" => - -- fmrgow result - misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0); - when "0110" => - -- fmrgew result - misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32); - when "0111" => + misc(SP_LSB-1 downto DP_LSB) := (others => not r.single_prec); + when "011" => + -- read FPSCR + misc := x"00000000" & (r.fpscr and fpscr_mask); + when "100" => + -- fmrgow/fmrgew result + if r.insn(8) = '0' then + misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0); + else + misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32); + end if; + when "101" => + -- LUT value misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64), UNIT_BIT - 19)); - when "1000" => - -- max positive result for fctiw[z] - misc := x"000000007fffffff"; - when "1001" => - -- max negative result for fctiw[z] - misc := x"ffffffff80000000"; - when "1010" => - -- max positive result for fctiwu[z] - misc := x"00000000ffffffff"; - when "1011" => - -- max negative result for fctiwu[z] - misc := x"0000000000000000"; - when "1100" => - -- max positive result for fctid[z] - misc := x"7fffffffffffffff"; - when "1101" => - -- max negative result for fctid[z] - misc := x"8000000000000000"; - when "1110" => - -- max positive result for fctidu[z] - misc := x"ffffffffffffffff"; - when "1111" => - -- max negative result for fctidu[z] - misc := x"0000000000000000"; + when "110" => + -- max positive or negative result for fcti* + if r.result_sign = '0' and r.b.class /= NAN then + misc := x"000000007fffffff"; + misc(31) := r.insn(8) or r.insn(9); -- unsigned or dword + misc(62 downto 32) := (others => r.insn(9)); -- dword + misc(63) := r.insn(8) and r.insn(8); + elsif r.insn(8) = '0' then + misc(63) := '1'; + if r.insn(9) = '0' then + misc(62 downto 31) := (others => '1'); + end if; + end if; when others => + -- A, B or C, according to opsel_sel + case opsel_sel is + when AIN_A => + misc := r.a.mantissa; + when AIN_B => + misc := r.b.mantissa; + when AIN_C => + misc := r.c.mantissa; + when others => + end case; end case; result <= misc; end case; - v.r := result; + if set_r = '1' then + v.r := result; + end if; if set_s = '1' then case opsel_s is when S_NEG => @@ -3407,6 +3436,17 @@ begin end case; end if; + if set_reg_ind = '1' then + case r.regsel is + when AIN_A => + set_a := '1'; + when AIN_B => + set_b := '1'; + when AIN_C => + set_c := '1'; + when others => + end case; + end if; if set_a = '1' or set_a_exp = '1' then v.a.exponent := new_exp; end if; @@ -3531,6 +3571,103 @@ begin v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2); end if; + -- Condition register data path + cr_result := "0000"; + case cr_op is + when CROP_FCMP => + if r.a.class = NAN or r.b.class = NAN then + cr_result := "0001"; -- unordered + elsif r.a.class = ZERO and r.b.class = ZERO then + cr_result := "0010"; -- equal + elsif r.a.negative /= r.b.negative then + cr_result := r.a.negative & r.b.negative & "00"; + elsif r.a.class = INFINITY and r.b.class = INFINITY then + -- A and B are the same sign from here down + cr_result := "0010"; + elsif r.a.class = ZERO then + cr_result := not r.b.negative & r.b.negative & "00"; + elsif r.a.class = INFINITY then + cr_result := r.a.negative & not r.a.negative & "00"; + elsif r.b.class = ZERO then + -- A is finite from here down + cr_result := r.a.negative & not r.a.negative & "00"; + elsif r.b.class = INFINITY then + cr_result := not r.b.negative & r.b.negative & "00"; + elsif r.a.exponent > r.b.exponent then + -- A and B are both finite from here down + cr_result := r.a.negative & not r.a.negative & "00"; + elsif r.a.exponent /= r.b.exponent then + -- A exponent is smaller than B + cr_result := not r.a.negative & r.a.negative & "00"; + elsif r.r(63) = '1' then + -- A is smaller in magnitude + cr_result := not r.a.negative & r.a.negative & "00"; + elsif (r_hi_nz or r_lo_nz) = '0' then + cr_result := "0010"; + else + cr_result := r.a.negative & not r.a.negative & "00"; + end if; + when CROP_MCRFS => + j := to_integer(unsigned(insn_bfa(r.insn))); + for i in 0 to 7 loop + if i = j then + k := (7 - i) * 4; + cr_result := r.fpscr(k + 3 downto k); + end if; + end loop; + when CROP_FTDIV => + if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or + (r.b.class = FINITE and r.b.denorm = '1') then + cr_result(2) := '1'; + end if; + if r.a.class = NAN or r.a.class = INFINITY or + r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or + (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) or + (r.doing_ftdiv(1) = '1' and (exp_tiny or exp_huge) = '1') then + cr_result(1) := '1'; + end if; + when CROP_FTSQRT => + if r.b.class = ZERO or r.b.class = INFINITY or + (r.b.class = FINITE and r.b.denorm = '1') then + cr_result(2) := '1'; + end if; + if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO + or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then + cr_result(1) := '1'; + end if; + when CROP_INTRES => + v.xerc_result := v.xerc; + if r.oe = '1' then + v.xerc_result.ov := r.int_ovf; + v.xerc_result.ov32 := r.int_ovf; + v.xerc_result.so := r.xerc.so or r.int_ovf; + v.writing_xer := '1'; + end if; + if r.m32b = '0' then + cr_result(3) := r.r(63); + if r.r = 64x"0" then + cr_result(1) := '1'; + else + cr_result(2) := not r.r(63); + end if; + else + cr_result(3) := r.r(31); + if r.r(31 downto 0) = 32x"0" then + cr_result(1) := '1'; + else + cr_result(2) := not r.r(31); + end if; + end if; + cr_result(0) := v.xerc_result.so; + when others => + end case; + if set_cr = '1' then + v.cr_result := cr_result; + end if; + if set_fpcc = '1' then + v.fpscr(FPSCR_FL downto FPSCR_FU) := cr_result; + end if; + if r.update_fprf = '1' then v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, r.r(UNIT_BIT) and not r.denorm); @@ -3554,9 +3691,9 @@ begin v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); end if; v.sp_result := r.single_prec; - v.int_result := int_result; + v.res_int := r.int_result or r.integer_op; v.illegal := illegal; - v.nsnan_result := v.quieten_nan; + v.nsnan_result := r.quieten_nan; v.res_sign := rsign; if r.integer_op = '1' then v.cr_mask := num_to_fxm(0); @@ -3585,7 +3722,7 @@ begin end if; -- This mustn't depend on any fields of r that are modified in IDLE state. - if r.int_result = '1' then + if r.res_int = '1' then fp_result <= r.r; else fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r, diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 79ba7fa..c13110f 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1496,34 +1496,105 @@ struct fmavals { unsigned long nfma; unsigned long nfms; } fmavals[] = { + /* +0 * +0 +- +0 -> +0, +0, -0, -0 */ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, + /* +0 * NaNC +- +0 -> NaNC, NaNC, NaNC, NaNC */ { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000 }, + /* +0 * NaNC +- NaNB -> NaNB, NaNB, NaNB, NaNB */ { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000 }, + /* NaNA * NaNC +- NaNB -> NaNA, NaNA, NaNA, NaNA */ { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000 }, + /* +1.0 * -0 +- +finite B -> +B, -B, -B, +B */ { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, + /* +1.0 * -1.0 +- (B = +3.818e+190) -> +B, -B, -B, +B */ { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, + /* +inf * -1.0 +- +finite B -> -inf, -inf, +inf, +inf */ { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, 0xfff0000000000000, 0xfff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, + /* +inf * +0 +- +finite B -> NaNQ, NaNQ, NaNQ, NaNQ */ { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000 }, + /* +1.0 * +1.0 +- 1.00000012 -> +2.00000012, +1.2e-7, -2.00000012, -1.2e-7 */ { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, 0x4000000010000000, 0xbe80000000000000, 0xc000000010000000, 0x3e80000000000000 }, + /* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */ { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, 0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 }, + /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 2^-50), +5 * 2^-52 + 2^-101, -, - */ { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, 0x4000000000000002, 0x3cd4000000000002, 0xc000000000000002, 0xbcd4000000000002 }, + /* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */ { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, 0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 }, + /* +2.443e-77 * 2.828 +- 6.909e-77 -> +9.446e-93, +1.382e-76, -9.446e-93, -1.382e-76 */ { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, 0x2cd3b3efbf5e2229, 0x3030000000000000, 0xacd3b3efbf5e2229, 0xb030000000000000 }, + /* +2.443e-77 * 2.828 +- -1.1055e-75 -> -1.0364e-75, +1.1746e-75, +1.0364e-75, -1.1746e-75 */ { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, 0xb05e0068a0000000, 0x3061003450000000, 0x305e0068a0000000, 0xb061003450000000 }, + /* +2 * +3 +- 3 -> +9, +3, -9, -3 */ + { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, + 0x4022000000000000, 0x4008000000000000, 0xc022000000000000, 0xc008000000000000 }, + /* +2 * +3 +- 5 -> +11, +1, -11, -1 */ + { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, + 0x4026000000000000, 0x3ff0000000000000, 0xc026000000000000, 0xbff0000000000000 }, + /* +2 * +3 +- 7 -> +13, -1, -13, +1 */ + { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, + 0x402a000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x3ff0000000000000 }, + /* +2 * +3 +- 9 -> +15, -3, -15, +3 */ + { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, + 0x402e000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4008000000000000 }, + /* +2 * +3 +- -3 -> +3, +9, -3, -9 */ + { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, + 0x4008000000000000, 0x4022000000000000, 0xc008000000000000, 0xc022000000000000 }, + /* +2 * +3 +- -5 -> +1, +11, -1, -11 */ + { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, + 0x3ff0000000000000, 0x4026000000000000, 0xbff0000000000000, 0xc026000000000000 }, + /* +2 * +3 +- -7 -> -1, +13, +1, -13 */ + { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, + 0xbff0000000000000, 0x402a000000000000, 0x3ff0000000000000, 0xc02a000000000000 }, + /* +2 * +3 +- -9 -> -3, +15, +3, -15 */ + { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, + 0xc008000000000000, 0x402e000000000000, 0x4008000000000000, 0xc02e000000000000 }, + /* +2 * -3 +- 3 -> -3, -9, +3, +9 */ + { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, + 0xc008000000000000, 0xc022000000000000, 0x4008000000000000, 0x4022000000000000 }, + /* +2 * -3 +- 5 -> -1, -11, +1, +11 */ + { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, + 0xbff0000000000000, 0xc026000000000000, 0x3ff0000000000000, 0x4026000000000000 }, + /* +2 * -3 +- 7 -> +1, -13, -1, +13 */ + { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, + 0x3ff0000000000000, 0xc02a000000000000, 0xbff0000000000000, 0x402a000000000000 }, + /* +2 * -3 +- 9 -> +3, -15, -3, +15 */ + { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, + 0x4008000000000000, 0xc02e000000000000, 0xc008000000000000, 0x402e000000000000 }, + /* -2 * +3 +- -3 -> -9, -3, +9, +3 */ + { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, + 0xc022000000000000, 0xc008000000000000, 0x4022000000000000, 0x4008000000000000 }, + /* -2 * +3 +- -5 -> -11, -1, +11, +1 */ + { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, + 0xc026000000000000, 0xbff0000000000000, 0x4026000000000000, 0x3ff0000000000000 }, + /* -2 * +3 +- -7 -> -13, +1, +13, -1 */ + { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, + 0xc02a000000000000, 0x3ff0000000000000, 0x402a000000000000, 0xbff0000000000000 }, + /* -2 * +3 +- -9 -> -15, +3, +15, -3 */ + { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, + 0xc02e000000000000, 0x4008000000000000, 0x402e000000000000, 0xc008000000000000 }, + /* -2 * +3 +- +0 -> -6, -6, +6, +6 */ + { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, + 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, + /* +2 * -3 +- -0 -> -6, -6, +6, +6 */ + { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, + 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, + /* 2^-1026 * (1.5 * 2^1023) +- -0 -> (1.5 * 2^-3), ditto, -ditto, -ditto */ + { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, + 0x3fc8000000000000, 0x3fc8000000000000, 0xbfc8000000000000, 0xbfc8000000000000 }, }; int test23(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index cc6c1cc..24878af 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ