diff --git a/fpu.vhdl b/fpu.vhdl index eaa4cf2..afac4c0 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -169,9 +169,7 @@ architecture behaviour of fpu is oe : std_ulogic; xerc : xer_common_t; xerc_result : xer_common_t; - res_negate : std_ulogic; - res_subtract : std_ulogic; - res_rmode : std_ulogic_vector(2 downto 0); + res_sign : std_ulogic; end record; type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); @@ -609,20 +607,13 @@ architecture behaviour of fpu is -- Construct a DP floating-point result from components function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0); - mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic; - negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector) + mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic) return std_ulogic_vector is variable dp_result : std_ulogic_vector(63 downto 0); - variable sign : std_ulogic; begin dp_result := (others => '0'); - sign := negative; case class is when ZERO => - if is_subtract = '1' then - -- set result sign depending on rounding mode - sign := round_mode(0) and round_mode(1); - end if; when FINITE => if mantissa(UNIT_BIT) = '1' then -- normalized number @@ -642,7 +633,7 @@ architecture behaviour of fpu is dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB); end if; end case; - dp_result(63) := sign xor negate; + dp_result(63) := negative; return dp_result; end; @@ -860,6 +851,7 @@ begin variable opcbits : std_ulogic_vector(4 downto 0); variable int_result : std_ulogic; variable illegal : std_ulogic; + variable rsign : std_ulogic; begin v := r; v.complete := '0'; @@ -1825,8 +1817,17 @@ begin when RENORM_B2 => set_b := '1'; - re_sel2 <= REXP2_NE; - re_set_result <= '1'; + -- For fdiv, we need to increase result_exp by shift rather + -- than decreasing it as for fre/frsqrte and fsqrt. + -- We do that by negating r.shift in this cycle and then + -- setting result_exp to new_exp in the next cycle + if r.use_a = '1' then + rs_sel1 <= RSH1_S; + rs_neg1 <= '1'; + else + re_sel2 <= REXP2_NE; + re_set_result <= '1'; + end if; v.opsel_a := AIN_B; v.state := LOOKUP; @@ -2046,6 +2047,12 @@ begin when LOOKUP => -- r.opsel_a = AIN_B -- wait one cycle for inverse_table[B] lookup + -- if this is a division, compute exponent + -- (see comment on RENORM_B2 above) + if r.use_a = '1' then + re_sel2 <= REXP2_NE; + re_set_result <= '1'; + end if; v.first := '1'; if r.insn(4) = '0' then if r.insn(3) = '0' then @@ -2590,7 +2597,6 @@ begin arith_done := '1'; when NAN_RESULT => - v.negate := '0'; if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then @@ -3158,14 +3164,14 @@ begin end case; + rsign := v.result_sign; if zero_divide = '1' then v.fpscr(FPSCR_ZX) := '1'; end if; if qnan_result = '1' then invalid := '1'; v.result_class := NAN; - v.result_sign := '0'; - v.negate := '0'; + rsign := '0'; misc_sel <= "0001"; opsel_r <= RES_MISC; arith_done := '1'; @@ -3181,6 +3187,12 @@ begin v.writing_fpr := '1'; v.update_fprf := '1'; end if; + if v.is_subtract = '1' and v.result_class = ZERO then + rsign := r.round_mode(0) and r.round_mode(1); + end if; + if v.negate = '1' and v.result_class /= NAN then + rsign := not rsign; + end if; v.instr_done := '1'; update_fx := '1'; end if; @@ -3516,7 +3528,7 @@ begin end if; if r.update_fprf = '1' then - v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class, + v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, r.r(UNIT_BIT) and not r.denorm); end if; @@ -3541,9 +3553,7 @@ begin v.int_result := int_result; v.illegal := illegal; v.nsnan_result := v.quieten_nan; - v.res_negate := v.negate; - v.res_subtract := v.is_subtract; - v.res_rmode := r.round_mode; + v.res_sign := rsign; if r.integer_op = '1' then v.cr_mask := num_to_fxm(0); elsif r.is_cmp = '0' then @@ -3574,9 +3584,8 @@ begin if r.int_result = '1' then fp_result <= r.r; else - fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r, - r.sp_result, r.nsnan_result, - r.res_negate, r.res_subtract, r.res_rmode); + fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r, + r.sp_result, r.nsnan_result); end if; rin <= v; diff --git a/scripts/run_test.sh b/scripts/run_test.sh index 1a032ba..fc3505f 100755 --- a/scripts/run_test.sh +++ b/scripts/run_test.sh @@ -21,7 +21,7 @@ cd $TMPDIR cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin -${MICROWATT_DIR}/core_tb | sed 's/.*: //' | egrep '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true +${MICROWATT_DIR}/core_tb | sed 's/.*: //' | grep -E '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true grep -v "^$" ${MICROWATT_DIR}/tests/${TEST}.out | sort | grep -v GPR31 > exp.out diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 773c05d..059d83b 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -459,6 +459,7 @@ int test6(long arg) unsigned long results[6]; unsigned long v; + set_fpscr(0); for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) { v = sp_dp_equiv[i].dp; asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)" @@ -474,6 +475,8 @@ int test6(long arg) results[4] != (v & ~SIGN) || results[5] != (v | SIGN)) return i + 1; + if (get_fpscr() != 0) + return i + 0x101; } return 0; } @@ -484,6 +487,98 @@ int fpu_test_6(void) return trapit(0, test6); } +unsigned long expected_fprf(unsigned long result, bool single) +{ + unsigned long sign = (result >> 63) & 1; + unsigned long exp = (result >> 52) & 0x7ff; + unsigned long mant = (result & ((1ul << 52) - 1)); + + if (exp == 0x7ff) { + /* infinity or NaN */ + if (mant) + return 0x11; /* NaN */ + if (sign) + return 0x09; /* -Infinity */ + else + return 0x05; /* +Infinity */ + } else if (exp > (single ? 0x380 : 0)) { + if (sign) + return 0x08; /* -normalized */ + else + return 0x04; /* +normalized */ + } else if (mant || exp > 0) { + if (sign) + return 0x18; /* -denorm */ + else + return 0x14; /* +denorm */ + } else { + if (sign) + return 0x12; /* -zero */ + else + return 0x02; /* +zero */ + } +} + +unsigned long expected_fprf_sp(unsigned long result) +{ + unsigned long sign = (result >> 31) & 1; + unsigned long exp = (result >> 23) & 0xff; + unsigned long mant = (result & ((1ul << 23) - 1)); + + if (exp == 0xff) { + /* infinity or NaN */ + if (mant) + return 0x11; /* NaN */ + if (sign) + return 0x09; /* -Infinity */ + else + return 0x05; /* +Infinity */ + } else if (exp > 0) { + if (sign) + return 0x08; /* -normalized */ + else + return 0x04; /* +normalized */ + } else if (mant) { + if (sign) + return 0x18; /* -denorm */ + else + return 0x14; /* +denorm */ + } else { + if (sign) + return 0x12; /* -zero */ + else + return 0x02; /* +zero */ + } +} + +int check_fprf(unsigned long result, bool single, unsigned long fpscr) +{ + unsigned long fprf; + + fprf = expected_fprf(result, single); + if (((fpscr >> 12) & 0x1f) == fprf) + return 0; + print_string("\r\n"); + print_hex(result, 16, " "); + print_hex(fpscr, 8, " "); + print_hex(fprf, 2, " "); + return 1; +} + +int check_fprf_sp(unsigned long result, unsigned long fpscr) +{ + unsigned long fprf; + + fprf = expected_fprf_sp(result); + if (((fpscr >> 12) & 0x1f) == fprf) + return 0; + print_string("\r\n"); + print_hex(result, 16, " "); + print_hex(fpscr, 8, " "); + print_hex(fprf, 2, " "); + return 1; +} + struct int_fp_equiv { long ival; unsigned long fp; @@ -522,12 +617,15 @@ int test7(long arg) { long i; unsigned long results[4]; + unsigned long fpscr; for (i = 0; i < sizeof(intvals) / sizeof(intvals[0]); ++i) { + set_fpscr(0); asm("lfd%U0%X0 3,%0; fcfid 6,3; fcfidu 7,3; stfd 6,0(%1); stfd 7,8(%1)" : : "m" (intvals[i].ival), "b" (results) : "memory"); asm("fcfids 9,3; stfd 9,16(%0); fcfidus 10,3; stfd 10,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != intvals[i].fp || results[1] != intvals[i].fp_u || results[2] != intvals[i].fp_s || @@ -539,6 +637,8 @@ int test7(long arg) print_hex(results[3], 16, " "); return i + 1; } + if (check_fprf(results[3], true, fpscr)) + return i + 0x101; } return 0; } @@ -582,16 +682,20 @@ int test8(long arg) { long i; unsigned long result; + unsigned long fpscr; for (i = 0; i < sizeof(roundvals) / sizeof(roundvals[0]); ++i) { asm("lfd 3,0(%0); lfd 4,8(%0); mtfsf 0,3,1,0; frsp 6,4; stfd 6,0(%1)" : : "b" (&roundvals[i]), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != roundvals[i].spval) { print_string("\r\n"); print_hex(i, 4, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, true, fpscr)) + return i + 0x101; } return 0; } @@ -796,6 +900,7 @@ int test11(long arg) long i; unsigned long results[4]; struct frivals *vp = frivals; + unsigned long fpscr; for (i = 0; i < sizeof(frivals) / sizeof(frivals[0]); ++i, ++vp) { set_fpscr(FPS_RN_FLOOR); @@ -807,6 +912,7 @@ int test11(long arg) asm("frip 5,3; stfd 5,16(%0)" : : "b" (results) : "memory"); set_fpscr(FPS_RN_CEIL); asm("frim 5,3; stfd 5,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->nval || results[1] != vp->zval || results[2] != vp->pval || results[3] != vp->mval) { print_hex(i, 2, "\r\n"); @@ -816,6 +922,8 @@ int test11(long arg) print_hex(results[3], 16, " "); return i + 1; } + if (check_fprf(results[3], false, fpscr)) + return i + 0x101; } return 0; } @@ -903,17 +1011,21 @@ int test13(long arg) long i; unsigned long results[2]; struct addvals *vp = addvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->sum || results[1] != vp->diff) { print_hex(i, 2, " "); print_hex(results[0], 16, " "); print_hex(results[1], 16, "\r\n"); return i + 1; } + if (check_fprf(results[1], false, fpscr)) + return i + 0x101; } return 0; } @@ -976,18 +1088,22 @@ int test14(long arg) long i; unsigned long results[2]; struct addvals *vp = sp_addvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(sp_addvals) / sizeof(sp_addvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); frsp 5,5; lfd 6,8(%0); frsp 6,6; " "fadds 7,5,6; fsubs 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->sum || results[1] != vp->diff) { print_hex(i, 2, " "); print_hex(results[0], 16, " "); print_hex(results[1], 16, "\r\n"); return i + 1; } + if (check_fprf(results[1], true, fpscr)) + return i + 0x101; } return 0; } @@ -1017,16 +1133,20 @@ int test15(long arg) long i; unsigned long result; struct mulvals *vp = mulvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(mulvals) / sizeof(mulvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fmul 7,5,6; stfd 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1056,16 +1176,20 @@ int test16(long arg) long i; unsigned int result; struct mulvals_sp *vp = mulvals_sp; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(mulvals_sp) / sizeof(mulvals_sp[0]); ++i, ++vp) { asm("lfs 5,0(%0); lfs 6,4(%0); fmuls 7,5,6; stfs 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 8, " "); return i + 1; } + if (check_fprf_sp(result, fpscr)) + return i + 0x101; } return 0; } @@ -1086,6 +1210,10 @@ struct divvals { { 0xbff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 }, { 0x4000000000000000, 0x4008000000000000, 0x3fe5555555555555 }, { 0xc01fff0007ffffff, 0xc03ffffffdffffbf, 0x3fcfff0009fff041 }, + { 0x0010000000000000, 0x0018000000000000, 0x3fe5555555555555 }, + { 0x0008000000000000, 0x0018000000000000, 0x3fd5555555555555 }, + { 0x0010000000000000, 0x0000c00000000000, 0x4035555555555555 }, + { 0x0004000000000000, 0x0000300000000000, 0x4035555555555555 }, }; int test17(long arg) @@ -1093,16 +1221,20 @@ int test17(long arg) long i; unsigned long result; struct divvals *vp = divvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(divvals) / sizeof(divvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fdiv 7,5,6; stfd 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1123,6 +1255,9 @@ struct recipvals { { 0xbff0000000000000, 0xbfeff00400000000 }, { 0x4008000000000000, 0x3fd54e3800000000 }, { 0xc03ffffffdffffbf, 0xbfa0040000000000 }, + { 0x0008100000000000, 0x7fdfb0c400000000 }, + { 0x0004080000000000, 0x7fefb0c400000000 }, + { 0x0002040000000000, 0x7ff0000000000000 }, }; int test18(long arg) @@ -1130,16 +1265,20 @@ int test18(long arg) long i; unsigned long result; struct recipvals *vp = recipvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(recipvals) / sizeof(recipvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); fre 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1273,16 +1412,20 @@ int test21(long arg) long i; unsigned long result; struct isqrtvals *vp = isqrtvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(isqrtvals) / sizeof(isqrtvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); frsqrte 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1320,16 +1463,20 @@ int test22(long arg) long i; unsigned long result; struct sqrtvals *vp = sqrtvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(sqrtvals) / sizeof(sqrtvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); fsqrt 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1384,6 +1531,7 @@ int test23(long arg) long i; unsigned long results[4]; struct fmavals *vp = fmavals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) { @@ -1391,6 +1539,7 @@ int test23(long arg) : : "b" (&vp->ra), "b" (results) : "memory"); asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->fma || results[1] != vp->fms || results[2] != vp->nfma || results[3] != vp->nfms) { print_hex(i, 2, " "); @@ -1400,6 +1549,8 @@ int test23(long arg) print_hex(results[3], 16, "\r\n"); return i + 1; } + if (check_fprf(results[3], false, fpscr)) + return i + 0x101; } return 0; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 3c1021b..e4e2116 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ