From 9a4f0c18e11f0c976b1a8a31ba124d822865193e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 13 Jan 2024 20:48:14 +1100 Subject: [PATCH 1/5] scripts/run_test: Use grep -E instead of egrep Grep in Fedora 39 has started warning when invoked as 'egrep', so use grep -E instead to avoid the warnings. Signed-off-by: Paul Mackerras --- scripts/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_test.sh b/scripts/run_test.sh index 1a032ba..fc3505f 100755 --- a/scripts/run_test.sh +++ b/scripts/run_test.sh @@ -21,7 +21,7 @@ cd $TMPDIR cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin -${MICROWATT_DIR}/core_tb | sed 's/.*: //' | egrep '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true +${MICROWATT_DIR}/core_tb | sed 's/.*: //' | grep -E '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true grep -v "^$" ${MICROWATT_DIR}/tests/${TEST}.out | sort | grep -v GPR31 > exp.out From eecf1ca39919fac653beaf1127e16613d312f16e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 24 Jan 2024 09:16:23 +1100 Subject: [PATCH 2/5] FPU: Fix setting of FPRF The sign recorded in FPRF was sometimes wrong because we weren't doing the modifications that were done in pack_dp when setting FPRF (FPSCR field). These modifications are: set sign for zero result of subtraction based on rounding mode; negate result for fnmadd/sub; but don't modify sign of NaNs. Instead we now do these modifications in the main state machine code and put the result in an 'rsign' variable that is used to set v.res_sign, then r.res_sign is used in the next cycle both for setting FPRF and in the pack_dp functions. That simplifies pack_dp and lets us get rid of r.res_negate, r.res_subtract and r.res_rmode. Signed-off-by: Paul Mackerras --- fpu.vhdl | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index eaa4cf2..6fbc979 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -169,9 +169,7 @@ architecture behaviour of fpu is oe : std_ulogic; xerc : xer_common_t; xerc_result : xer_common_t; - res_negate : std_ulogic; - res_subtract : std_ulogic; - res_rmode : std_ulogic_vector(2 downto 0); + res_sign : std_ulogic; end record; type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); @@ -609,20 +607,13 @@ architecture behaviour of fpu is -- Construct a DP floating-point result from components function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0); - mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic; - negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector) + mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic) return std_ulogic_vector is variable dp_result : std_ulogic_vector(63 downto 0); - variable sign : std_ulogic; begin dp_result := (others => '0'); - sign := negative; case class is when ZERO => - if is_subtract = '1' then - -- set result sign depending on rounding mode - sign := round_mode(0) and round_mode(1); - end if; when FINITE => if mantissa(UNIT_BIT) = '1' then -- normalized number @@ -642,7 +633,7 @@ architecture behaviour of fpu is dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB); end if; end case; - dp_result(63) := sign xor negate; + dp_result(63) := negative; return dp_result; end; @@ -860,6 +851,7 @@ begin variable opcbits : std_ulogic_vector(4 downto 0); variable int_result : std_ulogic; variable illegal : std_ulogic; + variable rsign : std_ulogic; begin v := r; v.complete := '0'; @@ -2590,7 +2582,6 @@ begin arith_done := '1'; when NAN_RESULT => - v.negate := '0'; if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then @@ -3158,14 +3149,14 @@ begin end case; + rsign := v.result_sign; if zero_divide = '1' then v.fpscr(FPSCR_ZX) := '1'; end if; if qnan_result = '1' then invalid := '1'; v.result_class := NAN; - v.result_sign := '0'; - v.negate := '0'; + rsign := '0'; misc_sel <= "0001"; opsel_r <= RES_MISC; arith_done := '1'; @@ -3181,6 +3172,12 @@ begin v.writing_fpr := '1'; v.update_fprf := '1'; end if; + if v.is_subtract = '1' and v.result_class = ZERO then + rsign := r.round_mode(0) and r.round_mode(1); + end if; + if v.negate = '1' and v.result_class /= NAN then + rsign := not rsign; + end if; v.instr_done := '1'; update_fx := '1'; end if; @@ -3516,7 +3513,7 @@ begin end if; if r.update_fprf = '1' then - v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class, + v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, r.r(UNIT_BIT) and not r.denorm); end if; @@ -3541,9 +3538,7 @@ begin v.int_result := int_result; v.illegal := illegal; v.nsnan_result := v.quieten_nan; - v.res_negate := v.negate; - v.res_subtract := v.is_subtract; - v.res_rmode := r.round_mode; + v.res_sign := rsign; if r.integer_op = '1' then v.cr_mask := num_to_fxm(0); elsif r.is_cmp = '0' then @@ -3574,9 +3569,8 @@ begin if r.int_result = '1' then fp_result <= r.r; else - fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r, - r.sp_result, r.nsnan_result, - r.res_negate, r.res_subtract, r.res_rmode); + fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r, + r.sp_result, r.nsnan_result); end if; rin <= v; From 59a7996f1cdfcbc0ec537e2fe7c9e51fda471f73 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 23 Jan 2024 22:06:49 +1100 Subject: [PATCH 3/5] tests/fpu: Add checks for correct setting of FPRF Signed-off-by: Paul Mackerras --- tests/fpu/fpu.c | 144 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_fpu.bin | Bin 31088 -> 31088 bytes 2 files changed, 144 insertions(+) diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 773c05d..d04140f 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -459,6 +459,7 @@ int test6(long arg) unsigned long results[6]; unsigned long v; + set_fpscr(0); for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) { v = sp_dp_equiv[i].dp; asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)" @@ -474,6 +475,8 @@ int test6(long arg) results[4] != (v & ~SIGN) || results[5] != (v | SIGN)) return i + 1; + if (get_fpscr() != 0) + return i + 0x101; } return 0; } @@ -484,6 +487,98 @@ int fpu_test_6(void) return trapit(0, test6); } +unsigned long expected_fprf(unsigned long result, bool single) +{ + unsigned long sign = (result >> 63) & 1; + unsigned long exp = (result >> 52) & 0x7ff; + unsigned long mant = (result & ((1ul << 52) - 1)); + + if (exp == 0x7ff) { + /* infinity or NaN */ + if (mant) + return 0x11; /* NaN */ + if (sign) + return 0x09; /* -Infinity */ + else + return 0x05; /* +Infinity */ + } else if (exp > (single ? 0x380 : 0)) { + if (sign) + return 0x08; /* -normalized */ + else + return 0x04; /* +normalized */ + } else if (mant || exp > 0) { + if (sign) + return 0x18; /* -denorm */ + else + return 0x14; /* +denorm */ + } else { + if (sign) + return 0x12; /* -zero */ + else + return 0x02; /* +zero */ + } +} + +unsigned long expected_fprf_sp(unsigned long result) +{ + unsigned long sign = (result >> 31) & 1; + unsigned long exp = (result >> 23) & 0xff; + unsigned long mant = (result & ((1ul << 23) - 1)); + + if (exp == 0xff) { + /* infinity or NaN */ + if (mant) + return 0x11; /* NaN */ + if (sign) + return 0x09; /* -Infinity */ + else + return 0x05; /* +Infinity */ + } else if (exp > 0) { + if (sign) + return 0x08; /* -normalized */ + else + return 0x04; /* +normalized */ + } else if (mant) { + if (sign) + return 0x18; /* -denorm */ + else + return 0x14; /* +denorm */ + } else { + if (sign) + return 0x12; /* -zero */ + else + return 0x02; /* +zero */ + } +} + +int check_fprf(unsigned long result, bool single, unsigned long fpscr) +{ + unsigned long fprf; + + fprf = expected_fprf(result, single); + if (((fpscr >> 12) & 0x1f) == fprf) + return 0; + print_string("\r\n"); + print_hex(result, 16, " "); + print_hex(fpscr, 8, " "); + print_hex(fprf, 2, " "); + return 1; +} + +int check_fprf_sp(unsigned long result, unsigned long fpscr) +{ + unsigned long fprf; + + fprf = expected_fprf_sp(result); + if (((fpscr >> 12) & 0x1f) == fprf) + return 0; + print_string("\r\n"); + print_hex(result, 16, " "); + print_hex(fpscr, 8, " "); + print_hex(fprf, 2, " "); + return 1; +} + struct int_fp_equiv { long ival; unsigned long fp; @@ -522,12 +617,15 @@ int test7(long arg) { long i; unsigned long results[4]; + unsigned long fpscr; for (i = 0; i < sizeof(intvals) / sizeof(intvals[0]); ++i) { + set_fpscr(0); asm("lfd%U0%X0 3,%0; fcfid 6,3; fcfidu 7,3; stfd 6,0(%1); stfd 7,8(%1)" : : "m" (intvals[i].ival), "b" (results) : "memory"); asm("fcfids 9,3; stfd 9,16(%0); fcfidus 10,3; stfd 10,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != intvals[i].fp || results[1] != intvals[i].fp_u || results[2] != intvals[i].fp_s || @@ -539,6 +637,8 @@ int test7(long arg) print_hex(results[3], 16, " "); return i + 1; } + if (check_fprf(results[3], true, fpscr)) + return i + 0x101; } return 0; } @@ -582,16 +682,20 @@ int test8(long arg) { long i; unsigned long result; + unsigned long fpscr; for (i = 0; i < sizeof(roundvals) / sizeof(roundvals[0]); ++i) { asm("lfd 3,0(%0); lfd 4,8(%0); mtfsf 0,3,1,0; frsp 6,4; stfd 6,0(%1)" : : "b" (&roundvals[i]), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != roundvals[i].spval) { print_string("\r\n"); print_hex(i, 4, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, true, fpscr)) + return i + 0x101; } return 0; } @@ -796,6 +900,7 @@ int test11(long arg) long i; unsigned long results[4]; struct frivals *vp = frivals; + unsigned long fpscr; for (i = 0; i < sizeof(frivals) / sizeof(frivals[0]); ++i, ++vp) { set_fpscr(FPS_RN_FLOOR); @@ -807,6 +912,7 @@ int test11(long arg) asm("frip 5,3; stfd 5,16(%0)" : : "b" (results) : "memory"); set_fpscr(FPS_RN_CEIL); asm("frim 5,3; stfd 5,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->nval || results[1] != vp->zval || results[2] != vp->pval || results[3] != vp->mval) { print_hex(i, 2, "\r\n"); @@ -816,6 +922,8 @@ int test11(long arg) print_hex(results[3], 16, " "); return i + 1; } + if (check_fprf(results[3], false, fpscr)) + return i + 0x101; } return 0; } @@ -903,17 +1011,21 @@ int test13(long arg) long i; unsigned long results[2]; struct addvals *vp = addvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->sum || results[1] != vp->diff) { print_hex(i, 2, " "); print_hex(results[0], 16, " "); print_hex(results[1], 16, "\r\n"); return i + 1; } + if (check_fprf(results[1], false, fpscr)) + return i + 0x101; } return 0; } @@ -976,18 +1088,22 @@ int test14(long arg) long i; unsigned long results[2]; struct addvals *vp = sp_addvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(sp_addvals) / sizeof(sp_addvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); frsp 5,5; lfd 6,8(%0); frsp 6,6; " "fadds 7,5,6; fsubs 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->sum || results[1] != vp->diff) { print_hex(i, 2, " "); print_hex(results[0], 16, " "); print_hex(results[1], 16, "\r\n"); return i + 1; } + if (check_fprf(results[1], true, fpscr)) + return i + 0x101; } return 0; } @@ -1017,16 +1133,20 @@ int test15(long arg) long i; unsigned long result; struct mulvals *vp = mulvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(mulvals) / sizeof(mulvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fmul 7,5,6; stfd 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1056,16 +1176,20 @@ int test16(long arg) long i; unsigned int result; struct mulvals_sp *vp = mulvals_sp; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(mulvals_sp) / sizeof(mulvals_sp[0]); ++i, ++vp) { asm("lfs 5,0(%0); lfs 6,4(%0); fmuls 7,5,6; stfs 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 8, " "); return i + 1; } + if (check_fprf_sp(result, fpscr)) + return i + 0x101; } return 0; } @@ -1093,16 +1217,20 @@ int test17(long arg) long i; unsigned long result; struct divvals *vp = divvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(divvals) / sizeof(divvals[0]); ++i, ++vp) { asm("lfd 5,0(%0); lfd 6,8(%0); fdiv 7,5,6; stfd 7,0(%1)" : : "b" (&vp->val_a), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->prod) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1130,16 +1258,20 @@ int test18(long arg) long i; unsigned long result; struct recipvals *vp = recipvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(recipvals) / sizeof(recipvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); fre 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1273,16 +1405,20 @@ int test21(long arg) long i; unsigned long result; struct isqrtvals *vp = isqrtvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(isqrtvals) / sizeof(isqrtvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); frsqrte 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1320,16 +1456,20 @@ int test22(long arg) long i; unsigned long result; struct sqrtvals *vp = sqrtvals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(sqrtvals) / sizeof(sqrtvals[0]); ++i, ++vp) { asm("lfd 6,0(%0); fsqrt 7,6; stfd 7,0(%1)" : : "b" (&vp->val), "b" (&result) : "memory"); + fpscr = get_fpscr(); if (result != vp->inv) { print_hex(i, 2, " "); print_hex(result, 16, " "); return i + 1; } + if (check_fprf(result, false, fpscr)) + return i + 0x101; } return 0; } @@ -1384,6 +1524,7 @@ int test23(long arg) long i; unsigned long results[4]; struct fmavals *vp = fmavals; + unsigned long fpscr; set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) { @@ -1391,6 +1532,7 @@ int test23(long arg) : : "b" (&vp->ra), "b" (results) : "memory"); asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)" : : "b" (results) : "memory"); + fpscr = get_fpscr(); if (results[0] != vp->fma || results[1] != vp->fms || results[2] != vp->nfma || results[3] != vp->nfms) { print_hex(i, 2, " "); @@ -1400,6 +1542,8 @@ int test23(long arg) print_hex(results[3], 16, "\r\n"); return i + 1; } + if (check_fprf(results[3], false, fpscr)) + return i + 0x101; } return 0; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 3c1021b859e238987c22eaeb5a34fc5318137666..f5a7eec6e4b76f103354219b14dc6a3ffb64f8d0 100755 GIT binary patch delta 8519 zcmb7J4Rljgw%$og2w2-71=9bN7EEJ`0TFMT{v_pRLJL?&^r4G7sZ}4a#<9F7uGFR^ z?Sxq{4_Por-a;)*aRCb$V2wXBOWzV-{d-f^;Cmg%pQ2bpq1r;LRZ)thz3o)*t=eGzOWO_`g!z`IW}6DTWM}t-59U<<&%~UX zKW)i*^qco{Dm|r3>tuPY&9Jn}8=P3>5%1pOe_;97av2f1HDt;oeT>yj4Gzzhb3P5Z zrMW6ktMV3sS1-$D%tse#ipnR7vBuC^cXmYIAH?pd8QRmi;?1e+v}=2XB{5lBo-39l zTC|zH;@=XBv}bz6%Zd5g{XOFA#QQBj+(xdqJNISn0gGmb~tPM!Z z!whe7oX+q_K$`HC*p^(P3!-`aOQlO(N}O)Zih#7?OOc*ZqAR~UAmxTRjo=)4G$3_$ zieun-9)r=HN*`-#oNm#-2BiBsMN#Uf+L}(0oc6NzWS02fwEQXCv&dDXt6G_6A{YBa zP<%c$MJLOcJWJ%KuShM@W!<+oBg_oRvi(&!gN<_X+`6n9(UhL5t(hs_OfSl5eWKCs zIlIZ11HOqWySQy2zmx>5r@CQ0NzBT~h_}sDM#PMy@yCjJna|H@eWKmYyY2b@J>Q*! zJP8Itn+KT%tRbINPMOlH6RW(QOz~4D+{q;JXU;><(HJ+1vVT};o8R_7=@lhx6qn~8o~ zyE_V3)jku27uMEA;jG%nqi{@Z1u&~)qO-NE4v)JOv?z6|)5XWx>%`Vu^&)jn+=}26 zRZv^g{|_9}%Ae%2kFN~+Y*@_6Bxmq8>`V^(MR}jkYojV-4m?WxKICRBjB0h(2%A(?FYHo@u}^*D+wA!c#`7IAt(y37Kejo56TEWTxY7 zn5Ub{bG(Vv;B#t7w)>UkKkMsCC6}2r-JT9h9hIBC=i~;54cI}J8pa<2%d#UczJFx+>|KG8b=LIjYhWbg75Qu25Ps*QL$B$vl|?2 zogWNX%#X%D0e{F*K&>Ux0j=JvNfj^8DN5gfu}tK=GWfUJl|zfPI-?0jzuxx1poqUc zSz9y{#bT=2DvKX-WqJQnda7b#&sgt_i!J){h@sw40InH`gHhO=! zESEX@vb+WCbMUnVCi!2QRR3NHO^CQZ4ZeR_Jb#DTaLDIL$*Ma6=i<&gFN+WEaA;l- zQ|HfGn2V7JBN;FfkCKSdB*jsAguwwg&w0-aRU7+v{rkpT3;_HS4?=YM#I`4Y93Z+P zYUj_=zSl2aoNv~oh`-ODm9c@nZ`a76Mm~P%ye(G3nYW9CJL|O{T@rt|bGG*ACGlVQ z*>p*C;HTn}n7qKEoqI_vSWxP7Kf2BnOjzTQCtA!Dv)0392T_i73ZjgO8kP~6trNoS z<&%_O{|cgPE7lc$+2HiY1SLoLtD$N;j%>Z5aEO z-=Y$*dBe}l6>B^l3F|yEnaytPwUn}4L#cze6IrZ@w8t4%qG4DGqC-$_kE%q&@)0E_ zD^@B>h}CJSzFAYlm6PYTH@%(|ulIedXvcvQwPeiku7(+&6rUI88&%cA1)SsB5Xg6b zGb+ESn9sHcIq(Ey&c8@HTF_nx=86!(PU6zB?-q6@x6~)gzLDS+8y&p1K!`&-*2qoN zY;W4vVJGqCK_WjFE8Om~QIjP~Mo~=;d(!(M7Hx zin|dmx;-h6ddcOpc-O#6woaBiirKo>CuI9^)TT=5aivqq;+KIr;(1iu4XjiZDn12_ zNH)wr0j!~-Jil^$vg$%p$YsF9W;OJ0M~F89+k7g)4+0Nh0+C!<-fCbjIx4XpTp)W= z)_TLapeXN%k_*}q5oCK36h`s44zO@E&tsmCC*}nNd zJM1y=0yu>osm_j%3&oa5Zy@ru7*hN&XLp-}Du=zKJf$h5M-q1+7Y=#OcF+t8EpwRG z3YrnLo!`Mdqus^gtyb3J#RB$gE`Ah$@8N=|qnPdI9LOHTp7IjtkzTVGIMQo&07rUF zJurJsIOS#49ZB#-yUk{-i{dt_*LZ*MKI}U1h^YFClZ<}HkQ=?(qNny2i>)(cwoO3r9zZRXcIts|)hp1G!|=7v(x0~st$KTl&Bh6A zpzS>+qJ|@}UX8?Kt@saLMXTKj9N|0j>P)KIN0rk1+G_K+g$`n zoR1+w6}B0uqRmKkZBE2Y&KM&V&rk~MP=hd16<=E*UHR5`s&=VWO(k$-{<;S^GJiRN zIsHUNtz6JJhq=|tXAKPEqGqV_;S^@(%4bvxv(a7Q%4aBty@y&YIyQ{tFbS1U{7~hi zMDart)iO3~k5a~VqKvs-8Jl@7x{R$*@lYANoN+`MTL>Ik#y*KTuUp1)(3%3lu}=Qx zk)YKaOYO~ww>>!hts9gWnpy8jdBlrj5q!HV(S8f_JJ<3XTfY=fB5>Qt6x5I;XsVuyRU;Mz@0Db4}T#% zTtL-TlK;@m9b3R-kC*^O^wp@4u!L`cc0l_ye@Q6m+yMGfg;qOEGlKRUXb*fMm)(i5 z%U3-tkJLXEUtWH`KN?n`xQp=#*bn@;if;mL0j^MS9I(NK>sl4x47>`sP{r}UO~AP-P5|x#PE)agd~V6MG$7rq z68JOL13Z8_n=Rt6&1T?U6;B1$mtqA}%%7>{!0)Sg8t^XQ<0_sG90We9;u*kZz6MvZ zkvsI^^7t8*kOXYKM@-IJB7%8|sQGi&_s9)SqIF$IV##|r-R(b*AP@zM<6~&uyrzMJlyhM#voXJbmSVTZsXNNV?J*J>`I&6Q)BM zwAUL^;4t4YiWl3TQ$x*E_)Y?TO?vdx*AOP(en@*cm8g(ue^^*r7vTP&^HJG#Xy$Vy zcG{mxhNqT>A%-@ZBBm9Zg4<2FkU*y_t2sX4;R^>11>CXfDjmMt?|o%uflc$6_5WnK z3uE(Rz>E6{A}mxn|Keeq18)InNASxJ(^lgB5Hu5h`C-}`yg|^inSh6Xy?Fat#bDt~ z-99uM{~=}<&DOP|`Njw0mqivIX^5nM$}*olS{!eh;@tR)cq-Ac!B_(_f$s)C8^5$9 z-Uj|3H5(!Eh0e)*2k4J{ICg(G_*=*3`^XQ0SH~8hCh$*>!*2#Z_M@>C2!d}NlaK3X z(qcKs6d*Ez?;VHl0Ds>&d^h+f$Kvz+eh6F~O91>P@KZk?+rnn>7mdRYg8%C=_-lC& zDTe6TF$5GJ0RN3~_zv*Tjl*|?A1CUIQXcf9HUAS?_VBEW6>s`wJS`mdwkGhmj=^Ua zZ|0f6SF{E;;r$S_o=?R6&U9Trno^s1+BtKQDVAt;n=Ee?`<=Jy%7I>M6CXG&IzO7$ zQ{usr**e4y>N+KUS7OnfMswAt;;$tObVgjiulfwld6Tm-{7QzW#oLRrmZNn9cN{z^ zKQ_GEL3@p9@G@F7$w|%^CN#$Gj@hIAqXtLC8SmHpYBHae_Rmj$rgPrpfY`g}v+r{; zxTr{{*AcnCKr??Ho3{+GCpcmr)7+q41Z@%8{4B%!Bxu=Z#IeOo@L`4l0sZG>`B~=U zVOkq#)QNxFF#!(;mLAZmLEFWIg{*9ps*VSTG=-uzh@|2!p$+ng{;K>{#@$;z)Q7F( zNy^cw=Sx{`MLRr6#A=j3q>4irY(#qULw@U5aEakbgkkYCpLclNPfsbrJoX!qZ9$zH zLOZ+ltfKeUAX<-_nxrJ zG*zcN^JdTQ+;h)8=e%?7J@<`%tr`7V)3#f?{<}x_95zr-hF*#zLNOu4buhx>?Ko6b z%viGsD7v3s=snFe#gWA49?ak9){|@g6z6QOo^0%GGj<1SjRQeKdi(Xn-*1>Fj2Rxa zjTwmfjFC9b7>qlE0%`3x7!L={>AP_c11UM1k${$pY^QqyK@V? zM9la2S&MCe_4sY1U@OP{z>E!u@qrx3?m#JyhRd~F;@A*92C^UI+e1{Bx=7y@o#!>?69*kl zGSbG>lqk{e6X(!Vso$j^q{iv$p7e>A{ZLt@bJzOB*Esn&T{1IHcVMkge8x}L&aBey zcKO8V%E&p4%zwrweteaFi4k*^PxM}u=QJAQbo*BL#K*4EZN@5H^Rqs2BPa4VStV#rRx}jCt1{)xh##xDacr9xhl|wEGQzDVn4l@ovtmR24X$ao^2a3 z66YB`8S)FH$Iq*314@vwb!CiwSlQaxAT~E%C`}w!O}z23B-^+Y3^TVG3~qBda8U@J z11yH%OkfR>#9ERg*LD%SFfJQlo4cq|CEB|X{MyC;*38-;E;7Tg4ZXoiTbz~lHQen;_2 zf{T%Gkyn!)SbkNi+mDVhlkAsG^{YCAI5dQn8u7B*XmNHP-IBF?E)OA}&Q+}phDT~K zaA;&!r_;IF721zY^at7VF!O{m^ZHxTukLbVK9by^H87|Xn9sbo_5l<91Tzy&G+wBX z`-di14(f(NBaIwVxTOGbh}fPj4&B>oH9rKWXX-!wpSG!|;Np{b4w->E$pS z(X<_ynS_HSC0inpL8u~8L%KyR>400B zz;Q;05mSYtuV|dU6=No(WzXcm|obpYtSH!D%p! zU?Lrne)WDLGx)t7KU*rcx)F2xMLqyav2_DIggM-bHER+zD(fI>YEN6T76@G zS2D##YLxOb%;e>_6XmBY-iq=v0&^wY7&ov0%$0Dw0hp(?8EG9DS58l*`uI-7cO-%{ zZ?f8QZVPIx3gwvc<{`$be_4sMQjMLc#*@9)qRT;ZQPdd0uB0ciXAC4}2y^|sB5UQ4 zd|1gt5?VrOa7V4eWpYl(fWckiNx7XU+%@jjFEy8u_1F4C_sz0zc}t6LO}r)9lj2(}A#A@|l7jszn=Z{xknR`qP`g^g z?dpysC%a+SFzh;wbF3Nr`F3K9_I#UNFWr>wYUZ0U-zmH|r|jWv$;uuchs@~Ud-#3L ziwX&wA>2GU$!8(SA2e*A-Uts{&y6Wz>q~mzu<1hVdUt2&>pa<8RK(i>UUZX3;J1Y= zZHi4Xz&LqQ#d}9jMv@XT144G-fkI{oHSMS2rpXRq zk1ffHnsxRY$tFCLJFIf(Ozg1IM3?gEXt?b?z@aD3QQ*)MXFo7MEJAnSm%!uw$-SPh zBY`-CkR6yGI%eP^bw_ryycSy#7^2F!cj}19Dj~Gzqg!%QmbIZbXq^Zl~;n>*mEjfgob4xo_?NNqJ1=;&YM+{^@`k( z+wYG5uBquPCWB=0Gp5gmD`x<2K=j>Kl&wZSI2123E&YuQnzo{9CA&z?Fg9djYGudCt zh{bFsIk)$c?Y7ZT7jDVKH9FeVkdPo$CM1x`XwqH5?@g_Ts!9dY)Rd4=j4@+_K(^EW zHRn0T(5@X0ioF)E*#D7)R$;5Vrom%Dez?=Od1j!kwvFb=Ck^k89v9fN9dh(9Nj_LY z2%k^JNA6IQ#cbFpxzqE!Cvg+vli$BXpR(9A?UY((FIbA^nLrc&{K2)PK%0nF6b<8J zNS0a}8;w*qt>{=5OY>k+GVd4L{!#7Tq28QLHJ zotjGWr9(=|`O)xQKNz9+7_RcSpnM&2r%N7wZG4ws89Q@dzI8=u%A=6K$9YYlpPd8w zgW4A%r}Syv;HSyFrU_mzcv0Yee3sQLM_6{i5(Y7hYx z&j4-(KB3}N;M2fwt9T}`5#NPgQ!!dlm-qobth}T$(tra`)1Q_t(uj0>LmXd(ldtsQh*yY=O zXDEKhU{`h;(U~Y`lQYK-hs*@jpX?wV>nUd zA3809TNtVR6Jxasi*TL|&u;KWLwI$dH}o-fB!stt)1NUWgz&KOkT~#WsXYGQh9?8O zK0;`vJyUlaxc4)<&YrIu1nK{r?y*~SDH=l7|C#>UZqm&Mx%NCpj#p5gY+w89Z17j#a@u2Lhc3WI!i06O}Z|S$IsHW)uxI*E_;r# z3iAk{dOnt`x}dX9!;l$IH)NMwVC)0X6SB&x zE2qj1L-yPSTCp%+XT;@Z-xqY#LaWXJQa?a{w(xPCkCPWc&XZ)_9R8KD^`Uk=5$}Q> zKMK`73ri0})^)JOo>!-@4;PyfW)%={43-b&5BvJ;!UI@yLO$x4=s&GBz zR$soq+-}H?K7d>oUHBXXQ?A}$E)v(=0}miq2)X%awQhIj%eRX{ z@B-jfgXX^_cx>RAN9epK7RB-%rVY?HN>@3px^9r-S9F(ip0)35#`xaAf3hO{*WN%f z%N!*)ZJ>X1gI;l3H36DflbjZJlQA224?jL>FCOt0V~y|GYrcqlueQb&`JPZ)YxxJw CsWwRf From 51954671f34536d1f315c5296058bae6c978237c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 7 Feb 2024 20:01:49 +1100 Subject: [PATCH 4/5] FPU: Fix behaviour of fdiv with denormalized divisor Renormalization of the divisor for fdiv[s] was adjusting the result exponent in the wrong direction, making the result smaller in magnitude than it should be by a power of 2. Fix this by negating r.shift in the RENORM_B2 state and then subtracting it in the LOOKUP cycle. Signed-off-by: Paul Mackerras --- fpu.vhdl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 6fbc979..afac4c0 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1817,8 +1817,17 @@ begin when RENORM_B2 => set_b := '1'; - re_sel2 <= REXP2_NE; - re_set_result <= '1'; + -- For fdiv, we need to increase result_exp by shift rather + -- than decreasing it as for fre/frsqrte and fsqrt. + -- We do that by negating r.shift in this cycle and then + -- setting result_exp to new_exp in the next cycle + if r.use_a = '1' then + rs_sel1 <= RSH1_S; + rs_neg1 <= '1'; + else + re_sel2 <= REXP2_NE; + re_set_result <= '1'; + end if; v.opsel_a := AIN_B; v.state := LOOKUP; @@ -2038,6 +2047,12 @@ begin when LOOKUP => -- r.opsel_a = AIN_B -- wait one cycle for inverse_table[B] lookup + -- if this is a division, compute exponent + -- (see comment on RENORM_B2 above) + if r.use_a = '1' then + re_sel2 <= REXP2_NE; + re_set_result <= '1'; + end if; v.first := '1'; if r.insn(4) = '0' then if r.insn(3) = '0' then From 7b86bf88635fbf3ba1e0a5236fb629815c3b40df Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 7 Feb 2024 20:16:04 +1100 Subject: [PATCH 5/5] tests/fpu: Add tests for fdiv and fre with denormalized operands Signed-off-by: Paul Mackerras --- tests/fpu/fpu.c | 7 +++++++ tests/test_fpu.bin | Bin 31088 -> 31232 bytes 2 files changed, 7 insertions(+) diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index d04140f..059d83b 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1210,6 +1210,10 @@ struct divvals { { 0xbff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 }, { 0x4000000000000000, 0x4008000000000000, 0x3fe5555555555555 }, { 0xc01fff0007ffffff, 0xc03ffffffdffffbf, 0x3fcfff0009fff041 }, + { 0x0010000000000000, 0x0018000000000000, 0x3fe5555555555555 }, + { 0x0008000000000000, 0x0018000000000000, 0x3fd5555555555555 }, + { 0x0010000000000000, 0x0000c00000000000, 0x4035555555555555 }, + { 0x0004000000000000, 0x0000300000000000, 0x4035555555555555 }, }; int test17(long arg) @@ -1251,6 +1255,9 @@ struct recipvals { { 0xbff0000000000000, 0xbfeff00400000000 }, { 0x4008000000000000, 0x3fd54e3800000000 }, { 0xc03ffffffdffffbf, 0xbfa0040000000000 }, + { 0x0008100000000000, 0x7fdfb0c400000000 }, + { 0x0004080000000000, 0x7fefb0c400000000 }, + { 0x0002040000000000, 0x7ff0000000000000 }, }; int test18(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index f5a7eec6e4b76f103354219b14dc6a3ffb64f8d0..e4e2116694ee651e3e897429a5d913a5daedb2c8 100755 GIT binary patch delta 831 zcmZvaPiPZC6vp37wq&=Ix)`mYXu760))3n?{i6{>)(HMtA>u*AoSH&G8kFLxk=Cq1 zM2!b^4puBGsD~^fmGLNAqzEbmFM5fhh?Xvh4aI{}FuvW5B#HyeoA>*^o%iiBm1UxT zCtUz)3_wdicWYU!qi{+~0}La}Aj=@jjOtgq`icJV=+dg(kD@;cu-^nQU!iTg0(uf~ zBN`Sr%U`S216FT6mRB?q$X}rMW%+f3R;g(ZlDBgJM)}cyv~NirwTtEHy5_bA2?i_4 zJv3b8zvQ<>5)F!Za2|WV`Un*bHslHVaJWe^AOxD}3Xcr;;`N(}rY|RU@6Pgvz&AFc9F! zHq8;*eS-%BCn@z7`OWSg^xWg6t$PSn?y$Dr67^o=3%%zEnP$iPR;c$9AB#{z#|!L4 ze~G9pHc)7iARd}vecL)&W#BH6SbAtPe>^zjqTT{eMR6P$d~8RM3e*@`WS85NwN^B* z;lE&2+5YKLD9jGUJnZAn;pV!+wOESeWP|=(4-f-IOkdrtO`V1OF6&JKN))}!%ji+Q=0r0|Mdwq#J dz&WAL-_&@`KL<7cpz8RY2@tMxem=G6`wcm-_YnX9 delta 701 zcmZvaPiPZC6vp375|fltL!&(ujcaS8hS=J)R##jSBmP?<(vui3NuW>~^x&z1)~q4O znqHy<5~Y+PJ#>l)G9E>X2SJ74$x9SUk>Y}A5ie43eY2@FiihRB_x-+|_wBI853>H9 z^Z_I^06qK4s~51283jEHkU=evS{}9h7~JU_BJkw$x9!Do-lGDXw*aiSXgj8WkpgT? zN4FYZZ2N+or_~r7|-6@Z(vEB|sjX4$xhsF55WkPEY zL?nEjQgczv9q32Tf~X%lLumCO>l&z2^PX56xJ8JMT^-z@=8Tw#Q9@TL?AlPB1ei8F z`o)_OkWi1+tB0NA`>`>+gJJ2JyoR0Vv6 zM{bx}I;%`q@w%M;V#4X^OTFNRgKDehf`r=sj~4iQ-eY;u