Compare commits

...

6 Commits

Author SHA1 Message Date
Paul Mackerras af2d6e268e
Merge pull request #425 from paulusmack/fixes
Fixes to the FPU and the run_test script
3 months ago
Paul Mackerras 7b86bf8863 tests/fpu: Add tests for fdiv and fre with denormalized operands
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 months ago
Paul Mackerras 51954671f3 FPU: Fix behaviour of fdiv with denormalized divisor
Renormalization of the divisor for fdiv[s] was adjusting the result
exponent in the wrong direction, making the result smaller in
magnitude than it should be by a power of 2.  Fix this by negating
r.shift in the RENORM_B2 state and then subtracting it in the LOOKUP
cycle.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 months ago
Paul Mackerras 59a7996f1c tests/fpu: Add checks for correct setting of FPRF
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
4 months ago
Paul Mackerras eecf1ca399 FPU: Fix setting of FPRF
The sign recorded in FPRF was sometimes wrong because we weren't doing
the modifications that were done in pack_dp when setting FPRF (FPSCR
field).  These modifications are: set sign for zero result of
subtraction based on rounding mode; negate result for fnmadd/sub;
but don't modify sign of NaNs.

Instead we now do these modifications in the main state machine code
and put the result in an 'rsign' variable that is used to set
v.res_sign, then r.res_sign is used in the next cycle both for setting
FPRF and in the pack_dp functions.  That simplifies pack_dp and lets
us get rid of r.res_negate, r.res_subtract and r.res_rmode.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
4 months ago
Paul Mackerras 9a4f0c18e1 scripts/run_test: Use grep -E instead of egrep
Grep in Fedora 39 has started warning when invoked as 'egrep',
so use grep -E instead to avoid the warnings.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
4 months ago

@ -169,9 +169,7 @@ architecture behaviour of fpu is
oe : std_ulogic;
xerc : xer_common_t;
xerc_result : xer_common_t;
res_negate : std_ulogic;
res_subtract : std_ulogic;
res_rmode : std_ulogic_vector(2 downto 0);
res_sign : std_ulogic;
end record;

type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
@ -609,20 +607,13 @@ architecture behaviour of fpu is

-- Construct a DP floating-point result from components
function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic;
negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector)
mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
return std_ulogic_vector is
variable dp_result : std_ulogic_vector(63 downto 0);
variable sign : std_ulogic;
begin
dp_result := (others => '0');
sign := negative;
case class is
when ZERO =>
if is_subtract = '1' then
-- set result sign depending on rounding mode
sign := round_mode(0) and round_mode(1);
end if;
when FINITE =>
if mantissa(UNIT_BIT) = '1' then
-- normalized number
@ -642,7 +633,7 @@ architecture behaviour of fpu is
dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
end if;
end case;
dp_result(63) := sign xor negate;
dp_result(63) := negative;
return dp_result;
end;

@ -860,6 +851,7 @@ begin
variable opcbits : std_ulogic_vector(4 downto 0);
variable int_result : std_ulogic;
variable illegal : std_ulogic;
variable rsign : std_ulogic;
begin
v := r;
v.complete := '0';
@ -1825,8 +1817,17 @@ begin

when RENORM_B2 =>
set_b := '1';
re_sel2 <= REXP2_NE;
re_set_result <= '1';
-- For fdiv, we need to increase result_exp by shift rather
-- than decreasing it as for fre/frsqrte and fsqrt.
-- We do that by negating r.shift in this cycle and then
-- setting result_exp to new_exp in the next cycle
if r.use_a = '1' then
rs_sel1 <= RSH1_S;
rs_neg1 <= '1';
else
re_sel2 <= REXP2_NE;
re_set_result <= '1';
end if;
v.opsel_a := AIN_B;
v.state := LOOKUP;

@ -2046,6 +2047,12 @@ begin
when LOOKUP =>
-- r.opsel_a = AIN_B
-- wait one cycle for inverse_table[B] lookup
-- if this is a division, compute exponent
-- (see comment on RENORM_B2 above)
if r.use_a = '1' then
re_sel2 <= REXP2_NE;
re_set_result <= '1';
end if;
v.first := '1';
if r.insn(4) = '0' then
if r.insn(3) = '0' then
@ -2590,7 +2597,6 @@ begin
arith_done := '1';

when NAN_RESULT =>
v.negate := '0';
if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
(r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
(r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
@ -3158,14 +3164,14 @@ begin

end case;

rsign := v.result_sign;
if zero_divide = '1' then
v.fpscr(FPSCR_ZX) := '1';
end if;
if qnan_result = '1' then
invalid := '1';
v.result_class := NAN;
v.result_sign := '0';
v.negate := '0';
rsign := '0';
misc_sel <= "0001";
opsel_r <= RES_MISC;
arith_done := '1';
@ -3181,6 +3187,12 @@ begin
v.writing_fpr := '1';
v.update_fprf := '1';
end if;
if v.is_subtract = '1' and v.result_class = ZERO then
rsign := r.round_mode(0) and r.round_mode(1);
end if;
if v.negate = '1' and v.result_class /= NAN then
rsign := not rsign;
end if;
v.instr_done := '1';
update_fx := '1';
end if;
@ -3516,7 +3528,7 @@ begin
end if;

if r.update_fprf = '1' then
v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class,
r.r(UNIT_BIT) and not r.denorm);
end if;

@ -3541,9 +3553,7 @@ begin
v.int_result := int_result;
v.illegal := illegal;
v.nsnan_result := v.quieten_nan;
v.res_negate := v.negate;
v.res_subtract := v.is_subtract;
v.res_rmode := r.round_mode;
v.res_sign := rsign;
if r.integer_op = '1' then
v.cr_mask := num_to_fxm(0);
elsif r.is_cmp = '0' then
@ -3574,9 +3584,8 @@ begin
if r.int_result = '1' then
fp_result <= r.r;
else
fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
r.sp_result, r.nsnan_result,
r.res_negate, r.res_subtract, r.res_rmode);
fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r,
r.sp_result, r.nsnan_result);
end if;

rin <= v;

@ -21,7 +21,7 @@ cd $TMPDIR

cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin

${MICROWATT_DIR}/core_tb | sed 's/.*: //' | egrep '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true
${MICROWATT_DIR}/core_tb | sed 's/.*: //' | grep -E '^(GPR[0-9]|LR |CTR |XER |CR [0-9])' | sort | grep -v GPR31 > test.out || true

grep -v "^$" ${MICROWATT_DIR}/tests/${TEST}.out | sort | grep -v GPR31 > exp.out


@ -459,6 +459,7 @@ int test6(long arg)
unsigned long results[6];
unsigned long v;

set_fpscr(0);
for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) {
v = sp_dp_equiv[i].dp;
asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)"
@ -474,6 +475,8 @@ int test6(long arg)
results[4] != (v & ~SIGN) ||
results[5] != (v | SIGN))
return i + 1;
if (get_fpscr() != 0)
return i + 0x101;
}
return 0;
}
@ -484,6 +487,98 @@ int fpu_test_6(void)
return trapit(0, test6);
}

unsigned long expected_fprf(unsigned long result, bool single)
{
unsigned long sign = (result >> 63) & 1;
unsigned long exp = (result >> 52) & 0x7ff;
unsigned long mant = (result & ((1ul << 52) - 1));

if (exp == 0x7ff) {
/* infinity or NaN */
if (mant)
return 0x11; /* NaN */
if (sign)
return 0x09; /* -Infinity */
else
return 0x05; /* +Infinity */
} else if (exp > (single ? 0x380 : 0)) {
if (sign)
return 0x08; /* -normalized */
else
return 0x04; /* +normalized */
} else if (mant || exp > 0) {
if (sign)
return 0x18; /* -denorm */
else
return 0x14; /* +denorm */
} else {
if (sign)
return 0x12; /* -zero */
else
return 0x02; /* +zero */
}
}

unsigned long expected_fprf_sp(unsigned long result)
{
unsigned long sign = (result >> 31) & 1;
unsigned long exp = (result >> 23) & 0xff;
unsigned long mant = (result & ((1ul << 23) - 1));

if (exp == 0xff) {
/* infinity or NaN */
if (mant)
return 0x11; /* NaN */
if (sign)
return 0x09; /* -Infinity */
else
return 0x05; /* +Infinity */
} else if (exp > 0) {
if (sign)
return 0x08; /* -normalized */
else
return 0x04; /* +normalized */
} else if (mant) {
if (sign)
return 0x18; /* -denorm */
else
return 0x14; /* +denorm */
} else {
if (sign)
return 0x12; /* -zero */
else
return 0x02; /* +zero */
}
}

int check_fprf(unsigned long result, bool single, unsigned long fpscr)
{
unsigned long fprf;

fprf = expected_fprf(result, single);
if (((fpscr >> 12) & 0x1f) == fprf)
return 0;
print_string("\r\n");
print_hex(result, 16, " ");
print_hex(fpscr, 8, " ");
print_hex(fprf, 2, " ");
return 1;
}

int check_fprf_sp(unsigned long result, unsigned long fpscr)
{
unsigned long fprf;

fprf = expected_fprf_sp(result);
if (((fpscr >> 12) & 0x1f) == fprf)
return 0;
print_string("\r\n");
print_hex(result, 16, " ");
print_hex(fpscr, 8, " ");
print_hex(fprf, 2, " ");
return 1;
}

struct int_fp_equiv {
long ival;
unsigned long fp;
@ -522,12 +617,15 @@ int test7(long arg)
{
long i;
unsigned long results[4];
unsigned long fpscr;

for (i = 0; i < sizeof(intvals) / sizeof(intvals[0]); ++i) {
set_fpscr(0);
asm("lfd%U0%X0 3,%0; fcfid 6,3; fcfidu 7,3; stfd 6,0(%1); stfd 7,8(%1)"
: : "m" (intvals[i].ival), "b" (results) : "memory");
asm("fcfids 9,3; stfd 9,16(%0); fcfidus 10,3; stfd 10,24(%0)"
: : "b" (results) : "memory");
fpscr = get_fpscr();
if (results[0] != intvals[i].fp ||
results[1] != intvals[i].fp_u ||
results[2] != intvals[i].fp_s ||
@ -539,6 +637,8 @@ int test7(long arg)
print_hex(results[3], 16, " ");
return i + 1;
}
if (check_fprf(results[3], true, fpscr))
return i + 0x101;
}
return 0;
}
@ -582,16 +682,20 @@ int test8(long arg)
{
long i;
unsigned long result;
unsigned long fpscr;

for (i = 0; i < sizeof(roundvals) / sizeof(roundvals[0]); ++i) {
asm("lfd 3,0(%0); lfd 4,8(%0); mtfsf 0,3,1,0; frsp 6,4; stfd 6,0(%1)"
: : "b" (&roundvals[i]), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != roundvals[i].spval) {
print_string("\r\n");
print_hex(i, 4, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, true, fpscr))
return i + 0x101;
}
return 0;
}
@ -796,6 +900,7 @@ int test11(long arg)
long i;
unsigned long results[4];
struct frivals *vp = frivals;
unsigned long fpscr;

for (i = 0; i < sizeof(frivals) / sizeof(frivals[0]); ++i, ++vp) {
set_fpscr(FPS_RN_FLOOR);
@ -807,6 +912,7 @@ int test11(long arg)
asm("frip 5,3; stfd 5,16(%0)" : : "b" (results) : "memory");
set_fpscr(FPS_RN_CEIL);
asm("frim 5,3; stfd 5,24(%0)" : : "b" (results) : "memory");
fpscr = get_fpscr();
if (results[0] != vp->nval || results[1] != vp->zval ||
results[2] != vp->pval || results[3] != vp->mval) {
print_hex(i, 2, "\r\n");
@ -816,6 +922,8 @@ int test11(long arg)
print_hex(results[3], 16, " ");
return i + 1;
}
if (check_fprf(results[3], false, fpscr))
return i + 0x101;
}
return 0;
}
@ -903,17 +1011,21 @@ int test13(long arg)
long i;
unsigned long results[2];
struct addvals *vp = addvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) {
asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
: : "b" (&vp->val_a), "b" (results) : "memory");
fpscr = get_fpscr();
if (results[0] != vp->sum || results[1] != vp->diff) {
print_hex(i, 2, " ");
print_hex(results[0], 16, " ");
print_hex(results[1], 16, "\r\n");
return i + 1;
}
if (check_fprf(results[1], false, fpscr))
return i + 0x101;
}
return 0;
}
@ -976,18 +1088,22 @@ int test14(long arg)
long i;
unsigned long results[2];
struct addvals *vp = sp_addvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(sp_addvals) / sizeof(sp_addvals[0]); ++i, ++vp) {
asm("lfd 5,0(%0); frsp 5,5; lfd 6,8(%0); frsp 6,6; "
"fadds 7,5,6; fsubs 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
: : "b" (&vp->val_a), "b" (results) : "memory");
fpscr = get_fpscr();
if (results[0] != vp->sum || results[1] != vp->diff) {
print_hex(i, 2, " ");
print_hex(results[0], 16, " ");
print_hex(results[1], 16, "\r\n");
return i + 1;
}
if (check_fprf(results[1], true, fpscr))
return i + 0x101;
}
return 0;
}
@ -1017,16 +1133,20 @@ int test15(long arg)
long i;
unsigned long result;
struct mulvals *vp = mulvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(mulvals) / sizeof(mulvals[0]); ++i, ++vp) {
asm("lfd 5,0(%0); lfd 6,8(%0); fmul 7,5,6; stfd 7,0(%1)"
: : "b" (&vp->val_a), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->prod) {
print_hex(i, 2, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, false, fpscr))
return i + 0x101;
}
return 0;
}
@ -1056,16 +1176,20 @@ int test16(long arg)
long i;
unsigned int result;
struct mulvals_sp *vp = mulvals_sp;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(mulvals_sp) / sizeof(mulvals_sp[0]); ++i, ++vp) {
asm("lfs 5,0(%0); lfs 6,4(%0); fmuls 7,5,6; stfs 7,0(%1)"
: : "b" (&vp->val_a), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->prod) {
print_hex(i, 2, " ");
print_hex(result, 8, " ");
return i + 1;
}
if (check_fprf_sp(result, fpscr))
return i + 0x101;
}
return 0;
}
@ -1086,6 +1210,10 @@ struct divvals {
{ 0xbff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 },
{ 0x4000000000000000, 0x4008000000000000, 0x3fe5555555555555 },
{ 0xc01fff0007ffffff, 0xc03ffffffdffffbf, 0x3fcfff0009fff041 },
{ 0x0010000000000000, 0x0018000000000000, 0x3fe5555555555555 },
{ 0x0008000000000000, 0x0018000000000000, 0x3fd5555555555555 },
{ 0x0010000000000000, 0x0000c00000000000, 0x4035555555555555 },
{ 0x0004000000000000, 0x0000300000000000, 0x4035555555555555 },
};

int test17(long arg)
@ -1093,16 +1221,20 @@ int test17(long arg)
long i;
unsigned long result;
struct divvals *vp = divvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(divvals) / sizeof(divvals[0]); ++i, ++vp) {
asm("lfd 5,0(%0); lfd 6,8(%0); fdiv 7,5,6; stfd 7,0(%1)"
: : "b" (&vp->val_a), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->prod) {
print_hex(i, 2, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, false, fpscr))
return i + 0x101;
}
return 0;
}
@ -1123,6 +1255,9 @@ struct recipvals {
{ 0xbff0000000000000, 0xbfeff00400000000 },
{ 0x4008000000000000, 0x3fd54e3800000000 },
{ 0xc03ffffffdffffbf, 0xbfa0040000000000 },
{ 0x0008100000000000, 0x7fdfb0c400000000 },
{ 0x0004080000000000, 0x7fefb0c400000000 },
{ 0x0002040000000000, 0x7ff0000000000000 },
};

int test18(long arg)
@ -1130,16 +1265,20 @@ int test18(long arg)
long i;
unsigned long result;
struct recipvals *vp = recipvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(recipvals) / sizeof(recipvals[0]); ++i, ++vp) {
asm("lfd 6,0(%0); fre 7,6; stfd 7,0(%1)"
: : "b" (&vp->val), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->inv) {
print_hex(i, 2, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, false, fpscr))
return i + 0x101;
}
return 0;
}
@ -1273,16 +1412,20 @@ int test21(long arg)
long i;
unsigned long result;
struct isqrtvals *vp = isqrtvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(isqrtvals) / sizeof(isqrtvals[0]); ++i, ++vp) {
asm("lfd 6,0(%0); frsqrte 7,6; stfd 7,0(%1)"
: : "b" (&vp->val), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->inv) {
print_hex(i, 2, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, false, fpscr))
return i + 0x101;
}
return 0;
}
@ -1320,16 +1463,20 @@ int test22(long arg)
long i;
unsigned long result;
struct sqrtvals *vp = sqrtvals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(sqrtvals) / sizeof(sqrtvals[0]); ++i, ++vp) {
asm("lfd 6,0(%0); fsqrt 7,6; stfd 7,0(%1)"
: : "b" (&vp->val), "b" (&result) : "memory");
fpscr = get_fpscr();
if (result != vp->inv) {
print_hex(i, 2, " ");
print_hex(result, 16, " ");
return i + 1;
}
if (check_fprf(result, false, fpscr))
return i + 0x101;
}
return 0;
}
@ -1384,6 +1531,7 @@ int test23(long arg)
long i;
unsigned long results[4];
struct fmavals *vp = fmavals;
unsigned long fpscr;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) {
@ -1391,6 +1539,7 @@ int test23(long arg)
: : "b" (&vp->ra), "b" (results) : "memory");
asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)"
: : "b" (results) : "memory");
fpscr = get_fpscr();
if (results[0] != vp->fma || results[1] != vp->fms ||
results[2] != vp->nfma || results[3] != vp->nfms) {
print_hex(i, 2, " ");
@ -1400,6 +1549,8 @@ int test23(long arg)
print_hex(results[3], 16, "\r\n");
return i + 1;
}
if (check_fprf(results[3], false, fpscr))
return i + 0x101;
}
return 0;
}

Binary file not shown.
Loading…
Cancel
Save