FPU: Implement fmr and related instructions

This implements fmr, fneg, fabs, fnabs and fcpsgn and adds tests
for them.

This adds logic to unpack and repack floating-point data from the
64-bit packed form (as stored in memory and the register file) into
the unpacked form in the fpr_reg_type record.  This is not strictly
necessary for fmr et al., but will be useful for when we do actual
arithmetic.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/245/head
Paul Mackerras 5 years ago
parent cb27353f37
commit b628af6176

@ -428,6 +428,11 @@ architecture behaviour of decode1 is
2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
others => illegal_inst
);


@ -80,6 +80,8 @@ architecture behaviour of decode2 is
return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
@ -300,6 +302,7 @@ begin
end process;

r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU

@ -23,7 +23,7 @@ package decode_types is
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRS);

@ -24,9 +24,20 @@ entity fpu is
end entity fpu;

architecture behaviour of fpu is
type fp_number_class is (ZERO, FINITE, INFINITY, NAN);

constant EXP_BITS : natural := 13;

type fpu_reg_type is record
class : fp_number_class;
negative : std_ulogic;
exponent : signed(EXP_BITS-1 downto 0); -- unbiased
mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
end record;

type state_t is (IDLE,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF);
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR);

type reg_type is record
state : state_t;
@ -41,9 +52,14 @@ architecture behaviour of fpu is
is_cmp : std_ulogic;
single_prec : std_ulogic;
fpscr : std_ulogic_vector(31 downto 0);
b : std_ulogic_vector(63 downto 0);
a : fpu_reg_type;
b : fpu_reg_type;
r : std_ulogic_vector(63 downto 0);
result_sign : std_ulogic;
result_class : fp_number_class;
result_exp : signed(EXP_BITS-1 downto 0);
writing_back : std_ulogic;
int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
end record;
@ -51,6 +67,72 @@ architecture behaviour of fpu is
signal r, rin : reg_type;

signal fp_result : std_ulogic_vector(63 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
signal result : std_ulogic_vector(63 downto 0);

-- Split a DP floating-point number into components and work out its class.
-- If is_int = 1, the input is considered an integer
function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
variable r : fpu_reg_type;
variable exp_nz : std_ulogic;
variable exp_ao : std_ulogic;
variable frac_nz : std_ulogic;
variable cls : std_ulogic_vector(2 downto 0);
begin
r.negative := fpr(63);
exp_nz := or (fpr(62 downto 52));
exp_ao := and (fpr(62 downto 52));
frac_nz := or (fpr(51 downto 0));
if is_int = '0' then
r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
if exp_nz = '0' then
r.exponent := to_signed(-1022, EXP_BITS);
end if;
r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
cls := exp_ao & exp_nz & frac_nz;
case cls is
when "000" => r.class := ZERO;
when "001" => r.class := FINITE; -- denormalized
when "010" => r.class := FINITE;
when "011" => r.class := FINITE;
when "110" => r.class := INFINITY;
when others => r.class := NAN;
end case;
else
r.mantissa := fpr;
r.exponent := (others => '0');
if (fpr(63) or exp_nz or frac_nz) = '1' then
r.class := FINITE;
else
r.class := ZERO;
end if;
end if;
return r;
end;

-- Construct a DP floating-point result from components
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
mantissa: std_ulogic_vector) return std_ulogic_vector is
variable result : std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
result(63) := sign;
case class is
when ZERO =>
when FINITE =>
if mantissa(54) = '1' then
-- normalized number
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
end if;
result(51 downto 0) := mantissa(53 downto 2);
when INFINITY =>
result(62 downto 52) := "11111111111";
when NAN =>
result(62 downto 52) := "11111111111";
result(51 downto 0) := mantissa(53 downto 2);
end case;
return result;
end;

begin
fpu_0: process(clk)
@ -85,14 +167,18 @@ begin

fpu_1: process(all)
variable v : reg_type;
variable adec : fpu_reg_type;
variable bdec : fpu_reg_type;
variable fpscr_mask : std_ulogic_vector(31 downto 0);
variable illegal : std_ulogic;
variable j, k : integer;
variable flm : std_ulogic_vector(7 downto 0);
variable int_input : std_ulogic;
begin
v := r;
illegal := '0';
v.busy := '0';
int_input := '0';

-- capture incoming instruction
if e_in.valid = '1' then
@ -101,6 +187,7 @@ begin
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
v.int_result := '0';
v.rc := e_in.rc;
v.is_cmp := e_in.out_cr;
if e_in.out_cr = '0' then
@ -108,11 +195,19 @@ begin
else
v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
end if;
v.b := e_in.frb;
int_input := '0';
if e_in.op = OP_FPOP_I then
int_input := '1';
end if;
adec := decode_dp(e_in.fra, int_input);
bdec := decode_dp(e_in.frb, int_input);
v.a := adec;
v.b := bdec;
end if;

v.writing_back := '0';
v.instr_done := '0';
opsel_r <= "00";
fpscr_mask := (others => '1');

case r.state is
@ -133,6 +228,8 @@ begin
else
v.state := DO_MTFSF;
end if;
when "01000" =>
v.state := DO_FMR;
when others =>
illegal := '1';
end case;
@ -177,7 +274,9 @@ begin
v.state := IDLE;

when DO_MFFS =>
v.int_result := '1';
v.writing_back := '1';
opsel_r <= "10";
case r.insn(20 downto 16) is
when "00000" =>
-- mffs
@ -191,7 +290,7 @@ begin
-- mffscrn
fpscr_mask := x"000000FF";
v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
r.b(FPSCR_RN+1 downto FPSCR_RN);
r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
when "10111" =>
-- mffscrni
fpscr_mask := x"000000FF";
@ -216,19 +315,48 @@ begin
for i in 0 to 7 loop
k := i * 4;
if flm(i) = '1' then
v.fpscr(k + 3 downto k) := r.b(k + 3 downto k);
v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
end if;
end loop;
v.instr_done := '1';
v.state := IDLE;

when DO_FMR =>
v.result_class := r.b.class;
v.result_exp := r.b.exponent;
if r.insn(9) = '1' then
v.result_sign := '0'; -- fabs
elsif r.insn(8) = '1' then
v.result_sign := '1'; -- fnabs
elsif r.insn(7) = '1' then
v.result_sign := r.b.negative; -- fmr
elsif r.insn(6) = '1' then
v.result_sign := not r.b.negative; -- fneg
else
v.result_sign := r.a.negative; -- fcpsgn
end if;
v.writing_back := '1';
v.instr_done := '1';
v.state := IDLE;

end case;

-- Data path.
-- Just enough to read FPSCR for now.
v.r := x"00000000" & (r.fpscr and fpscr_mask);
case opsel_r is
when "00" =>
result <= r.b.mantissa;
when "10" =>
result <= x"00000000" & (r.fpscr and fpscr_mask);
when others =>
result <= (others => '0');
end case;
v.r := result;

if r.int_result = '1' then
fp_result <= r.r;
else
fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r);
end if;

v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));

@ -438,6 +438,39 @@ int fpu_test_5(void)
return 0;
}

#define SIGN 0x8000000000000000ul

int test6(long arg)
{
long i;
unsigned long results[6];
unsigned long v;

for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) {
v = sp_dp_equiv[i].dp;
asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)"
: : "m" (sp_dp_equiv[i].dp), "b" (results) : "memory");
asm("fabs 9,6; fnabs 10,6; stfd 9,16(%0); stfd 10,24(%0)"
: : "b" (results) : "memory");
asm("fcpsgn 4,9,3; stfd 4,32(%0); fcpsgn 5,10,3; stfd 5,40(%0)"
: : "b" (results) : "memory");
if (results[0] != v ||
results[1] != (v ^ SIGN) ||
results[2] != (v & ~SIGN) ||
results[3] != (v | SIGN) ||
results[4] != (v & ~SIGN) ||
results[5] != (v | SIGN))
return i + 1;
}
return 0;
}

int fpu_test_6(void)
{
enable_fp();
return trapit(0, test6);
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -469,6 +502,7 @@ int main(void)
do_test(3, fpu_test_3);
do_test(4, fpu_test_4);
do_test(5, fpu_test_5);
do_test(6, fpu_test_6);

return fail;
}

Binary file not shown.

@ -3,3 +3,4 @@ test 02:PASS
test 03:PASS
test 04:PASS
test 05:PASS
test 06:PASS

Loading…
Cancel
Save