From 4282d37741d53363d6960a0e4092c89089017604 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 20 Apr 2025 14:30:28 +1000 Subject: [PATCH] FPU: Faster method for testing for 1-bits at right end of R At various points we need to set the X bit if any bit of R which would be shifted out by a right shift of N bits is a 1. We can do this by computing R | -R, which contains a 1 in the position of the right-most 1-bit in R and in all positions to the left, and zeroes to the right. That means we can test for the least-significant N bits being non-zero by testing whether bit N-1 of (R | -R) is a 1. Doing this uses fewer LUTs and has better timing than the old method of generating a mask, ANDing it with R, and testing whether the result is non-zero. Signed-off-by: Paul Mackerras --- fpu.vhdl | 51 ++++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 5648012..16583cb 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -582,26 +582,6 @@ architecture behaviour of fpu is return shift_result; end; - -- Generate a mask with 0-bits on the left and 1-bits on the right which - -- selects the bits will be lost in doing a right shift. The shift - -- parameter is the bottom 6 bits of a negative shift count, - -- indicating a right shift. - function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is - variable mask_result: std_ulogic_vector(63 downto 0); - begin - mask_result := (others => '0'); - if is_X(shift) then - mask_result := (others => 'X'); - return mask_result; - end if; - for i in 0 to 63 loop - if i >= shift then - mask_result(63 - i) := '1'; - end if; - end loop; - return mask_result; - end; - -- Split a DP floating-point number into components and work out its class. -- If is_int = 1, the input is considered an integer function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_fp: std_ulogic; @@ -1051,6 +1031,7 @@ begin variable asign : std_ulogic; variable bneg : std_ulogic; variable ci : std_ulogic; + variable rormr : std_ulogic_vector(63 downto 0); begin v := r; v.complete := '0'; @@ -3258,22 +3239,26 @@ begin -- Data path. -- This has A and B input multiplexers, an adder, a shifter, -- count-leading-zeroes logic, and a result mux. + + -- If shifting right, test if bits of R will be shifted out of significance if r.longmask = '1' then - mshift := r.shift + to_signed(-29, EXP_BITS); - else - mshift := r.shift; - end if; - if is_X(mshift) then - mask := (others => 'X'); - elsif mshift < to_signed(-64, EXP_BITS) then - mask := (others => '1'); - elsif mshift >= to_signed(0, EXP_BITS) then - mask := (others => '0'); + mshift := to_signed(28, EXP_BITS); else - mask := right_mask(unsigned(mshift(5 downto 0))); + mshift := to_signed(-1, EXP_BITS); end if; - if (or (mask and r.r)) = '1' and set_x = '1' then - v.x := '1'; + mshift := mshift - r.shift; + if set_x = '1' and not is_X(mshift) and mshift >= to_signed(0, EXP_BITS) then + -- Instead of computing (R & right_mask(63-mshift)) != 0, + -- we compute (R | -R), which has the form 111...10...0 + -- where the rightmost 1 is in the same position as the + -- rightmost 1 in R. Then if bit (mshift) of that value + -- is 1, there must be a 1 in the rightmost (mshift + 1) + -- bits of R. + rormr := r.r or std_ulogic_vector(- signed(r.r)); + if mshift >= to_signed(64, EXP_BITS) then + mshift := to_signed(63, EXP_BITS); + end if; + v.x := v.x or r.r(to_integer(unsigned(mshift(5 downto 0)))); end if; asign := '0'; case opsel_a is