diff --git a/fpu.vhdl b/fpu.vhdl
index 60640af..ebbb564 100644
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -53,7 +53,7 @@ architecture behaviour of fpu is
                      DO_FCFID, DO_FCTI,
                      DO_FRSP, DO_FRSP_2, DO_FRI,
                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
-                     DO_FRE, DO_FRSQRTE,
+                     DO_FRE,
                      DO_FSEL,
                      DO_IDIVMOD,
                      FRI_1,
@@ -62,10 +62,9 @@ architecture behaviour of fpu is
                      MULT_1,
                      FMADD_0, FMADD_1, FMADD_2, FMADD_3,
                      FMADD_4, FMADD_5, FMADD_6,
-                     LOOKUP,
                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
                      FRE_1,
-                     RSQRT_1,
+                     SQRT_ODD, RSQRT_1,
                      FTDIV_1,
                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
@@ -76,9 +75,8 @@ architecture behaviour of fpu is
                      ROUND_UFLOW, ROUND_OFLOW,
                      ROUNDING, ROUNDING_2, ROUNDING_3,
                      DENORM,
-                     RENORM_A, RENORM_A2,
-                     RENORM_B, RENORM_B2,
-                     RENORM_C, RENORM_C2,
+                     RENORM_A, RENORM_B, RENORM_C,
+                     RENORM_1, RENORM_2,
                      IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
                      IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
                      IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
@@ -174,6 +172,7 @@ architecture behaviour of fpu is
         res_int      : std_ulogic;
         exec_state   : state_t;
         cycle_1      : std_ulogic;
+        regsel       : std_ulogic_vector(1 downto 0);
     end record;
 
     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
@@ -309,7 +308,7 @@ architecture behaviour of fpu is
         2#10110# => DO_FSQRT,
         2#11000# => DO_FRE,
         2#11001# => DO_FMUL,
-        2#11010# => DO_FRSQRTE,
+        2#11010# => DO_FSQRT,
         2#11100# => DO_FMADD,
         2#11101# => DO_FMADD,
         2#11110# => DO_FMADD,
@@ -870,6 +869,7 @@ begin
         variable rsgn_op     : std_ulogic_vector(1 downto 0);
         variable is_nan_inf  : std_ulogic;
         variable is_zero_den : std_ulogic;
+        variable set_reg_ind : std_ulogic;
     begin
         v := r;
         v.complete := '0';
@@ -1170,6 +1170,7 @@ begin
         mult_mask := '0';
         rnd_b32 := '0';
         illegal := '0';
+        set_reg_ind := '0';
 
         re_sel1 <= REXP1_ZERO;
         re_sel2 <= REXP2_CON;
@@ -1208,6 +1209,7 @@ begin
                 v.x := '0';
                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
                 set_s := '1';
+                v.regsel := AIN_R;
 
             when DO_NAN_INF =>
                 -- At least one floating-point operand is infinity or NaN
@@ -1331,6 +1333,14 @@ begin
                         -- This will trigger for fmul as well as fmadd/sub, but
                         -- it doesn't matter since r.is_subtract = 0 for fmul.
                         rsgn_op := RSGN_SUB;
+                    end if;
+                    if r.a.denorm = '1' and (r.is_multiply = '1' or r.is_inverse = '1') then
+                        v.state := RENORM_A;
+                    elsif r.c.denorm = '1' then
+                        v.state := RENORM_C;
+                    elsif r.b.denorm = '1' and (r.is_inverse = '1' or r.is_sqrt = '1') then
+                        v.state := RENORM_B;
+                    elsif r.is_multiply = '1' and r.b.class = ZERO then
                         v.state := DO_FMUL;
                     else
                         v.state := r.exec_state;
@@ -1639,16 +1649,8 @@ begin
                 re_sel1 <= REXP1_A;
                 re_sel2 <= REXP2_C;
                 re_set_result <= '1';
-                -- Renormalize denorm operands
-                if r.a.denorm = '1' then
-                    v.state := RENORM_A;
-                elsif r.c.denorm = '1' then
-                    opsel_a <= AIN_C;
-                    v.state := RENORM_C;
-                else
-                    f_to_multiply.valid <= '1';
-                    v.state := MULT_1;
-                end if;
+                f_to_multiply.valid <= '1';
+                v.state := MULT_1;
 
             when DO_FDIV =>
                 opsel_a <= AIN_A;
@@ -1658,16 +1660,8 @@ begin
                 re_neg2 <= '1';
                 re_set_result <= '1';
                 v.count := "00";
-                -- Renormalize denorm operands
-                if r.a.denorm = '1' then
-                    v.state := RENORM_A;
-                elsif r.b.denorm = '1' then
-                    opsel_a <= AIN_B;
-                    v.state := RENORM_B;
-                else
-                    v.first := '1';
-                    v.state := DIV_2;
-                end if;
+                v.first := '1';
+                v.state := DIV_2;
 
             when DO_FSEL =>
                 rsgn_op := RSGN_SEL;
@@ -1691,14 +1685,13 @@ begin
                 if r.b.negative = '1' then
                     v.fpscr(FPSCR_VXSQRT) := '1';
                     qnan_result := '1';
-                elsif r.b.denorm = '1' then
-                    v.state := RENORM_B;
-                elsif r.b.exponent(0) = '0' then
+                end if;
+                if r.b.exponent(0) = '1' then
+                    v.state := SQRT_ODD;
+                elsif r.is_inverse = '0' then
                     v.state := SQRT_1;
                 else
-                    -- set shift to 1
-                    rs_con2 <= RSCON2_1;
-                    v.state := RENORM_B2;
+                    v.state := RSQRT_1;
                 end if;
 
             when DO_FRE =>
@@ -1706,29 +1699,7 @@ begin
                 v.result_class := r.b.class;
                 re_sel2 <= REXP2_B;
                 re_set_result <= '1';
-                if r.b.denorm = '1' then
-                    v.state := RENORM_B;
-                else
-                    v.state := FRE_1;
-                end if;
-
-            when DO_FRSQRTE =>
-                opsel_a <= AIN_B;
-                v.result_class := r.b.class;
-                re_sel2 <= REXP2_B;
-                re_set_result <= '1';
-                -- set shift to 1
-                rs_con2 <= RSCON2_1;
-                if r.b.negative = '1' then
-                    v.fpscr(FPSCR_VXSQRT) := '1';
-                    qnan_result := '1';
-                elsif r.b.denorm = '1' then
-                    v.state := RENORM_B;
-                elsif r.b.exponent(0) = '0' then
-                    v.state := RSQRT_1;
-                else
-                    v.state := RENORM_B2;
-                end if;
+                v.state := FRE_1;
 
             when DO_FMADD =>
                 -- fmadd, fmsub, fnmadd, fnmsub
@@ -1740,14 +1711,7 @@ begin
                 re_set_result <= '1';
                 -- put b.exp into shift
                 rs_sel1 <= RSH1_B;
-                -- Make sure A and C are normalized
-                if r.a.denorm = '1' then
-                    opsel_a <= AIN_A;
-                    v.state := RENORM_A;
-                elsif r.c.denorm = '1' then
-                    opsel_a <= AIN_C;
-                    v.state := RENORM_C;
-                elsif (r.a.exponent + r.c.exponent + 1) < r.b.exponent then
+                if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then
                     -- addend is bigger, do multiply first
                     -- if subtracting, sign is opposite to initial estimate
                     f_to_multiply.valid <= '1';
@@ -1759,68 +1723,48 @@ begin
                 end if;
 
             when RENORM_A =>
-                rs_norm <= '1';
-                v.state := RENORM_A2;
-
-            when RENORM_A2 =>
-                set_a := '1';
-                re_sel2 <= REXP2_NE;
+                -- Get A into R
+                opsel_a <= AIN_A;
+                v.regsel := AIN_A;
+                re_sel1 <= REXP1_A;
                 re_set_result <= '1';
-                if r.is_multiply = '1' then
-                    opsel_a <= AIN_C;
-                    if r.c.mantissa(UNIT_BIT) = '1' then
-                        if r.is_addition = '0' or r.b.class = ZERO then
-                            v.first := '1';
-                            v.state := MULT_1;
-                        else
-                            v.state := DO_FMADD;
-                        end if;
-                    else
-                        v.state := RENORM_C;
-                    end if;
-                else
-                    opsel_a <= AIN_B;
-                    if r.b.mantissa(UNIT_BIT) = '1' then
-                        v.first := '1';
-                        v.state := DIV_2;
-                    else
-                        v.state := RENORM_B;
-                    end if;
-                end if;
+                v.a.denorm := '0';
+                v.state := RENORM_1;
 
             when RENORM_B =>
-                rs_norm <= '1';
-                renorm_sqrt := r.is_sqrt;
-                v.state := RENORM_B2;
-
-            when RENORM_B2 =>
-                set_b := '1';
-                -- For fdiv, we need to increase result_exp by shift rather
-                -- than decreasing it as for fre/frsqrte and fsqrt.
-                -- We do that by negating r.shift in this cycle and then
-                -- setting result_exp to new_exp in the next cycle
-                if r.use_a = '1' then
-                    rs_sel1 <= RSH1_S;
-                    rs_neg1 <= '1';
-                else
-                    re_sel2 <= REXP2_NE;
-                    re_set_result <= '1';
-                end if;
-                v.state := LOOKUP;
+                -- Get B into R
+                opsel_a <= AIN_B;
+                v.regsel := AIN_B;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                v.b.denorm := '0';
+                v.state := RENORM_1;
 
             when RENORM_C =>
+                -- Get C into R
+                opsel_a <= AIN_C;
+                v.regsel := AIN_C;
+                re_sel2 <= REXP2_C;
+                re_set_result <= '1';
+                v.c.denorm := '0';
+                v.state := RENORM_1;
+
+            when RENORM_1 =>
                 rs_norm <= '1';
-                v.state := RENORM_C2;
+                renorm_sqrt := r.is_sqrt;
+                v.state := RENORM_2;
 
-            when RENORM_C2 =>
-                set_c := '1';
-                re_sel2 <= REXP2_NE;
-                re_set_result <= '1';
-                if r.is_addition = '0' or r.b.class = ZERO then
-                    v.first := '1';
-                    v.state := MULT_1;
+            when RENORM_2 =>
+                set_reg_ind := '1';
+                if r.c.denorm = '1' then
+                    -- must be either fmul or fmadd/sub
+                    v.state := RENORM_C;
+                elsif r.b.denorm = '1' and r.is_addition = '0' then
+                    v.state := RENORM_B;
+                elsif r.is_multiply = '1' and r.b.class = ZERO then
+                    v.state := DO_FMUL;
                 else
-                    v.state := DO_FMADD;
+                    v.state := r.exec_state;
                 end if;
 
             when ADD_1 =>
@@ -2017,28 +1961,6 @@ begin
                     v.state := NORMALIZE;
                 end if;
 
-            when LOOKUP =>
-                -- wait one cycle for inverse_table[B] lookup
-                -- if this is a division, compute exponent
-                -- (see comment on RENORM_B2 above)
-                opsel_a <= AIN_B;
-                if r.use_a = '1' then
-                    re_sel2 <= REXP2_NE;
-                    re_set_result <= '1';
-                end if;
-                v.first := '1';
-                if r.is_sqrt = '1' then
-                    if r.is_inverse = '1' then
-                        v.state := RSQRT_1;
-                    else
-                        v.state := SQRT_1;
-                    end if;
-                elsif r.use_a = '1' then
-                    v.state := DIV_2;
-                else
-                    v.state := FRE_1;
-                end if;
-
             when DIV_2 =>
                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
                 msel_1 <= MUL1_B;
@@ -2135,6 +2057,12 @@ begin
                     v.doing_ftdiv := "10";
                 end if;
 
+            when SQRT_ODD =>
+                -- set shift to 1
+                rs_con2 <= RSCON2_1;
+                v.regsel := AIN_B;
+                v.state := RENORM_2;
+
             when RSQRT_1 =>
                 opsel_r <= RES_MISC;
                 misc_sel <= "101";
@@ -3344,6 +3272,17 @@ begin
             end case;
         end if;
 
+        if set_reg_ind = '1' then
+            case r.regsel is
+                when AIN_A =>
+                    set_a := '1';
+                when AIN_B =>
+                    set_b := '1';
+                when AIN_C =>
+                    set_c := '1';
+                when others =>
+            end case;
+        end if;
         if set_a = '1' or set_a_exp = '1' then
             v.a.exponent := new_exp;
         end if;