Browse Source

hardfloat: fix float32/64 fused multiply-add

Before falling back to softfloat FMA, we do not restore the original
values of inputs A and C. Fix it.

This bug was caught by running gcc's testsuite on RISC-V qemu.

Note that this change gives a small perf increase for fp-bench:

  Host: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz
  Command: perf stat -r 3 taskset -c 0 ./fp-bench -o mulAdd -p $prec

- $prec = single:
  - before:
    101.71 MFlops
    102.18 MFlops
    100.96 MFlops
  - after:
    103.63 MFlops
    103.05 MFlops
    102.96 MFlops

- $prec = double:
  - before:
    173.10 MFlops
    173.93 MFlops
    172.11 MFlops
  - after:
    178.49 MFlops
    178.88 MFlops
    178.66 MFlops

Signed-off-by: Kito Cheng <kito.cheng@gmail.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <2019032220.17777-1-cota@braap.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
tags/v4.0.0-rc1
Kito Cheng 2 months ago
parent
commit
896f51fbfa
1 changed files with 10 additions and 0 deletions
  1. 10
    0
      fpu/softfloat.c

+ 10
- 0
fpu/softfloat.c View File

@@ -1596,6 +1596,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1596 1596
         }
1597 1597
         ur.h = up.h + uc.h;
1598 1598
     } else {
1599
+        union_float32 ua_orig = ua;
1600
+        union_float32 uc_orig = uc;
1601
+
1599 1602
         if (flags & float_muladd_negate_product) {
1600 1603
             ua.h = -ua.h;
1601 1604
         }
@@ -1608,6 +1611,8 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1608 1611
         if (unlikely(f32_is_inf(ur))) {
1609 1612
             s->float_exception_flags |= float_flag_overflow;
1610 1613
         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1614
+            ua = ua_orig;
1615
+            uc = uc_orig;
1611 1616
             goto soft;
1612 1617
         }
1613 1618
     }
@@ -1662,6 +1667,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1662 1667
         }
1663 1668
         ur.h = up.h + uc.h;
1664 1669
     } else {
1670
+        union_float64 ua_orig = ua;
1671
+        union_float64 uc_orig = uc;
1672
+
1665 1673
         if (flags & float_muladd_negate_product) {
1666 1674
             ua.h = -ua.h;
1667 1675
         }
@@ -1674,6 +1682,8 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1674 1682
         if (unlikely(f64_is_inf(ur))) {
1675 1683
             s->float_exception_flags |= float_flag_overflow;
1676 1684
         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1685
+            ua = ua_orig;
1686
+            uc = uc_orig;
1677 1687
             goto soft;
1678 1688
         }
1679 1689
     }

Loading…
Cancel
Save