Message ID | 20190322204320.17777-1-cota@braap.org |
---|---|
State | New |
Headers | show |
Series | [v2,for-4.0] hardfloat: fix float32/64 fused multiply-add | expand |
Emilio G. Cota <cota@braap.org> writes: > From: Kito Cheng <kito.cheng@gmail.com> > > Before falling back to softfloat FMA, we do not restore the original > values of inputs A and C. Fix it. > > This bug was caught by running gcc's testsuite on RISC-V qemu. > > Note that this change gives a small perf increase for fp-bench: > > Host: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz > Command: perf stat -r 3 taskset -c 0 ./fp-bench -o mulAdd -p $prec Queued to for-4.0/testing-and-fpu-fixes, thanks. > > - $prec = single: > - before: > 101.71 MFlops > 102.18 MFlops > 100.96 MFlops > - after: > 103.63 MFlops > 103.05 MFlops > 102.96 MFlops > > - $prec = double: > - before: > 173.10 MFlops > 173.93 MFlops > 172.11 MFlops > - after: > 178.49 MFlops > 178.88 MFlops > 178.66 MFlops > > Signed-off-by: Kito Cheng <kito.cheng@gmail.com> > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > fpu/softfloat.c | 10 ++++++++++ > 1 file changed, 10 insertions(+) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 4610738ab1..2ba36ec370 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -1596,6 +1596,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) > } > ur.h = up.h + uc.h; > } else { > + union_float32 ua_orig = ua; > + union_float32 uc_orig = uc; > + > if (flags & float_muladd_negate_product) { > ua.h = -ua.h; > } > @@ -1608,6 +1611,8 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) > if (unlikely(f32_is_inf(ur))) { > s->float_exception_flags |= float_flag_overflow; > } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { > + ua = ua_orig; > + uc = uc_orig; > goto soft; > } > } > @@ -1662,6 +1667,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) > } > ur.h = up.h + uc.h; > } else { > + union_float64 ua_orig = ua; > + union_float64 uc_orig = uc; > + > if (flags & float_muladd_negate_product) { > ua.h = -ua.h; > } > @@ -1674,6 +1682,8 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) > if (unlikely(f64_is_inf(ur))) { > s->float_exception_flags |= float_flag_overflow; > } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { > + ua = ua_orig; > + uc = uc_orig; > goto soft; > } > } -- Alex Bennée
On 3/22/19 1:43 PM, Emilio G. Cota wrote: > From: Kito Cheng <kito.cheng@gmail.com> > > Before falling back to softfloat FMA, we do not restore the original > values of inputs A and C. Fix it. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On Fri, 22 Mar 2019 13:43:20 PDT (-0700), cota@braap.org wrote: > From: Kito Cheng <kito.cheng@gmail.com> > > Before falling back to softfloat FMA, we do not restore the original > values of inputs A and C. Fix it. > > This bug was caught by running gcc's testsuite on RISC-V qemu. > > Note that this change gives a small perf increase for fp-bench: > > Host: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz > Command: perf stat -r 3 taskset -c 0 ./fp-bench -o mulAdd -p $prec > > - $prec = single: > - before: > 101.71 MFlops > 102.18 MFlops > 100.96 MFlops > - after: > 103.63 MFlops > 103.05 MFlops > 102.96 MFlops > > - $prec = double: > - before: > 173.10 MFlops > 173.93 MFlops > 172.11 MFlops > - after: > 178.49 MFlops > 178.88 MFlops > 178.66 MFlops > > Signed-off-by: Kito Cheng <kito.cheng@gmail.com> > Signed-off-by: Emilio G. Cota <cota@braap.org> Tested-by: Palmer Dabbelt <palmer@sifive.com> Thanks for fixing this! > --- > fpu/softfloat.c | 10 ++++++++++ > 1 file changed, 10 insertions(+) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 4610738ab1..2ba36ec370 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -1596,6 +1596,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) > } > ur.h = up.h + uc.h; > } else { > + union_float32 ua_orig = ua; > + union_float32 uc_orig = uc; > + > if (flags & float_muladd_negate_product) { > ua.h = -ua.h; > } > @@ -1608,6 +1611,8 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) > if (unlikely(f32_is_inf(ur))) { > s->float_exception_flags |= float_flag_overflow; > } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { > + ua = ua_orig; > + uc = uc_orig; > goto soft; > } > } > @@ -1662,6 +1667,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) > } > ur.h = up.h + uc.h; > } else { > + union_float64 ua_orig = ua; > + union_float64 uc_orig = uc; > + > if (flags & float_muladd_negate_product) { > ua.h = -ua.h; > } > @@ -1674,6 +1682,8 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) > if (unlikely(f64_is_inf(ur))) { > s->float_exception_flags |= float_flag_overflow; > } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { > + ua = ua_orig; > + uc = uc_orig; > goto soft; > } > }
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 4610738ab1..2ba36ec370 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1596,6 +1596,9 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) } ur.h = up.h + uc.h; } else { + union_float32 ua_orig = ua; + union_float32 uc_orig = uc; + if (flags & float_muladd_negate_product) { ua.h = -ua.h; } @@ -1608,6 +1611,8 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) if (unlikely(f32_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { + ua = ua_orig; + uc = uc_orig; goto soft; } } @@ -1662,6 +1667,9 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) } ur.h = up.h + uc.h; } else { + union_float64 ua_orig = ua; + union_float64 uc_orig = uc; + if (flags & float_muladd_negate_product) { ua.h = -ua.h; } @@ -1674,6 +1682,8 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) if (unlikely(f64_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { + ua = ua_orig; + uc = uc_orig; goto soft; } }