diff mbox

[4/5] target-ppc: use the softfloat float32_muladd function

Message ID 1347138767-19941-4-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno Sept. 8, 2012, 9:12 p.m. UTC
Use the new softfloat float32_muladd() function to implement the vmaddfp
and vnmsubfp instructions. As a bonus we can get rid of the call to the
HANDLE_NAN3 macro, as the NaN handling is directly done at the softfloat
level.

Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-ppc/int_helper.c |   57 ++++++++++++-----------------------------------
 1 file changed, 14 insertions(+), 43 deletions(-)

Comments

Peter Maydell Sept. 9, 2012, 9:51 a.m. UTC | #1
On 8 September 2012 22:12, Aurelien Jarno <aurelien@aurel32.net> wrote:
> +#define VARITHFPFMA(suffix, type)                                       \
> +    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
> +                           ppc_avr_t *b, ppc_avr_t *c)                  \
> +    {                                                                   \
> +        int i;                                                          \
> +        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
> +            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
> +                                     type, &env->vec_status);           \
> +        }                                                               \
> +    }
> +VARITHFPFMA(maddfp, 0);
> +VARITHFPFMA(nmsubfp, float_muladd_negate_result);
> +#undef VARITHFPFMA
> +
>  #define VARITHSAT_CASE(type, op, cvt, element)                          \
>      {                                                                   \
>          type result = (type)a->element[i] op (type)b->element[i];       \
> -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
> -                     ppc_avr_t *b, ppc_avr_t *c)
> -{
> -    int i;
> -
> -    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
> -        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
> -            /* Need to do the computation is higher precision and round
> -             * once at the end.  */
> -            float64 af, bf, cf, t;
> -
> -            af = float32_to_float64(a->f[i], &env->vec_status);
> -            bf = float32_to_float64(b->f[i], &env->vec_status);
> -            cf = float32_to_float64(c->f[i], &env->vec_status);
> -            t = float64_mul(af, cf, &env->vec_status);
> -            t = float64_sub(t, bf, &env->vec_status);
> -            t = float64_chs(t);
> -            r->f[i] = float64_to_float32(t, &env->vec_status);
> -        }
> -    }
> -}

I mentioned this in my comment on the other patch, but just to attach
it to the right patch for the benefit of the archives:
the code here for vnmsub is (correctly) doing a subtraction of bf
and then negating the final result, so you need to pass float_muladd
the flags negate_result | negate_c, not just negate_result.

thanks
-- PMM
Aurelien Jarno Sept. 9, 2012, 10:03 a.m. UTC | #2
On Sun, Sep 09, 2012 at 10:51:20AM +0100, Peter Maydell wrote:
> On 8 September 2012 22:12, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > +#define VARITHFPFMA(suffix, type)                                       \
> > +    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
> > +                           ppc_avr_t *b, ppc_avr_t *c)                  \
> > +    {                                                                   \
> > +        int i;                                                          \
> > +        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
> > +            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
> > +                                     type, &env->vec_status);           \
> > +        }                                                               \
> > +    }
> > +VARITHFPFMA(maddfp, 0);
> > +VARITHFPFMA(nmsubfp, float_muladd_negate_result);
> > +#undef VARITHFPFMA
> > +
> >  #define VARITHSAT_CASE(type, op, cvt, element)                          \
> >      {                                                                   \
> >          type result = (type)a->element[i] op (type)b->element[i];       \
> > -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
> > -                     ppc_avr_t *b, ppc_avr_t *c)
> > -{
> > -    int i;
> > -
> > -    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
> > -        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
> > -            /* Need to do the computation is higher precision and round
> > -             * once at the end.  */
> > -            float64 af, bf, cf, t;
> > -
> > -            af = float32_to_float64(a->f[i], &env->vec_status);
> > -            bf = float32_to_float64(b->f[i], &env->vec_status);
> > -            cf = float32_to_float64(c->f[i], &env->vec_status);
> > -            t = float64_mul(af, cf, &env->vec_status);
> > -            t = float64_sub(t, bf, &env->vec_status);
> > -            t = float64_chs(t);
> > -            r->f[i] = float64_to_float32(t, &env->vec_status);
> > -        }
> > -    }
> > -}
> 
> I mentioned this in my comment on the other patch, but just to attach
> it to the right patch for the benefit of the archives:
> the code here for vnmsub is (correctly) doing a subtraction of bf
> and then negating the final result, so you need to pass float_muladd
> the flags negate_result | negate_c, not just negate_result.
> 

Correct, or alternatively it could use negate_product. I'll send an
updated patch later.
diff mbox

Patch

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 6141243..51cb97c 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -418,6 +418,20 @@  VARITHFP(minfp, float32_min)
 VARITHFP(maxfp, float32_max)
 #undef VARITHFP
 
+#define VARITHFPFMA(suffix, type)                                       \
+    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
+                           ppc_avr_t *b, ppc_avr_t *c)                  \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
+            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
+                                     type, &env->vec_status);           \
+        }                                                               \
+    }
+VARITHFPFMA(maddfp, 0);
+VARITHFPFMA(nmsubfp, float_muladd_negate_result);
+#undef VARITHFPFMA
+
 #define VARITHSAT_CASE(type, op, cvt, element)                          \
     {                                                                   \
         type result = (type)a->element[i] op (type)b->element[i];       \
@@ -649,27 +663,6 @@  VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
-void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
-                    ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation in higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_add(t, bf, &env->vec_status);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
                       ppc_avr_t *b, ppc_avr_t *c)
 {
@@ -909,28 +902,6 @@  VMUL(uh, u16, u32)
 #undef VMUL_DO
 #undef VMUL
 
-void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
-                     ppc_avr_t *b, ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation is higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_sub(t, bf, &env->vec_status);
-            t = float64_chs(t);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                   ppc_avr_t *c)
 {