Patchwork target-arm: Fix VMLA, VMLS, VNMLS, VNMLA handling of NaNs

login
register
mail settings
Submitter Peter Maydell
Date May 5, 2011, 6:35 p.m.
Message ID <1304620535-28885-1-git-send-email-peter.maydell@linaro.org>
Download mbox | patch
Permalink /patch/94297/
State New
Headers show

Comments

Peter Maydell - May 5, 2011, 6:35 p.m.
Correct handling of NaNs for VFP VMLA, VMLS, VNMLS and VNMLA requires that
we implement the set of negations and additions specified by the ARM ARM;
plausible looking simplifications like turning (-A + B) into (B - A) or
computing (A + B) rather than (B + A) result in selecting the wrong NaN or
returning a NaN with the wrong sign bit.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate.c |   53 ++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 40 insertions(+), 13 deletions(-)
Aurelien Jarno - May 14, 2011, 10:23 p.m.
On Thu, May 05, 2011 at 07:35:35PM +0100, Peter Maydell wrote:
> Correct handling of NaNs for VFP VMLA, VMLS, VNMLS and VNMLA requires that
> we implement the set of negations and additions specified by the ARM ARM;
> plausible looking simplifications like turning (-A + B) into (B - A) or
> computing (A + B) rather than (B + A) result in selecting the wrong NaN or
> returning a NaN with the wrong sign bit.
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/translate.c |   53 ++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 40 insertions(+), 13 deletions(-)

Thanks, applied.

> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index a1af436..3c38364 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -909,6 +909,26 @@ VFP_OP2(div)
>  
>  #undef VFP_OP2
>  
> +static inline void gen_vfp_F1_mul(int dp)
> +{
> +    /* Like gen_vfp_mul() but put result in F1 */
> +    if (dp) {
> +        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env);
> +    } else {
> +        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env);
> +    }
> +}
> +
> +static inline void gen_vfp_F1_neg(int dp)
> +{
> +    /* Like gen_vfp_neg() but put result in F1 */
> +    if (dp) {
> +        gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
> +    } else {
> +        gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
> +    }
> +}
> +
>  static inline void gen_vfp_abs(int dp)
>  {
>      if (dp)
> @@ -3021,27 +3041,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
>              for (;;) {
>                  /* Perform the calculation.  */
>                  switch (op) {
> -                case 0: /* mac: fd + (fn * fm) */
> -                    gen_vfp_mul(dp);
> -                    gen_mov_F1_vreg(dp, rd);
> +                case 0: /* VMLA: fd + (fn * fm) */
> +                    /* Note that order of inputs to the add matters for NaNs */
> +                    gen_vfp_F1_mul(dp);
> +                    gen_mov_F0_vreg(dp, rd);
>                      gen_vfp_add(dp);
>                      break;
> -                case 1: /* nmac: fd - (fn * fm) */
> +                case 1: /* VMLS: fd + -(fn * fm) */
>                      gen_vfp_mul(dp);
> -                    gen_vfp_neg(dp);
> -                    gen_mov_F1_vreg(dp, rd);
> +                    gen_vfp_F1_neg(dp);
> +                    gen_mov_F0_vreg(dp, rd);
>                      gen_vfp_add(dp);
>                      break;
> -                case 2: /* msc: -fd + (fn * fm) */
> -                    gen_vfp_mul(dp);
> -                    gen_mov_F1_vreg(dp, rd);
> -                    gen_vfp_sub(dp);
> +                case 2: /* VNMLS: -fd + (fn * fm) */
> +                    /* Note that it isn't valid to replace (-A + B) with (B - A)
> +                     * or similar plausible looking simplifications
> +                     * because this will give wrong results for NaNs.
> +                     */
> +                    gen_vfp_F1_mul(dp);
> +                    gen_mov_F0_vreg(dp, rd);
> +                    gen_vfp_neg(dp);
> +                    gen_vfp_add(dp);
>                      break;
> -                case 3: /* nmsc: -fd - (fn * fm)  */
> +                case 3: /* VNMLA: -fd + -(fn * fm) */
>                      gen_vfp_mul(dp);
> +                    gen_vfp_F1_neg(dp);
> +                    gen_mov_F0_vreg(dp, rd);
>                      gen_vfp_neg(dp);
> -                    gen_mov_F1_vreg(dp, rd);
> -                    gen_vfp_sub(dp);
> +                    gen_vfp_add(dp);
>                      break;
>                  case 4: /* mul: fn * fm */
>                      gen_vfp_mul(dp);
> -- 
> 1.7.1
> 
> 
>

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index a1af436..3c38364 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -909,6 +909,26 @@  VFP_OP2(div)
 
 #undef VFP_OP2
 
+static inline void gen_vfp_F1_mul(int dp)
+{
+    /* Like gen_vfp_mul() but put result in F1 */
+    if (dp) {
+        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env);
+    } else {
+        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env);
+    }
+}
+
+static inline void gen_vfp_F1_neg(int dp)
+{
+    /* Like gen_vfp_neg() but put result in F1 */
+    if (dp) {
+        gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
+    } else {
+        gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
+    }
+}
+
 static inline void gen_vfp_abs(int dp)
 {
     if (dp)
@@ -3021,27 +3041,34 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
             for (;;) {
                 /* Perform the calculation.  */
                 switch (op) {
-                case 0: /* mac: fd + (fn * fm) */
-                    gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
+                case 0: /* VMLA: fd + (fn * fm) */
+                    /* Note that order of inputs to the add matters for NaNs */
+                    gen_vfp_F1_mul(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
-                case 1: /* nmac: fd - (fn * fm) */
+                case 1: /* VMLS: fd + -(fn * fm) */
                     gen_vfp_mul(dp);
-                    gen_vfp_neg(dp);
-                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_F1_neg(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
-                case 2: /* msc: -fd + (fn * fm) */
-                    gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_sub(dp);
+                case 2: /* VNMLS: -fd + (fn * fm) */
+                    /* Note that it isn't valid to replace (-A + B) with (B - A)
+                     * or similar plausible looking simplifications
+                     * because this will give wrong results for NaNs.
+                     */
+                    gen_vfp_F1_mul(dp);
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_neg(dp);
+                    gen_vfp_add(dp);
                     break;
-                case 3: /* nmsc: -fd - (fn * fm)  */
+                case 3: /* VNMLA: -fd + -(fn * fm) */
                     gen_vfp_mul(dp);
+                    gen_vfp_F1_neg(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_neg(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_sub(dp);
+                    gen_vfp_add(dp);
                     break;
                 case 4: /* mul: fn * fm */
                     gen_vfp_mul(dp);