diff mbox

[v2,11/11] target-arm: Add support for AArch32 64bit VCVTB and VCVTT

Message ID 1390908155-23475-11-git-send-email-will.newton@linaro.org
State New
Headers show

Commit Message

Will Newton Jan. 28, 2014, 11:22 a.m. UTC
Add support for the AArch32 floating-point half-precision to double-
precision conversion VCVTB and VCVTT instructions.

Signed-off-by: Will Newton <will.newton@linaro.org>
---
 target-arm/translate.c | 62 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 14 deletions(-)

Comments

Peter Maydell Jan. 28, 2014, 3:26 p.m. UTC | #1
On 28 January 2014 11:22, Will Newton <will.newton@linaro.org> wrote:
> Add support for the AArch32 floating-point half-precision to double-
> precision conversion VCVTB and VCVTT instructions.
>
> Signed-off-by: Will Newton <will.newton@linaro.org>
> ---
>  target-arm/translate.c | 62 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 48 insertions(+), 14 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index e701c0f..dfda2c4 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -3142,14 +3142,16 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                      VFP_DREG_N(rn, insn);
>                  }
>
> -                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
> +                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
> +                                 ((rn & 0x1e) == 0x6))) {
>                      /* Integer or single precision destination.  */
>                      rd = VFP_SREG_D(insn);

You should update the comment here, since the destination
is halfprec in the case you've added.

>                  } else {
>                      VFP_DREG_D(rd, insn);
>                  }
>                  if (op == 15 &&
> -                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
> +                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
> +                     ((rn & 0x1e) == 0x4))) {
>                      /* VCVT from int is always from S reg regardless of dp bit.
>                       * VCVT with immediate frac_bits has same format as SREG_M
>                       */

Again, the comment needs updating since you've added an extra case.

> @@ -3241,12 +3243,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                  case 5:
>                  case 6:
>                  case 7:
> -                    /* VCVTB, VCVTT: only present with the halfprec extension,
> -                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
> +                    /* VCVTB, VCVTT: only present with the halfprec extension
> +                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
> +                     * (we choose to UNDEF)
>                       */
> -                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
> +                    if ((dp && !arm_feature(env, ARM_FEATURE_V8)) ||
> +                        !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
>                          return 1;
>                      }
> +                    if ((rn & 0x1e) == 0x4) {
> +                        /* Single precision source */
> +                        gen_mov_F0_vreg(0, rm);
> +                        break;
> +                    }

This is confusing, because actually you're using this to catch the
case of a half-precision source. I think it will be clearer to say:

          if (!extract32(rn, 1, 1)) {
              /* Half precision source */
              gen_mov_F0_vreg(0, rm);
              break;
          }

which then catches the half-prec sources for both to-single
and to-double, and leaves just the "source is as per dp flag"
cases to fall through. (We didn't need to catch halfprec sources
separately when we only had 32<->16 since they both get handled
similarly, but now we have the possibility of dp!=0 it's clearer
to handle them explicitly.

>                      /* Otherwise fall through */
>                  default:
>                      /* One source operand.  */
> @@ -3394,21 +3403,39 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                      case 3: /* sqrt */
>                          gen_vfp_sqrt(dp);
>                          break;
> -                    case 4: /* vcvtb.f32.f16 */
> +                    case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
>                          tmp = gen_vfp_mrs();
>                          tcg_gen_ext16u_i32(tmp, tmp);
> -                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
> +                                                           cpu_env);
> +                        }
>                          tcg_temp_free_i32(tmp);
>                          break;
> -                    case 5: /* vcvtt.f32.f16 */
> +                    case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
>                          tmp = gen_vfp_mrs();
>                          tcg_gen_shri_i32(tmp, tmp, 16);
> -                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
> +                                                           cpu_env);
> +                        }
>                          tcg_temp_free_i32(tmp);
>                          break;
> -                    case 6: /* vcvtb.f16.f32 */
> +                    case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
>                          tmp = tcg_temp_new_i32();
> -                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
> +                                                           cpu_env);
> +                        }
>                          gen_mov_F0_vreg(0, rd);
>                          tmp2 = gen_vfp_mrs();
>                          tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
> @@ -3416,9 +3443,15 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                          tcg_temp_free_i32(tmp2);
>                          gen_vfp_msr(tmp);
>                          break;
> -                    case 7: /* vcvtt.f16.f32 */
> +                    case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
>                          tmp = tcg_temp_new_i32();
> -                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                        if (dp) {
> +                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
> +                                                           cpu_env);
> +                        } else {
> +                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
> +                                                           cpu_env);
> +                        }
>                          tcg_gen_shli_i32(tmp, tmp, 16);
>                          gen_mov_F0_vreg(0, rd);
>                          tmp2 = gen_vfp_mrs();
> @@ -3553,7 +3586,8 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                  /* Write back the result.  */
>                  if (op == 15 && (rn >= 8 && rn <= 11))
>                      ; /* Comparison, do nothing.  */
> -                else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
> +                else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
> +                                            (rn & 0x1e) == 0x6))
>                      /* VCVT double to int: always integer result. */
>                      gen_mov_vreg_F0(0, rd);
>                  else if (op == 15 && rn == 15)

Again, you need to update the comment because you've added a case.

Incidentally, my testing of this patch revealed a bug in
the softfloat float<->float conversion functions: if the
input is a NaN with the sign bit set and the top N bits of the
mantissa clear (where N is the number of mantissa bits in the
destination fp format), then we will incorrectly return a NaN
with sign bit clear and topmost mantissa bit only set, whereas
the ARM ARM demands that the signbit be the same as the input.
This is because the softfloat code for handling "what does an
input NaN get turned into for float/float conversion" (ie the
float*ToCommonNaN and commonNaNToFloat* functions) don't do
what the ARM ARM specifies; they should probably be specifiable
by the target architecture. At the moment we try to work around
this by always calling float*_maybe_silence_nan() after the
conversion, but this turns out to be not sufficient to fix up
the semantics...

thanks
-- PMM
diff mbox

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index e701c0f..dfda2c4 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3142,14 +3142,16 @@  static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     VFP_DREG_N(rn, insn);
                 }
 
-                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
+                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
+                                 ((rn & 0x1e) == 0x6))) {
                     /* Integer or single precision destination.  */
                     rd = VFP_SREG_D(insn);
                 } else {
                     VFP_DREG_D(rd, insn);
                 }
                 if (op == 15 &&
-                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
+                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
+                     ((rn & 0x1e) == 0x4))) {
                     /* VCVT from int is always from S reg regardless of dp bit.
                      * VCVT with immediate frac_bits has same format as SREG_M
                      */
@@ -3241,12 +3243,19 @@  static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 case 5:
                 case 6:
                 case 7:
-                    /* VCVTB, VCVTT: only present with the halfprec extension,
-                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
+                    /* VCVTB, VCVTT: only present with the halfprec extension
+                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
+                     * (we choose to UNDEF)
                      */
-                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
+                    if ((dp && !arm_feature(env, ARM_FEATURE_V8)) ||
+                        !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
                         return 1;
                     }
+                    if ((rn & 0x1e) == 0x4) {
+                        /* Single precision source */
+                        gen_mov_F0_vreg(0, rm);
+                        break;
+                    }
                     /* Otherwise fall through */
                 default:
                     /* One source operand.  */
@@ -3394,21 +3403,39 @@  static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     case 3: /* sqrt */
                         gen_vfp_sqrt(dp);
                         break;
-                    case 4: /* vcvtb.f32.f16 */
+                    case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
                         tmp = gen_vfp_mrs();
                         tcg_gen_ext16u_i32(tmp, tmp);
-                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+                        if (dp) {
+                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
+                                                           cpu_env);
+                        } else {
+                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
+                                                           cpu_env);
+                        }
                         tcg_temp_free_i32(tmp);
                         break;
-                    case 5: /* vcvtt.f32.f16 */
+                    case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
                         tmp = gen_vfp_mrs();
                         tcg_gen_shri_i32(tmp, tmp, 16);
-                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+                        if (dp) {
+                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
+                                                           cpu_env);
+                        } else {
+                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
+                                                           cpu_env);
+                        }
                         tcg_temp_free_i32(tmp);
                         break;
-                    case 6: /* vcvtb.f16.f32 */
+                    case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
                         tmp = tcg_temp_new_i32();
-                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                        if (dp) {
+                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
+                                                           cpu_env);
+                        } else {
+                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
+                                                           cpu_env);
+                        }
                         gen_mov_F0_vreg(0, rd);
                         tmp2 = gen_vfp_mrs();
                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
@@ -3416,9 +3443,15 @@  static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                         tcg_temp_free_i32(tmp2);
                         gen_vfp_msr(tmp);
                         break;
-                    case 7: /* vcvtt.f16.f32 */
+                    case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
                         tmp = tcg_temp_new_i32();
-                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                        if (dp) {
+                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
+                                                           cpu_env);
+                        } else {
+                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
+                                                           cpu_env);
+                        }
                         tcg_gen_shli_i32(tmp, tmp, 16);
                         gen_mov_F0_vreg(0, rd);
                         tmp2 = gen_vfp_mrs();
@@ -3553,7 +3586,8 @@  static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 /* Write back the result.  */
                 if (op == 15 && (rn >= 8 && rn <= 11))
                     ; /* Comparison, do nothing.  */
-                else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
+                else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
+                                            (rn & 0x1e) == 0x6))
                     /* VCVT double to int: always integer result. */
                     gen_mov_vreg_F0(0, rd);
                 else if (op == 15 && rn == 15)