Patchwork [v2,6/6] target-arm: Use standard FPSCR for Neon half-precision operations

login
register
mail settings
Submitter Peter Maydell
Date Feb. 9, 2011, 4:27 p.m.
Message ID <1297268850-5777-7-git-send-email-peter.maydell@linaro.org>
Download mbox | patch
Permalink /patch/82498/
State New
Headers show

Comments

Peter Maydell - Feb. 9, 2011, 4:27 p.m.
The Neon half-precision conversion operations (VCVT.F16.F32 and
VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike
the VFP versions (VCVTB and VCVTT).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c    |   26 ++++++++++++++++++++++----
 target-arm/helpers.h   |    2 ++
 target-arm/translate.c |   16 ++++++++--------
 3 files changed, 32 insertions(+), 12 deletions(-)
Aurelien Jarno - Feb. 9, 2011, 6:43 p.m.
On Wed, Feb 09, 2011 at 04:27:30PM +0000, Peter Maydell wrote:
> The Neon half-precision conversion operations (VCVT.F16.F32 and
> VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike
> the VFP versions (VCVTB and VCVTT).
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/helper.c    |   26 ++++++++++++++++++++++----
>  target-arm/helpers.h   |    2 ++
>  target-arm/translate.c |   16 ++++++++--------
>  3 files changed, 32 insertions(+), 12 deletions(-)

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 503278c..d36f0f3 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -2623,9 +2623,8 @@ VFP_CONV_FIX(ul, s, float32, uint32, u)
>  #undef VFP_CONV_FIX
>  
>  /* Half precision conversions.  */
> -float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s)
>  {
> -    float_status *s = &env->vfp.fp_status;
>      int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
>      float32 r = float16_to_float32(a, ieee, s);
>      if (ieee) {
> @@ -2634,9 +2633,8 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
>      return r;
>  }
>  
> -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
> +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)
>  {
> -    float_status *s = &env->vfp.fp_status;
>      int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
>      float16 r = float32_to_float16(a, ieee, s);
>      if (ieee) {
> @@ -2645,6 +2643,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
>      return r;
>  }
>  
> +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +{
> +    return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
> +}
> +
> +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
> +{
> +    return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
> +}
> +
> +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
> +{
> +    return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
> +}
> +
> +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
> +{
> +    return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
> +}
> +
>  float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
>  {
>      float_status *s = &env->vfp.fp_status;
> diff --git a/target-arm/helpers.h b/target-arm/helpers.h
> index 8a2564e..40264b4 100644
> --- a/target-arm/helpers.h
> +++ b/target-arm/helpers.h
> @@ -129,6 +129,8 @@ DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
>  
>  DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
>  DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
> +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
> +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
>  
>  DEF_HELPER_3(recps_f32, f32, f32, f32, env)
>  DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index e4649e6..a867f55 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -5495,17 +5495,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                      tmp = new_tmp();
>                      tmp2 = new_tmp();
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
> -                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
> -                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> +                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
>                      tcg_gen_shli_i32(tmp2, tmp2, 16);
>                      tcg_gen_or_i32(tmp2, tmp2, tmp);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
> -                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
>                      neon_store_reg(rd, 0, tmp2);
>                      tmp2 = new_tmp();
> -                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> +                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
>                      tcg_gen_shli_i32(tmp2, tmp2, 16);
>                      tcg_gen_or_i32(tmp2, tmp2, tmp);
>                      neon_store_reg(rd, 1, tmp2);
> @@ -5518,17 +5518,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                      tmp = neon_load_reg(rm, 0);
>                      tmp2 = neon_load_reg(rm, 1);
>                      tcg_gen_ext16u_i32(tmp3, tmp);
> -                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
>                      tcg_gen_shri_i32(tmp3, tmp, 16);
> -                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
>                      dead_tmp(tmp);
>                      tcg_gen_ext16u_i32(tmp3, tmp2);
> -                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
>                      tcg_gen_shri_i32(tmp3, tmp2, 16);
> -                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
>                      dead_tmp(tmp2);
>                      dead_tmp(tmp3);
> -- 
> 1.7.1
> 
> 
>
Peter Maydell - Feb. 9, 2011, 7:03 p.m.
On 9 February 2011 16:27, Peter Maydell <peter.maydell@linaro.org> wrote:
> -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
> +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)


> +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env)

> +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env)

Just noticed this accidental change due to cut-n-paste error, the _to_f16
helpers should still be returning uint32_t, not float32.

-- PMM

Patch

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 503278c..d36f0f3 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2623,9 +2623,8 @@  VFP_CONV_FIX(ul, s, float32, uint32, u)
 #undef VFP_CONV_FIX
 
 /* Half precision conversions.  */
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
+static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s)
 {
-    float_status *s = &env->vfp.fp_status;
     int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
     float32 r = float16_to_float32(a, ieee, s);
     if (ieee) {
@@ -2634,9 +2633,8 @@  float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
     return r;
 }
 
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
+static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)
 {
-    float_status *s = &env->vfp.fp_status;
     int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
     float16 r = float32_to_float16(a, ieee, s);
     if (ieee) {
@@ -2645,6 +2643,26 @@  uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
     return r;
 }
 
+float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
+{
+    return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
+}
+
+float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
+{
+    return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
+}
+
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
+{
+    return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
+}
+
+float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env)
+{
+    return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
+}
+
 float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
 {
     float_status *s = &env->vfp.fp_status;
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index 8a2564e..40264b4 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -129,6 +129,8 @@  DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
 
 DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
+DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
+DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
 
 DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index e4649e6..a867f55 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5495,17 +5495,17 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     tmp = new_tmp();
                     tmp2 = new_tmp();
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
                     tcg_gen_shli_i32(tmp2, tmp2, 16);
                     tcg_gen_or_i32(tmp2, tmp2, tmp);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
                     neon_store_reg(rd, 0, tmp2);
                     tmp2 = new_tmp();
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
                     tcg_gen_shli_i32(tmp2, tmp2, 16);
                     tcg_gen_or_i32(tmp2, tmp2, tmp);
                     neon_store_reg(rd, 1, tmp2);
@@ -5518,17 +5518,17 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
                     tcg_gen_ext16u_i32(tmp3, tmp);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
                     tcg_gen_shri_i32(tmp3, tmp, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
                     dead_tmp(tmp);
                     tcg_gen_ext16u_i32(tmp3, tmp2);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
                     tcg_gen_shri_i32(tmp3, tmp2, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
                     dead_tmp(tmp2);
                     dead_tmp(tmp3);