Patchwork [4/4] target-i386: fix SSE rounding and flush to zero

login
register
mail settings
Submitter Aurelien Jarno
Date Jan. 7, 2012, 8:09 p.m.
Message ID <1325966978-940-5-git-send-email-aurelien@aurel32.net>
Download mbox | patch
Permalink /patch/134863/
State New
Headers show

Comments

Aurelien Jarno - Jan. 7, 2012, 8:09 p.m.
SSE rounding and flush to zero control has never been implemented. However
given that softfloat-native was using a single state for FPU and SSE and
given that glibc is setting both FPU and SSE state in fesetround(), this
was working correctly up to the switch to softfloat.

Fix that by adding an update_sse_status() function similar to
update_fpu_status(), and callin git on write to mxcsr.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-i386/helper.h    |    1 +
 target-i386/op_helper.c |   64 +++++++++++++++++++++++++++++++++++++++-------
 target-i386/translate.c |    2 +-
 3 files changed, 56 insertions(+), 11 deletions(-)
Robert Wang - Jan. 12, 2012, 5:37 a.m.
After applied this patch, while I was compiling on my lap, there will
be an error:

 ./configure --enable-kvm --target-list=x86_64-softmmu && make
CC    x86_64-softmmu/translate.o
/qemu/target-i386/translate.c: In function ‘disas_insn’:
/qemu/target-i386/translate.c:7547:17: error: incompatible type for
argument 1 of ‘gen_helper_ldmxcsr’
/qemu/target-i386/helper.h:200:1: note: expected ‘TCGv_i32’ but
argument is of type ‘TCGv_i64’
make[1]: *** [translate.o] Error 1
make: *** [subdir-x86_64-softmmu] Error 2


On Sun, Jan 8, 2012 at 04:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> SSE rounding and flush to zero control has never been implemented. However
> given that softfloat-native was using a single state for FPU and SSE and
> given that glibc is setting both FPU and SSE state in fesetround(), this
> was working correctly up to the switch to softfloat.
>
> Fix that by adding an update_sse_status() function similar to
> update_fpu_status(), and callin git on write to mxcsr.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
>  target-i386/helper.h    |    1 +
>  target-i386/op_helper.c |   64 +++++++++++++++++++++++++++++++++++++++-------
>  target-i386/translate.c |    2 +-
>  3 files changed, 56 insertions(+), 11 deletions(-)
>
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 6b518ad..761954e 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -197,6 +197,7 @@ DEF_HELPER_2(lzcnt, tl, tl, int)
>
>  /* MMX/SSE */
>
> +DEF_HELPER_1(ldmxcsr, void, i32)
>  DEF_HELPER_0(enter_mmx, void)
>  DEF_HELPER_0(emms, void)
>  DEF_HELPER_2(movq, void, ptr, ptr)
> diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
> index c89e4a4..2aea71b 100644
> --- a/target-i386/op_helper.c
> +++ b/target-i386/op_helper.c
> @@ -52,11 +52,11 @@ static inline target_long lshift(target_long x, int n)
>     }
>  }
>
> -#define RC_MASK         0xc00
> -#define RC_NEAR         0x000
> -#define RC_DOWN         0x400
> -#define RC_UP           0x800
> -#define RC_CHOP         0xc00
> +#define FPU_RC_MASK         0xc00
> +#define FPU_RC_NEAR         0x000
> +#define FPU_RC_DOWN         0x400
> +#define FPU_RC_UP           0x800
> +#define FPU_RC_CHOP         0xc00
>
>  #define MAXTAN 9223372036854775808.0
>
> @@ -4024,18 +4024,18 @@ static void update_fp_status(void)
>     int rnd_type;
>
>     /* set rounding mode */
> -    switch(env->fpuc & RC_MASK) {
> +    switch(env->fpuc & FPU_RC_MASK) {
>     default:
> -    case RC_NEAR:
> +    case FPU_RC_NEAR:
>         rnd_type = float_round_nearest_even;
>         break;
> -    case RC_DOWN:
> +    case FPU_RC_DOWN:
>         rnd_type = float_round_down;
>         break;
> -    case RC_UP:
> +    case FPU_RC_UP:
>         rnd_type = float_round_up;
>         break;
> -    case RC_CHOP:
> +    case FPU_RC_CHOP:
>         rnd_type = float_round_to_zero;
>         break;
>     }
> @@ -5629,6 +5629,50 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
>
>  /* MMX/SSE */
>  /* XXX: optimize by storing fptt and fptags in the static cpu state */
> +
> +#define SSE_DAZ             0x0040
> +#define SSE_RC_MASK         0x6000
> +#define SSE_RC_NEAR         0x0000
> +#define SSE_RC_DOWN         0x2000
> +#define SSE_RC_UP           0x4000
> +#define SSE_RC_CHOP         0x6000
> +#define SSE_FZ              0x8000
> +
> +static void update_sse_status(void)
> +{
> +    int rnd_type;
> +
> +    /* set rounding mode */
> +    switch(env->mxcsr & SSE_RC_MASK) {
> +    default:
> +    case SSE_RC_NEAR:
> +        rnd_type = float_round_nearest_even;
> +        break;
> +    case SSE_RC_DOWN:
> +        rnd_type = float_round_down;
> +        break;
> +    case SSE_RC_UP:
> +        rnd_type = float_round_up;
> +        break;
> +    case SSE_RC_CHOP:
> +        rnd_type = float_round_to_zero;
> +        break;
> +    }
> +    set_float_rounding_mode(rnd_type, &env->sse_status);
> +
> +    /* set denormals are zero */
> +    set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
> +
> +    /* set flush to zero */
> +    set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
> +}
> +
> +void helper_ldmxcsr(uint32_t val)
> +{
> +    env->mxcsr = val;
> +    update_sse_status();
> +}
> +
>  void helper_enter_mmx(void)
>  {
>     env->fpstt = 0;
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 8321bf3..b9839c5 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -7544,7 +7544,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
>             if (op == 2) {
>                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
> -                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
> +                gen_helper_ldmxcsr(cpu_T[0]);
>             } else {
>                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
>                 gen_op_st_T0_A0(OT_LONG + s->mem_index);
> --
> 1.7.7.3
>
>
Markus Armbruster - Jan. 13, 2012, 9:40 a.m.
Dong Xu Wang <wdongxu@linux.vnet.ibm.com> writes:

> After applied this patch, while I was compiling on my lap, there will
> be an error:
>
>  ./configure --enable-kvm --target-list=x86_64-softmmu && make
> CC    x86_64-softmmu/translate.o
> /qemu/target-i386/translate.c: In function ‘disas_insn’:
> /qemu/target-i386/translate.c:7547:17: error: incompatible type for
> argument 1 of ‘gen_helper_ldmxcsr’
> /qemu/target-i386/helper.h:200:1: note: expected ‘TCGv_i32’ but
> argument is of type ‘TCGv_i64’
> make[1]: *** [translate.o] Error 1
> make: *** [subdir-x86_64-softmmu] Error 2

I see this, too.
Justin M. Forbes - Jan. 13, 2012, 3:14 p.m.
On Fri, 2012-01-13 at 10:40 +0100, Markus Armbruster wrote:
> Dong Xu Wang <wdongxu@linux.vnet.ibm.com> writes:
> 
> > After applied this patch, while I was compiling on my lap, there will
> > be an error:
> >
> >  ./configure --enable-kvm --target-list=x86_64-softmmu && make
> > CC    x86_64-softmmu/translate.o
> > /qemu/target-i386/translate.c: In function ‘disas_insn’:
> > /qemu/target-i386/translate.c:7547:17: error: incompatible type for
> > argument 1 of ‘gen_helper_ldmxcsr’
> > /qemu/target-i386/helper.h:200:1: note: expected ‘TCGv_i32’ but
> > argument is of type ‘TCGv_i64’
> > make[1]: *** [translate.o] Error 1
> > make: *** [subdir-x86_64-softmmu] Error 2
> 
> I see this, too.


I will take a look, I am not seeing it right now with the full stable
tree, though I am doing a much larger config.

Justin

Patch

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 6b518ad..761954e 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -197,6 +197,7 @@  DEF_HELPER_2(lzcnt, tl, tl, int)
 
 /* MMX/SSE */
 
+DEF_HELPER_1(ldmxcsr, void, i32)
 DEF_HELPER_0(enter_mmx, void)
 DEF_HELPER_0(emms, void)
 DEF_HELPER_2(movq, void, ptr, ptr)
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index c89e4a4..2aea71b 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -52,11 +52,11 @@  static inline target_long lshift(target_long x, int n)
     }
 }
 
-#define RC_MASK         0xc00
-#define RC_NEAR         0x000
-#define RC_DOWN         0x400
-#define RC_UP           0x800
-#define RC_CHOP         0xc00
+#define FPU_RC_MASK         0xc00
+#define FPU_RC_NEAR         0x000
+#define FPU_RC_DOWN         0x400
+#define FPU_RC_UP           0x800
+#define FPU_RC_CHOP         0xc00
 
 #define MAXTAN 9223372036854775808.0
 
@@ -4024,18 +4024,18 @@  static void update_fp_status(void)
     int rnd_type;
 
     /* set rounding mode */
-    switch(env->fpuc & RC_MASK) {
+    switch(env->fpuc & FPU_RC_MASK) {
     default:
-    case RC_NEAR:
+    case FPU_RC_NEAR:
         rnd_type = float_round_nearest_even;
         break;
-    case RC_DOWN:
+    case FPU_RC_DOWN:
         rnd_type = float_round_down;
         break;
-    case RC_UP:
+    case FPU_RC_UP:
         rnd_type = float_round_up;
         break;
-    case RC_CHOP:
+    case FPU_RC_CHOP:
         rnd_type = float_round_to_zero;
         break;
     }
@@ -5629,6 +5629,50 @@  void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
 
 /* MMX/SSE */
 /* XXX: optimize by storing fptt and fptags in the static cpu state */
+
+#define SSE_DAZ             0x0040
+#define SSE_RC_MASK         0x6000
+#define SSE_RC_NEAR         0x0000
+#define SSE_RC_DOWN         0x2000
+#define SSE_RC_UP           0x4000
+#define SSE_RC_CHOP         0x6000
+#define SSE_FZ              0x8000
+
+static void update_sse_status(void)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch(env->mxcsr & SSE_RC_MASK) {
+    default:
+    case SSE_RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case SSE_RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case SSE_RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case SSE_RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->sse_status);
+
+    /* set denormals are zero */
+    set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
+
+    /* set flush to zero */
+    set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
+}
+
+void helper_ldmxcsr(uint32_t val)
+{
+    env->mxcsr = val;
+    update_sse_status();
+}
+
 void helper_enter_mmx(void)
 {
     env->fpstt = 0;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 8321bf3..b9839c5 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7544,7 +7544,7 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             if (op == 2) {
                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+                gen_helper_ldmxcsr(cpu_T[0]);
             } else {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
                 gen_op_st_T0_A0(OT_LONG + s->mem_index);