diff mbox

[2/4] target-i386: do not memcpy in and out of xmm_regs

Message ID 1420652355-31847-3-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Jan. 7, 2015, 5:39 p.m. UTC
After the next patch, we will move the high parts of AVX and AVX512 registers
in the same array as the SSE registers.  This will make it impossible to
memcpy an array of 128-bit values in and out of xmm_regs in one swoop.
Use a for loop instead.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/kvm.c       | 30 ++++++++++++++++++++++++------
 target-i386/translate.c |  8 ++++----
 2 files changed, 28 insertions(+), 10 deletions(-)

Comments

Eduardo Habkost Jan. 13, 2015, 5:02 p.m. UTC | #1
On Wed, Jan 07, 2015 at 06:39:13PM +0100, Paolo Bonzini wrote:
> After the next patch, we will move the high parts of AVX and AVX512 registers
> in the same array as the SSE registers.  This will make it impossible to
> memcpy an array of 128-bit values in and out of xmm_regs in one swoop.
> Use a for loop instead.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Eduardo Habkost Jan. 13, 2015, 6:37 p.m. UTC | #2
On Wed, Jan 07, 2015 at 06:39:13PM +0100, Paolo Bonzini wrote:
[...]
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 5af4300..253009a 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -2621,10 +2621,10 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
>  
>  static inline void gen_op_movo(int d_offset, int s_offset)
>  {
> -    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
> -    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
> -    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
> -    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
> +    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0));
> +    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0));
> +    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1));
> +    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1));

It looks good (I even sent my Reviewed-by line), but:

target-i386/translate.c:2624:88: error: expected ‘)’ before ‘;’ token
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0));
                                                                                        ^
Paolo Bonzini Jan. 13, 2015, 7:49 p.m. UTC | #3
On 13/01/2015 19:37, Eduardo Habkost wrote:
>> > +    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1));
>> > +    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1));
> It looks good (I even sent my Reviewed-by line), but:
> 
> target-i386/translate.c:2624:88: error: expected ‘)’ before ‘;’ token
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0));
>                                                                                         ^

This sounds familiar.  I had attached the wrong version of the series,
but the only difference is the two extra parentheses.

Paolo
diff mbox

Patch

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f92edfe..cf9f331 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1019,7 +1019,10 @@  static int kvm_put_fpu(X86CPU *cpu)
         fpu.ftwx |= (!env->fptags[i]) << i;
     }
     memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
-    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0));
+        stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1));
+    }
     fpu.mxcsr = env->mxcsr;
 
     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
@@ -1045,6 +1048,7 @@  static int kvm_put_xsave(X86CPU *cpu)
     CPUX86State *env = &cpu->env;
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
     uint16_t cwd, swd, twd;
+    uint8_t *xmm;
     int i, r;
 
     if (!kvm_has_xsave()) {
@@ -1065,8 +1069,6 @@  static int kvm_put_xsave(X86CPU *cpu)
     memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
     memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
             sizeof env->fpregs);
-    memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
-            sizeof env->xmm_regs);
     xsave->region[XSAVE_MXCSR] = env->mxcsr;
     *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
     memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
@@ -1079,6 +1081,13 @@  static int kvm_put_xsave(X86CPU *cpu)
             sizeof env->opmask_regs);
     memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
             sizeof env->zmmh_regs);
+
+    xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+        stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
+        stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
+    }
+
 #ifdef TARGET_X86_64
     memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
             sizeof env->hi16_zmm_regs);
@@ -1384,7 +1393,10 @@  static int kvm_get_fpu(X86CPU *cpu)
         env->fptags[i] = !((fpu.ftwx >> i) & 1);
     }
     memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
-    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
+        env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
+    }
     env->mxcsr = fpu.mxcsr;
 
     return 0;
@@ -1395,6 +1407,7 @@  static int kvm_get_xsave(X86CPU *cpu)
     CPUX86State *env = &cpu->env;
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
     int ret, i;
+    const uint8_t *xmm;
     uint16_t cwd, swd, twd;
 
     if (!kvm_has_xsave()) {
@@ -1421,8 +1434,6 @@  static int kvm_get_xsave(X86CPU *cpu)
     env->mxcsr = xsave->region[XSAVE_MXCSR];
     memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
             sizeof env->fpregs);
-    memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
-            sizeof env->xmm_regs);
     env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
     memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
             sizeof env->ymmh_regs);
@@ -1434,6 +1445,13 @@  static int kvm_get_xsave(X86CPU *cpu)
             sizeof env->opmask_regs);
     memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
             sizeof env->zmmh_regs);
+
+    xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+        env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
+        env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
+    }
+
 #ifdef TARGET_X86_64
     memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
             sizeof env->hi16_zmm_regs);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5af4300..253009a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2621,10 +2621,10 @@  static inline void gen_sto_env_A0(DisasContext *s, int offset)
 
 static inline void gen_op_movo(int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0));
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1));
 }
 
 static inline void gen_op_movq(int d_offset, int s_offset)