diff mbox series

[v2,28/42] i386: Implement VZEROALL and VZEROUPPER

Message ID 20220424220204.2493824-29-paul@nowt.org
State New
Headers show
Series AVX guest implementation | expand

Commit Message

Paul Brook April 24, 2022, 10:01 p.m. UTC
The use the same opcode as EMMS, which I guess makes some sort of sense.
Fairly strightforward other than that.

If we were wanting to optimize out gen_clear_ymmh then this would be one of
the starting points.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/ops_sse.h        | 48 ++++++++++++++++++++++++++++++++++++
 target/i386/ops_sse_header.h |  9 +++++++
 target/i386/tcg/translate.c  | 26 ++++++++++++++++---
 3 files changed, 80 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index ad3312d353..a1f50f0c8b 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3071,6 +3071,54 @@  void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #endif
 #endif
 
+#if SHIFT == 2
+void helper_vzeroall(CPUX86State *env)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        env->xmm_regs[i].ZMM_Q(0) = 0;
+        env->xmm_regs[i].ZMM_Q(1) = 0;
+        env->xmm_regs[i].ZMM_Q(2) = 0;
+        env->xmm_regs[i].ZMM_Q(3) = 0;
+    }
+}
+
+void helper_vzeroupper(CPUX86State *env)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        env->xmm_regs[i].ZMM_Q(2) = 0;
+        env->xmm_regs[i].ZMM_Q(3) = 0;
+    }
+}
+
+#ifdef TARGET_X86_64
+void helper_vzeroall_hi8(CPUX86State *env)
+{
+    int i;
+
+    for (i = 8; i < 16; i++) {
+        env->xmm_regs[i].ZMM_Q(0) = 0;
+        env->xmm_regs[i].ZMM_Q(1) = 0;
+        env->xmm_regs[i].ZMM_Q(2) = 0;
+        env->xmm_regs[i].ZMM_Q(3) = 0;
+    }
+}
+
+void helper_vzeroupper_hi8(CPUX86State *env)
+{
+    int i;
+
+    for (i = 8; i < 16; i++) {
+        env->xmm_regs[i].ZMM_Q(2) = 0;
+        env->xmm_regs[i].ZMM_Q(3) = 0;
+    }
+}
+#endif
+#endif
+
 #undef SSE_HELPER_S
 
 #undef SHIFT
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index cfcfba154b..48f0945917 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -411,6 +411,15 @@  DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32)
 #endif
 
+#if SHIFT == 2
+DEF_HELPER_1(vzeroall, void, env)
+DEF_HELPER_1(vzeroupper, void, env)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(vzeroall_hi8, void, env)
+DEF_HELPER_1(vzeroupper_hi8, void, env)
+#endif
+#endif
+
 #undef SHIFT
 #undef Reg
 #undef SUFFIX
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index bcd6d47fd0..ba70aeb039 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3455,9 +3455,29 @@  static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         return;
     }
     if (b == 0x77) {
-        /* emms */
-        gen_helper_emms(cpu_env);
-        return;
+        if (s->prefix & PREFIX_VEX) {
+            CHECK_AVX(s);
+            if (s->vex_l) {
+                gen_helper_vzeroall(cpu_env);
+#ifdef TARGET_X86_64
+                if (CODE64(s)) {
+                    gen_helper_vzeroall_hi8(cpu_env);
+                }
+#endif
+            } else {
+                gen_helper_vzeroupper(cpu_env);
+#ifdef TARGET_X86_64
+                if (CODE64(s)) {
+                    gen_helper_vzeroupper_hi8(cpu_env);
+                }
+#endif
+            }
+            return;
+        } else {
+            /* emms */
+            gen_helper_emms(cpu_env);
+            return;
+        }
     }
     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
        the static cpu state) */