@@ -3407,6 +3407,79 @@ void helper_vzeroupper_hi8(CPUX86State *env)
}
}
#endif
+
+void helper_vpermdq_ymm(CPUX86State *env,
+ Reg *d, Reg *v, Reg *s, uint32_t order)
+{
+ uint64_t r0, r1, r2, r3;
+
+ switch (order & 3) {
+ case 0:
+ r0 = v->Q(0);
+ r1 = v->Q(1);
+ break;
+ case 1:
+ r0 = v->Q(2);
+ r1 = v->Q(3);
+ break;
+ case 2:
+ r0 = s->Q(0);
+ r1 = s->Q(1);
+ break;
+ case 3:
+ r0 = s->Q(2);
+ r1 = s->Q(3);
+ break;
+ }
+ switch ((order >> 4) & 3) {
+ case 0:
+ r2 = v->Q(0);
+ r3 = v->Q(1);
+ break;
+ case 1:
+ r2 = v->Q(2);
+ r3 = v->Q(3);
+ break;
+ case 2:
+ r2 = s->Q(0);
+ r3 = s->Q(1);
+ break;
+ case 3:
+ r2 = s->Q(2);
+ r3 = s->Q(3);
+ break;
+ }
+ d->Q(0) = r0;
+ d->Q(1) = r1;
+ d->Q(2) = r2;
+ d->Q(3) = r3;
+}
+
+void helper_vpermq_ymm(CPUX86State *env, Reg *d, Reg *s, uint32_t order)
+{
+ uint64_t r0, r1, r2, r3;
+ r0 = s->Q(order & 3);
+ r1 = s->Q((order >> 2) & 3);
+ r2 = s->Q((order >> 4) & 3);
+ r3 = s->Q((order >> 6) & 3);
+ d->Q(0) = r0;
+ d->Q(1) = r1;
+ d->Q(2) = r2;
+ d->Q(3) = r3;
+}
+
+void helper_vpermd_ymm(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+ uint32_t r[8];
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ r[i] = s->L(v->L(i) & 7);
+ }
+ for (i = 0; i < 8; i++) {
+ d->L(i) = r[i];
+ }
+}
#endif
#endif
@@ -457,6 +457,9 @@ DEF_HELPER_1(vzeroupper, void, env)
DEF_HELPER_1(vzeroall_hi8, void, env)
DEF_HELPER_1(vzeroupper_hi8, void, env)
#endif
+DEF_HELPER_5(vpermdq_ymm, void, env, Reg, Reg, Reg, i32)
+DEF_HELPER_4(vpermq_ymm, void, env, Reg, Reg, i32)
+DEF_HELPER_4(vpermd_ymm, void, env, Reg, Reg, Reg)
#endif
#endif
@@ -3258,6 +3258,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
[0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
[0x14] = BLENDV_OP(blendvps, SSE41, 0),
[0x15] = BLENDV_OP(blendvpd, SSE41, 0),
+#define gen_helper_vpermd_xmm NULL
+ [0x16] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermps */
[0x17] = CMP_OP(ptest, SSE41),
/* TODO:Some vbroadcast variants require AVX2 */
[0x18] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR), /* vbroadcastss */
@@ -3287,6 +3289,7 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
[0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX),
[0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX),
[0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX),
+ [0x36] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermd */
[0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX),
[0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX),
[0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX),
@@ -3329,8 +3332,13 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
/* prefix [66] 0f 3a */
static const struct SSEOpHelper_table7 sse_op_table7[256] = {
+#define gen_helper_vpermq_xmm NULL
+ [0x00] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2),
+ [0x01] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2), /* vpermpd */
[0x04] = UNARY_OP(vpermilps_imm, AVX, 0),
[0x05] = UNARY_OP(vpermilpd_imm, AVX, 0),
+#define gen_helper_vpermdq_xmm NULL
+ [0x06] = BINARY_OP(vpermdq, AVX, 0), /* vperm2f128 */
[0x08] = UNARY_OP(roundps, SSE41, 0),
[0x09] = UNARY_OP(roundpd, SSE41, 0),
#define gen_helper_roundss_ymm NULL
@@ -3353,6 +3361,7 @@ static const struct SSEOpHelper_table7 sse_op_table7[256] = {
[0x41] = BINARY_OP(dppd, SSE41, 0),
[0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX),
[0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0),
+ [0x46] = BINARY_OP(vpermdq, AVX, SSE_OPF_AVX2), /* vperm2i128 */
#define gen_helper_pcmpestrm_ymm NULL
[0x60] = CMP_OP(pcmpestrm, SSE42),
#define gen_helper_pcmpestri_ymm NULL
A set of shuffle operations that operate on complete 256 bit registers. The integer and floating point variants have identical semantics. Signed-off-by: Paul Brook <paul@nowt.org> --- target/i386/ops_sse.h | 73 ++++++++++++++++++++++++++++++++++++ target/i386/ops_sse_header.h | 3 ++ target/i386/tcg/translate.c | 9 +++++ 3 files changed, 85 insertions(+)