new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^z\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+extern _Float16 const* p;
+volatile __m128h x1, x2, res;
+volatile __mmask8 m8;
+
+void
+avx512f_test (void)
+{
+ x2 = _mm_mask_load_sh (x1, m8, p);
+ x2 = _mm_maskz_load_sh (m8, p);
+ _mm_mask_store_sh (p, m8, x1);
+
+ res = _mm_move_sh (x1, x2);
+ res = _mm_mask_move_sh (res, m8, x1, x2);
+ res = _mm_maskz_move_sh (m8, x1, x2);
+}
new file mode 100644
@@ -0,0 +1,115 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void NOINLINE
+emulate_mov2_load_sh(V512 * dest, V512 op1,
+ __mmask8 k, int zero_mask)
+{
+ V512 v1, v2, v3, v4, v5, v6, v7, v8;
+ int i;
+
+ unpack_ph_2twops(op1, &v1, &v2);
+ unpack_ph_2twops(*dest, &v7, &v8);
+
+ if ((k&1) || !k)
+ v5.f32[0] = v1.f32[0];
+ else if (zero_mask)
+ v5.f32[0] = 0;
+ else
+ v5.f32[0] = v7.f32[0]; //remains unchanged
+
+ for (i = 1; i < 8; i++)
+ v5.f32[i] = 0;
+
+ *dest = pack_twops_2ph(v5, v6);
+}
+
+void NOINLINE
+emulate_mov3_load_sh(V512 * dest, V512 op1, V512 op2,
+ __mmask8 k, int zero_mask)
+{
+ V512 v1, v2, v3, v4, v5, v6, v7, v8;
+ int i;
+
+ unpack_ph_2twops(op1, &v1, &v2);
+ unpack_ph_2twops(op2, &v3, &v4);
+ unpack_ph_2twops(*dest, &v7, &v8);
+
+ if ((k&1) || !k)
+ v5.f32[0] = v3.f32[0];
+ else if (zero_mask)
+ v5.f32[0] = 0;
+ else
+ v5.f32[0] = v7.f32[0]; //remains unchanged
+
+ for (i = 1; i < 8; i++)
+ v5.f32[i] = v1.f32[i];
+
+ *dest = pack_twops_2ph(v5, v6);
+}
+
+void NOINLINE
+emulate_mov2_store_sh(V512 * dest, V512 op1, __mmask8 k)
+{
+ V512 v1, v2, v3, v4, v5, v6, v7, v8;
+ int i;
+
+ unpack_ph_2twops(op1, &v1, &v2);
+ unpack_ph_2twops(*dest, &v7, &v8);
+
+ if ((k&1) || !k)
+ v5.f32[0] = v1.f32[0];
+ else
+ v5.f32[0] = v7.f32[0]; //remains unchanged
+
+ *dest = pack_twops_2ph(v5, v6);
+}
+
+void
+test_512 (void)
+{
+ V512 res;
+ V512 exp;
+
+ init_src();
+
+ // no mask
+ emulate_mov2_load_sh (&exp, src1, 0x0, 0);
+ res.xmmh[0] = _mm_load_sh((const void *)&(src1.u16[0]));
+ check_results(&res, &exp, 8, "_mm_load_sh");
+
+ // with mask and mask bit is set
+ emulate_mov2_load_sh (&exp, src1, 0x1, 0);
+ res.xmmh[0] = _mm_mask_load_sh(res.xmmh[0], 0x1, (const void *)&(src1.u16[0]));
+ check_results(&res, &exp, 8, "_mm__mask_load_sh");
+
+ // with zero-mask
+ emulate_mov2_load_sh (&exp, src1, 0x0, 1);
+ res.xmmh[0] = _mm_maskz_load_sh(0x1, (const void *)&(src1.u16[0]));
+ check_results(&res, &exp, 8, "_mm_maskz_load_sh");
+
+ emulate_mov3_load_sh (&exp, src1, src2, 0x1, 0);
+ res.xmmh[0] = _mm_mask_move_sh(res.xmmh[0], 0x1, src1.xmmh[0], src2.xmmh[0]);
+ check_results(&res, &exp, 8, "_mm_mask_move_sh");
+
+ emulate_mov3_load_sh (&exp, src1, src2, 0x1, 1);
+ res.xmmh[0] = _mm_maskz_move_sh(0x1, src1.xmmh[0], src2.xmmh[0]);
+ check_results(&res, &exp, 8, "_mm_maskz_move_sh");
+
+ // no mask
+ emulate_mov2_store_sh (&exp, src1, 0x0);
+ _mm_store_sh((void *)&(res.u16[0]), src1.xmmh[0]);
+ check_results(&exp, &res, 1, "_mm_store_sh");
+
+ // with mask
+ emulate_mov2_store_sh (&exp, src1, 0x1);
+ _mm_mask_store_sh((void *)&(res.u16[0]), 0x1, src1.xmmh[0]);
+ check_results(&exp, &res, 1, "_mm_mask_store_sh");
+
+ if (n_errs != 0) {
+ abort ();
+ }
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vmovw\[^-]" 1 } } */
+/* { dg-final { scan-assembler-times "vpextrw" 1 } } */
+#include <immintrin.h>
+
+volatile __m128i x1;
+volatile short x2;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_cvtsi16_si128 (x2);
+ x2 = _mm_cvtsi128_si16 (x1);
+}
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run {target avx512fp16} } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+
+static void
+do_test (void)
+{
+ union128i_w u;
+ short b = 128;
+ short e[8] = {0,0,0,0,0,0,0,0};
+
+ u.x = _mm_cvtsi16_si128 (b);
+
+ e[0] = b;
+
+ if (check_union128i_w (u, e))
+ abort ();
+ u.a[0] = 123;
+ b = _mm_cvtsi128_si16 (u.x);
+ if (u.a[0] != b)
+ abort();
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+__m128i
+__attribute__ ((noinline, noclone))
+foo1 (short x)
+{
+ return __extension__ (__m128i)(__v8hi) { x, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+__m128i
+__attribute__ ((noinline, noclone))
+foo2 (short *x)
+{
+ return __extension__ (__m128i)(__v8hi) { *x, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <string.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+#include "avx512fp16-vmovw-2a.c"
+
+__m128i
+__attribute__ ((noinline,noclone))
+foo3 (__m128i x)
+{
+ return foo1 (((__v8hi) x)[0]);
+}
+
+static void
+do_test (void)
+{
+ short x;
+ union128i_w u = { -1, -1,};
+ union128i_w exp = { 0, 0};
+ __m128i v;
+ union128i_w a;
+
+ x = 25;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo1 (x);
+ a.x = v;
+ if (check_union128i_w (a, exp.a))
+ abort ();
+
+ x = 33;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo2 (&x);
+ a.x = v;
+ if (check_union128i_w (a, exp.a))
+ abort ();
+
+ x = -33;
+ u.a[0] = x;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo3 (u.x);
+ a.x = v;
+ if (check_union128i_w (a, exp.a))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32), __may_alias__));
+
+__m256i
+__attribute__ ((noinline, noclone))
+foo1 (short x)
+{
+ return __extension__ (__m256i)(__v16hi) { x, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+__m256i
+__attribute__ ((noinline, noclone))
+foo2 (short *x)
+{
+ return __extension__ (__m256i)(__v16hi) { *x, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <string.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+#include "avx512fp16-vmovw-3a.c"
+
+__m256i
+__attribute__ ((noinline,noclone))
+foo3 (__m256i x)
+{
+ return foo1 (((__v16hi) x)[0]);
+}
+
+static void
+do_test (void)
+{
+ short x;
+ union256i_w u = { -1, -1, -1, -1 };
+ union256i_w exp = { 0, 0, 0, 0 };
+
+ __m256i v;
+ union256i_w a;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo1 (x);
+ a.x = v;
+ if (check_union256i_w (a, exp.a))
+ abort ();
+
+ x = 33;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo2 (&x);
+ a.x = v;
+ if (check_union256i_w (a, exp.a))
+ abort ();
+
+ x = -23;
+ u.a[0] = x;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo3 (u.x);
+ a.x = v;
+ if (check_union256i_w (a, exp.a))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__m512i
+__attribute__ ((noinline, noclone))
+foo1 (short x)
+{
+ return __extension__ (__m512i)(__v32hi) { x, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+__m512i
+__attribute__ ((noinline, noclone))
+foo2 (short *x)
+{
+ return __extension__ (__m512i)(__v32hi) { *x, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <string.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+#include "avx512fp16-vmovw-4a.c"
+
+__m512i
+__attribute__ ((noinline,noclone))
+foo3 (__m512i x)
+{
+ return foo1 (((__v32hi) x)[0]);
+}
+
+static void
+do_test (void)
+{
+ short x = 25;
+ union512i_w u = { -1, -1, -1, -1, -1, -1, -1, -1 };
+ union512i_w exp = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ __m512i v;
+ union512i_w a;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo1 (x);
+ a.x = v;
+ if (check_union512i_w (a, exp.a))
+ abort ();
+
+ x = 55;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo2 (&x);
+ a.x = v;
+ if (check_union512i_w (a, exp.a))
+ abort ();
+
+ x = 33;
+ u.a[0] = x;
+ exp.a[0] = x;
+ memset (&v, -1, sizeof (v));
+ v = foo3 (u.x);
+ a.x = v;
+ if (check_union512i_w (a, exp.a))
+ abort ();
+}