@@ -107,6 +107,10 @@ display_ps(const void *p, const char *banner, int n_elems)
check_results ((void*)res, (void*)exp, size,\
NAME_OF(intrin))
+#define CHECK_RESULT_MASK(res, exp, size, intrin) \
+ check_results_mask ((__mmask32)res, (__mmask32)exp, size,\
+ NAME_OF(intrin))
+
/* To evaluate whether result match _Float16 precision,
only the last bit of real/emulate result could be
different. */
@@ -136,6 +140,18 @@ check_results(void *got, void *exp, int n_elems, char *banner)
}
}
+void NOINLINE
+check_results_mask(__mmask32 got, __mmask32 exp, int n_elems, char *banner)
+{
+ if (got != exp) {
+#ifdef DEBUG
+ printf("ERROR: %s failed : got mask %x != exp mask %x\n",
+ banner ? banner : "", got, exp);
+#endif
+ n_errs++;
+ }
+}
+
/* Functions for src/dest initialization */
void NOINLINE
init_src()
@@ -156,6 +172,27 @@ init_src()
src2 = pack_twops_2ph(v3, v4);
}
+void NOINLINE
+init_src_nanf()
+{
+ V512 v1, v2, v3, v4;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ v1.f32[i] = i + 1 + 0.5;
+ v2.f32[i] = i + 17 + 0.5;
+ v3.f32[i] = i * 2 + 2 + 0.5;
+ v4.f32[i] = i * 2 + 34 + 0.5;
+
+ src3.u32[i] = (i + 1) * 10;
+ }
+
+ v1.f32[0] = __builtin_nanf("");
+ src1 = pack_twops_2ph(v1, v2);
+ src2 = pack_twops_2ph(v3, v4);
+}
+
+
void NOINLINE
init_dest(V512 * res, V512 * exp)
{
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$2\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$3\[^\n\r]*\{sae\}\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\[^\{\n\]*\\\$4\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __mmask32 res, res1, res2;
+volatile __m512h x1, x2;
+volatile __mmask32 m32;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cmp_ph_mask (x1, x2, 1);
+ res1 = _mm512_mask_cmp_ph_mask (m32, x1, x2, 2);
+ res = _mm512_cmp_round_ph_mask (x1, x2, 3, 8);
+ res1 = _mm512_mask_cmp_round_ph_mask (m32, x1, x2, 4, 4);
+}
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS (AVX512F_LEN / 16)
+
+__mmask32 NOINLINE
+EMULATE(cmp_ph) (V512 op1, V512 op2,
+ __mmask32 k, int predicate)
+{
+ V512 v1, v2, v3, v4, v5, v6, v7, v8;
+ int i, j;
+ __mmask16 mr1 = 0, mr2 = 0;
+ __mmask16 m1, m2;
+ __mmask32 mr = 0;
+
+ m1 = k & 0xffff;
+ m2 = (k >> 16) & 0xffff;
+
+ unpack_ph_2twops(op1, &v1, &v2);
+ unpack_ph_2twops(op2, &v3, &v4);
+
+ for (i = 0; i < 16; i++) {
+ if (((1 << i) & m1) != 0) {
+ j = v1.f32[i] == v3.f32[i] ? 1 : 0;
+ mr1 = mr1 | (j << i);
+ }
+
+ if (((1 << i) & m2) != 0) {
+ j = v2.f32[i] == v4.f32[i] ? 1 : 0;
+ mr2 = mr2 | (j << i);
+ }
+ }
+
+ mr = mr1 | (mr2 << 16);
+ return mr;
+}
+
+void
+TEST (void)
+{
+ __mmask32 res, exp;
+
+ init_src();
+
+ exp = EMULATE(cmp_ph) (src1, src2, NET_MASK, 0);
+ res = INTRINSIC (_cmp_ph_mask) (HF(src1), HF(src2), 0);
+ CHECK_RESULT_MASK (res, exp, N_ELEMS, _cmp_ph_mask);
+
+ exp = EMULATE(cmp_ph) (src1, src2, MASK_VALUE, 0);
+ res = INTRINSIC (_mask_cmp_ph_mask) (MASK_VALUE, HF(src1), HF(src2), 0);
+ CHECK_RESULT_MASK (res, exp, N_ELEMS, _mask_cmp_ph_mask);
+
+#if AVX512F_LEN == 512
+ exp = EMULATE(cmp_ph) (src1, src2, NET_MASK, 0);
+ res = INTRINSIC (_cmp_round_ph_mask) (HF(src1), HF(src2), 0, 8);
+ CHECK_RESULT_MASK (res, exp, N_ELEMS, _cmp_round_ph_mask);
+
+ exp = EMULATE(cmp_ph) (src1, src2, MASK_VALUE, 0);
+ res = INTRINSIC (_mask_cmp_round_ph_mask) (MASK_VALUE, HF(src1), HF(src2), 0, 8);
+ CHECK_RESULT_MASK (res, exp, N_ELEMS, _mask_cmp_round_ph_mask);
+#endif
+
+ if (n_errs != 0) {
+ abort ();
+ }
+}
+
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\[^\{\n\]*\\\$4\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$3\[^\n\r]*\{sae\}\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\[^\{\n\]*\\\$4\[^\n\r]*\{sae\}\[^\n\r\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __mmask8 res, res1, res2;
+volatile __m128h x1, x2;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_cmp_sh_mask (x1, x2, 3);
+ res = _mm_mask_cmp_sh_mask (m8, x1, x2, 4);
+ res = _mm_cmp_round_sh_mask (x1, x2, 3, 8);
+ res1 = _mm_mask_cmp_round_sh_mask (m8, x1, x2, 4, 8);
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 8
+
+__mmask8 NOINLINE
+emulate_cmp_sh(V512 op1, V512 op2,
+ __mmask8 k, int predicate)
+{
+ V512 v1, v2, v3, v4, v5, v6, v7, v8;
+ __mmask8 mr = 0;
+
+ unpack_ph_2twops(op1, &v1, &v2);
+ unpack_ph_2twops(op2, &v3, &v4);
+
+ if ((k&1) || !k)
+ mr = v1.f32[0] == v3.f32[0] ? 1 : 0;
+
+ return mr;
+}
+
+void
+test_512 (void)
+{
+ __mmask8 res, exp;
+
+ init_src();
+
+ exp = emulate_cmp_sh(src1, src2, 0x1, 0);
+ res = _mm_cmp_round_sh_mask(src1.xmmh[0], src2.xmmh[0], 0, 8);
+ check_results_mask(res, exp, 1, "_mm_cmp_round_sh_mask");
+
+ exp = emulate_cmp_sh(src1, src2, 0x1, 0);
+ res = _mm_mask_cmp_round_sh_mask(0x1, src1.xmmh[0], src2.xmmh[0], 0, 8);
+ check_results_mask(res, exp, 1, "_mm_mask_cmp_round_sh_mask");
+
+ if (n_errs != 0) {
+ abort ();
+ }
+}
+
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$3\[^\n\r]*\{sae\}\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$7\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$16\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$1\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$2\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$14\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$13\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$20\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$0\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$17\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$18\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$30\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$29\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpsh\[ \\t\]+\\\$4\[^\n\r0-9]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x, y;
+volatile int res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_comi_round_sh (x, y, 3, 8);
+ res = _mm_comi_sh (x, y, 7);
+ res = _mm_comieq_sh (x, y);
+ res = _mm_comilt_sh (x, y);
+ res = _mm_comile_sh (x, y);
+ res = _mm_comigt_sh (x, y);
+ res = _mm_comige_sh (x, y);
+ res = _mm_comineq_sh (x, y);
+ res = _mm_ucomieq_sh (x, y);
+ res = _mm_ucomilt_sh (x, y);
+ res = _mm_ucomile_sh (x, y);
+ res = _mm_ucomigt_sh (x, y);
+ res = _mm_ucomige_sh (x, y);
+ res = _mm_ucomineq_sh (x, y);
+}
+
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ dst_ref = ((int) rel) | dst_ref; \
+ dst = _mm_comi_round_sh(src1.xmmh[0], src2.xmmh[0], imm, \
+ _MM_FROUND_NO_EXC); \
+ if (dst_ref != dst) abort(); \
+
+void
+test_512 (void)
+{
+ V512 v1,v2,v3,v4;
+ float s1,s2;
+ int res,exp,dst;
+ __mmask8 dst_ref;
+
+ init_src();
+ unpack_ph_2twops(src1, &v1, &v2);
+ unpack_ph_2twops(src2, &v3, &v4);
+ s1 = v1.f32[0];
+ s2 = v3.f32[0];
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1, s2) && s1 == s2);
+ CMP(_CMP_LT_OS, !isunordered(s1, s2) && s1 < s2);
+ CMP(_CMP_LE_OS, !isunordered(s1, s2) && s1 <= s2);
+ CMP(_CMP_UNORD_Q, isunordered(s1, s2));
+ CMP(_CMP_NEQ_UQ, isunordered(s1, s2) || s1 != s2);
+ CMP(_CMP_NLT_US, isunordered(s1, s2) || s1 >= s2);
+ CMP(_CMP_NLE_US, isunordered(s1, s2) || s1 > s2);
+ CMP(_CMP_ORD_Q, !isunordered(s1, s2));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1, s2) || s1 == s2);
+ CMP(_CMP_NGE_US, isunordered(s1, s2) || s1 < s2);
+ CMP(_CMP_NGT_US, isunordered(s1, s2) || s1 <= s2);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1, s2) && s1 != s2);
+ CMP(_CMP_GE_OS, !isunordered(s1, s2) && s1 >= s2);
+ CMP(_CMP_GT_OS, !isunordered(s1, s2) && s1 > s2);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1, s2) && s1 == s2);
+ CMP(_CMP_LT_OQ, !isunordered(s1, s2) && s1 < s2);
+ CMP(_CMP_LE_OQ, !isunordered(s1, s2) && s1 <= s2);
+ CMP(_CMP_UNORD_S, isunordered(s1, s2));
+ CMP(_CMP_NEQ_US, isunordered(s1, s2) || s1 != s2);
+ CMP(_CMP_NLT_UQ, isunordered(s1, s2) || s1 >= s2);
+ CMP(_CMP_NLE_UQ, isunordered(s1, s2) || s1 > s2);
+ CMP(_CMP_ORD_S, !isunordered(s1, s2));
+ CMP(_CMP_EQ_US, isunordered(s1, s2) || s1 == s2);
+ CMP(_CMP_NGE_UQ, isunordered(s1, s2) || s1 < s2);
+ CMP(_CMP_NGT_UQ, isunordered(s1, s2) || s1 <= s2);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1, s2) && s1 != s2);
+ CMP(_CMP_GE_OQ, !isunordered(s1, s2) && s1 >= s2);
+ CMP(_CMP_GT_OQ, !isunordered(s1, s2) && s1 > s2);
+ CMP(_CMP_TRUE_US, 1);
+}
+
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ dst_ref = ((int) rel) | dst_ref; \
+ dst = _mm_comi_round_sh(src1.xmmh[0], src2.xmmh[0], imm, \
+ _MM_FROUND_NO_EXC); \
+ if (dst_ref != dst) abort(); \
+
+void
+test_512 (void)
+{
+ V512 v1,v2,v3,v4;
+ float s1,s2;
+ int res,exp,dst;
+ __mmask8 dst_ref;
+
+ init_src_nanf();
+ unpack_ph_2twops(src1, &v1, &v2);
+ unpack_ph_2twops(src2, &v3, &v4);
+ s1 = v1.f32[0];
+ s2 = v3.f32[0];
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1, s2) && s1 == s2);
+ CMP(_CMP_LT_OS, !isunordered(s1, s2) && s1 < s2);
+ CMP(_CMP_LE_OS, !isunordered(s1, s2) && s1 <= s2);
+ CMP(_CMP_UNORD_Q, isunordered(s1, s2));
+ CMP(_CMP_NEQ_UQ, isunordered(s1, s2) || s1 != s2);
+ CMP(_CMP_NLT_US, isunordered(s1, s2) || s1 >= s2);
+ CMP(_CMP_NLE_US, isunordered(s1, s2) || s1 > s2);
+ CMP(_CMP_ORD_Q, !isunordered(s1, s2));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1, s2) || s1 == s2);
+ CMP(_CMP_NGE_US, isunordered(s1, s2) || s1 < s2);
+ CMP(_CMP_NGT_US, isunordered(s1, s2) || s1 <= s2);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1, s2) && s1 != s2);
+ CMP(_CMP_GE_OS, !isunordered(s1, s2) && s1 >= s2);
+ CMP(_CMP_GT_OS, !isunordered(s1, s2) && s1 > s2);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1, s2) && s1 == s2);
+ CMP(_CMP_LT_OQ, !isunordered(s1, s2) && s1 < s2);
+ CMP(_CMP_LE_OQ, !isunordered(s1, s2) && s1 <= s2);
+ CMP(_CMP_UNORD_S, isunordered(s1, s2));
+ CMP(_CMP_NEQ_US, isunordered(s1, s2) || s1 != s2);
+ CMP(_CMP_NLT_UQ, isunordered(s1, s2) || s1 >= s2);
+ CMP(_CMP_NLE_UQ, isunordered(s1, s2) || s1 > s2);
+ CMP(_CMP_ORD_S, !isunordered(s1, s2));
+ CMP(_CMP_EQ_US, isunordered(s1, s2) || s1 == s2);
+ CMP(_CMP_NGE_UQ, isunordered(s1, s2) || s1 < s2);
+ CMP(_CMP_NGT_UQ, isunordered(s1, s2) || s1 <= s2);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1, s2) && s1 != s2);
+ CMP(_CMP_GE_OQ, !isunordered(s1, s2) && s1 >= s2);
+ CMP(_CMP_GT_OQ, !isunordered(s1, s2) && s1 > s2);
+ CMP(_CMP_TRUE_US, 1);
+}
+
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$1\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$2\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpph\[ \\t\]+\\\$4\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __mmask16 res;
+volatile __mmask8 res1;
+volatile __m256h x1, x2;
+volatile __m128h x3, x4;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm256_cmp_ph_mask (x1, x2, 1);
+ res = _mm256_mask_cmp_ph_mask (m16, x1, x2, 2);
+ res1 = _mm_cmp_ph_mask (x3, x4, 3);
+ res1 = _mm_mask_cmp_ph_mask (m8, x3, x4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define DEBUG
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512fp16-vcmpph-1b.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512fp16-vcmpph-1b.c"
+