diff mbox series

[45/62] AVX512FP16: Add testcase for fp16 bitwise operations.

Message ID 20210701061648.9447-46-hongtao.liu@intel.com
State New
Headers show
Series Support all AVX512FP16 intrinsics. | expand

Commit Message

Liu, Hongtao July 1, 2021, 6:16 a.m. UTC
gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-neg-1a.c: New test.
	* gcc.target/i386/avx512fp16-neg-1b.c: Ditto.
	* gcc.target/i386/avx512fp16-scalar-bitwise-1a.c: Ditto.
	* gcc.target/i386/avx512fp16-scalar-bitwise-1b.c: Ditto.
	* gcc.target/i386/avx512fp16-vector-bitwise-1a.c: Ditto.
	* gcc.target/i386/avx512fp16-vector-bitwise-1b.c: Ditto.
	* gcc.target/i386/avx512fp16vl-neg-1a.c: Ditto.
	* gcc.target/i386/avx512fp16vl-neg-1b.c: Ditto.
---
 .../gcc.target/i386/avx512fp16-neg-1a.c       |  19 +++
 .../gcc.target/i386/avx512fp16-neg-1b.c       |  33 +++++
 .../i386/avx512fp16-scalar-bitwise-1a.c       |  31 +++++
 .../i386/avx512fp16-scalar-bitwise-1b.c       |  82 ++++++++++++
 .../i386/avx512fp16-vector-bitwise-1a.c       | 124 ++++++++++++++++++
 .../i386/avx512fp16-vector-bitwise-1b.c       | 119 +++++++++++++++++
 .../gcc.target/i386/avx512fp16vl-neg-1a.c     |  18 +++
 .../gcc.target/i386/avx512fp16vl-neg-1b.c     |  33 +++++
 8 files changed, 459 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-neg-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1b.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c
new file mode 100644
index 00000000000..bf7693e0b1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\r\]*%zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\r\]*%xmm0" 1 } } */
+
+#include<immintrin.h>
+
+_Float16
+neghf (_Float16 a)
+{
+  return -a;
+}
+
+__m512h
+neghf512 (__m512h a)
+{
+  return -a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1b.c
new file mode 100644
index 00000000000..770f7b283d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-neg-1b.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+static void
+test_512 (void)
+{
+  V512 v1, v2, v3, v4, exp, res;
+  int i;
+  init_src();
+  
+  unpack_ph_2twops(src1, &v1, &v2);
+  v1.f32[0] = -v1.f32[0];
+  exp = pack_twops_2ph(v1, v2);
+  res.zmmh = src1.zmmh;
+  res.f16[0] = -res.f16[0];
+  check_results(&res, &exp, 32, "neg");
+
+  unpack_ph_2twops(src1, &v1, &v2);
+  for (i=0; i<16; i++)
+  {
+    v1.f32[i] = -v1.f32[i];  
+    v2.f32[i] = -v2.f32[i];  
+  }
+  exp = pack_twops_2ph(v1, v2);
+  res.zmmh = -src1.zmmh;
+  check_results(&res, &exp, 32, "neg");
+  if (n_errs != 0) {
+      abort ();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1a.c
new file mode 100644
index 00000000000..1325c341a33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1a.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16" } */
+
+_Float16
+f1 (_Float16 x)
+{
+  return __builtin_fabsf16 (x);
+}
+
+_Float16
+f2 (_Float16 x, _Float16 y)
+{
+  return __builtin_copysignf16 (x, y);
+}
+
+_Float16
+f3 (_Float16 x)
+{
+  return -x;
+}
+
+_Float16
+f4 (_Float16 x, _Float16 y)
+{
+  return x * __builtin_copysignf16 (1, y);
+}
+
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 4 } } */
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1b.c
new file mode 100644
index 00000000000..7a292519a4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-scalar-bitwise-1b.c
@@ -0,0 +1,82 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void NOINLINE
+emulate_absneg_ph (V512 * dest, V512 op1, int abs)
+{
+  V512 v1, v2, v3, v4;
+  int i;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+  unpack_ph_2twops(*dest, &v3, &v4);
+
+  for (i = 0; i != 16; i++) {
+    if (abs) {
+      v3.f32[i] = __builtin_fabsf (v1.f32[i]);
+      v4.f32[i] = __builtin_fabsf (v2.f32[i]);
+    }
+    else {
+      v3.f32[i] = -v1.f32[i];
+      v4.f32[i] = -v2.f32[i];
+    }
+  }
+  *dest = pack_twops_2ph(v3, v4);
+}
+
+void NOINLINE
+emulate_copysign_ph (V512 * dest, V512 op1, V512 op2, int xorsign)
+{
+  V512 v1, v2, v3, v4, v5, v6;
+  int i;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+  unpack_ph_2twops(op2, &v3, &v4);
+  unpack_ph_2twops(*dest, &v5, &v6);
+
+  for (i = 0; i != 16; i++) {
+    if (xorsign) {
+      v5.f32[i] = v1.f32[i] * __builtin_copysignf (1, v3.f32[i]);
+      v6.f32[i] = v2.f32[i] * __builtin_copysignf (1, v4.f32[i]);
+    }
+    else {
+      v5.f32[i] = __builtin_copysignf (v1.f32[i], v3.f32[i]);
+      v6.f32[i] = __builtin_copysignf (v2.f32[i], v4.f32[i]);
+    }
+  }
+  *dest = pack_twops_2ph(v5, v6);
+}
+
+void
+test_512 (void)
+{
+  V512 res, exp;
+
+  init_src ();
+
+  /* Abs for float16.  */
+  emulate_absneg_ph (&exp, src1, 1);
+  res.f16[0] = __builtin_fabsf16 (src1.f16[0]);
+  check_results (&res, &exp, 1, "abs_float16");
+
+  /* Neg for float16.  */
+  emulate_absneg_ph (&exp, src1, 0);
+  res.f16[0] = -(src1.f16[0]);
+  check_results (&res, &exp, 1, "neg_float16");
+
+  /* Copysign for float16.  */
+  emulate_copysign_ph (&exp, src1, src2, 0);
+  res.f16[0] = __builtin_copysignf16 (src1.f16[0], src2.f16[0]);
+  check_results (&res, &exp, 1, "copysign_float16");
+
+  /* Xorsign for float16.  */
+  emulate_copysign_ph (&exp, src1, src2, 1);
+  res.f16[0] = src1.f16[0] * __builtin_copysignf16 (1, src2.f16[0]);
+  check_results (&res, &exp, 1, "xorsign_float16");
+
+  if (n_errs != 0) {
+    abort ();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1a.c
new file mode 100644
index 00000000000..13c05abc532
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1a.c
@@ -0,0 +1,124 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512vl -mavx512fp16" } */
+
+#include<immintrin.h>
+__m128h
+f1 (__m128h x)
+{
+  int i = 0;
+  __m128h y;
+  for (; i != 8; i++)
+    y[i] = __builtin_fabsf16 (x[i]);
+  return y;
+}
+
+__m256h
+f2 (__m256h x)
+{
+  int i = 0;
+  __m256h y;
+  for (; i != 16; i++)
+    y[i] = __builtin_fabsf16 (x[i]);
+  return y;
+}
+
+__m512h
+f3 (__m512h x)
+{
+  int i = 0;
+  __m512h y;
+  for (; i != 32; i++)
+    y[i] = __builtin_fabsf16 (x[i]);
+  return y;
+}
+
+__m128h
+f4 (__m128h x)
+{
+  return -x;
+}
+
+__m256h
+f5 (__m256h x)
+{
+  return -x;
+}
+
+__m512h
+f6 (__m512h x)
+{
+  return -x;
+}
+
+__m128h
+f7 (__m128h x, __m128h y)
+{
+  int i = 0;
+  __m128h z;
+  for (; i != 8; i++)
+    z[i] = __builtin_copysignf16 (x[i], y[i]);
+  return z;
+}
+
+__m256h
+f8 (__m256h x, __m256h y)
+{
+  int i = 0;
+  __m256h z;
+  for (; i != 16; i++)
+    z[i] = __builtin_copysignf16 (x[i], y[i]);
+  return z;
+}
+
+__m512h
+f9 (__m512h x, __m512h y)
+{
+  int i = 0;
+  __m512h z;
+  for (; i != 32; i++)
+    z[i] = __builtin_copysignf16 (x[i], y[i]);
+  return z;
+}
+
+__m128h
+f10 (__m128h x, __m128h y)
+{
+  int i = 0;
+  __m128h z;
+  for (; i != 8; i++)
+    z[i] = x[i] * __builtin_copysignf16 (1, y[i]);
+  return z;
+}
+
+__m256h
+f11 (__m256h x, __m256h y)
+{
+  int i = 0;
+  __m256h z;
+  for (; i != 16; i++)
+    z[i] = x[i] * __builtin_copysignf16 (1, y[i]);
+  return z;
+}
+
+__m512h
+f12 (__m512h x, __m512h y)
+{
+  int i = 0;
+  __m512h z;
+  for (; i != 32; i++)
+    z[i] = x[i] * __builtin_copysignf16 (1, y[i]);
+  return z;
+}
+
+/* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm0" } } */
+/* { dg-final { scan-assembler "vandps\[^\n\r\]*ymm0" } } */
+/* { dg-final { scan-assembler "vpandd\[^\n\r\]*zmm0" } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm0" 2 } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm0" 2 } } */
+/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*zmm0" 2 } } */
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*zmm0" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1b.c
new file mode 100644
index 00000000000..1398b360064
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-bitwise-1b.c
@@ -0,0 +1,119 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void NOINLINE
+emulate_absneg_ph (V512 * dest, V512 op1, int abs)
+{
+  V512 v1, v2, v3, v4;
+  int i;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+  unpack_ph_2twops(*dest, &v3, &v4);
+
+  for (i = 0; i != 16; i++) {
+    if (abs) {
+      v3.f32[i] = __builtin_fabsf (v1.f32[i]);
+      v4.f32[i] = __builtin_fabsf (v2.f32[i]);
+    }
+    else {
+      v3.f32[i] = -v1.f32[i];
+      v4.f32[i] = -v2.f32[i];
+    }
+  }
+  *dest = pack_twops_2ph(v3, v4);
+}
+
+void NOINLINE
+emulate_copysign_ph (V512 * dest, V512 op1, V512 op2, int xorsign)
+{
+  V512 v1, v2, v3, v4, v5, v6;
+  int i;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+  unpack_ph_2twops(op2, &v3, &v4);
+  unpack_ph_2twops(*dest, &v5, &v6);
+
+  for (i = 0; i != 16; i++) {
+    if (xorsign) {
+      v5.f32[i] = v1.f32[i] * __builtin_copysignf (1, v3.f32[i]);
+      v6.f32[i] = v2.f32[i] * __builtin_copysignf (1, v4.f32[i]);
+    }
+    else {
+      v5.f32[i] = __builtin_copysignf (v1.f32[i], v3.f32[i]);
+      v6.f32[i] = __builtin_copysignf (v2.f32[i], v4.f32[i]);
+    }
+  }
+  *dest = pack_twops_2ph(v5, v6);
+}
+
+
+void
+test_512 (void)
+{
+  V512 res, exp;
+
+  init_src ();
+
+  /* Abs for vector float16.  */
+  emulate_absneg_ph (&exp, src1, 1);
+  for (int i = 0; i != 8; i++)
+    res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
+  check_results (&res, &exp, 8, "abs_m128h");
+
+  for (int i = 0; i != 16; i++)
+    res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
+  check_results (&res, &exp, 16, "abs_m256h");
+
+  for (int i = 0; i != 32; i++)
+    res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
+  check_results (&res, &exp, 32, "abs_m512h");
+
+  /* Neg for vector float16.  */
+  emulate_absneg_ph (&exp, src1, 0);
+  for (int i = 0; i != 8; i++)
+    res.f16[i] = -(src1.f16[i]);
+  check_results (&res, &exp, 8, "neg_m128h");
+
+  for (int i = 0; i != 16; i++)
+    res.f16[i] = -(src1.f16[i]);
+  check_results (&res, &exp, 16, "neg_m256h");
+
+  for (int i = 0; i != 32; i++)
+    res.f16[i] = -(src1.f16[i]);
+  check_results (&res, &exp, 32, "neg_m512h");
+
+  /* Copysign for vector float16.  */
+  emulate_copysign_ph (&exp, src1, src2, 0);
+  for (int i = 0; i != 8; i++)
+    res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
+  check_results (&res, &exp, 8, "copysign_m128h");
+
+  for (int i = 0; i != 16; i++)
+    res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
+  check_results (&res, &exp, 16, "copysign_m256h");
+
+  for (int i = 0; i != 32; i++)
+    res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
+  check_results (&res, &exp, 32, "copysign_m512h");
+
+  /* Xorsign for vector float16.  */
+  emulate_copysign_ph (&exp, src1, src2, 1);
+  for (int i = 0; i != 8; i++)
+    res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
+  check_results (&res, &exp, 8, "xorsign_m128h");
+
+  for (int i = 0; i != 16; i++)
+    res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
+  check_results (&res, &exp, 16, "xorsign_m256h");
+
+  for (int i = 0; i != 32; i++)
+    res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
+  check_results (&res, &exp, 32, "xorsign_m512h");
+
+  if (n_errs != 0) {
+    abort ();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c
new file mode 100644
index 00000000000..a40a0d88dd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\r\]*%xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\r\]*%ymm0" 1 } } */
+#include<immintrin.h>
+
+__m128h
+neghf128 (__m128h a)
+{
+  return -a;
+}
+
+__m256h
+neghf256 (__m256h a)
+{
+  return -a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1b.c
new file mode 100644
index 00000000000..d8f65fb3f60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1b.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+static void
+test_512 (void)
+{
+  V512 v1, v2, v3, v4, exp, res;
+  int i;
+  init_src();
+  
+  unpack_ph_2twops(src1, &v1, &v2);
+  v1.f32[0] = -v1.f32[0];
+  exp = pack_twops_2ph(v1, v2);
+  res.zmmh = src1.zmmh;
+  res.f16[0] = -res.f16[0];
+  check_results(&res, &exp, 32, "neg");
+
+  unpack_ph_2twops(src1, &v1, &v2);
+  for (i=0; i<16; i++)
+  {
+    v1.f32[i] = -v1.f32[i];  
+    v2.f32[i] = -v2.f32[i];  
+  }
+  exp = pack_twops_2ph(v1, v2);
+  res.zmmh = -src1.zmmh;
+  check_results(&res, &exp, 32, "neg");
+  if (n_errs != 0) {
+      abort ();
+  }
+}