diff mbox series

[pushed] aarch64: Support unpacked CNOT on SVE

Message ID mptlfd3em3w.fsf@arm.com
State New
Headers show
Series [pushed] aarch64: Support unpacked CNOT on SVE | expand

Commit Message

Richard Sandiford Jan. 8, 2021, 10:53 a.m. UTC
This patch adds unpacked support for unconditional and
conditional CNOT.  The type suffix has to be taken from
the element size rather than the container size.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
	* config/aarch64/aarch64-sve.md (*cnot<mode>): Extend from
	SVE_FULL_I to SVE_I.
	(*cond_cnot<mode>_2, *cond_cnot<mode>_any): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sve/cnot_2.c: New test.
	* gcc.target/aarch64/sve/cond_cnot_4.c: Likewise.
	* gcc.target/aarch64/sve/cond_cnot_4_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_cnot_5.c: Likewise.
	* gcc.target/aarch64/sve/cond_cnot_5_run.c: Likewise.
	* gcc.target/aarch64/sve/cond_cnot_6.c: Likewise.
	* gcc.target/aarch64/sve/cond_cnot_6_run.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md             | 36 +++++++++----------
 gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c | 29 +++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_4.c      | 32 +++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_4_run.c  | 26 ++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_5.c      | 32 +++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_5_run.c  | 26 ++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_6.c      | 31 ++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_6_run.c  | 26 ++++++++++++++
 8 files changed, 220 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b83f9912cb6..2f5a5e3c914 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3227,16 +3227,16 @@  (define_expand "@aarch64_pred_cnot<mode>"
 )
 
 (define_insn "*cnot<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-	(unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
 	  [(unspec:<VPRED>
 	     [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
 	      (eq:<VPRED>
-		(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-		(match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+		(match_operand:SVE_I 2 "register_operand" "0, w")
+		(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
 	     UNSPEC_PRED_Z)
-	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+	   (match_operand:SVE_I 4 "aarch64_simd_imm_one")
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
@@ -3274,19 +3274,19 @@  (define_expand "@cond_cnot<mode>"
 
 ;; Predicated logical inverse, merging with the first input.
 (define_insn_and_rewrite "*cond_cnot<mode>_2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-	(unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   ;; Logical inverse of operand 2 (as above).
-	   (unspec:SVE_FULL_I
+	   (unspec:SVE_I
 	     [(unspec:<VPRED>
 		[(match_operand 5)
 		 (const_int SVE_KNOWN_PTRUE)
 		 (eq:<VPRED>
-		   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-		   (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+		   (match_operand:SVE_I 2 "register_operand" "0, w")
+		   (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
 		UNSPEC_PRED_Z)
-	      (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+	      (match_operand:SVE_I 4 "aarch64_simd_imm_one")
 	      (match_dup 3)]
 	     UNSPEC_SEL)
 	   (match_dup 2)]
@@ -3310,22 +3310,22 @@  (define_insn_and_rewrite "*cond_cnot<mode>_2"
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
 (define_insn_and_rewrite "*cond_cnot<mode>_any"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
-	(unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   ;; Logical inverse of operand 2 (as above).
-	   (unspec:SVE_FULL_I
+	   (unspec:SVE_I
 	     [(unspec:<VPRED>
 		[(match_operand 5)
 		 (const_int SVE_KNOWN_PTRUE)
 		 (eq:<VPRED>
-		   (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
-		   (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+		   (match_operand:SVE_I 2 "register_operand" "w, w, w")
+		   (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
 		UNSPEC_PRED_Z)
-	      (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+	      (match_operand:SVE_I 4 "aarch64_simd_imm_one")
 	      (match_dup 3)]
 	     UNSPEC_SEL)
-	   (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	   (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
   "@
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
new file mode 100644
index 00000000000..fe778234424
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)				\
+  void __attribute__ ((noipa))					\
+  test_##TYPE1##_##TYPE2##_##TYPE3 (TYPE2 *restrict r,		\
+				    TYPE1 *restrict pred,	\
+				    TYPE2 *restrict a)		\
+  {								\
+    for (int i = 0; i < COUNT; ++i)				\
+      if (pred[i])						\
+	r[i] = !a[i];						\
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
new file mode 100644
index 00000000000..729d3f4f2ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)				\
+  void __attribute__ ((noipa))					\
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,			\
+			  TYPE2 *__restrict a,			\
+			  TYPE1 *__restrict pred)		\
+  {								\
+    for (int i = 0; i < COUNT; ++i)				\
+      r[i] = pred[i] ? !a[i] : a[i];				\
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
new file mode 100644
index 00000000000..de9c0a502e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_4.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)				\
+  {								\
+    TYPE1 pred[N];						\
+    TYPE2 r[N], a[N];						\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = i & 1 ? 0 : 3 * (i + 1);				\
+	pred[i] = (i % 3 < 2);					\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE1##_##TYPE2 (r, a, pred);			\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE2) (pred[i] ? !a[i] : a[i]))		\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
new file mode 100644
index 00000000000..7318e108591
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)				\
+  void __attribute__ ((noipa))					\
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,			\
+			  TYPE1 *__restrict a,			\
+			  TYPE2 *__restrict b)			\
+  {								\
+    for (int i = 0; i < COUNT; ++i)				\
+      r[i] = a[i] == 0 ? !b[i] : a[i];				\
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
new file mode 100644
index 00000000000..f8f277c32c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_5.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)				\
+  {								\
+    TYPE1 a[N];							\
+    TYPE2 r[N], b[N];						\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = i % 3 < 2 ? 0 : i * 42;				\
+	b[i] = i & 1 ? 0 : 3 * (i + 1);				\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE1##_##TYPE2 (r, a, b);				\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : a[i]))		\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
new file mode 100644
index 00000000000..d44e357f44a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)				\
+  void __attribute__ ((noipa))					\
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,			\
+			  TYPE1 *__restrict a,			\
+			  TYPE2 *__restrict b)			\
+  {								\
+    for (int i = 0; i < COUNT; ++i)				\
+      r[i] = a[i] == 0 ? !b[i] : 127;				\
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
new file mode 100644
index 00000000000..9e33616dc8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_6.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)				\
+  {								\
+    TYPE1 a[N];							\
+    TYPE2 r[N], b[N];						\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = i % 3 < 2 ? 0 : i * 42;				\
+	b[i] = i & 1 ? 0 : 3 * (i + 1);				\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE1##_##TYPE2 (r, a, b);				\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : 127))		\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}