diff mbox series

[committed,AArch64] Add support for SVE CLS and CLZ

Message ID mptef1onmoh.fsf@arm.com
State New
Headers show
Series [committed,AArch64] Add support for SVE CLS and CLZ | expand

Commit Message

Richard Sandiford Aug. 14, 2019, 9:03 a.m. UTC
This patch adds support for unpredicated SVE CLS and CLZ.  A later patch
will add support for predicated unary integer arithmetic.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274437.

Richard


2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (SVE_INT_UNARY): Add clrsb and clz.
	(optab, sve_int_op): Handle them.
	* config/aarch64/aarch64-sve.md: Expand comment.

gcc/testsuite/
	* gcc.target/aarch64/vect-clz.c: Force SVE off.
	* gcc.target/aarch64/sve/clrsb_1.c: New test.
	* gcc.target/aarch64/sve/clrsb_1_run.c: Likewise.
	* gcc.target/aarch64/sve/clz_1.c: Likewise.
	* gcc.target/aarch64/sve/clz_1_run.c: Likewise.
diff mbox series

Patch

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2019-08-14 09:39:44.323282457 +0100
+++ gcc/config/aarch64/iterators.md	2019-08-14 10:00:45.485990851 +0100
@@ -1276,7 +1276,7 @@  (define_code_iterator UCOMPARISONS [ltu
 (define_code_iterator FAC_COMPARISONS [lt le ge gt])
 
 ;; SVE integer unary operations.
-(define_code_iterator SVE_INT_UNARY [abs neg not popcount])
+(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount])
 
 ;; SVE integer binary operations.
 (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
@@ -1307,6 +1307,8 @@  (define_code_attr optab [(ashift "ashl")
 			 (unsigned_fix "fixuns")
 			 (float "float")
 			 (unsigned_float "floatuns")
+			 (clrsb "clrsb")
+			 (clz "clz")
 			 (popcount "popcount")
 			 (and "and")
 			 (ior "ior")
@@ -1474,6 +1476,8 @@  (define_code_attr sve_int_op [(plus "add
 			      (ior "orr")
 			      (xor "eor")
 			      (not "not")
+			      (clrsb "cls")
+			      (clz "clz")
 			      (popcount "cnt")])
 
 (define_code_attr sve_int_op_rev [(plus "add")
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:58:35.914942337 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-14 10:00:45.485990851 +0100
@@ -1422,6 +1422,8 @@  (define_expand "vec_extract<vpred><Vel>"
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ABS
+;; - CLS (= clrsb)
+;; - CLZ
 ;; - CNT (= popcount)
 ;; - NEG
 ;; - NOT
Index: gcc/testsuite/gcc.target/aarch64/vect-clz.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/vect-clz.c	2019-03-08 18:14:30.068993639 +0000
+++ gcc/testsuite/gcc.target/aarch64/vect-clz.c	2019-08-14 10:00:45.485990851 +0100
@@ -1,6 +1,8 @@ 
 /* { dg-do run } */
 /* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 extern void abort ();
 
 void
Index: gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c	2019-08-14 10:00:45.485990851 +0100
@@ -0,0 +1,22 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+clrsb_32 (unsigned int *restrict dst, uint32_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_clrsb (src[i]);
+}
+
+void __attribute__ ((noinline, noclone))
+clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_clrsbll (src[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c	2019-08-14 10:00:45.485990851 +0100
@@ -0,0 +1,50 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "clrsb_1.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+unsigned int data[] = {
+  0xffffff80, 24,
+  0xffffffff, 31,
+  0x00000000, 31,
+  0x80000000, 0,
+  0x7fffffff, 0,
+  0x000003ff, 21,
+  0x1fffffff, 2,
+  0x0000ffff, 15,
+  0xffff0000, 15
+};
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  unsigned int count = sizeof (data) / sizeof (data[0]) / 2;
+
+  uint32_t in32[count];
+  unsigned int out32[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in32[i] = data[i * 2];
+      asm volatile ("" ::: "memory");
+    }
+  clrsb_32 (out32, in32, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out32[i] != data[i * 2 + 1])
+      abort ();
+
+  uint64_t in64[count];
+  unsigned int out64[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in64[i] = (uint64_t) data[i * 2] << 32;
+      asm volatile ("" ::: "memory");
+    }
+  clrsb_64 (out64, in64, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out64[i] != (data[i * 2] ? data[i * 2 + 1] : 63))
+      abort ();
+
+  return 0;
+}
Index: gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/clz_1.c	2019-08-14 10:00:45.485990851 +0100
@@ -0,0 +1,22 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+clz_32 (unsigned int *restrict dst, uint32_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_clz (src[i]);
+}
+
+void __attribute__ ((noinline, noclone))
+clz_64 (unsigned int *restrict dst, uint64_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_clzll (src[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c	2019-08-14 10:00:45.485990851 +0100
@@ -0,0 +1,50 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "clz_1.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+unsigned int data[] = {
+  0xffffff80, 0,
+  0xffffffff, 0,
+  0x00000000, 32,
+  0x80000000, 0,
+  0x7fffffff, 1,
+  0x000003ff, 22,
+  0x1fffffff, 3,
+  0x0000ffff, 16,
+  0xffff0000, 0
+};
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  unsigned int count = sizeof (data) / sizeof (data[0]) / 2;
+
+  uint32_t in32[count];
+  unsigned int out32[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in32[i] = data[i * 2];
+      asm volatile ("" ::: "memory");
+    }
+  clz_32 (out32, in32, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out32[i] != data[i * 2 + 1])
+      abort ();
+
+  uint64_t in64[count];
+  unsigned int out64[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in64[i] = (uint64_t) data[i * 2] << 10;
+      asm volatile ("" ::: "memory");
+    }
+  clz_64 (out64, in64, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out64[i] != (data[i * 2] ? data[i * 2 + 1] + 22 : 64))
+      abort ();
+
+  return 0;
+}