diff mbox series

Avoid is_constant calls in vectorizable_bswap

Message ID 87pny9tppf.fsf@arm.com
State New
Headers show
Series Avoid is_constant calls in vectorizable_bswap | expand

Commit Message

Richard Sandiford Aug. 23, 2018, 9:08 a.m. UTC
The "new" VEC_PERM_EXPR handling makes it easy to support bswap
for variable-length vectors.

Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
and x86_64-linux-gnu.  OK to install?

Richard


2018-08-23  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
	vectors.

gcc/testsuite/
	* gcc.target/aarch64/sve/bswap_1.c: New test.
	* gcc.target/aarch64/sve/bswap_2.c: Likewise.
	* gcc.target/aarch64/sve/bswap_3.c: Likewise.

Comments

Richard Biener Aug. 24, 2018, 12:51 p.m. UTC | #1
On Thu, Aug 23, 2018 at 11:09 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> The "new" VEC_PERM_EXPR handling makes it easy to support bswap
> for variable-length vectors.
>
> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
> and x86_64-linux-gnu.  OK to install?

OK.

Richard.

> Richard
>
>
> 2018-08-23  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
>         vectors.
>
> gcc/testsuite/
>         * gcc.target/aarch64/sve/bswap_1.c: New test.
>         * gcc.target/aarch64/sve/bswap_2.c: Likewise.
>         * gcc.target/aarch64/sve/bswap_3.c: Likewise.
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       2018-08-23 09:59:35.245682525 +0100
> +++ gcc/tree-vect-stmts.c       2018-08-23 10:07:30.233601466 +0100
> @@ -2961,13 +2961,10 @@ vectorizable_bswap (stmt_vec_info stmt_i
>    vec_info *vinfo = stmt_info->vinfo;
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>    unsigned ncopies;
> -  unsigned HOST_WIDE_INT nunits, num_bytes;
>
>    op = gimple_call_arg (stmt, 0);
>    vectype = STMT_VINFO_VECTYPE (stmt_info);
> -
> -  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
> -    return false;
> +  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>
>    /* Multiple types in SLP are handled by creating the appropriate number of
>       vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
> @@ -2983,11 +2980,11 @@ vectorizable_bswap (stmt_vec_info stmt_i
>    if (! char_vectype)
>      return false;
>
> -  if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
> +  poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
> +  unsigned word_bytes;
> +  if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
>      return false;
>
> -  unsigned word_bytes = num_bytes / nunits;
> -
>    /* The encoding uses one stepped pattern for each byte in the word.  */
>    vec_perm_builder elts (num_bytes, word_bytes, 3);
>    for (unsigned i = 0; i < 3; ++i)
> Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c
> ===================================================================
> --- /dev/null   2018-07-26 10:26:13.137955424 +0100
> +++ gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c      2018-08-23 10:07:30.233601466 +0100
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +#include <stdint.h>
> +
> +void
> +f (uint16_t *a, uint16_t *b)
> +{
> +  for (int i = 0; i < 100; ++i)
> +    a[i] = __builtin_bswap16 (b[i]);
> +}
> +
> +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 { xfail aarch64_big_endian } } } */
> Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c
> ===================================================================
> --- /dev/null   2018-07-26 10:26:13.137955424 +0100
> +++ gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c      2018-08-23 10:07:30.233601466 +0100
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +#include <stdint.h>
> +
> +void
> +f (uint32_t *a, uint32_t *b)
> +{
> +  for (int i = 0; i < 100; ++i)
> +    a[i] = __builtin_bswap32 (b[i]);
> +}
> +
> +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 { xfail aarch64_big_endian } } } */
> Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c
> ===================================================================
> --- /dev/null   2018-07-26 10:26:13.137955424 +0100
> +++ gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c      2018-08-23 10:07:30.233601466 +0100
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +#include <stdint.h>
> +
> +void
> +f (uint64_t *a, uint64_t *b)
> +{
> +  for (int i = 0; i < 100; ++i)
> +    a[i] = __builtin_bswap64 (b[i]);
> +}
> +
> +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 { xfail aarch64_big_endian } } } */
diff mbox series

Patch

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2018-08-23 09:59:35.245682525 +0100
+++ gcc/tree-vect-stmts.c	2018-08-23 10:07:30.233601466 +0100
@@ -2961,13 +2961,10 @@  vectorizable_bswap (stmt_vec_info stmt_i
   vec_info *vinfo = stmt_info->vinfo;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   unsigned ncopies;
-  unsigned HOST_WIDE_INT nunits, num_bytes;
 
   op = gimple_call_arg (stmt, 0);
   vectype = STMT_VINFO_VECTYPE (stmt_info);
-
-  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
-    return false;
+  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
@@ -2983,11 +2980,11 @@  vectorizable_bswap (stmt_vec_info stmt_i
   if (! char_vectype)
     return false;
 
-  if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
+  poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
+  unsigned word_bytes;
+  if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
     return false;
 
-  unsigned word_bytes = num_bytes / nunits;
-
   /* The encoding uses one stepped pattern for each byte in the word.  */
   vec_perm_builder elts (num_bytes, word_bytes, 3);
   for (unsigned i = 0; i < 3; ++i)
Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c
===================================================================
--- /dev/null	2018-07-26 10:26:13.137955424 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c	2018-08-23 10:07:30.233601466 +0100
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint16_t *a, uint16_t *b)
+{
+  for (int i = 0; i < 100; ++i)
+    a[i] = __builtin_bswap16 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 { xfail aarch64_big_endian } } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c
===================================================================
--- /dev/null	2018-07-26 10:26:13.137955424 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c	2018-08-23 10:07:30.233601466 +0100
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint32_t *a, uint32_t *b)
+{
+  for (int i = 0; i < 100; ++i)
+    a[i] = __builtin_bswap32 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 { xfail aarch64_big_endian } } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c
===================================================================
--- /dev/null	2018-07-26 10:26:13.137955424 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c	2018-08-23 10:07:30.233601466 +0100
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint64_t *a, uint64_t *b)
+{
+  for (int i = 0; i < 100; ++i)
+    a[i] = __builtin_bswap64 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 { xfail aarch64_big_endian } } } */