diff mbox series

Tweak gcc.dg/vect/bb-slp-4[01].c (PR92366)

Message ID mpt7e4272il.fsf@arm.com
State New
Headers show
Series Tweak gcc.dg/vect/bb-slp-4[01].c (PR92366) | expand

Commit Message

Richard Sandiford Nov. 14, 2019, 6:10 p.m. UTC
gcc.dg/vect/bb-slp-40.c was failing on some targets because the
explicit dg-options overrode things like -maltivec.  This patch
uses dg-additional-options instead.

Also, it seems safer not to require exactly 1 instance of each message,
since that depends on the target vector length.

gcc.dg/vect/bb-slp-41.c contained invariant constructors that are
vectorised on AArch64 (foo) and constructors that aren't (bar).
This meant that the number of times we print "Found vectorizable
constructor" depended on how many vector sizes we try, since we'd
print it for each failed attempt.

In foo, we create invariant { b[0], ... } and { b[1], ... },
and the test is making sure that the two separate invariant vectors
can be fed from the same vector load at b.  This is a different case
from bb-slp-40.c, where the constructors are naturally separate.
(The expected count is 4 rather than 2 because we can vectorise the
epilogue too.)

However, due to limitations in the loop vectoriser, we still do the
addition of { b[0], ... } and { b[1], ... } in the loop.  Hopefully
that'll be fixed at some point, so this patch adds an alternative test
that directly needs 4 separate invariant constructors.  E.g. with Joel's
SLP optimisation, the new test generates:

        ldr     q4, [x1]
        dup     v7.4s, v4.s[0]
        dup     v6.4s, v4.s[1]
        dup     v5.4s, v4.s[2]
        dup     v4.4s, v4.s[3]

instead of the somewhat bizarre:

        ldp     s6, s5, [x1, 4]
        ldr     s4, [x1, 12]
        ld1r    {v7.4s}, [x1]
        dup     v6.4s, v6.s[0]
        dup     v5.4s, v5.s[0]
        dup     v4.4s, v4.s[0]

The patch then disables vectorisation of the original foo in
bb-vect-slp-41.c, so that we get the same correctness testing
for bar but don't need to test for specific counts.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
OK to install?

Richard


2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/testsuite/
	PR testsuite/92366
	* gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead
	of dg-options.  Remove expected counts.
	* gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit
	dg-do run.  Suppress vectorization of foo.
	* gcc.dg/vect/bb-slp-42.c: New test.

Comments

Richard Biener Nov. 14, 2019, 6:42 p.m. UTC | #1
On November 14, 2019 7:10:10 PM GMT+01:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>gcc.dg/vect/bb-slp-40.c was failing on some targets because the
>explicit dg-options overrode things like -maltivec.  This patch
>uses dg-additional-options instead.
>
>Also, it seems safer not to require exactly 1 instance of each message,
>since that depends on the target vector length.
>
>gcc.dg/vect/bb-slp-41.c contained invariant constructors that are
>vectorised on AArch64 (foo) and constructors that aren't (bar).
>This meant that the number of times we print "Found vectorizable
>constructor" depended on how many vector sizes we try, since we'd
>print it for each failed attempt.
>
>In foo, we create invariant { b[0], ... } and { b[1], ... },
>and the test is making sure that the two separate invariant vectors
>can be fed from the same vector load at b.  This is a different case
>from bb-slp-40.c, where the constructors are naturally separate.
>(The expected count is 4 rather than 2 because we can vectorise the
>epilogue too.)
>
>However, due to limitations in the loop vectoriser, we still do the
>addition of { b[0], ... } and { b[1], ... } in the loop.  Hopefully
>that'll be fixed at some point, so this patch adds an alternative test
>that directly needs 4 separate invariant constructors.  E.g. with
>Joel's
>SLP optimisation, the new test generates:
>
>        ldr     q4, [x1]
>        dup     v7.4s, v4.s[0]
>        dup     v6.4s, v4.s[1]
>        dup     v5.4s, v4.s[2]
>        dup     v4.4s, v4.s[3]
>
>instead of the somewhat bizarre:
>
>        ldp     s6, s5, [x1, 4]
>        ldr     s4, [x1, 12]
>        ld1r    {v7.4s}, [x1]
>        dup     v6.4s, v6.s[0]
>        dup     v5.4s, v5.s[0]
>        dup     v4.4s, v4.s[0]
>
>The patch then disables vectorisation of the original foo in
>bb-vect-slp-41.c, so that we get the same correctness testing
>for bar but don't need to test for specific counts.
>
>Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
>OK to install?

Ok. 

Richard. 

>Richard
>
>
>2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
>
>gcc/testsuite/
>	PR testsuite/92366
>	* gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead
>	of dg-options.  Remove expected counts.
>	* gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit
>	dg-do run.  Suppress vectorization of foo.
>	* gcc.dg/vect/bb-slp-42.c: New test.
>
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c
>===================================================================
>--- gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-04 21:13:57.363758109
>+0000
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -1,5 +1,5 @@
> /* { dg-do compile } */
>-/* { dg-options "-O3 -fdump-tree-slp-all" } */
>+/* { dg-additional-options "-fvect-cost-model=dynamic" } */
> /* { dg-require-effective-target vect_int } */
> 
> char g_d[1024], g_s1[1024], g_s2[1024];
>@@ -30,5 +30,5 @@ void foo(void)
> }
> 
> /* See that we vectorize an SLP instance.  */
>-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor"
>1 "slp1" } } */
>-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1
>"slp1" } } */
>+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1"
>} } */
>+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" }
>} */
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c
>===================================================================
>--- gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-04 21:13:57.363758109
>+0000
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -1,10 +1,9 @@
>-/* { dg-do run } */
>-/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
> /* { dg-require-effective-target vect_int } */
> 
> #define ARR_SIZE 1000
> 
>-void foo (int *a, int *b)
>+void __attribute__((optimize (0)))
>+foo (int *a, int *b)
> {
>   int i;
>   for (i = 0; i < (ARR_SIZE - 2); ++i)
>@@ -56,6 +55,4 @@ int main ()
>   return 0;
> 
> }
>-/* See that we vectorize an SLP instance.  */
>-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor"
>12 "slp1" } } */
>-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4
>"slp1" } } */
>+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP"
>"slp1" } } */
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c
>===================================================================
>--- /dev/null	2019-09-17 11:41:18.176664108 +0100
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -0,0 +1,49 @@
>+/* { dg-require-effective-target vect_int } */
>+/* { dg-require-effective-target vect_perm } */
>+
>+#include "tree-vect.h"
>+
>+#define ARR_SIZE 1024
>+
>+void __attribute__((noipa))
>+foo (int a[][ARR_SIZE], int *b)
>+{
>+  int i;
>+  for (i = 0; i < ARR_SIZE; ++i)
>+    {
>+      a[0][i] += b[0];
>+      a[1][i] += b[1];
>+      a[2][i] += b[2];
>+      a[3][i] += b[3];
>+    }
>+}
>+
>+int
>+main ()
>+{
>+  int a[4][ARR_SIZE];
>+  int b[4];
>+
>+  check_vect ();
>+
>+  for (int i = 0; i < 4; ++i)
>+    {
>+      b[i] = 20 * i;
>+      for (int j = 0; j < ARR_SIZE; ++j)
>+	a[i][j] = (i + 1) * ARR_SIZE - j;
>+    }
>+
>+  foo (a, b);
>+
>+  for (int i = 0; i < 4; ++i)
>+    for (int j = 0; j < ARR_SIZE; ++j)
>+      if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i)
>+	__builtin_abort ();
>+
>+  return 0;
>+
>+}
>+
>+/* See that we vectorize an SLP instance.  */
>+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1"
>{ target { ! vect_fully_masked } } } } */
>+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4
>"slp1" { target { ! vect_fully_masked } } } } */
Christophe Lyon Nov. 18, 2019, 12:37 p.m. UTC | #2
On Thu, 14 Nov 2019 at 19:10, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> gcc.dg/vect/bb-slp-40.c was failing on some targets because the
> explicit dg-options overrode things like -maltivec.  This patch
> uses dg-additional-options instead.
>
> Also, it seems safer not to require exactly 1 instance of each message,
> since that depends on the target vector length.
>
> gcc.dg/vect/bb-slp-41.c contained invariant constructors that are
> vectorised on AArch64 (foo) and constructors that aren't (bar).
> This meant that the number of times we print "Found vectorizable
> constructor" depended on how many vector sizes we try, since we'd
> print it for each failed attempt.
>
> In foo, we create invariant { b[0], ... } and { b[1], ... },
> and the test is making sure that the two separate invariant vectors
> can be fed from the same vector load at b.  This is a different case
> from bb-slp-40.c, where the constructors are naturally separate.
> (The expected count is 4 rather than 2 because we can vectorise the
> epilogue too.)
>
> However, due to limitations in the loop vectoriser, we still do the
> addition of { b[0], ... } and { b[1], ... } in the loop.  Hopefully
> that'll be fixed at some point, so this patch adds an alternative test
> that directly needs 4 separate invariant constructors.  E.g. with Joel's
> SLP optimisation, the new test generates:
>
>         ldr     q4, [x1]
>         dup     v7.4s, v4.s[0]
>         dup     v6.4s, v4.s[1]
>         dup     v5.4s, v4.s[2]
>         dup     v4.4s, v4.s[3]
>
> instead of the somewhat bizarre:
>
>         ldp     s6, s5, [x1, 4]
>         ldr     s4, [x1, 12]
>         ld1r    {v7.4s}, [x1]
>         dup     v6.4s, v6.s[0]
>         dup     v5.4s, v5.s[0]
>         dup     v4.4s, v4.s[0]
>
> The patch then disables vectorisation of the original foo in
> bb-vect-slp-41.c, so that we get the same correctness testing
> for bar but don't need to test for specific counts.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
> OK to install?
>
> Richard
>
>
> 2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/testsuite/
>         PR testsuite/92366
>         * gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead
>         of dg-options.  Remove expected counts.
>         * gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit
>         dg-do run.  Suppress vectorization of foo.
>         * gcc.dg/vect/bb-slp-42.c: New test.
>

Hi Richard,

I've noticed that gcc.dg/vect/bb-slp-42.c fails on
armeb-linux-gnueabihf when GCC is configured --with-cpu cortex-a9
--with-fpu neon-fp16.

FAIL: gcc.dg/vect/bb-slp-42.c -flto -ffat-lto-objects  scan-tree-dump
slp1 "Found vectorizable constructor"
FAIL: gcc.dg/vect/bb-slp-42.c -flto -ffat-lto-objects
scan-tree-dump-times slp1 "vectorizing stmts using SLP" 4
FAIL: gcc.dg/vect/bb-slp-42.c scan-tree-dump slp1 "Found vectorizable
constructor"
FAIL: gcc.dg/vect/bb-slp-42.c scan-tree-dump-times slp1 "vectorizing
stmts using SLP" 4

This is test UNSUPPORTED when GCC is configured --with-fpu vfpv3-d16-fp16.

Not sure we want to bother since quite a few vectorization tests
already fail on armeb...

Christophe


> Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/bb-slp-40.c       2019-11-04 21:13:57.363758109 +0000
> +++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c       2019-11-14 18:08:36.323546916 +0000
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3 -fdump-tree-slp-all" } */
> +/* { dg-additional-options "-fvect-cost-model=dynamic" } */
>  /* { dg-require-effective-target vect_int } */
>
>  char g_d[1024], g_s1[1024], g_s2[1024];
> @@ -30,5 +30,5 @@ void foo(void)
>  }
>
>  /* See that we vectorize an SLP instance.  */
> -/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */
> +/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" } } */
> +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
> Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/bb-slp-41.c       2019-11-04 21:13:57.363758109 +0000
> +++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c       2019-11-14 18:08:36.323546916 +0000
> @@ -1,10 +1,9 @@
> -/* { dg-do run } */
> -/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
>  /* { dg-require-effective-target vect_int } */
>
>  #define ARR_SIZE 1000
>
> -void foo (int *a, int *b)
> +void __attribute__((optimize (0)))
> +foo (int *a, int *b)
>  {
>    int i;
>    for (i = 0; i < (ARR_SIZE - 2); ++i)
> @@ -56,6 +55,4 @@ int main ()
>    return 0;
>
>  }
> -/* See that we vectorize an SLP instance.  */
> -/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */
> +/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } } */
> Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c
> ===================================================================
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c       2019-11-14 18:08:36.323546916 +0000
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_perm } */
> +
> +#include "tree-vect.h"
> +
> +#define ARR_SIZE 1024
> +
> +void __attribute__((noipa))
> +foo (int a[][ARR_SIZE], int *b)
> +{
> +  int i;
> +  for (i = 0; i < ARR_SIZE; ++i)
> +    {
> +      a[0][i] += b[0];
> +      a[1][i] += b[1];
> +      a[2][i] += b[2];
> +      a[3][i] += b[3];
> +    }
> +}
> +
> +int
> +main ()
> +{
> +  int a[4][ARR_SIZE];
> +  int b[4];
> +
> +  check_vect ();
> +
> +  for (int i = 0; i < 4; ++i)
> +    {
> +      b[i] = 20 * i;
> +      for (int j = 0; j < ARR_SIZE; ++j)
> +       a[i][j] = (i + 1) * ARR_SIZE - j;
> +    }
> +
> +  foo (a, b);
> +
> +  for (int i = 0; i < 4; ++i)
> +    for (int j = 0; j < ARR_SIZE; ++j)
> +      if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i)
> +       __builtin_abort ();
> +
> +  return 0;
> +
> +}
> +
> +/* See that we vectorize an SLP instance.  */
> +/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" { target { ! vect_fully_masked } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" { target { ! vect_fully_masked } } } } */
diff mbox series

Patch

Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-04 21:13:57.363758109 +0000
+++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-14 18:08:36.323546916 +0000
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-slp-all" } */
+/* { dg-additional-options "-fvect-cost-model=dynamic" } */
 /* { dg-require-effective-target vect_int } */
 
 char g_d[1024], g_s1[1024], g_s2[1024];
@@ -30,5 +30,5 @@  void foo(void)
 }
 
 /* See that we vectorize an SLP instance.  */
-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */
+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-04 21:13:57.363758109 +0000
+++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-14 18:08:36.323546916 +0000
@@ -1,10 +1,9 @@ 
-/* { dg-do run } */
-/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
 /* { dg-require-effective-target vect_int } */
 
 #define ARR_SIZE 1000
 
-void foo (int *a, int *b)
+void __attribute__((optimize (0)))
+foo (int *a, int *b)
 {
   int i;
   for (i = 0; i < (ARR_SIZE - 2); ++i)
@@ -56,6 +55,4 @@  int main ()
   return 0;
 
 }
-/* See that we vectorize an SLP instance.  */
-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */
+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } } */
Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c	2019-11-14 18:08:36.323546916 +0000
@@ -0,0 +1,49 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_perm } */
+
+#include "tree-vect.h"
+
+#define ARR_SIZE 1024
+
+void __attribute__((noipa))
+foo (int a[][ARR_SIZE], int *b)
+{
+  int i;
+  for (i = 0; i < ARR_SIZE; ++i)
+    {
+      a[0][i] += b[0];
+      a[1][i] += b[1];
+      a[2][i] += b[2];
+      a[3][i] += b[3];
+    }
+}
+
+int
+main ()
+{
+  int a[4][ARR_SIZE];
+  int b[4];
+
+  check_vect ();
+
+  for (int i = 0; i < 4; ++i)
+    {
+      b[i] = 20 * i;
+      for (int j = 0; j < ARR_SIZE; ++j)
+	a[i][j] = (i + 1) * ARR_SIZE - j;
+    }
+
+  foo (a, b);
+
+  for (int i = 0; i < 4; ++i)
+    for (int j = 0; j < ARR_SIZE; ++j)
+      if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i)
+	__builtin_abort ();
+
+  return 0;
+
+}
+
+/* See that we vectorize an SLP instance.  */
+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" { target { ! vect_fully_masked } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" { target { ! vect_fully_masked } } } } */