[committed,AArch64] Split gcc.target/aarch64/sve/reduc_strict_3.c
diff mbox series

Message ID mptwockygmj.fsf@arm.com
State New
Headers show
Series
  • [committed,AArch64] Split gcc.target/aarch64/sve/reduc_strict_3.c
Related show

Commit Message

Richard Sandiford Oct. 31, 2019, 5:22 p.m. UTC
This patch splits gcc.target/aarch64/sve/reduc_strict_3.c into one
test per function, so that it's easier to see what each scan is
matching and also so that we no longer rely on the number of times
that each dump message is printed.

The patch also generalises the tests to work with scalable vectors.
I think the test probably predates support for variable-length
loop-aware SLP.

Tested on aarch64-linux-gnu and applied as r277681.

Richard


2019-10-31  Richard Sandiford  <richard.sandiford@arm.com>

gcc/testsuite/
	* gcc.target/aarch64/sve/reduc_strict_3.c: Split all but the
	first function out into...
	* gcc.target/aarch64/sve/reduc_strict_4.c,
	* gcc.target/aarch64/sve/reduc_strict_5.c,
	* gcc.target/aarch64/sve/reduc_strict_6.c,
	* gcc.target/aarch64/sve/reduc_strict_7.c,
	* gcc.target/aarch64/sve/reduc_strict_8.c,
	* gcc.target/aarch64/sve/reduc_strict_9.c: ...these new tests.
	Test for scalable vectors instead of 256-bit vectors.

Patch
diff mbox series

Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c	2019-10-31 17:15:21.594544316 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c	2019-10-31 17:20:02.404591908 +0000
@@ -1,12 +1,7 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
-/* Disabling epilogues until we find a better way to deal with scans.  */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-options "-O2 -ftree-vectorize" } */
 
-double mat[100][4];
-double mat2[100][8];
-double mat3[100][12];
-double mat4[100][3];
+double mat[100][2];
 
 double
 slp_reduc_plus (int n)
@@ -16,116 +11,8 @@  slp_reduc_plus (int n)
     {
       tmp = tmp + mat[i][0];
       tmp = tmp + mat[i][1];
-      tmp = tmp + mat[i][2];
-      tmp = tmp + mat[i][3];
     }
   return tmp;
 }
 
-double
-slp_reduc_plus2 (int n)
-{
-  double tmp = 0.0;
-  for (int i = 0; i < n; i++)
-    {
-      tmp = tmp + mat2[i][0];
-      tmp = tmp + mat2[i][1];
-      tmp = tmp + mat2[i][2];
-      tmp = tmp + mat2[i][3];
-      tmp = tmp + mat2[i][4];
-      tmp = tmp + mat2[i][5];
-      tmp = tmp + mat2[i][6];
-      tmp = tmp + mat2[i][7];
-    }
-  return tmp;
-}
-
-double
-slp_reduc_plus3 (int n)
-{
-  double tmp = 0.0;
-  for (int i = 0; i < n; i++)
-    {
-      tmp = tmp + mat3[i][0];
-      tmp = tmp + mat3[i][1];
-      tmp = tmp + mat3[i][2];
-      tmp = tmp + mat3[i][3];
-      tmp = tmp + mat3[i][4];
-      tmp = tmp + mat3[i][5];
-      tmp = tmp + mat3[i][6];
-      tmp = tmp + mat3[i][7];
-      tmp = tmp + mat3[i][8];
-      tmp = tmp + mat3[i][9];
-      tmp = tmp + mat3[i][10];
-      tmp = tmp + mat3[i][11];
-    }
-  return tmp;
-}
-
-void
-slp_non_chained_reduc (int n, double * restrict out)
-{
-  for (int i = 0; i < 3; i++)
-    out[i] = 0;
-
-  for (int i = 0; i < n; i++)
-    {
-      out[0] = out[0] + mat4[i][0];
-      out[1] = out[1] + mat4[i][1];
-      out[2] = out[2] + mat4[i][2];
-    }
-}
-
-/* Strict FP reductions shouldn't be used for the outer loops, only the
-   inner loops.  */
-
-float
-double_reduc1 (float (*restrict i)[16])
-{
-  float l = 0;
-
-#pragma GCC unroll 0
-  for (int a = 0; a < 8; a++)
-    for (int b = 0; b < 8; b++)
-      l += i[b][a];
-  return l;
-}
-
-float
-double_reduc2 (float *restrict i)
-{
-  float l = 0;
-
-  for (int a = 0; a < 8; a++)
-    for (int b = 0; b < 16; b++)
-      {
-        l += i[b * 4];
-        l += i[b * 4 + 1];
-        l += i[b * 4 + 2];
-        l += i[b * 4 + 3];
-      }
-  return l;
-}
-
-float
-double_reduc3 (float *restrict i, float *restrict j)
-{
-  float k = 0, l = 0;
-
-  for (int a = 0; a < 8; a++)
-    for (int b = 0; b < 8; b++)
-      {
-        k += i[b];
-        l += j[b];
-      }
-  return l * k;
-}
-
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
-/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3.  Each one
-   is reported three times, once for SVE, once for 128-bit AdvSIMD and once
-   for 64-bit AdvSIMD.  */
-/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
-/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3.  */
-/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_4.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_4.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+double mat[100][8];
+
+double
+slp_reduc_plus (int n)
+{
+  double tmp = 0.0;
+  for (int i = 0; i < n; i++)
+    {
+      tmp = tmp + mat[i][0];
+      tmp = tmp + mat[i][1];
+      tmp = tmp + mat[i][2];
+      tmp = tmp + mat[i][3];
+      tmp = tmp + mat[i][4];
+      tmp = tmp + mat[i][5];
+      tmp = tmp + mat[i][6];
+      tmp = tmp + mat[i][7];
+    }
+  return tmp;
+}
+
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 4 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_5.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_5.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+double mat[100][12];
+
+double
+slp_reduc_plus (int n)
+{
+  double tmp = 0.0;
+  for (int i = 0; i < n; i++)
+    {
+      tmp = tmp + mat[i][0];
+      tmp = tmp + mat[i][1];
+      tmp = tmp + mat[i][2];
+      tmp = tmp + mat[i][3];
+      tmp = tmp + mat[i][4];
+      tmp = tmp + mat[i][5];
+      tmp = tmp + mat[i][6];
+      tmp = tmp + mat[i][7];
+      tmp = tmp + mat[i][8];
+      tmp = tmp + mat[i][9];
+      tmp = tmp + mat[i][10];
+      tmp = tmp + mat[i][11];
+    }
+  return tmp;
+}
+
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 6 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_6.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_6.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+double mat[100][4];
+double mat2[100][8];
+double mat3[100][12];
+double mat4[100][3];
+
+void
+slp_non_chained_reduc (int n, double * restrict out)
+{
+  for (int i = 0; i < 3; i++)
+    out[i] = 0;
+
+  for (int i = 0; i < n; i++)
+    {
+      out[0] = out[0] + mat4[i][0];
+      out[1] = out[1] + mat4[i][1];
+      out[2] = out[2] + mat4[i][2];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_7.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_7.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+
+/* Strict FP reductions shouldn't be used for the outer loop, only the
+   inner loop.  */
+
+float
+double_reduc (float (*i)[16])
+{
+  float l = 0;
+
+#pragma GCC unroll 0
+  for (int a = 0; a < 8; a++)
+    for (int b = 0; b < 100; b++)
+      l += i[b][a];
+  return l;
+}
+
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_8.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_8.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+
+float
+double_reduc (float *i)
+{
+  float l = 0;
+
+  for (int a = 0; a < 8; a++)
+    for (int b = 0; b < 16; b++)
+      {
+        l += i[b * 4];
+        l += i[b * 4 + 1];
+        l += i[b * 4 + 2];
+        l += i[b * 4 + 3];
+      }
+  return l;
+}
+
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_9.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_9.c	2019-10-31 17:20:02.404591908 +0000
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+
+float
+double_reduc (float *i, float *j)
+{
+  float k = 0, l = 0;
+
+  for (int a = 0; a < 8; a++)
+    for (int b = 0; b < 100; b++)
+      {
+        k += i[b];
+        l += j[b];
+      }
+  return l * k;
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */