Improve unrolling heuristics, PR91975
diff mbox series

Message ID nycvar.YFH.7.76.1910070947230.5566@zhemvz.fhfr.qr
State New
Headers show
Series
  • Improve unrolling heuristics, PR91975
Related show

Commit Message

Richard Biener Oct. 7, 2019, 7:51 a.m. UTC
Currently there's a surprising difference in unrolling size estimation
depending on how exactly you formulate your IV expressions.  The following
patch makes it less dependent on this, behaving like the more optimistical
treatment (&a + 1 being constant).  In the end it's still a heuristic
and in some sense the estimation of the original size now looks odd
(costing of a[i] vs. *(a + i * 4)).  I still think it's an improvement.

For testcase adjustments I generally tried to disable unrolling if
doing so would defeat the testcases purpose (validate correctness
of vectorization for example).  I've verified that the unrolling we
now do results in no worse code for the cases (even if I ended up
disabling that unrolling).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

2019-10-07  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91975
	* tree-ssa-loop-ivcanon.c (constant_after_peeling): Consistently
	handle invariants.

	* g++.dg/tree-ssa/ivopts-3.C: Adjust.
	* gcc.dg/vect/vect-profile-1.c: Disable cunrolli.
	* gcc.dg/vect/vect-double-reduc-6.c: Disable unrolling of
	the innermost loop.
	* gcc.dg/vect/vect-93.c: Likewise.
	* gcc.dg/vect/vect-105.c: Likewise.
	* gcc.dg/vect/pr79920.c: Likewise.
	* gcc.dg/vect/no-vfa-vect-102.c: Likewise.
	* gcc.dg/vect/no-vfa-vect-101.c: Likewise.
	* gcc.dg/vect/pr83202-1.c: Operate on a larger array.
	* gfortran.dg/vect/vect-8.f90: Likewise.
	* gcc.dg/tree-ssa/cunroll-2.c: Scan early unrolling dump instead
	of late one.

Comments

Andreas Schwab Nov. 2, 2019, 5:55 p.m. UTC | #1
This breaks bootstrap on powerpc:

/daten/gcc/gcc-20191101/Build/./prev-gcc/xg++ -B/daten/gcc/gcc-20191101/Build/./prev-gcc/ -B/usr/powerpc64-suse-linux/bin/ -nostdinc++ -B/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/src/.libs -B/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/libsupc++/.libs  -I/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/include/powerpc64-suse-linux  -I/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/include  -I/daten/gcc/gcc-20191101/libstdc++-v3/libsupc++ -L/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/src/.libs -L/daten/gcc/gcc-20191101/Build/prev-powerpc64-suse-linux/libstdc++-v3/libsupc++/.libs -fno-PIE -c  -DIN_GCC_FRONTEND -DIN_GCC_FRONTEND -g -O2 -fno-checking -gtoggle -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror   -DHAVE_CONFIG_H -I. -I. -I../../gcc -I../../gcc/. -I../../gcc/../include -I../../gcc/../libcpp/include  -I../../gcc/../libdecnumber -I../../gcc/../libdecnumber/dpd -I../libdecnumber -I../../gcc/../libbacktrace   -o rs6000-c.o -MT rs6000-c.o -MMD -MP -MF ./.deps/rs6000-c.TPo ../../gcc/config/rs6000/rs6000-c.c
In file included from ../../gcc/c-family/c-common.h:27,
                 from ../../gcc/config/rs6000/rs6000-c.c:29:
../../gcc/fold-const.h: In function 'tree_node* altivec_build_resolved_builtin(tree_node**, int, const altivec_builtin_types*)':
../../gcc/fold-const.h:74:21: error: 'arg_type[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   74 |    fold_convert_loc (UNKNOWN_LOCATION, T1, T2)
      |                     ^
../../gcc/config/rs6000/rs6000-c.c:6062:8: note: 'arg_type[1]' was declared here
 6062 |   tree arg_type[3];
      |        ^~~~~~~~
In file included from ../../gcc/c-family/c-common.h:27,
                 from ../../gcc/config/rs6000/rs6000-c.c:29:
../../gcc/fold-const.h:74:21: error: 'arg_type[2]' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   74 |    fold_convert_loc (UNKNOWN_LOCATION, T1, T2)
      |                     ^
../../gcc/config/rs6000/rs6000-c.c:6062:8: note: 'arg_type[2]' was declared here
 6062 |   tree arg_type[3];
      |        ^~~~~~~~
cc1plus: all warnings being treated as errors

Andreas.

Patch
diff mbox series

diff --git a/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
index 07ff1b770f8..6760a5b1851 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
@@ -70,6 +70,8 @@  int main ( int , char** ) {
     return 0;
 }
 
-// Verify that on x86_64 and i?86 we use a single IV for the innermost loop
+// Verify that on x86_64 and i?86 we unroll the innsermost loop and
+// use three IVs for the then innermost loop
 
-// { dg-final { scan-tree-dump "Selected IV set for loop \[0-9\]* at \[^ \]*:64, 3 avg niters, 1 IVs" "ivopts" { target x86_64-*-* i?86-*-* } } }
+// { dg-final { scan-tree-dump "Selected IV set for loop \[0-9\]* at \[^ \]*:63, 127 avg niters, 3 IVs" "ivopts" { target x86_64-*-* i?86-*-* } } }
+// { dg-final { scan-tree-dump-not "Selected IV set for loop \[0-9\]* at \[^ \]*:64" "ivopts" { target x86_64-*-* i?86-*-* } } }
diff --git a/gcc/testsuite/gcc.c-torture/execute/loop-3.c b/gcc/testsuite/gcc.c-torture/execute/loop-3.c
index e314a01b1f1..33eb18826fd 100644
--- a/gcc/testsuite/gcc.c-torture/execute/loop-3.c
+++ b/gcc/testsuite/gcc.c-torture/execute/loop-3.c
@@ -13,7 +13,7 @@  f (m)
   i = m;
   do
     {
-      g (i * INT_MAX / 2);
+      g ((int)((unsigned)i * INT_MAX) / 2);
     }
   while (--i > 0);
 }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c
index b1d1c7d3d85..ae3fec99749 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-cunroll-details" } */
+/* { dg-options "-O3 -fdump-tree-cunrolli-details" } */
 int a[2];
 int test2 (void);
 void
@@ -14,4 +14,4 @@  test(int c)
     }
 }
 /* We are not able to get rid of the final conditional because the loop has two exits.  */
-/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunroll"} } */
+/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunrolli"} } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
index 91eb28218bd..ce934279ddf 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
@@ -22,6 +22,7 @@  int main1 (int x, int y) {
   p = (struct extraction *) malloc (sizeof (struct extraction));
 
   /* Not vectorizable: different unknown offset.  */
+#pragma GCC unroll 0
   for (i = 0; i < N; i++)
     {
       *((int *)p + x + i) = a[i];
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
index 51f62788dbf..d9e0529e73f 100644
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
@@ -28,6 +28,7 @@  int main1 (int x, int y) {
     }
 
   /* Not vectorizable: distance 1.  */
+#pragma GCC unroll 0
   for (i = 0; i < N - 1; i++)
     {
        *((int *)p + x + i + 1) = *((int *)p + x + i);
diff --git a/gcc/testsuite/gcc.dg/vect/pr79920.c b/gcc/testsuite/gcc.dg/vect/pr79920.c
index 276a2806f0c..38e0fef779a 100644
--- a/gcc/testsuite/gcc.dg/vect/pr79920.c
+++ b/gcc/testsuite/gcc.dg/vect/pr79920.c
@@ -14,6 +14,7 @@  compute_integral (double w_1[18])
 
   for (int ip_1 = 0; ip_1 < 2; ++ip_1)
     {
+#pragma GCC unroll 0
       for (int i_0 = 0; i_0 < 6; ++i_0)
 	t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
 			  + (w_1[i_0*3+2] * t32[ip_1][2]));
diff --git a/gcc/testsuite/gcc.dg/vect/pr83202-1.c b/gcc/testsuite/gcc.dg/vect/pr83202-1.c
index 33c83de29b8..ac12f079839 100644
--- a/gcc/testsuite/gcc.dg/vect/pr83202-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr83202-1.c
@@ -1,9 +1,9 @@ 
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_double } */
 
-void test(double data[8][8])
+void test(double data[16][16])
 {
-  for (int i = 0; i < 8; i++)
+  for (int i = 0; i < 16; i++)
     {
       for (int j = 0; j < i; j+=4)
 	{
diff --git a/gcc/testsuite/gcc.dg/vect/vect-105.c b/gcc/testsuite/gcc.dg/vect/vect-105.c
index 0024457f9e2..17b6e89d8f6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-105.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-105.c
@@ -35,6 +35,7 @@  int main1 (int x) {
   /* Vectorizable: distance > number of iterations.  */
   for (i = 1; i < N; i++)
   {
+#pragma GCC unroll 0
     for (j = 0; j < N; j++)
     {
        *((int *)p + x + i + j) = *((int *)p + x + i + j + 5);
diff --git a/gcc/testsuite/gcc.dg/vect/vect-93.c b/gcc/testsuite/gcc.dg/vect/vect-93.c
index 397c2ed05aa..c3e12783b2c 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-93.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-93.c
@@ -29,6 +29,7 @@  main1 (float *pa)
 	abort ();
     }
 
+#pragma GCC unroll 0
   for (i = 1; i <= N2; i++)
     {
       pa[i] = 3.0;
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c
index 3a4f087da81..85aec1bf609 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c
@@ -19,6 +19,7 @@  foo ()
     {
       sum = 1;
       for (j = 0; j < K; j++) 
+#pragma GCC unroll 0
         for (i = 0; i < K; i++)
           sum *= in[i+k][j];
       out[k] = sum;
diff --git a/gcc/testsuite/gcc.dg/vect/vect-profile-1.c b/gcc/testsuite/gcc.dg/vect/vect-profile-1.c
index 93d7ad13147..922f965806f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-profile-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-profile-1.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */
-/* { dg-additional-options "-fdump-tree-vect-details-blocks" } */
+/* { dg-additional-options "-fdump-tree-vect-details-blocks -fdisable-tree-cunrolli" } */
 
 /* At least one of these should correspond to a full vector.  */
 
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
index e26cdf95e51..386f7de4a7e 100644
--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
@@ -247,7 +247,7 @@  nl1= 1
 nl2= 2
 fw= 2.000D0
   DO ky= 2,n
-DO kx= 2,3
+DO kx= 2,4
     du1ky= u1(kx,ky+1,nl1)-u1(kx,ky-1,nl1)
     du2ky= u2(kx,ky+1,nl1)-u2(kx,ky-1,nl1)
     du3ky= u3(kx,ky+1,nl1)-u3(kx,ky-1,nl1)
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index 5952cad7bba..d38959c3aa2 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -195,9 +195,8 @@  constant_after_peeling (tree op, gimple *stmt, class loop *loop)
   /* Induction variables are constants when defined in loop.  */
   if (loop_containing_stmt (stmt) != loop)
     return false;
-  tree ev = analyze_scalar_evolution (loop, op);
-  if (chrec_contains_undetermined (ev)
-      || chrec_contains_symbols (ev))
+  tree ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, op));
+  if (chrec_contains_undetermined (ev))
     return false;
   return true;
 }