diff mbox

[PR34114/2] Prove no-overflow for loop with NE_EXPR exit condition and non-ONE step

Message ID HE1PR0801MB1755AB12BFEFE69E4D5354E5E7010@HE1PR0801MB1755.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng July 29, 2016, 3:35 p.m. UTC
Hi,
This patch fixes PR34114.  It proves no-overflow for loops with NE_EXPR exit condition and non-ONE step.  The patch contains detailed comment about when a loop like (IV=base; IV != FINAL; IV += step) doesn't overflow.

Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin

2016-07-27  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/34114
	* tree-ssa-loop-niter.c (number_of_iterations_ne): Prove no-overflow
	information for more control IVs.

gcc/testsuite/ChangeLog
2016-07-27  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/34114
	* gcc.dg/tree-ssa/loop-42.c: New test.

Comments

Jeff Law July 29, 2016, 3:54 p.m. UTC | #1
On 07/29/2016 09:35 AM, Bin Cheng wrote:
> Hi,
> This patch fixes PR34114.  It proves no-overflow for loops with NE_EXPR exit condition and non-ONE step.  The patch contains detailed comment about when a loop like (IV=base; IV != FINAL; IV += step) doesn't overflow.
>
> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Thanks,
> bin
>
> 2016-07-27  Bin Cheng  <bin.cheng@arm.com>
>
> 	PR tree-optimization/34114
> 	* tree-ssa-loop-niter.c (number_of_iterations_ne): Prove no-overflow
> 	information for more control IVs.
>
> gcc/testsuite/ChangeLog
> 2016-07-27  Bin Cheng  <bin.cheng@arm.com>
>
> 	PR tree-optimization/34114
> 	* gcc.dg/tree-ssa/loop-42.c: New test.
OK.
jeff
diff mbox

Patch

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b7d7c32..af35aaa 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -964,7 +964,6 @@  number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv,
   tree niter_type = unsigned_type_for (type);
   tree s, c, d, bits, assumption, tmp, bound;
   mpz_t max;
-  tree e;
 
   niter->control = *iv;
   niter->bound = final;
@@ -999,21 +998,76 @@  number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv,
 				 TYPE_SIGN (niter_type));
   mpz_clear (max);
 
-  /* Compute no-overflow information for the control iv.  Note we are
-     handling NE_EXPR, if iv base equals to final value, the loop exits
-     immediately, and the iv does not overflow.  */
-  if (tree_int_cst_sign_bit (iv->step))
-    e = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final);
-  else
-    e = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final);
-  e = simplify_using_initial_conditions (loop, e);
-  if (integer_onep (e)
-      && (integer_onep (s)
-	  || (TREE_CODE (c) == INTEGER_CST
-	      && TREE_CODE (s) == INTEGER_CST
-	      && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0)))
+  /* Compute no-overflow information for the control iv.  This can be
+     proven when below two conditions hold.
+
+       1) |FINAL - base| is an exact multiple of step.
+       2) IV evaluates toward FINAL at beginning, i.e:
+
+	    base <= FINAL ; step > 0
+	    base >= FINAL ; step < 0
+
+	  Note the first condition holds, the second can be then relaxed
+	  to below condition.
+
+	    base - step < FINAL ; step > 0
+				  && base - step doesn't underflow
+	    base - step > FINAL ; step < 0
+				  && base - step doesn't overflow
+
+	  The relaxation is important because after pass loop-ch, loop
+	  with exit condition (IV != FINAL) will usually be guarded by
+	  pre-condition (IV.base - IV.step != FINAL).  Please refer to
+	  PR34114 as an example.
+
+     Also note, for NE_EXPR, base equals to FINAL is a special case, in
+     which the loop exits immediately, and the iv does not overflow.  */
+  if (!niter->control.no_overflow
+      && (integer_onep (s) || multiple_of_p (type, c, s)))
     {
-      niter->control.no_overflow = true;
+      tree t, cond, relaxed_cond = boolean_false_node;
+
+      if (tree_int_cst_sign_bit (iv->step))
+	{
+	  cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final);
+	  if (TREE_CODE (type) == INTEGER_TYPE)
+	    {
+	      /* Only when base - step doesn't overflow.  */
+	      t = TYPE_MAX_VALUE (type);
+	      t = fold_build2 (PLUS_EXPR, type, t, iv->step);
+	      t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base);
+	      if (integer_nonzerop (t))
+		{
+		  t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
+		  relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node,
+					      t, final);
+		}
+	    }
+	}
+      else
+	{
+	  cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final);
+	  if (TREE_CODE (type) == INTEGER_TYPE)
+	    {
+	      /* Only when base - step doesn't underflow.  */
+	      t = TYPE_MIN_VALUE (type);
+	      t = fold_build2 (PLUS_EXPR, type, t, iv->step);
+	      t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base);
+	      if (integer_nonzerop (t))
+		{
+		  t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
+		  relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node,
+					      t, final);
+		}
+	    }
+	}
+
+      t = simplify_using_initial_conditions (loop, cond);
+      if (!t || !integer_onep (t))
+	t = simplify_using_initial_conditions (loop, relaxed_cond);
+
+      if (t && integer_onep (t))
+	niter->control.no_overflow = true;
     }
 
   /* First the trivial cases -- when the step is 1.  */
@@ -1022,6 +1076,11 @@  number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv,
       niter->niter = c;
       return true;
     }
+  if (niter->control.no_overflow && multiple_of_p (type, c, s))
+    {
+      niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, c, s);
+      return true;
+    }
 
   /* Let nsd (step, size of mode) = d.  If d does not divide c, the loop
      is infinite.  Otherwise, the number of iterations is
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c
new file mode 100644
index 0000000..3f9d91a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivcanon-details" } */
+
+void foo2 (unsigned int num, int *a)
+{
+  unsigned int i, n = (num - (num % 2));
+
+  for(i = 0; i != n; i += 2)
+    a[i] = 0;
+}
+
+void foo3 (unsigned int num, int *a)
+{
+  unsigned int i, n = (num - (num % 3));
+
+  for(i = 0; i != n; i += 3)
+    a[i] = 0;
+}
+
+void foo4 (unsigned int num, int *a)
+{
+  unsigned int i, n = (num - (num % 4));
+
+  for(i = 0; i != n; i += 4)
+    a[i] = 0;
+}
+
+void foo5 (unsigned int num, int *a)
+{
+  unsigned int i, n = (num - (num % 5));
+
+  for(i = 0; i != n; i += 5)
+    a[i] = 0;
+}
+
+/* { dg-final { scan-tree-dump-not "under assumptions " "ivcanon" } } */