diff mbox series

Loop-split improvements, part 3

Message ID ZMO7E9C9KBfbyJHH@kam.mff.cuni.cz
State New
Headers show
Series Loop-split improvements, part 3 | expand

Commit Message

Jan Hubicka July 28, 2023, 12:56 p.m. UTC
Hi,
This patch extends tree-ssa-loop-split to understand test of the form
 if (i==0)
and
 if (i!=0)
which triggers only during the first iteration.  Naturally we should
also be able to trigger last iteration or split into 3 cases if
the test indeed can fire in the middle of the loop.

Last iteration is bit trickier pattern matching so I want to do it
incrementally, but I implemented easy case using value range that handled
loops with constant iterations.

The testcase gets misupdated profile, I will also fix that incrementally.

Bootstrapped/regtested x86_64-linux, OK?

gcc/ChangeLog:

	PR middle-end/77689
	* tree-ssa-loop-split.cc: Include value-query.h.
	(split_at_bb_p): Analyze cases where EQ/NE can be turned
	into LT/LE/GT/GE; return updated guard code.
	(split_loop): Use guard code.

gcc/testsuite/ChangeLog:

	PR middle-end/77689
	* g++.dg/tree-ssa/loop-split-1.C: New test.

Comments

Richard Biener July 28, 2023, 1:15 p.m. UTC | #1
On Fri, Jul 28, 2023 at 2:57 PM Jan Hubicka via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
> This patch extends tree-ssa-loop-split to understand test of the form
>  if (i==0)
> and
>  if (i!=0)
> which triggers only during the first iteration.  Naturally we should
> also be able to trigger last iteration or split into 3 cases if
> the test indeed can fire in the middle of the loop.
>
> Last iteration is bit trickier pattern matching so I want to do it
> incrementally, but I implemented easy case using value range that handled
> loops with constant iterations.
>
> The testcase gets misupdated profile, I will also fix that incrementally.
>
> Bootstrapped/regtested x86_64-linux, OK?

OK, though I think we can handle more loops by simply conservatively peeling
one iteration at the beginning/end with such conditions and would be not subject
to all other limitations the loop splitting pass has?

Richard.

> gcc/ChangeLog:
>
>         PR middle-end/77689
>         * tree-ssa-loop-split.cc: Include value-query.h.
>         (split_at_bb_p): Analyze cases where EQ/NE can be turned
>         into LT/LE/GT/GE; return updated guard code.
>         (split_loop): Use guard code.
>
> gcc/testsuite/ChangeLog:
>
>         PR middle-end/77689
>         * g++.dg/tree-ssa/loop-split-1.C: New test.
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
> new file mode 100644
> index 00000000000..9581438b536
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-lsplit-details -std=c++11" } */
> +#include <vector>
> +#include <cmath>
> +
> +constexpr unsigned s = 100000000;
> +
> +int main()
> +{
> +    std::vector<float> a, b, c;
> +    a.reserve(s);
> +    b.reserve(s);
> +    c.reserve(s);
> +
> +    for(unsigned i = 0; i < s; ++i)
> +    {
> +        if(i == 0)
> +            a[i] = b[i] * c[i];
> +        else
> +            a[i] = (b[i] + c[i]) * c[i-1] * std::log(i);
> +    }
> +}
> +/* { dg-final { scan-tree-dump-times "loop split" 1 "lsplit" } } */
> diff --git a/gcc/tree-ssa-loop-split.cc b/gcc/tree-ssa-loop-split.cc
> index 70cd0aaefa7..641346cba70 100644
> --- a/gcc/tree-ssa-loop-split.cc
> +++ b/gcc/tree-ssa-loop-split.cc
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple-fold.h"
>  #include "gimplify-me.h"
>  #include "print-tree.h"
> +#include "value-query.h"
>
>  /* This file implements two kinds of loop splitting.
>
> @@ -75,7 +76,8 @@ along with GCC; see the file COPYING3.  If not see
>     point in *BORDER and the comparison induction variable in IV.  */
>
>  static tree
> -split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
> +split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
> +              enum tree_code *guard_code)
>  {
>    gcond *stmt;
>    affine_iv iv2;
> @@ -87,19 +89,6 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
>
>    enum tree_code code = gimple_cond_code (stmt);
>
> -  /* Only handle relational comparisons, for equality and non-equality
> -     we'd have to split the loop into two loops and a middle statement.  */
> -  switch (code)
> -    {
> -      case LT_EXPR:
> -      case LE_EXPR:
> -      case GT_EXPR:
> -      case GE_EXPR:
> -       break;
> -      default:
> -       return NULL_TREE;
> -    }
> -
>    if (loop_exits_from_bb_p (loop, bb))
>      return NULL_TREE;
>
> @@ -129,6 +118,56 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
>    if (!iv->no_overflow)
>      return NULL_TREE;
>
> +  /* Only handle relational comparisons, for equality and non-equality
> +     we'd have to split the loop into two loops and a middle statement.  */
> +  switch (code)
> +    {
> +      case LT_EXPR:
> +      case LE_EXPR:
> +      case GT_EXPR:
> +      case GE_EXPR:
> +       break;
> +      case NE_EXPR:
> +      case EQ_EXPR:
> +       /* If the test check for first iteration, we can handle NE/EQ
> +          with only one split loop.  */
> +       if (operand_equal_p (iv->base, iv2.base, 0))
> +         {
> +           if (code == EQ_EXPR)
> +             code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
> +           else
> +             code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
> +           break;
> +         }
> +       /* Similarly when the test checks for minimal or maximal
> +          value range.  */
> +       else
> +         {
> +           int_range<2> r;
> +           get_global_range_query ()->range_of_expr (r, op0, stmt);
> +           if (!r.varying_p () && !r.undefined_p ()
> +               && TREE_CODE (op1) == INTEGER_CST)
> +             {
> +               wide_int val = wi::to_wide (op1);
> +               if (known_eq (val, r.lower_bound ()))
> +                 {
> +                   code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
> +                   break;
> +                 }
> +               else if (known_eq (val, r.upper_bound ()))
> +                 {
> +                   code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
> +                   break;
> +                 }
> +             }
> +         }
> +       /* TODO: We can compare with exit condition; it seems that testing for
> +          last iteration is common case.  */
> +       return NULL_TREE;
> +      default:
> +       return NULL_TREE;
> +    }
> +
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      {
>        fprintf (dump_file, "Found potential split point: ");
> @@ -143,6 +182,7 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
>      }
>
>    *border = iv2.base;
> +  *guard_code = code;
>    return op0;
>  }
>
> @@ -566,8 +606,9 @@ split_loop (class loop *loop1)
>      }
>
>    /* Find a splitting opportunity.  */
> +  enum tree_code guard_code;
>    for (i = 0; i < loop1->num_nodes; i++)
> -    if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
> +    if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
>        {
>         profile_count entry_count = loop_preheader_edge (loop1)->count ();
>         /* Handling opposite steps is not implemented yet.  Neither
> @@ -585,7 +626,6 @@ split_loop (class loop *loop1)
>         gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
>         tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
>                                                  loop_preheader_edge (loop1));
> -       enum tree_code guard_code = gimple_cond_code (guard_stmt);
>
>         /* Loop splitting is implemented by versioning the loop, placing
>            the new loop after the old loop, make the first loop iterate
Jan Hubicka July 28, 2023, 1:24 p.m. UTC | #2
> On Fri, Jul 28, 2023 at 2:57 PM Jan Hubicka via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi,
> > This patch extends tree-ssa-loop-split to understand test of the form
> >  if (i==0)
> > and
> >  if (i!=0)
> > which triggers only during the first iteration.  Naturally we should
> > also be able to trigger last iteration or split into 3 cases if
> > the test indeed can fire in the middle of the loop.
> >
> > Last iteration is bit trickier pattern matching so I want to do it
> > incrementally, but I implemented easy case using value range that handled
> > loops with constant iterations.
> >
> > The testcase gets misupdated profile, I will also fix that incrementally.
> >
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> OK, though I think we can handle more loops by simply conservatively peeling
> one iteration at the beginning/end with such conditions and would be not subject
> to all other limitations the loop splitting pass has?

I was also thinking of extending loop peeling heuristics by this.
Loop-ch already can handle case where the static test exits loop, so we
could get this if I figure out how to merge the analysis.

To handle last iteration (like in hmmer), we would need to extend loop
peeling to support that.

Even with that tree-ssa-loop-split has chance to be more informed and
have better cost model.  Let me see how many restrictions can be dropped
it.

Honza
diff mbox series

Patch

diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
new file mode 100644
index 00000000000..9581438b536
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-lsplit-details -std=c++11" } */
+#include <vector>
+#include <cmath>
+
+constexpr unsigned s = 100000000;
+
+int main()
+{
+    std::vector<float> a, b, c;
+    a.reserve(s);
+    b.reserve(s);
+    c.reserve(s);
+
+    for(unsigned i = 0; i < s; ++i)
+    {
+        if(i == 0)
+            a[i] = b[i] * c[i];
+        else
+            a[i] = (b[i] + c[i]) * c[i-1] * std::log(i);
+    }
+}
+/* { dg-final { scan-tree-dump-times "loop split" 1 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.cc b/gcc/tree-ssa-loop-split.cc
index 70cd0aaefa7..641346cba70 100644
--- a/gcc/tree-ssa-loop-split.cc
+++ b/gcc/tree-ssa-loop-split.cc
@@ -42,6 +42,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "gimplify-me.h"
 #include "print-tree.h"
+#include "value-query.h"
 
 /* This file implements two kinds of loop splitting.
 
@@ -75,7 +76,8 @@  along with GCC; see the file COPYING3.  If not see
    point in *BORDER and the comparison induction variable in IV.  */
 
 static tree
-split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
+split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
+	       enum tree_code *guard_code)
 {
   gcond *stmt;
   affine_iv iv2;
@@ -87,19 +89,6 @@  split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
 
   enum tree_code code = gimple_cond_code (stmt);
 
-  /* Only handle relational comparisons, for equality and non-equality
-     we'd have to split the loop into two loops and a middle statement.  */
-  switch (code)
-    {
-      case LT_EXPR:
-      case LE_EXPR:
-      case GT_EXPR:
-      case GE_EXPR:
-	break;
-      default:
-	return NULL_TREE;
-    }
-
   if (loop_exits_from_bb_p (loop, bb))
     return NULL_TREE;
 
@@ -129,6 +118,56 @@  split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
   if (!iv->no_overflow)
     return NULL_TREE;
 
+  /* Only handle relational comparisons, for equality and non-equality
+     we'd have to split the loop into two loops and a middle statement.  */
+  switch (code)
+    {
+      case LT_EXPR:
+      case LE_EXPR:
+      case GT_EXPR:
+      case GE_EXPR:
+	break;
+      case NE_EXPR:
+      case EQ_EXPR:
+	/* If the test check for first iteration, we can handle NE/EQ
+	   with only one split loop.  */
+	if (operand_equal_p (iv->base, iv2.base, 0))
+	  {
+	    if (code == EQ_EXPR)
+	      code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
+	    else
+	      code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
+	    break;
+	  }
+	/* Similarly when the test checks for minimal or maximal
+	   value range.  */
+	else
+	  {
+	    int_range<2> r;
+	    get_global_range_query ()->range_of_expr (r, op0, stmt);
+	    if (!r.varying_p () && !r.undefined_p ()
+		&& TREE_CODE (op1) == INTEGER_CST)
+	      {
+		wide_int val = wi::to_wide (op1);
+		if (known_eq (val, r.lower_bound ()))
+		  {
+		    code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
+		    break;
+		  }
+		else if (known_eq (val, r.upper_bound ()))
+		  {
+		    code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
+		    break;
+		  }
+	      }
+	  }
+	/* TODO: We can compare with exit condition; it seems that testing for
+	   last iteration is common case.  */
+	return NULL_TREE;
+      default:
+	return NULL_TREE;
+    }
+
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "Found potential split point: ");
@@ -143,6 +182,7 @@  split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
     }
 
   *border = iv2.base;
+  *guard_code = code;
   return op0;
 }
 
@@ -566,8 +606,9 @@  split_loop (class loop *loop1)
     }
 
   /* Find a splitting opportunity.  */
+  enum tree_code guard_code;
   for (i = 0; i < loop1->num_nodes; i++)
-    if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
+    if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
       {
 	profile_count entry_count = loop_preheader_edge (loop1)->count ();
 	/* Handling opposite steps is not implemented yet.  Neither
@@ -585,7 +626,6 @@  split_loop (class loop *loop1)
 	gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 	tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
 						 loop_preheader_edge (loop1));
-	enum tree_code guard_code = gimple_cond_code (guard_stmt);
 
 	/* Loop splitting is implemented by versioning the loop, placing
 	   the new loop after the old loop, make the first loop iterate