diff mbox

Fix PR 50971 and PR 35629: Only one loop detected when there should be two

Message ID CA+=Sn1=H0k=YDiWxFT7qA-DUJ_t+8DR3A7HB3wcJAB0sDFkGCA@mail.gmail.com
State New
Headers show

Commit Message

Andrew Pinski March 13, 2012, 6:31 p.m. UTC
Ping?  Rebootstrapped on x86_64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

On Sat, Jan 21, 2012 at 1:21 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> The problem with these two bug reports is that the cfgloop does not do
> a good job for disambiguating some loops.  This patch rewrites
> find_subloop_latch_edge_by_ivs to be better.  It is able to detect
> much more loops and gets the ones which are referenced in PR 50971 and
> PR 35629.  It does make sure the loops it finds are really loops and
> not ones where the continue would cause a loop not to be done.
>
> OK for 4.8 when stage 1 comes?  Bootstrapped and tested on
> x86_64-linux-gnu with no regressions.
>
> ChangeLog:
> cfgloop.c (skip_to_exit): New function.
> (find_subloop_latch_edge_by_ivs): Rewrite to better detect subloop latches by
> IVs.  Also look at the cfg for those IVs to check for a better choice.
>
> testsuite/ChangeLog:
> * gcc.dg/tree-ssa/loop-25.c: Remove xfails and remove "Found latch
> edge"/"Merged latch edges" tests.
> * gcc.dg/tree-ssa/loop-38.c: New testcase.

Comments

Richard Biener March 14, 2012, 9:47 a.m. UTC | #1
On Tue, Mar 13, 2012 at 7:31 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> Ping?  Rebootstrapped on x86_64-linux-gnu with no regressions.

Zdenek, can you have a look here?  I think the patch is reasonable, but
you should have a better idea ;)

Thanks,
Richard.

> Thanks,
> Andrew Pinski
>
> On Sat, Jan 21, 2012 at 1:21 PM, Andrew Pinski <pinskia@gmail.com> wrote:
>> The problem with these two bug reports is that the cfgloop does not do
>> a good job for disambiguating some loops.  This patch rewrites
>> find_subloop_latch_edge_by_ivs to be better.  It is able to detect
>> much more loops and gets the ones which are referenced in PR 50971 and
>> PR 35629.  It does make sure the loops it finds are really loops and
>> not ones where the continue would cause a loop not to be done.
>>
>> OK for 4.8 when stage 1 comes?  Bootstrapped and tested on
>> x86_64-linux-gnu with no regressions.
>>
>> ChangeLog:
>> cfgloop.c (skip_to_exit): New function.
>> (find_subloop_latch_edge_by_ivs): Rewrite to better detect subloop latches by
>> IVs.  Also look at the cfg for those IVs to check for a better choice.
>>
>> testsuite/ChangeLog:
>> * gcc.dg/tree-ssa/loop-25.c: Remove xfails and remove "Found latch
>> edge"/"Merged latch edges" tests.
>> * gcc.dg/tree-ssa/loop-38.c: New testcase.
Zdenek Dvorak March 14, 2012, 10:33 a.m. UTC | #2
Hi,

> On Tue, Mar 13, 2012 at 7:31 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> > Ping?  Rebootstrapped on x86_64-linux-gnu with no regressions.
> 
> Zdenek, can you have a look here?  I think the patch is reasonable, but
> you should have a better idea ;)

I do not understand the patch very well.  In the comment before the function,
please provide an explanation of what the heuristics mean (e.g., give examples
of what the particular cases are supposed to disambiguate).  Further
suggestions:

+/* Return the basic block where we might be doing an exit from a loop
+   if we go back up the cfg starting at basic block B skipping other loops
+   on the way and join points too.  */
+static basic_block
+skip_to_exit (basic_block b, struct loop *loop, unsigned succ_edge_count)

The comment describing this function is rather vague.  The succ_edge_count
parameter is not documented.  Also, skip_to_exit actually does not seem to do
anything with loop exits, so some less missleading name would be better.

+      /* There are multiple latches, we can't figure out the preheader,
+	 just return b. */
+      if (oloop->latch == NULL)
+	return NULL;

The code does not seem to match the comment.

+      if (c == NULL)
+	return NULL;
+      d = skip_to_exit (EDGE_PRED (b, 1)->src, loop, 1);
+      if (c == NULL)
+	return NULL;
+      if (c != d)
+	return NULL;

The second "c == NULL" check is redundant.

+ ... accessor of the latch ...

What do you mean by "accessor"? 

+  if (VEC_length (edge, latches) != 1)
+    {

this condition is redundant, find_subloop_latch_edge_by_ivs will only be
called (and it only makes sense to call it) when there is more than
one latch edge candidate.

+	fprintf (dump_file, "no latches for IV subloob.\n");

subloop

+	  fprintf (dump_file, "more one latches:");

"several latches:"

+      if (EDGE_COUNT (loop->header->succs) == 1)

single_succ_p
diff mbox

Patch

Index: testsuite/gcc.dg/tree-ssa/loop-25.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/loop-25.c	(revision 183364)
+++ testsuite/gcc.dg/tree-ssa/loop-25.c	(working copy)
@@ -120,10 +120,8 @@  void test5 (void)
 
 /* { dg-final { scan-tree-dump-times "Disambiguating loop" 5 "profile_estimate" } } */
 /* For the following xfail marks, see PR35629.  */
-/* { dg-final { scan-tree-dump-times "Found latch edge" 5 "profile_estimate" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "Merged latch edges" 2 "profile_estimate" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "4 loops found" 2 "profile_estimate" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "3 loops found" 2 "profile_estimate" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "2 loops found" 1 "profile_estimate" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "4 loops found" 2 "profile_estimate" } } */
+/* { dg-final { scan-tree-dump-times "3 loops found" 2 "profile_estimate" } } */
+/* { dg-final { scan-tree-dump-times "2 loops found" 1 "profile_estimate" } } */
 
 /* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: testsuite/gcc.dg/tree-ssa/loop-38.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-ch" } */
+
+typedef struct basket
+{
+    int *a;
+    int cost;
+    int abs_cost;
+} BASKET;
+BASKET *perm[50 +300 +1];
+
+
+int f(int a, int *b, int cut)
+{
+  do {
+  while (perm[a]->abs_cost > cut)
+    a++;
+  a++;
+  } while (b[a]);
+  return a;
+}
+
+/* { dg-final { scan-tree-dump-times "Disambiguating loop" 1 "ch" } } */
+/* { dg-final { scan-tree-dump-times "3 loops found" 1 "ch" } } */
+
+/* { dg-final { cleanup-tree-dump "ch" } } */
Index: cfgloop.c
===================================================================
--- cfgloop.c	(revision 183364)
+++ cfgloop.c	(working copy)
@@ -548,63 +548,200 @@  find_subloop_latch_edge_by_profile (VEC
   return me;
 }
 
+/* Return the basic block where we might be doing an exit from a loop
+   if we go back up the cfg starting at basic block B skipping other loops
+   on the way and join points too.  */
+static basic_block
+skip_to_exit (basic_block b, struct loop *loop, unsigned succ_edge_count)
+{
+  /* Skip to the begining of the loop if possible, we don't need to check
+     succ_edge count for loops. */
+  if (b->loop_father != loop)
+    {
+      edge e;
+      edge_iterator ei;
+      edge preheader_e = NULL;
+
+      struct loop *oloop = b->loop_father;
+      /* There are multiple latches, we can't figure out the preheader,
+	 just return b. */
+      if (oloop->latch == NULL)
+	return NULL;
+      FOR_EACH_EDGE (e, ei, oloop->header->preds)
+        if (e->src != oloop->latch && preheader_e == NULL)
+          preheader_e = e;
+	else if (e->src != oloop->latch && preheader_e != NULL)
+	  {
+	    preheader_e = NULL;
+	    break;
+	  }
+      /* Only one preheader edge.  */
+      if (preheader_e != NULL)
+        return skip_to_exit (preheader_e->src, loop, 1);
+      else
+	return NULL;
+    }
+  if (EDGE_COUNT (b->succs) != succ_edge_count)
+    return b;
+  /* Skip basic blocks where it is just a passthrough. */
+  if (single_pred_p (b))
+    return skip_to_exit (single_pred_edge (b)->src, loop, 1);
+  /* A join point, find the point where the join was from. */
+  /* FIXME should handle the case where we have more than two
+     predicates. */
+  if (EDGE_COUNT (b->preds) == 2)
+    {
+      basic_block c, d;
+      if (EDGE_PRED (b, 0)->flags & EDGE_ABNORMAL
+	  || EDGE_PRED (b, 1)->flags & EDGE_ABNORMAL)
+	return NULL;
+      c = skip_to_exit (EDGE_PRED (b, 0)->src, loop, 1);
+      if (c == NULL)
+	return NULL;
+      d = skip_to_exit (EDGE_PRED (b, 1)->src, loop, 1);
+      if (c == NULL)
+	return NULL;
+      if (c != d)
+	return NULL;
+      return skip_to_exit (d, loop, 2);
+    }
+  return b;
+}
+
 /* Among LATCHES, guesses a latch edge of LOOP corresponding to subloop, based
    on the structure of induction variables.  Returns this edge, or NULL if we
    do not find any.
 
-   We are quite conservative, and look just for an obvious simple innermost
-   loop (which is the case where we would lose the most performance by not
-   disambiguating the loop).  More precisely, we look for the following
-   situation: The source of the chosen latch edge dominates sources of all
-   the other latch edges.  Additionally, the header does not contain a phi node
-   such that the argument from the chosen edge is equal to the argument from
-   another edge.  */
+   We start by finding all the latches that might have an IV defined by them.
+   If there is only one of them chose that one.  If there is more than one find
+   the one where the accessor of the latch is the header.  If none match that
+   then find the one where the accessor of the latch is a different loop
+   but only do this if the header has only one successor.  */
 
 static edge
 find_subloop_latch_edge_by_ivs (struct loop *loop ATTRIBUTE_UNUSED, VEC (edge, heap) *latches)
 {
-  edge e, latch = VEC_index (edge, latches, 0);
-  unsigned i;
+  edge latch = NULL, e;
+  unsigned i, j;
   gimple phi;
   gimple_stmt_iterator psi;
   tree lop;
   basic_block bb;
+  VEC (edge, heap) *extra_latches = NULL;
 
-  /* Find the candidate for the latch edge.  */
-  for (i = 1; VEC_iterate (edge, latches, i, e); i++)
-    if (dominated_by_p (CDI_DOMINATORS, latch->src, e->src))
-      latch = e;
-
-  /* Verify that it dominates all the latch edges.  */
-  FOR_EACH_VEC_ELT (edge, latches, i, e)
-    if (!dominated_by_p (CDI_DOMINATORS, e->src, latch->src))
+  if (VEC_length (edge, latches) != 1)
+    {
+       if (dump_file)
+	{
+	  fprintf (dump_file, "trying latches:");
+	  FOR_EACH_VEC_ELT (edge, latches, i, e)
+	    fprintf (dump_file, " %d", e->src->index);
+	  fprintf (dump_file, " that might be a IV subloop.\n");
+	}
+    }
+
+  FOR_EACH_VEC_ELT (edge, latches, i, latch)
+    {
+      /* Check for a phi node that would deny that this is a latch edge of
+         a subloop.  */
+      for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
+	{
+	  phi = gsi_stmt (psi);
+	  lop = PHI_ARG_DEF_FROM_EDGE (phi, latch);
+
+	  /* Ignore the values that are not changed inside the subloop.  */
+	  if (TREE_CODE (lop) != SSA_NAME
+	      || SSA_NAME_DEF_STMT (lop) == phi)
+	    continue;
+	  if (lop == PHI_RESULT (phi))
+	    continue;
+	  bb = gimple_bb (SSA_NAME_DEF_STMT (lop));
+	  if (!bb || !flow_bb_inside_loop_p (loop, bb))
+	    continue;
+	  /* Ignore virtual operands PHIs as they will always
+	     be different. */
+	  if (!is_gimple_reg (lop))
+	    continue;
+	  FOR_EACH_VEC_ELT (edge, latches, j, e)
+	    if (e != latch
+		&& PHI_ARG_DEF_FROM_EDGE (phi, e) == lop)
+	      {
+		latch = NULL;
+		goto next_latch;
+	      }
+	}
+      next_latch:
+      if (latch != NULL)
+        VEC_safe_push (edge, heap, extra_latches, latch);
+    }
+  if (VEC_empty (edge, extra_latches))
+    {
+       if (dump_file)
+	fprintf (dump_file, "no latches for IV subloob.\n");
       return NULL;
+    }
 
-  /* Check for a phi node that would deny that this is a latch edge of
-     a subloop.  */
-  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
-    {
-      phi = gsi_stmt (psi);
-      lop = PHI_ARG_DEF_FROM_EDGE (phi, latch);
-
-      /* Ignore the values that are not changed inside the subloop.  */
-      if (TREE_CODE (lop) != SSA_NAME
-	  || SSA_NAME_DEF_STMT (lop) == phi)
-	continue;
-      bb = gimple_bb (SSA_NAME_DEF_STMT (lop));
-      if (!bb || !flow_bb_inside_loop_p (loop, bb))
-	continue;
-
-      FOR_EACH_VEC_ELT (edge, latches, i, e)
-	if (e != latch
-	    && PHI_ARG_DEF_FROM_EDGE (phi, e) == lop)
-	  return NULL;
+  if (VEC_length (edge, extra_latches) != 1)
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, "more one latches:");
+	  FOR_EACH_VEC_ELT (edge, extra_latches, i, e)
+	    fprintf (dump_file, " %d", e->src->index);
+	  fprintf (dump_file, " that might be a IV subloop.\n");
+	}
+      /* Try to look for the subloop where the accessor of the latch is
+	 the header.  */
+      FOR_EACH_VEC_ELT (edge, extra_latches, i, latch)
+	{
+	  basic_block b = latch->src;
+          b = skip_to_exit (b, loop, 1);
+	  if (b == NULL)
+	    continue;
+          if (b == loop->header)
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Guessing %d as latch of the subloop.\n",
+			   latch->src->index);
+		  fprintf (dump_file, "Because its immediate accessor"
+			   " is the header.\n");
+		}
+	      return latch;
+	    }
+	}
+      /* Try to look for the subloop where the accessor of the latch
+	 is a different loop but only do this if the header has only one
+	 successor.  */
+      if (EDGE_COUNT (loop->header->succs) == 1)
+	FOR_EACH_VEC_ELT (edge, extra_latches, i, latch)
+	  {
+	    basic_block b = latch->src;
+	    b = skip_to_exit (b, loop, 1);
+	    if (b == NULL)
+	      continue;
+	    b = skip_to_exit (b, loop, 2);
+	    if (b == loop->header)
+	      {
+		if (dump_file)
+		  {
+		    fprintf (dump_file, "Guessing %d as latch of the subloop.\n",
+			     latch->src->index);
+		    fprintf (dump_file, "Because condition reaches the header.\n");
+		  }
+		return latch;
+	      }
+	  }
+      return NULL;
     }
 
+  latch = VEC_index (edge, extra_latches, 0);
+
   if (dump_file)
     fprintf (dump_file,
 	     "Found latch edge %d -> %d using iv structure.\n",
 	     latch->src->index, latch->dest->index);
+  VEC_free (edge, heap, extra_latches);
   return latch;
 }