diff mbox series

Fix PR83887, re-implement SESE region merging

Message ID alpine.LSU.2.20.1801171528430.32271@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR83887, re-implement SESE region merging | expand

Commit Message

Richard Biener Jan. 17, 2018, 2:36 p.m. UTC
This fixes PR83887 where current GRAPHITE SESE region merging sometimes
finds non-SESE regions as a result.

It re-implements SESE region merging by searching for the entry/exit
with a worklist based algorithm seeded by the entry/exit block of
the to be merged regions, walking predecessors and successors and
using dominator information to see whether we deal with a
(possible) entry or exit edge.

The patch misses optimization in that known SESE regions can be
skipped in the walk (and trivially known ones are single exit
loops).  While easily added during iteration I've yet think about
a way to handle the case where the initial seeds are entries of
such regions.

I probably also should add some comments.

Bootstrapped and tested on x86_64-unknown-linux-gnu, I've also built
SPEC CPU 2006 with -floop-nest-optimize [-fgraphite-identity] with
no new issues and 4 less optimized loop nests (maybe we did get away
with some invalid SESE regions - I did not investigate yet).

Comments welcome.

Thanks,
Richard.

2018-01-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83887
	* graphite-scop-detection.c
	(scop_detection::get_nearest_dom_with_single_entry): Remove.
	(scop_detection::get_nearest_pdom_with_single_exit): Likewise.
	(scop_detection::merge_sese): Re-implement with a flood-fill
	algorithm that properly finds a SESE region if it exists.

	* gcc.dg/graphite/pr83887.c: New testcase.
	* gfortran.dg/graphite/pr83887.f90: Likewise.
	* gfortran.dg/graphite/pr83887.f: Likewise.
diff mbox series

Patch

Index: gcc/graphite-scop-detection.c
===================================================================
--- gcc/graphite-scop-detection.c	(revision 256776)
+++ gcc/graphite-scop-detection.c	(working copy)
@@ -309,16 +309,6 @@  public:
 
   sese_l get_sese (loop_p loop);
 
-  /* Return the closest dominator with a single entry edge.  In case of a
-     back-loop the back-edge is not counted.  */
-
-  static edge get_nearest_dom_with_single_entry (basic_block dom);
-
-  /* Return the closest post-dominator with a single exit edge.  In case of a
-     back-loop the back-edge is not counted.  */
-
-  static edge get_nearest_pdom_with_single_exit (basic_block dom);
-
   /* Merge scops at same loop depth and returns the new sese.
      Returns a new SESE when merge was successful, INVALID_SESE otherwise.  */
 
@@ -441,85 +431,6 @@  scop_detection::get_sese (loop_p loop)
   return sese_l (scop_begin, scop_end);
 }
 
-/* Return the closest dominator with a single entry edge.  */
-
-edge
-scop_detection::get_nearest_dom_with_single_entry (basic_block dom)
-{
-  if (!dom->preds)
-    return NULL;
-
-  /* If any of the dominators has two predecessors but one of them is a back
-     edge, then that basic block also qualifies as a dominator with single
-     entry.  */
-  if (dom->preds->length () == 2)
-    {
-      /* If e1->src dominates e2->src then e1->src will also dominate dom.  */
-      edge e1 = (*dom->preds)[0];
-      edge e2 = (*dom->preds)[1];
-      loop_p l = dom->loop_father;
-      loop_p l1 = e1->src->loop_father;
-      loop_p l2 = e2->src->loop_father;
-      if (l != l1 && l == l2
-	  && dominated_by_p (CDI_DOMINATORS, e2->src, e1->src))
-	return e1;
-      if (l != l2 && l == l1
-	  && dominated_by_p (CDI_DOMINATORS, e1->src, e2->src))
-	return e2;
-    }
-
-  while (dom->preds->length () != 1)
-    {
-      if (dom->preds->length () < 1)
-	return NULL;
-      dom = get_immediate_dominator (CDI_DOMINATORS, dom);
-      if (!dom->preds)
-	return NULL;
-    }
-  return (*dom->preds)[0];
-}
-
-/* Return the closest post-dominator with a single exit edge.  In case of a
-   back-loop the back-edge is not counted.  */
-
-edge
-scop_detection::get_nearest_pdom_with_single_exit (basic_block pdom)
-{
-  if (!pdom->succs)
-    return NULL;
-
-  /* If any of the post-dominators has two successors but one of them is a back
-     edge, then that basic block also qualifies as a post-dominator with single
-     exit. */
-  if (pdom->succs->length () == 2)
-    {
-      /* If e1->dest post-dominates e2->dest then e1->dest will also
-	 post-dominate pdom.  */
-      edge e1 = (*pdom->succs)[0];
-      edge e2 = (*pdom->succs)[1];
-      loop_p l = pdom->loop_father;
-      loop_p l1 = e1->dest->loop_father;
-      loop_p l2 = e2->dest->loop_father;
-      if (l != l1 && l == l2
-	  && dominated_by_p (CDI_POST_DOMINATORS, e2->dest, e1->dest))
-	return e1;
-      if (l != l2 && l == l1
-	  && dominated_by_p (CDI_POST_DOMINATORS, e1->dest, e2->dest))
-	return e2;
-    }
-
-  while (pdom->succs->length () != 1)
-    {
-      if (pdom->succs->length () < 1)
-	return NULL;
-      pdom = get_immediate_dominator (CDI_POST_DOMINATORS, pdom);
-      if (!pdom->succs)
-	return NULL;
-    }
-
-  return (*pdom->succs)[0];
-}
-
 /* Merge scops at same loop depth and returns the new sese.
    Returns a new SESE when merge was successful, INVALID_SESE otherwise.  */
 
@@ -537,73 +448,78 @@  scop_detection::merge_sese (sese_l first
 	       dp << "[scop-detection] try merging sese s2: ";
 	       print_sese (dump_file, second));
 
-  /* Assumption: Both the sese's should be at the same loop depth or one scop
-     should subsume the other like in case of nested loops.  */
-
-  /* Find the common dominators for entry,
-     and common post-dominators for the exit.  */
-  basic_block dom = nearest_common_dominator (CDI_DOMINATORS,
-					      get_entry_bb (first),
-					      get_entry_bb (second));
-
-  edge entry = get_nearest_dom_with_single_entry (dom);
-
-  if (!entry || (entry->flags & EDGE_IRREDUCIBLE_LOOP))
-    return invalid_sese;
-
-  basic_block pdom = nearest_common_dominator (CDI_POST_DOMINATORS,
-					       get_exit_bb (first),
-					       get_exit_bb (second));
-  pdom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, pdom);
-
-  edge exit = get_nearest_pdom_with_single_exit (pdom);
-
-  if (!exit || (exit->flags & EDGE_IRREDUCIBLE_LOOP))
-    return invalid_sese;
-
-  sese_l combined (entry, exit);
-
-  DEBUG_PRINT (dp << "[scop-detection] checking combined sese: ";
-	       print_sese (dump_file, combined));
+  auto_bitmap worklist, in_sese_region;
+  bitmap_set_bit (worklist, get_entry_bb (first)->index);
+  bitmap_set_bit (worklist, get_exit_bb (first)->index);
+  bitmap_set_bit (worklist, get_entry_bb (second)->index);
+  bitmap_set_bit (worklist, get_exit_bb (second)->index);
+  edge entry = NULL, exit = NULL;
+
+  /* We can optimize the case of adding a loop entry dest or exit
+     src to the worklist (for single-exit loops) by skipping
+     directly to the exit dest / entry src.  in_sese_region
+     doesn't have to cover all blocks in the region but merely
+     its border it acts more like a visited bitmap.  */
+  do
+    {
+      int index = bitmap_first_set_bit (worklist);
+      bitmap_clear_bit (worklist, index);
+      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, index);
+      edge_iterator ei;
+      edge e;
 
-  /* FIXME: We could iterate to find the dom which dominates pdom, and pdom
-     which post-dominates dom, until it stabilizes.  Also, ENTRY->SRC and
-     EXIT->DEST should be in the same loop nest.  */
-  if (!dominated_by_p (CDI_DOMINATORS, pdom, dom)
-      || entry->src->loop_father != exit->dest->loop_father)
-    return invalid_sese;
-
-  /* For now we just bail out when there is a loop exit in the region
-     that is not also the exit of the region.  We could enlarge the
-     region to cover the loop that region exits to.  See PR79977.  */
-  if (loop_outer (entry->src->loop_father))
-    {
-      vec<edge> exits = get_loop_exit_edges (entry->src->loop_father);
-      for (unsigned i = 0; i < exits.length (); ++i)
+      /* With fake exit edges we can end up with no possible exit.  */
+      if (index == EXIT_BLOCK)
 	{
-	  if (exits[i] != exit
-	      && bb_in_region (exits[i]->src, entry->dest, exit->src))
-	    {
-	      DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
-	      exits.release ();
-	      return invalid_sese;
-	    }
+	  DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
+	  return invalid_sese;
 	}
-      exits.release ();
+
+      bitmap_set_bit (in_sese_region, bb->index);
+         
+      basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == dom
+	    && (! entry
+		|| dominated_by_p (CDI_DOMINATORS, entry->dest, bb)))
+	  {
+	    if (entry
+		&& ! bitmap_bit_p (in_sese_region, entry->src->index))
+	      bitmap_set_bit (worklist, entry->src->index);
+	    entry = e;
+	  }
+	else if (! bitmap_bit_p (in_sese_region, e->src->index))
+	  bitmap_set_bit (worklist, e->src->index);
+
+      basic_block pdom = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (e->dest == pdom
+	    && (! exit
+		|| dominated_by_p (CDI_POST_DOMINATORS, exit->src, bb)))
+	  {
+	    if (exit
+		&& ! bitmap_bit_p (in_sese_region, exit->dest->index))
+	      bitmap_set_bit (worklist, exit->dest->index);
+	    exit = e;
+	  }
+	else if (! bitmap_bit_p (in_sese_region, e->dest->index))
+	  bitmap_set_bit (worklist, e->dest->index);
     }
+  while (! bitmap_empty_p (worklist));
 
-  /* For now we just want to bail out when exit does not post-dominate entry.
-     TODO: We might just add a basic_block at the exit to make exit
-     post-dominate entry (the entire region).  */
-  if (!dominated_by_p (CDI_POST_DOMINATORS, get_entry_bb (combined),
-		       get_exit_bb (combined))
-      || !dominated_by_p (CDI_DOMINATORS, get_exit_bb (combined),
-			  get_entry_bb (combined)))
+  /* Include the BB with the loop-closed SSA PHI nodes.
+     canonicalize_loop_closed_ssa makes sure that is in proper shape.  */
+  if (exit->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+      && loop_exit_edge_p (exit->src->loop_father, exit))
     {
-      DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
-      return invalid_sese;
+      gcc_assert (single_pred_p (exit->dest)
+		  && single_succ_p (exit->dest)
+		  && sese_trivially_empty_bb_p (exit->dest));
+      exit = single_succ_edge (exit->dest);
     }
 
+  sese_l combined (entry, exit);
+
   DEBUG_PRINT (dp << "[merged-sese] s1: "; print_sese (dump_file, combined));
 
   return combined;
Index: gcc/testsuite/gcc.dg/graphite/pr83887.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr83887.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr83887.c	(working copy)
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -floop-nest-optimize -fno-tree-loop-im" } */
+
+int z4, g7;
+
+void
+x3 (int my)
+{
+  while (my < 2)
+    {
+      for (z4 = 0; z4 < 2; ++z4)
+	{
+	}
+
+      if (my != 0)
+	for (g7 = 0; g7 < 2; ++g7)
+	  {
+	  }
+
+      ++my;
+    }
+}
Index: gcc/testsuite/gfortran.dg/graphite/pr83887.f90
===================================================================
--- gcc/testsuite/gfortran.dg/graphite/pr83887.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/graphite/pr83887.f90	(working copy)
@@ -0,0 +1,59 @@ 
+! { dg-do compile }
+! { dg-options "-O -floop-nest-optimize" }
+      SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, &
+                   B, LDB )
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      complex(kind((1.0d0,1.0d0)))         ALPHA
+      complex(kind((1.0d0,1.0d0)))         A( LDA, * ), B( LDB, * )
+      EXTERNAL           XERBLA
+      INTRINSIC          CONJG, MAX
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      complex(kind((1.0d0,1.0d0)))         TEMP
+      complex(kind((1.0d0,1.0d0)))         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      complex(kind((1.0d0,1.0d0)))         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+      LSIDE  =  scan( SIDE  , 'Ll' )>0
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ =  scan( TRANSA, 'Tt' )>0
+      NOUNIT =  scan( DIAG  , 'Nn' )>0
+      UPPER  =  scan( UPLO  , 'Uu' )>0
+      INFO   = 0
+      IF( N.EQ.0 ) &
+   RETURN
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT ) &
+                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT ) &
+                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+      RETURN
+      END
+
Index: gcc/testsuite/gfortran.dg/graphite/pr83887.f
===================================================================
--- gcc/testsuite/gfortran.dg/graphite/pr83887.f	(nonexistent)
+++ gcc/testsuite/gfortran.dg/graphite/pr83887.f	(working copy)
@@ -0,0 +1,23 @@ 
+! { dg-do compile }
+! { dg-options "-O2 -floop-nest-optimize" }
+      SUBROUTINE STONG(IGAUSS)
+      DIMENSION EXX(6)
+      PARAMETER (MXSH=1000, MXGTOT=5000)
+      COMMON /NSHEL / EX(MXGTOT),CS(MXGTOT),NSHELL
+  100 CONTINUE
+      NSHELL = NSHELL+1
+      IF(NSHELL.GT.MXSH) THEN
+         RETURN
+      END IF
+      DO 320 I = 1,IGAUSS
+         K = K1+I-1
+         EX(K) = EXX(I)*SCALE
+  320 CONTINUE
+      IF(TNORM.GT.TOLNRM) THEN
+         STOP
+      END IF
+      DO 460 IG = K1,K2
+         CS(IG) = FACS*CS(IG)
+  460 CONTINUE
+      GO TO 100
+      END