diff mbox series

[RFC] Fix CFG cleanup compile-time hog, PR85964

Message ID alpine.LSU.2.20.1805291646430.24704@zhemvz.fhfr.qr
State New
Headers show
Series [RFC] Fix CFG cleanup compile-time hog, PR85964 | expand

Commit Message

Richard Biener May 29, 2018, 2:54 p.m. UTC
The following fixes the situation where the initial sweep over the
CFG to remove trivially dead code regions causes excessive compile-time
because of using remove_edge_and_dominated_blocks and thus
iterate_fix_dominators.

The good thing is that I added cleanup_control_flow_pre doing this
initial sweep in PRE order.  So we can simply remove the entry edges
into the dead code regions and use the visited bitmap kept by the PRE
walk to remove unreachable blocks afterwards.

For dominators we then re-compute them from scratch which is way faster
for the testcase (a reduced one gets CFG cleanup time down from
19s to 0.16s).  The testcase still runs into a backwards jump-threading
scalability issue.

Note the patch will be slower for the case of not many removed edges
but it looks hard to find a good cut-off upfront.

Note we unfortunately cannot merge this with the unreachable block
removal because of the intermediate code to insert loop entry
forwarders which needs dominators to identify natural loop backedges.

Any opinions?

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Thanks,
Richard.

Comments

Richard Biener June 4, 2018, 3:30 p.m. UTC | #1
On Tue, 29 May 2018, Richard Biener wrote:

> 
> The following fixes the situation where the initial sweep over the
> CFG to remove trivially dead code regions causes excessive compile-time
> because of using remove_edge_and_dominated_blocks and thus
> iterate_fix_dominators.
> 
> The good thing is that I added cleanup_control_flow_pre doing this
> initial sweep in PRE order.  So we can simply remove the entry edges
> into the dead code regions and use the visited bitmap kept by the PRE
> walk to remove unreachable blocks afterwards.
> 
> For dominators we then re-compute them from scratch which is way faster
> for the testcase (a reduced one gets CFG cleanup time down from
> 19s to 0.16s).  The testcase still runs into a backwards jump-threading
> scalability issue.
> 
> Note the patch will be slower for the case of not many removed edges
> but it looks hard to find a good cut-off upfront.
> 
> Note we unfortunately cannot merge this with the unreachable block
> removal because of the intermediate code to insert loop entry
> forwarders which needs dominators to identify natural loop backedges.
> 
> Any opinions?

This is a less intrusive variant also factoring in the unreachable
block removal at the start of CFG cleanup.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'll go ahead with this tomorrow unless there are any comments.

Richard.

2018-06-04  Richard Biener  <rguenther@suse.de>

	* tree-cfgcleanup.c (cleanup_control_flow_pre): For edge
	removal pretend DOM info isn't available so we do not update
	it and only remove edges, not dominated blocks.  Actually free
	DOM info in case we removed something.  Remove unreachable blocks.
	(mfb_keep_latches): Work with either DOM info or marked backedges.
	(cleanup_tree_cfg_noloop): Do not remove unreachable blocks
	first.  Mark backedges if DOM info isn't available.
	(Re-)compute DOM info after cleanup_control_flow_pre.

Index: gcc/tree-cfgcleanup.c
===================================================================
--- gcc/tree-cfgcleanup.c	(revision 261145)
+++ gcc/tree-cfgcleanup.c	(working copy)
@@ -684,8 +684,8 @@ want_merge_blocks_p (basic_block bb1, ba
 }
 
 
-/* Tries to cleanup cfg in basic block BB.  Returns true if anything
-   changes.  */
+/* Tries to cleanup cfg in basic block BB by merging blocks.  Returns
+   true if anything changes.  */
 
 static bool
 cleanup_tree_cfg_bb (basic_block bb)
@@ -725,6 +725,12 @@ cleanup_control_flow_pre ()
 {
   bool retval = false;
 
+  /* We want remove_edge_and_dominated_blocks to only remove edges,
+     not dominated blocks which it does when dom info isn't available.
+     Pretend so.  */
+  dom_state saved_state = dom_info_state (CDI_DOMINATORS);
+  set_dom_info_availability (CDI_DOMINATORS, DOM_NONE);
+
   auto_vec<edge_iterator, 20> stack (n_basic_blocks_for_fn (cfun) + 1);
   auto_sbitmap visited (last_basic_block_for_fn (cfun));
   bitmap_clear (visited);
@@ -741,6 +747,8 @@ cleanup_control_flow_pre ()
 	  && ! bitmap_bit_p (visited, dest->index))
 	{
 	  bitmap_set_bit (visited, dest->index);
+	  /* We only possibly remove edges from DEST here, leaving
+	     possibly unreachable code in the IL.  */
 	  retval |= cleanup_control_flow_bb (dest);
 	  if (EDGE_COUNT (dest->succs) > 0)
 	    stack.quick_push (ei_start (dest->succs));
@@ -754,13 +762,35 @@ cleanup_control_flow_pre ()
 	}
     }
 
+  set_dom_info_availability (CDI_DOMINATORS, saved_state);
+
+  /* We are deleting BBs in non-reverse dominator order, make sure
+     insert_debug_temps_for_defs is prepared for that.  */
+  if (retval)
+    free_dominance_info (CDI_DOMINATORS);
+
+  /* Remove all now (and previously) unreachable blocks.  */
+  for (int i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); ++i)
+    {
+      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+      if (bb && !bitmap_bit_p (visited, bb->index))
+	{
+	  if (!retval)
+	    free_dominance_info (CDI_DOMINATORS);
+	  delete_basic_block (bb);
+	  retval = true;
+	}
+    }
+
   return retval;
 }
 
 static bool
 mfb_keep_latches (edge e)
 {
-  return ! dominated_by_p (CDI_DOMINATORS, e->src, e->dest);
+  return !((dom_info_available_p (CDI_DOMINATORS)
+	    && dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+	   || (e->flags & EDGE_DFS_BACK));
 }
 
 /* Remove unreachable blocks and other miscellaneous clean up work.
@@ -769,23 +799,8 @@ mfb_keep_latches (edge e)
 static bool
 cleanup_tree_cfg_noloop (void)
 {
-  bool changed;
-
   timevar_push (TV_TREE_CLEANUP_CFG);
 
-  /* If dominance information is available, there cannot be any unreachable
-     blocks.  */
-  if (!dom_info_available_p (CDI_DOMINATORS))
-    {
-      changed = delete_unreachable_blocks ();
-      calculate_dominance_info (CDI_DOMINATORS);
-    }
-  else
-    {
-      checking_verify_dominators (CDI_DOMINATORS);
-      changed = false;
-    }
-
   /* Ensure that we have single entries into loop headers.  Otherwise
      if one of the entries is becoming a latch due to CFG cleanup
      (from formerly being part of an irreducible region) then we mess
@@ -795,6 +810,10 @@ cleanup_tree_cfg_noloop (void)
      we need to capture the dominance state before the pending transform.  */
   if (current_loops)
     {
+      /* This needs backedges or dominators.  */
+      if (!dom_info_available_p (CDI_DOMINATORS))
+	mark_dfs_back_edges ();
+
       loop_p loop;
       unsigned i;
       FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
@@ -816,7 +835,9 @@ cleanup_tree_cfg_noloop (void)
 		    any_abnormal = true;
 		    break;
 		  }
-		if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
+		if ((dom_info_available_p (CDI_DOMINATORS)
+		     && dominated_by_p (CDI_DOMINATORS, e->src, bb))
+		    || (e->flags & EDGE_DFS_BACK))
 		  {
 		    found_latch = true;
 		    continue;
@@ -850,12 +871,19 @@ cleanup_tree_cfg_noloop (void)
   /* Start by iterating over all basic blocks in PRE order looking for
      edge removal opportunities.  Do this first because incoming SSA form
      may be invalid and we want to avoid performing SSA related tasks such
-     as propgating out a PHI node during BB merging in that state.  */
-  changed |= cleanup_control_flow_pre ();
+     as propgating out a PHI node during BB merging in that state.  This
+     also gets rid of unreachable blocks.  */
+  bool changed = cleanup_control_flow_pre ();
 
   /* After doing the above SSA form should be valid (or an update SSA
      should be required).  */
 
+  /* Compute dominator info which we need for the iterative process below.  */
+  if (!dom_info_available_p (CDI_DOMINATORS))
+    calculate_dominance_info (CDI_DOMINATORS);
+  else
+    checking_verify_dominators (CDI_DOMINATORS);
+
   /* During forwarder block cleanup, we may redirect edges out of
      SWITCH_EXPRs, which can get expensive.  So we want to enable
      recording of edge to CASE_LABEL_EXPR.  */
@@ -884,6 +912,10 @@ cleanup_tree_cfg_noloop (void)
       if (!bb)
 	continue;
 
+      /* BB merging done by cleanup_tree_cfg_bb can end up propagating
+	 out single-argument PHIs which in turn can expose
+	 cleanup_control_flow_bb opportunities so we have to repeat
+	 that here.  */
       changed |= cleanup_control_flow_bb (bb);
       changed |= cleanup_tree_cfg_bb (bb);
     }
diff mbox series

Patch

diff --git a/gcc/tree-cfgcleanup.c b/gcc/tree-cfgcleanup.c
index 1bf7771dac1..4915d5e8f5f 100644
--- a/gcc/tree-cfgcleanup.c
+++ b/gcc/tree-cfgcleanup.c
@@ -57,7 +57,7 @@  bitmap cfgcleanup_altered_bbs;
 /* Remove any fallthru edge from EV.  Return true if an edge was removed.  */
 
 static bool
-remove_fallthru_edge (vec<edge, va_gc> *ev)
+remove_fallthru_edge (vec<edge, va_gc> *ev, bool only_edges_p)
 {
   edge_iterator ei;
   edge e;
@@ -68,7 +68,12 @@  remove_fallthru_edge (vec<edge, va_gc> *ev)
 	if (e->flags & EDGE_COMPLEX)
 	  e->flags &= ~EDGE_FALLTHRU;
 	else
-	  remove_edge_and_dominated_blocks (e);
+	  {
+	    if (only_edges_p)
+	      remove_edge (e);
+	    else
+	      remove_edge_and_dominated_blocks (e);
+	  }
 	return true;
       }
   return false;
@@ -122,7 +127,8 @@  convert_single_case_switch (gswitch *swtch, gimple_stmt_iterator &gsi)
    at block BB.  */
 
 static bool
-cleanup_control_expr_graph (basic_block bb, gimple_stmt_iterator gsi)
+cleanup_control_expr_graph (basic_block bb, gimple_stmt_iterator gsi,
+			    bool only_edges_p)
 {
   edge taken_edge;
   bool retval = false;
@@ -182,7 +188,10 @@  cleanup_control_expr_graph (basic_block bb, gimple_stmt_iterator gsi)
 		}
 
 	      taken_edge->probability += e->probability;
-	      remove_edge_and_dominated_blocks (e);
+	      if (only_edges_p)
+		remove_edge (e);
+	      else
+		remove_edge_and_dominated_blocks (e);
 	      retval = true;
 	    }
 	  else
@@ -222,7 +231,7 @@  cleanup_call_ctrl_altering_flag (gimple *bb_end)
    true if anything changes.  */
 
 static bool
-cleanup_control_flow_bb (basic_block bb)
+cleanup_control_flow_bb (basic_block bb, bool only_edges_p)
 {
   gimple_stmt_iterator gsi;
   bool retval = false;
@@ -230,7 +239,27 @@  cleanup_control_flow_bb (basic_block bb)
 
   /* If the last statement of the block could throw and now cannot,
      we need to prune cfg.  */
-  retval |= gimple_purge_dead_eh_edges (bb);
+  if (only_edges_p)
+    {
+      gimple *stmt = last_stmt (bb);
+      if (!(stmt && stmt_can_throw_internal (stmt)))
+	{
+	  edge e;
+	  edge_iterator ei;
+	  for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
+	    {
+	      if (e->flags & EDGE_EH)
+		{
+		  remove_edge (e);
+		  retval = true;
+		}
+	      else
+		ei_next (&ei);
+	    }
+	}
+    }
+  else
+    retval |= gimple_purge_dead_eh_edges (bb);
 
   gsi = gsi_last_nondebug_bb (bb);
   if (gsi_end_p (gsi))
@@ -245,7 +274,7 @@  cleanup_control_flow_bb (basic_block bb)
       || gimple_code (stmt) == GIMPLE_SWITCH)
     {
       gcc_checking_assert (gsi_stmt (gsi_last_bb (bb)) == stmt);
-      retval |= cleanup_control_expr_graph (bb, gsi);
+      retval |= cleanup_control_expr_graph (bb, gsi, only_edges_p);
     }
   else if (gimple_code (stmt) == GIMPLE_GOTO
 	   && TREE_CODE (gimple_goto_dest (stmt)) == ADDR_EXPR
@@ -270,7 +299,12 @@  cleanup_control_flow_bb (basic_block bb)
       for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 	{
 	  if (e->dest != target_block)
-	    remove_edge_and_dominated_blocks (e);
+	    {
+	      if (only_edges_p)
+		remove_edge (e);
+	      else
+		remove_edge_and_dominated_blocks (e);
+	    }
 	  else
 	    {
 	      /* Turn off the EDGE_ABNORMAL flag.  */
@@ -300,7 +334,7 @@  cleanup_control_flow_bb (basic_block bb)
 	 now, they should be all unreachable anyway.  */
       for (gsi_next (&gsi); !gsi_end_p (gsi); )
 	gsi_remove (&gsi, true);
-      if (remove_fallthru_edge (bb->succs))
+      if (remove_fallthru_edge (bb->succs, only_edges_p))
 	retval = true;
     }
 
@@ -741,7 +775,7 @@  cleanup_control_flow_pre ()
 	  && ! bitmap_bit_p (visited, dest->index))
 	{
 	  bitmap_set_bit (visited, dest->index);
-	  retval |= cleanup_control_flow_bb (dest);
+	  retval |= cleanup_control_flow_bb (dest, true);
 	  if (EDGE_COUNT (dest->succs) > 0)
 	    stack.quick_push (ei_start (dest->succs));
 	}
@@ -754,6 +788,19 @@  cleanup_control_flow_pre ()
 	}
     }
 
+  /* If we removed any edge remove all now unreachable blocks.  */
+  if (retval)
+    {
+      free_dominance_info (CDI_DOMINATORS);
+      for (int i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); ++i)
+	{
+	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+	  if (bb && !bitmap_bit_p (visited, bb->index))
+	    delete_basic_block (bb);
+	}
+      calculate_dominance_info (CDI_DOMINATORS);
+    }
+
   return retval;
 }
 
@@ -808,7 +855,7 @@  cleanup_tree_cfg_1 (void)
       if (!bb)
 	continue;
 
-      retval |= cleanup_control_flow_bb (bb);
+      retval |= cleanup_control_flow_bb (bb, false);
       retval |= cleanup_tree_cfg_bb (bb);
     }