Patchwork Fix memory exhaustion during cunrolli

login
register
mail settings
Submitter Eric Botcazou
Date Sept. 17, 2012, 10:52 a.m.
Message ID <2250451.AMjsrQ0SXf@polaris>
Download mbox | patch
Permalink /patch/184378/
State New
Headers show

Comments

Eric Botcazou - Sept. 17, 2012, 10:52 a.m.
> Yes - now cfg_cleanup does that (and it really shouldn't be its job).  That
> was the improvement I talked about - reducing the number of BBs a lot.

OK, I removed the code and compiled the testcase of PR 43186 with --param max-
completely-peel-loop-nest-depth=32 and got back the explosion.  Compilation is 
instantaneous again with my patch.

> Ah, indeed ;)  Or just push struct loop of changed loops onto a stack.

Thanks for the tip.  Revised patch attached, tested on x86_64-suse-linux.
OK for the mainline?  Or do you want me to remove max-completely-peel-loop-
nest-depth as well?


2012-09-17  Eric Botcazou  <ebotcazou@adacore.com>

	* tree-cfgcleanup. (cleanup_control_expr_graph) <GIMPLE_COND>: Remove
	code doing propagation from degenerate PHI nodes.
	* tree-ssa-loop-ivcanon.c (propagate_into_all_uses): New function.
	(propagate_constants_for_unrolling): Likewise.
	(tree_unroll_loops_completely): If the current loop has been unrolled
	and its father isn't the entire function, propagate constants within
	the new basic blocks by means of propagate_constants_for_unrolling.
Richard Guenther - Sept. 17, 2012, 12:28 p.m.
On Mon, Sep 17, 2012 at 12:52 PM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> Yes - now cfg_cleanup does that (and it really shouldn't be its job).  That
>> was the improvement I talked about - reducing the number of BBs a lot.
>
> OK, I removed the code and compiled the testcase of PR 43186 with --param max-
> completely-peel-loop-nest-depth=32 and got back the explosion.  Compilation is
> instantaneous again with my patch.
>
>> Ah, indeed ;)  Or just push struct loop of changed loops onto a stack.
>
> Thanks for the tip.  Revised patch attached, tested on x86_64-suse-linux.
> OK for the mainline?  Or do you want me to remove max-completely-peel-loop-
> nest-depth as well?

No, we can keep that.

Ok for trunk.

Thanks,
Richard.

>
> 2012-09-17  Eric Botcazou  <ebotcazou@adacore.com>
>
>         * tree-cfgcleanup. (cleanup_control_expr_graph) <GIMPLE_COND>: Remove
>         code doing propagation from degenerate PHI nodes.
>         * tree-ssa-loop-ivcanon.c (propagate_into_all_uses): New function.
>         (propagate_constants_for_unrolling): Likewise.
>         (tree_unroll_loops_completely): If the current loop has been unrolled
>         and its father isn't the entire function, propagate constants within
>         the new basic blocks by means of propagate_constants_for_unrolling.
>
>
> --
> Eric Botcazou

Patch

Index: tree-cfgcleanup.c
===================================================================
--- tree-cfgcleanup.c	(revision 191365)
+++ tree-cfgcleanup.c	(working copy)
@@ -88,40 +88,11 @@  cleanup_control_expr_graph (basic_block
       switch (gimple_code (stmt))
 	{
 	case GIMPLE_COND:
-	  {
-	    tree lhs = gimple_cond_lhs (stmt);
-	    tree rhs = gimple_cond_rhs (stmt);
-	    /* For conditions try harder and lookup single-argument
-	       PHI nodes.  Only do so from the same basic-block though
-	       as other basic-blocks may be dead already.  */
-	    if (TREE_CODE (lhs) == SSA_NAME
-		&& !name_registered_for_update_p (lhs))
-	      {
-		gimple def_stmt = SSA_NAME_DEF_STMT (lhs);
-		if (gimple_code (def_stmt) == GIMPLE_PHI
-		    && gimple_phi_num_args (def_stmt) == 1
-		    && gimple_bb (def_stmt) == gimple_bb (stmt)
-		    && (TREE_CODE (PHI_ARG_DEF (def_stmt, 0)) != SSA_NAME
-			|| !name_registered_for_update_p (PHI_ARG_DEF (def_stmt,
-								       0))))
-		  lhs = PHI_ARG_DEF (def_stmt, 0);
-	      }
-	    if (TREE_CODE (rhs) == SSA_NAME
-		&& !name_registered_for_update_p (rhs))
-	      {
-		gimple def_stmt = SSA_NAME_DEF_STMT (rhs);
-		if (gimple_code (def_stmt) == GIMPLE_PHI
-		    && gimple_phi_num_args (def_stmt) == 1
-		    && gimple_bb (def_stmt) == gimple_bb (stmt)
-		    && (TREE_CODE (PHI_ARG_DEF (def_stmt, 0)) != SSA_NAME
-			|| !name_registered_for_update_p (PHI_ARG_DEF (def_stmt,
-								       0))))
-		  rhs = PHI_ARG_DEF (def_stmt, 0);
-	      }
-	    val = fold_binary_loc (loc, gimple_cond_code (stmt),
-				   boolean_type_node, lhs, rhs);
-	    break;
-	  }
+	  val = fold_binary_loc (loc, gimple_cond_code (stmt),
+				 boolean_type_node,
+			         gimple_cond_lhs (stmt),
+				 gimple_cond_rhs (stmt));
+	  break;
 
 	case GIMPLE_SWITCH:
 	  val = gimple_switch_index (stmt);
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 191365)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -503,6 +503,80 @@  canonicalize_induction_variables (void)
   return 0;
 }
 
+/* Propagate VAL into all uses of SSA_NAME.  */
+
+static void
+propagate_into_all_uses (tree ssa_name, tree val)
+{
+  imm_use_iterator iter;
+  gimple use_stmt;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
+    {
+      gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
+      use_operand_p use;
+
+      FOR_EACH_IMM_USE_ON_STMT (use, iter)
+	SET_USE (use, val);
+
+      if (is_gimple_assign (use_stmt)
+	  && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
+	     == GIMPLE_SINGLE_RHS)
+	{
+	  tree rhs = gimple_assign_rhs1 (use_stmt);
+
+	  if (TREE_CODE (rhs) == ADDR_EXPR)
+	    recompute_tree_invariant_for_addr_expr (rhs);
+	}
+
+      fold_stmt_inplace (&use_stmt_gsi);
+      update_stmt (use_stmt);
+    }
+}
+
+/* Propagate constant SSA_NAMEs defined in basic block BB.  */
+
+static void
+propagate_constants_for_unrolling (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+
+  /* Look for degenerate PHI nodes with constant argument.  */
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
+    {
+      gimple phi = gsi_stmt (gsi);
+      tree result = gimple_phi_result (phi);
+      tree arg = gimple_phi_arg_def (phi, 0);
+
+      if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
+	{
+	  propagate_into_all_uses (result, arg);
+	  gsi_remove (&gsi, true);
+	  release_ssa_name (result);
+	}
+      else
+	gsi_next (&gsi);
+    }
+
+  /* Look for assignments to SSA names with constant RHS.  */
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+    {
+      gimple stmt = gsi_stmt (gsi);
+      tree lhs;
+
+      if (is_gimple_assign (stmt)
+	  && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
+	  && gimple_assign_rhs_code (stmt) == INTEGER_CST)
+	{
+	  propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
+	  gsi_remove (&gsi, true);
+	  release_ssa_name (lhs);
+	}
+      else
+	gsi_next (&gsi);
+    }
+}
+
 /* Unroll LOOPS completely if they iterate just few times.  Unless
    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    size of the code does not increase.  */
@@ -510,6 +584,7 @@  canonicalize_induction_variables (void)
 unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
+  VEC(loop_p,stack) *father_stack = VEC_alloc (loop_p, stack, 16);
   loop_iterator li;
   struct loop *loop;
   bool changed;
@@ -522,22 +597,51 @@  tree_unroll_loops_completely (bool may_i
 
       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 	{
+	  struct loop *loop_father = loop_outer (loop);
+
 	  if (may_increase_size && optimize_loop_for_speed_p (loop)
 	      /* Unroll outermost loops only if asked to do so or they do
 		 not cause code growth.  */
-	      && (unroll_outer
-		  || loop_outer (loop_outer (loop))))
+	      && (unroll_outer || loop_outer (loop_father)))
 	    ul = UL_ALL;
 	  else
 	    ul = UL_NO_GROWTH;
-	  changed |= canonicalize_loop_induction_variables
-		       (loop, false, ul, !flag_tree_loop_ivcanon);
+
+	  if (canonicalize_loop_induction_variables (loop, false, ul,
+						     !flag_tree_loop_ivcanon))
+	    {
+	      changed = true;
+	      /* If we'll continue unrolling, we need to propagate constants
+		 within the new basic blocks to fold away induction variable
+		 computations; otherwise, the size might blow up before the
+		 iteration is complete and the IR eventually cleaned up.  */
+	      if (loop_outer (loop_father) && !loop_father->aux)
+	        {
+		  VEC_safe_push (loop_p, stack, father_stack, loop_father);
+		  loop_father->aux = loop_father;
+		}
+	    }
 	}
 
       if (changed)
 	{
+	  struct loop **iter;
+	  unsigned i;
+
 	  update_ssa (TODO_update_ssa);
 
+	  /* Propagate the constants within the new basic blocks.  */
+	  FOR_EACH_VEC_ELT (loop_p, father_stack, i, iter)
+	    {
+	      unsigned j;
+	      basic_block *body = get_loop_body_in_dom_order (*iter);
+	      for (j = 0; j < (*iter)->num_nodes; j++)
+		propagate_constants_for_unrolling (body[j]);
+	      free (body);
+	      (*iter)->aux = NULL;
+	    }
+	  VEC_truncate (loop_p, father_stack, 0);
+
 	  /* This will take care of removing completely unrolled loops
 	     from the loop structures so we can continue unrolling now
 	     innermost loops.  */
@@ -552,5 +656,7 @@  tree_unroll_loops_completely (bool may_i
   while (changed
 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 
+  VEC_free (loop_p, stack, father_stack);
+
   return 0;
 }