Patchwork Reduce complette unrolling & peeling limits

login
register
mail settings
Submitter Jan Hubicka
Date Nov. 19, 2012, 12:46 p.m.
Message ID <20121119124644.GA7359@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/199998/
State New
Headers show

Comments

Jan Hubicka - Nov. 19, 2012, 12:46 p.m.
Hi,
this is patch I will try to test once I have chance :)
t simply prevents unroller from analyzing loops when they are already too large.

	* tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add UPPER_BOUND
	parameter.
	(try_unroll_loop_completely) Update.

Patch

Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 193598)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -1,5 +1,5 @@ 
-/* Induction variable canonicalization.
-   Copyright (C) 2004, 2005, 2007, 2008, 2010
+/* Induction variable canonicalization and loop peeling.
+   Copyright (C) 2004, 2005, 2007, 2008, 2010, 2012
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -29,9 +29,12 @@  along with GCC; see the file COPYING3.
    variables.  In that case the created optimization possibilities are likely
    to pay up.
 
-   Additionally in case we detect that it is beneficial to unroll the
-   loop completely, we do it right here to expose the optimization
-   possibilities to the following passes.  */
+   We also perform
+     - complette unrolling (or peeling) when the loops is rolling few enough
+       times
+     - simple peeling (i.e. copying few initial iterations prior the loop)
+       when number of iteration estimate is known (typically by the profile
+       info).  */
 
 #include "config.h"
 #include "system.h"
@@ -207,10 +210,12 @@  constant_after_peeling (tree op, gimple
    iteration of the loop.
    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
    of loop.
-   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
+   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. 
+   Stop estimating after UPPER_BOUND is met. Return true in this case */
 
-static void
-tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size)
+static bool
+tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
+			 int upper_bound)
 {
   basic_block *body = get_loop_body (loop);
   gimple_stmt_iterator gsi;
@@ -316,6 +321,12 @@  tree_estimate_loop_size (struct loop *lo
 	      if (likely_eliminated || likely_eliminated_last)
 		size->last_iteration_eliminated_by_peeling += num;
 	    }
+	  if ((size->overall - size->eliminated_by_peeling
+	      - size->last_iteration_eliminated_by_peeling) > upper_bound)
+	    {
+              free (body);
+	      return true;
+	    }
 	}
     }
   while (path.length ())
@@ -357,6 +368,7 @@  tree_estimate_loop_size (struct loop *lo
 	     size->last_iteration_eliminated_by_peeling);
 
   free (body);
+  return false;
 }
 
 /* Estimate number of insns of completely unrolled loop.
@@ -699,12 +711,23 @@  try_unroll_loop_completely (struct loop
       sbitmap wont_exit;
       edge e;
       unsigned i;
+      bool large;
       vec<edge> to_remove = vec<edge>();
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      tree_estimate_loop_size (loop, exit, edge_to_cancel, &size);
+      large = tree_estimate_loop_size
+		 (loop, exit, edge_to_cancel, &size,
+	          ul == UL_NO_GROWTH ? 0
+		  : PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) * 2);
       ninsns = size.overall;
+      if (large)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+		     loop->num);
+	  return false;
+	}
 
       unr_insns = estimated_unrolled_size (&size, n_unroll);
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -865,6 +888,133 @@  try_unroll_loop_completely (struct loop
   return true;
 }
 
+/* Return number of instructions after peeling.  */
+static unsigned HOST_WIDE_INT
+estimated_peeled_sequence_size (struct loop_size *size,
+			        unsigned HOST_WIDE_INT npeel)
+{
+  return MAX (npeel * (HOST_WIDE_INT) (size->overall
+			     	       - size->eliminated_by_peeling), 1);
+}
+
+/* If the loop is expected to iterate N times and is
+   small enough, duplicate the loop body N+1 times before
+   the loop itself.  This way the hot path will never
+   enter the loop.  
+   Parameters are the same as for try_unroll_loops_completely */
+
+static bool
+try_peel_loop (struct loop *loop,
+	       edge exit, tree niter,
+	       HOST_WIDE_INT maxiter)
+{
+  int npeel;
+  struct loop_size size;
+  int peeled_size;
+  sbitmap wont_exit;
+  unsigned i;
+  vec<edge> to_remove = vec<edge>();
+  edge e;
+
+  /* If the iteration bound is known and large, then we can safely eliminate
+     the check in peeled copies.  */
+  if (TREE_CODE (niter) != INTEGER_CST)
+    exit = NULL;
+
+  if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
+    return false;
+
+  /* Peel only innermost loops.  */
+  if (loop->inner)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: outer loop\n");
+      return false;
+    }
+
+  if (!optimize_loop_for_speed_p (loop))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: cold loop\n");
+      return false;
+    }
+
+  /* Check if there is an estimate on the number of iterations.  */
+  npeel = estimated_loop_iterations_int (loop);
+  if (npeel < 0)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: number of iterations is not "
+	         "estimated\n");
+      return false;
+    }
+  if (maxiter >= 0 && maxiter <= npeel)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: upper bound is known so can "
+		 "unroll complettely\n");
+      return false;
+    }
+
+  /* We want to peel estimated number of iterations + 1 (so we never
+     enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
+     and be sure to avoid overflows.  */
+  if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: rolls too much "
+		 "(%i + 1 > --param max-peel-times)\n", npeel);
+      return false;
+    }
+  npeel++;
+
+  /* Check peeled loops size.  */
+  tree_estimate_loop_size (loop, exit, NULL, &size,
+			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
+  if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
+      > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+		 "(%i insns > --param max-peel-insns)", peeled_size);
+      return false;
+    }
+
+  /* Duplicate possibly eliminating the exits.  */
+  initialize_original_copy_tables ();
+  wont_exit = sbitmap_alloc (npeel + 1);
+  bitmap_ones (wont_exit);
+  bitmap_clear_bit (wont_exit, 0);
+  if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+					     npeel, wont_exit,
+					     exit, &to_remove,
+					     DLTHE_FLAG_UPDATE_FREQ
+					     | DLTHE_FLAG_COMPLETTE_PEEL))
+    {
+      free_original_copy_tables ();
+      free (wont_exit);
+      return false;
+    }
+  FOR_EACH_VEC_ELT (to_remove, i, e)
+    {
+      bool ok = remove_path (e);
+      gcc_assert (ok);
+    }
+  free (wont_exit);
+  free_original_copy_tables ();
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Peeled loop %d, %i times.\n",
+	       loop->num, npeel);
+    }
+  if (loop->any_upper_bound)
+    loop->nb_iterations_upper_bound -= double_int::from_uhwi (npeel);
+  loop->nb_iterations_estimate = double_int_zero;
+  /* Make sure to mark loop cold so we do not try to peel it more.  */
+  scale_loop_profile (loop, 1, 0);
+  loop->header->count = 0;
+  return true;
+}
 /* Adds a canonical induction variable to LOOP if suitable.
    CREATE_IV is true if we may create a new iv.  UL determines
    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
@@ -939,6 +1089,9 @@  canonicalize_loop_induction_variables (s
       && exit && just_once_each_iteration_p (loop, exit->src))
     create_canonical_iv (loop, exit, niter);
 
+  if (ul == UL_ALL)
+    modified |= try_peel_loop (loop, exit, niter, maxiter);
+
   return modified;
 }
 
@@ -981,8 +1134,10 @@  canonicalize_induction_variables (void)
     }
   BITMAP_FREE (loop_closed_ssa_invalidated);
 
+  /* Update virtuals because we possibly introduced __builtin_unreachable
+     call.  */
   if (changed)
-    return TODO_cleanup_cfg;
+    return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
   return 0;
 }