diff mbox

Fix profile updating in loop peeling

Message ID 20160530123840.GA82571@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka May 30, 2016, 12:38 p.m. UTC
Hi,
this patch fixes profile updates in loop peeling pass.  First it correctly
set wont_exit which can only be set when we know the number of iterations
tested by EXIT and this number is higher than maximal number of iterations
(an unlikely case which is usually removed by VRP pass or earlier cunroll).

Second problem is that we determine number of peelings as number of estimated
iterations + 1.  After peeling we currently underflow updating estimates
which makes them to be re-computed later to bogus values.

Last change is way we netermine profile for the new loop header.  We used to
drop the loop frequency 1/1000.  This patch makes use of the info on remaining
entry edges to the loop.

While working on this I noticed that try_peel_loop has tendency to iterate
and peel one loop many times.  I will prepare followup for that.  Also
testcases will come once I commit the change enabling loop peeling at -O3
by using likely estimates.

Bootstrapped/regtested x86_64-linux.

	* tree-ssa-loop-ivcanon.c (try_peel_loop): Correctly set wont_exit
	for peeled copies; avoid underflow when updating estimates; correctly
	scale loop profile.

Comments

Bernhard Reutner-Fischer June 2, 2016, 8:34 a.m. UTC | #1
On Mon, May 30, 2016 at 02:38:40PM +0200, Jan Hubicka wrote:
> Hi,
> this patch fixes profile updates in loop peeling pass.  First it correctly
> set wont_exit which can only be set when we know the number of iterations
> tested by EXIT and this number is higher than maximal number of iterations
> (an unlikely case which is usually removed by VRP pass or earlier cunroll).
> 
> Second problem is that we determine number of peelings as number of estimated
> iterations + 1.  After peeling we currently underflow updating estimates
> which makes them to be re-computed later to bogus values.
> 
> Last change is way we netermine profile for the new loop header.  We used to
> drop the loop frequency 1/1000.  This patch makes use of the info on remaining
> entry edges to the loop.
> 
> While working on this I noticed that try_peel_loop has tendency to iterate
> and peel one loop many times.  I will prepare followup for that.  Also
> testcases will come once I commit the change enabling loop peeling at -O3
> by using likely estimates.
> 
> Bootstrapped/regtested x86_64-linux.
> 
> 	* tree-ssa-loop-ivcanon.c (try_peel_loop): Correctly set wont_exit
> 	for peeled copies; avoid underflow when updating estimates; correctly
> 	scale loop profile.
> 
> Index: tree-ssa-loop-ivcanon.c
> ===================================================================
> --- tree-ssa-loop-ivcanon.c	(revision 236874)
> +++ tree-ssa-loop-ivcanon.c	(working copy)

> @@ -1053,14 +1065,48 @@ try_peel_loop (struct loop *loop,
>        fprintf (dump_file, "Peeled loop %d, %i times.\n",
>  	       loop->num, (int) npeel);
>      }
> +  if (loop->any_estimate)
> +    {
> +      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
> +        loop->nb_iterations_estimate -= npeel;
> +      else
> +	loop->nb_iterations_estimate = 0;
> +    }
>    if (loop->any_upper_bound)
> -    loop->nb_iterations_upper_bound -= npeel;
> +    {
> +      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))

shouldn't this compare loop->nb_iterations_upper_bound to npeel instead
of nb_iterations_estimate ?

> +        loop->nb_iterations_upper_bound -= npeel;
> +      else
> +        loop->nb_iterations_upper_bound = 0;
> +    }
>    if (loop->any_likely_upper_bound)
> -    loop->nb_iterations_likely_upper_bound -= npeel;
> -  loop->nb_iterations_estimate = 0;
> -  /* Make sure to mark loop cold so we do not try to peel it more.  */
> -  scale_loop_profile (loop, 1, 0);
> -  loop->header->count = 0;
> +    {
> +      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))

nb_iterations_likely_upper_bound ?
What am i missing?

> +	loop->nb_iterations_likely_upper_bound -= npeel;
> +      else
> +	{
> +	  loop->any_estimate = true;
> +	  loop->nb_iterations_estimate = 0;
> +	  loop->nb_iterations_likely_upper_bound = 0;

I don't immediately understand this else branch but didn't try hard.
Can you please explain or maybe add a comment?

thanks,
> +	}
> +    }
> +  gcov_type entry_count = 0;
> +  int entry_freq = 0;
> +
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, loop->header->preds)
> +    if (e->src != loop->latch)
> +      {
> +	entry_count += e->src->count;
> +	entry_freq += e->src->frequency;
> +	gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
> +      }
> +  int scale = 1;
> +  if (loop->header->count)
> +    scale = RDIV (entry_count * REG_BR_PROB_BASE, loop->header->count);
> +  else if (loop->header->frequency)
> +    scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
> +  scale_loop_profile (loop, scale, 0);
>    return true;
>  }
>  /* Adds a canonical induction variable to LOOP if suitable.
diff mbox

Patch

Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 236874)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -970,7 +970,9 @@  try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
-  /* Peel only innermost loops.  */
+  /* Peel only innermost loops.
+     While the code is perfectly capable of peeling non-innermost loops,
+     the heuristics would probably need some improvements. */
   if (loop->inner)
     {
       if (dump_file)
@@ -1029,13 +1031,23 @@  try_peel_loop (struct loop *loop,
   /* Duplicate possibly eliminating the exits.  */
   initialize_original_copy_tables ();
   wont_exit = sbitmap_alloc (npeel + 1);
-  bitmap_ones (wont_exit);
-  bitmap_clear_bit (wont_exit, 0);
+  if (exit && niter
+      && TREE_CODE (niter) == INTEGER_CST
+      && wi::leu_p (npeel, wi::to_widest (niter)))
+    {
+      bitmap_ones (wont_exit);
+      if (wi::eq_p (wi::to_widest (niter), npeel))
+        bitmap_clear_bit (wont_exit, 0);
+    }
+  else
+    {
+      exit = NULL;
+      bitmap_clear (wont_exit);
+    }
   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 					     npeel, wont_exit,
 					     exit, &to_remove,
-					     DLTHE_FLAG_UPDATE_FREQ
-					     | DLTHE_FLAG_COMPLETTE_PEEL))
+					     DLTHE_FLAG_UPDATE_FREQ))
     {
       free_original_copy_tables ();
       free (wont_exit);
@@ -1053,14 +1065,48 @@  try_peel_loop (struct loop *loop,
       fprintf (dump_file, "Peeled loop %d, %i times.\n",
 	       loop->num, (int) npeel);
     }
+  if (loop->any_estimate)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
+        loop->nb_iterations_estimate -= npeel;
+      else
+	loop->nb_iterations_estimate = 0;
+    }
   if (loop->any_upper_bound)
-    loop->nb_iterations_upper_bound -= npeel;
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
+        loop->nb_iterations_upper_bound -= npeel;
+      else
+        loop->nb_iterations_upper_bound = 0;
+    }
   if (loop->any_likely_upper_bound)
-    loop->nb_iterations_likely_upper_bound -= npeel;
-  loop->nb_iterations_estimate = 0;
-  /* Make sure to mark loop cold so we do not try to peel it more.  */
-  scale_loop_profile (loop, 1, 0);
-  loop->header->count = 0;
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
+	loop->nb_iterations_likely_upper_bound -= npeel;
+      else
+	{
+	  loop->any_estimate = true;
+	  loop->nb_iterations_estimate = 0;
+	  loop->nb_iterations_likely_upper_bound = 0;
+	}
+    }
+  gcov_type entry_count = 0;
+  int entry_freq = 0;
+
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    if (e->src != loop->latch)
+      {
+	entry_count += e->src->count;
+	entry_freq += e->src->frequency;
+	gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
+      }
+  int scale = 1;
+  if (loop->header->count)
+    scale = RDIV (entry_count * REG_BR_PROB_BASE, loop->header->count);
+  else if (loop->header->frequency)
+    scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
+  scale_loop_profile (loop, scale, 0);
   return true;
 }
 /* Adds a canonical induction variable to LOOP if suitable.