Patchwork Non-dominating loop bounds in tree-ssa-loop-niter 2/4

login
register
mail settings
Submitter Jan Hubicka
Date Oct. 30, 2012, 6:29 p.m.
Message ID <20121030182949.GA9056@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/195582/
State New
Headers show

Comments

Jan Hubicka - Oct. 30, 2012, 6:29 p.m.
Hi,
this patch implements the second part of planned change - to determine loop bounds
based by shortest path discovery.  This allows to bound number of iterations
on loops with bounds in statements that do not dominate the latch.

I originally planned to implement this as part of maybe_lower_iteration_bound,
but I found doing it separately is more readable.  While both performs walk of
the loop body, both do that for different reasons.

discover_iteration_bound_by_body_walk walks from header to latch, while
maybe_lower_iteration_bound walks from header to first statement with side
effects looking if there is exit on the way.

Both walks can be skipped for many loops, but each with different reasons.

Bootstrapped/regtested x86_64-linux, OK?

	* tree-ssa-loop-niter.c (double_int_cmp, bound_index,
	discover_iteration_bound_by_body_walk): New functions.
	(discover_iteration_bound_by_body_walk): Use it.

	* gcc.dg/tree-ssa/loop-38.c: New testcase.
Richard Guenther - Oct. 31, 2012, 9:08 a.m.
On Tue, 30 Oct 2012, Jan Hubicka wrote:

> Hi,
> this patch implements the second part of planned change - to determine loop bounds
> based by shortest path discovery.  This allows to bound number of iterations
> on loops with bounds in statements that do not dominate the latch.
> 
> I originally planned to implement this as part of maybe_lower_iteration_bound,
> but I found doing it separately is more readable.  While both performs walk of
> the loop body, both do that for different reasons.
> 
> discover_iteration_bound_by_body_walk walks from header to latch, while
> maybe_lower_iteration_bound walks from header to first statement with side
> effects looking if there is exit on the way.
> 
> Both walks can be skipped for many loops, but each with different reasons.
> 
> Bootstrapped/regtested x86_64-linux, OK?
> 
> 	* tree-ssa-loop-niter.c (double_int_cmp, bound_index,
> 	discover_iteration_bound_by_body_walk): New functions.
> 	(discover_iteration_bound_by_body_walk): Use it.
> 
> 	* gcc.dg/tree-ssa/loop-38.c: New testcase.
> 
> Index: tree-ssa-loop-niter.c
> ===================================================================
> --- tree-ssa-loop-niter.c	(revision 192989)
> +++ tree-ssa-loop-niter.c	(working copy)
> @@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val)
>    return ret;
>  }
>  
> +/* Compare double ints, callback for qsort.  */
> +
> +int
> +double_int_cmp (const void *p1, const void *p2)
> +{
> +  const double_int *d1 = (const double_int *)p1;
> +  const double_int *d2 = (const double_int *)p2;
> +  if (*d1 == *d2)
> +    return 0;
> +  if (d1->ult (*d2))
> +    return -1;
> +  return 1;
> +}
> +
> +/* Return index of BOUND in BOUNDS array sorted in increasing order.
> +   Lookup by binary search.  */
> +
> +int
> +bound_index (VEC (double_int, heap) *bounds, double_int bound)
> +{
> +  unsigned int end = VEC_length (double_int, bounds);
> +  unsigned int begin = 0;
> +
> +  /* Find a matching index by means of a binary search.  */
> +  while (begin != end)
> +    {
> +      unsigned int middle = (begin + end) / 2;
> +      double_int index = VEC_index (double_int, bounds, middle);
> +
> +      if (index == bound)
> +	return middle;
> +      else if (index.ult (bound))
> +	begin = middle + 1;
> +      else
> +	end = middle;
> +    }
> +  gcc_unreachable ();
> +}
> +
> +/* Used to hold vector of queues of basic blocks bellow.  */
> +typedef VEC (basic_block, heap) *bb_queue;
> +DEF_VEC_P(bb_queue);
> +DEF_VEC_ALLOC_P(bb_queue,heap);
> +
> +/* We recorded loop bounds only for statements dominating loop latch (and thus
> +   executed each loop iteration).  If there are any bounds on statements not
> +   dominating the loop latch we can improve the estimate by walking the loop
> +   body and seeing if every path from loop header to loop latch contains
> +   some bounded statement.  */
> +
> +static void
> +discover_iteration_bound_by_body_walk (struct loop *loop)
> +{
> +  pointer_map_t *bb_bounds;
> +  struct nb_iter_bound *elt;
> +  VEC (double_int, heap) *bounds = NULL;
> +  VEC (bb_queue, heap) *queues = NULL;
> +  bb_queue queue = NULL;
> +  ptrdiff_t queue_index;
> +  ptrdiff_t latch_index = 0;
> +  pointer_map_t *visited;
> +
> +  /* Discover what bounds may interest us.  */
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      double_int bound = elt->bound;
> +
> +      /* Exit terminates loop at given iteration, while non-exits produce undefined
> +	 effect on the next iteration.  */
> +      if (!elt->is_exit)
> +	{
> +	  bound += double_int_one;
> +	  /* Overflow, give up on this bound.  */
> +	  if (bound == double_int_zero)
> +	    continue;
> +	}
> +
> +      if (!loop->any_upper_bound
> +	  || bound.ult (loop->nb_iterations_upper_bound))
> +        VEC_safe_push (double_int, heap, bounds, bound);
> +    }
> +
> +  /* Exit early if there is nothing to do.  */
> +  if (!bounds)
> +    return;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
> +
> +  /* Sort the bounds in decreasing order.  */
> +  qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds),
> +	 sizeof (double_int), double_int_cmp);
> +
> +  /* For every basic block record the lowest bound that is guaranteed to
> +     terminate the loop.  */
> +
> +  bb_bounds = pointer_map_create ();
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      double_int bound = elt->bound;
> +      if (!elt->is_exit)
> +	{
> +	  bound += double_int_one;
> +	  /* Overflow, give up on this bound.  */
> +	  if (bound == double_int_zero)
> +	    continue;
> +	}
> +
> +      if (!loop->any_upper_bound
> +	  || bound.ult (loop->nb_iterations_upper_bound))
> +	{
> +	  ptrdiff_t index = bound_index (bounds, bound);
> +	  void **entry = pointer_map_contains (bb_bounds,
> +					       gimple_bb (elt->stmt));
> +	  if (!entry)
> +	    *pointer_map_insert (bb_bounds,
> +				 gimple_bb (elt->stmt)) = (void *)index;
> +	  else if ((ptrdiff_t)*entry > index)
> +	    *entry = (void *)index;
> +	}
> +    }
> +
> +  visited = pointer_map_create ();

visited is a poor name for a map ...

Otherwise looks ok.

Thanks,
Richard.

> +
> +  /* Perform shortest path discovery loop->header ... loop->latch.
> +
> +     The "distance" is given by the smallest loop bound of basic block
> +     present in the path and we look for path with largest smallest bound
> +     on it.
> +
> +     To avoid the need for fibonaci heap on double ints we simply compress
> +     double ints into indexes to BOUNDS array and then represent the queue
> +     as arrays of queues for every index.
> +     Index of VEC_length (BOUNDS) means that the execution of given BB has
> +     no bounds determined.
> +
> +     VISITED is a pointer map translating basic block into smallest index
> +     it was inserted into the priority queue with.  */
> +  latch_index = -1;
> +
> +  /* Start walk in loop header with index set to infinite bound.  */
> +  queue_index = VEC_length (double_int, bounds);
> +  VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
> +  VEC_safe_push (basic_block, heap, queue, loop->header);
> +  VEC_replace (bb_queue, queues, queue_index, queue);
> +  *pointer_map_insert (visited, loop->header) = (void *)queue_index;
> +
> +  for (; queue_index >= 0; queue_index--)
> +    {
> +      if (latch_index < queue_index)
> +	{
> +	  while (VEC_length (basic_block,
> +			     VEC_index (bb_queue, queues, queue_index)))
> +	    {
> +	      basic_block bb;
> +	      ptrdiff_t bound_index = queue_index;
> +	      void **entry;
> +              edge e;
> +              edge_iterator ei;
> +
> +	      queue = VEC_index (bb_queue, queues, queue_index);
> +	      bb = VEC_pop (basic_block, queue);
> +
> +	      /* OK, we later increased BB priority, skip it.  */
> +	      if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
> +		continue;
> +
> +	      /* See if we can improve the bound.  */
> +	      entry = pointer_map_contains (bb_bounds, bb);
> +	      if (entry && (ptrdiff_t)*entry < bound_index)
> +		bound_index = (ptrdiff_t)*entry;
> +
> +	      /* Insert succesors into the queue, watch for latch edge
> +		 and record greatest index we saw.  */
> +	      FOR_EACH_EDGE (e, ei, bb->succs)
> +		{
> +		  bool insert = false;
> +		  void **entry;
> +
> +		  if (loop_exit_edge_p (loop, e))
> +		    continue;
> +
> +		  if (e == loop_latch_edge (loop)
> +		      && latch_index < bound_index)
> +		    latch_index = bound_index;
> +		  else if (!(entry = pointer_map_contains (visited, e->dest)))
> +		    {
> +		      insert = true;
> +		      *pointer_map_insert (visited, e->dest) = (void *)bound_index;
> +		    }
> +		  else if ((ptrdiff_t)*entry < bound_index)
> +		    {
> +		      insert = true;
> +		      *entry = (void *)bound_index;
> +		    }
> +		    
> +		  if (insert)
> +		    {
> +		      bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
> +		      VEC_safe_push (basic_block, heap, queue2, e->dest);
> +		      VEC_replace (bb_queue, queues, bound_index, queue2);
> +		    }
> +		}
> +	    }
> +	}
> +      else
> +	VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
> +    }
> +
> +  gcc_assert (latch_index >= 0);
> +  if (latch_index < VEC_length (double_int, bounds))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	{
> +	  fprintf (dump_file, "Found better loop bound ");
> +	  dump_double_int (dump_file,
> +			   VEC_index (double_int, bounds, latch_index), true);
> +	  fprintf (dump_file, "\n");
> +	}
> +      record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
> +			  false, true);
> +    }
> +
> +  VEC_free (bb_queue, heap, queues);
> +  pointer_map_destroy (bb_bounds);
> +  pointer_map_destroy (visited);
> +}
> +
>  /* See if every path cross the loop goes through a statement that is known
>     to not execute at the last iteration. In that case we can decrese iteration
>     count by 1.  */
> @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str
>  
>    infer_loop_bounds_from_undefined (loop);
>  
> +  discover_iteration_bound_by_body_walk (loop);
> +
>    maybe_lower_iteration_bound (loop);
>  
>    /* If we have a measured profile, use it to estimate the number of
> Index: testsuite/gcc.dg/tree-ssa/loop-38.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> +++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
> +int a[10];
> +int b[11];
> +t(int n)
> +{
> +   int i;
> +   int sum = 0;
> +   for (i=0;i<n;i++)
> +     if (q())
> +	sum+=a[i];
> +     else
> +	sum+=b[i];
> +  return sum;
> +}
> +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */
> +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */
> +/* { dg-final { cleanup-tree-dump "cunrolli" } } */
> 
>
Jan Hubicka - Oct. 31, 2012, 10:09 a.m.
> 
> visited is a poor name for a map ...
Hmm, visited_with_priority?

Thanks,
Honza
> 
> Otherwise looks ok.
> 
> Thanks,
> Richard.
> 
> > +
> > +  /* Perform shortest path discovery loop->header ... loop->latch.
> > +
> > +     The "distance" is given by the smallest loop bound of basic block
> > +     present in the path and we look for path with largest smallest bound
> > +     on it.
> > +
> > +     To avoid the need for fibonaci heap on double ints we simply compress
> > +     double ints into indexes to BOUNDS array and then represent the queue
> > +     as arrays of queues for every index.
> > +     Index of VEC_length (BOUNDS) means that the execution of given BB has
> > +     no bounds determined.
> > +
> > +     VISITED is a pointer map translating basic block into smallest index
> > +     it was inserted into the priority queue with.  */
> > +  latch_index = -1;
> > +
> > +  /* Start walk in loop header with index set to infinite bound.  */
> > +  queue_index = VEC_length (double_int, bounds);
> > +  VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
> > +  VEC_safe_push (basic_block, heap, queue, loop->header);
> > +  VEC_replace (bb_queue, queues, queue_index, queue);
> > +  *pointer_map_insert (visited, loop->header) = (void *)queue_index;
> > +
> > +  for (; queue_index >= 0; queue_index--)
> > +    {
> > +      if (latch_index < queue_index)
> > +	{
> > +	  while (VEC_length (basic_block,
> > +			     VEC_index (bb_queue, queues, queue_index)))
> > +	    {
> > +	      basic_block bb;
> > +	      ptrdiff_t bound_index = queue_index;
> > +	      void **entry;
> > +              edge e;
> > +              edge_iterator ei;
> > +
> > +	      queue = VEC_index (bb_queue, queues, queue_index);
> > +	      bb = VEC_pop (basic_block, queue);
> > +
> > +	      /* OK, we later increased BB priority, skip it.  */
> > +	      if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
> > +		continue;
> > +
> > +	      /* See if we can improve the bound.  */
> > +	      entry = pointer_map_contains (bb_bounds, bb);
> > +	      if (entry && (ptrdiff_t)*entry < bound_index)
> > +		bound_index = (ptrdiff_t)*entry;
> > +
> > +	      /* Insert succesors into the queue, watch for latch edge
> > +		 and record greatest index we saw.  */
> > +	      FOR_EACH_EDGE (e, ei, bb->succs)
> > +		{
> > +		  bool insert = false;
> > +		  void **entry;
> > +
> > +		  if (loop_exit_edge_p (loop, e))
> > +		    continue;
> > +
> > +		  if (e == loop_latch_edge (loop)
> > +		      && latch_index < bound_index)
> > +		    latch_index = bound_index;
> > +		  else if (!(entry = pointer_map_contains (visited, e->dest)))
> > +		    {
> > +		      insert = true;
> > +		      *pointer_map_insert (visited, e->dest) = (void *)bound_index;
> > +		    }
> > +		  else if ((ptrdiff_t)*entry < bound_index)
> > +		    {
> > +		      insert = true;
> > +		      *entry = (void *)bound_index;
> > +		    }
> > +		    
> > +		  if (insert)
> > +		    {
> > +		      bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
> > +		      VEC_safe_push (basic_block, heap, queue2, e->dest);
> > +		      VEC_replace (bb_queue, queues, bound_index, queue2);
> > +		    }
> > +		}
> > +	    }
> > +	}
> > +      else
> > +	VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
> > +    }
> > +
> > +  gcc_assert (latch_index >= 0);
> > +  if (latch_index < VEC_length (double_int, bounds))
> > +    {
> > +      if (dump_file && (dump_flags & TDF_DETAILS))
> > +	{
> > +	  fprintf (dump_file, "Found better loop bound ");
> > +	  dump_double_int (dump_file,
> > +			   VEC_index (double_int, bounds, latch_index), true);
> > +	  fprintf (dump_file, "\n");
> > +	}
> > +      record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
> > +			  false, true);
> > +    }
> > +
> > +  VEC_free (bb_queue, heap, queues);
> > +  pointer_map_destroy (bb_bounds);
> > +  pointer_map_destroy (visited);
> > +}
> > +
> >  /* See if every path cross the loop goes through a statement that is known
> >     to not execute at the last iteration. In that case we can decrese iteration
> >     count by 1.  */
> > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str
> >  
> >    infer_loop_bounds_from_undefined (loop);
> >  
> > +  discover_iteration_bound_by_body_walk (loop);
> > +
> >    maybe_lower_iteration_bound (loop);
> >  
> >    /* If we have a measured profile, use it to estimate the number of
> > Index: testsuite/gcc.dg/tree-ssa/loop-38.c
> > ===================================================================
> > --- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> > +++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
> > +int a[10];
> > +int b[11];
> > +t(int n)
> > +{
> > +   int i;
> > +   int sum = 0;
> > +   for (i=0;i<n;i++)
> > +     if (q())
> > +	sum+=a[i];
> > +     else
> > +	sum+=b[i];
> > +  return sum;
> > +}
> > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */
> > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */
> > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */
> > 
> > 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imend
Richard Guenther - Oct. 31, 2012, 10:10 a.m.
On Wed, 31 Oct 2012, Jan Hubicka wrote:

> > 
> > visited is a poor name for a map ...
> Hmm, visited_with_priority?

Just block_priority?

Richard.

> Thanks,
> Honza
> > 
> > Otherwise looks ok.
> > 
> > Thanks,
> > Richard.
> > 
> > > +
> > > +  /* Perform shortest path discovery loop->header ... loop->latch.
> > > +
> > > +     The "distance" is given by the smallest loop bound of basic block
> > > +     present in the path and we look for path with largest smallest bound
> > > +     on it.
> > > +
> > > +     To avoid the need for fibonaci heap on double ints we simply compress
> > > +     double ints into indexes to BOUNDS array and then represent the queue
> > > +     as arrays of queues for every index.
> > > +     Index of VEC_length (BOUNDS) means that the execution of given BB has
> > > +     no bounds determined.
> > > +
> > > +     VISITED is a pointer map translating basic block into smallest index
> > > +     it was inserted into the priority queue with.  */
> > > +  latch_index = -1;
> > > +
> > > +  /* Start walk in loop header with index set to infinite bound.  */
> > > +  queue_index = VEC_length (double_int, bounds);
> > > +  VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
> > > +  VEC_safe_push (basic_block, heap, queue, loop->header);
> > > +  VEC_replace (bb_queue, queues, queue_index, queue);
> > > +  *pointer_map_insert (visited, loop->header) = (void *)queue_index;
> > > +
> > > +  for (; queue_index >= 0; queue_index--)
> > > +    {
> > > +      if (latch_index < queue_index)
> > > +	{
> > > +	  while (VEC_length (basic_block,
> > > +			     VEC_index (bb_queue, queues, queue_index)))
> > > +	    {
> > > +	      basic_block bb;
> > > +	      ptrdiff_t bound_index = queue_index;
> > > +	      void **entry;
> > > +              edge e;
> > > +              edge_iterator ei;
> > > +
> > > +	      queue = VEC_index (bb_queue, queues, queue_index);
> > > +	      bb = VEC_pop (basic_block, queue);
> > > +
> > > +	      /* OK, we later increased BB priority, skip it.  */
> > > +	      if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
> > > +		continue;
> > > +
> > > +	      /* See if we can improve the bound.  */
> > > +	      entry = pointer_map_contains (bb_bounds, bb);
> > > +	      if (entry && (ptrdiff_t)*entry < bound_index)
> > > +		bound_index = (ptrdiff_t)*entry;
> > > +
> > > +	      /* Insert succesors into the queue, watch for latch edge
> > > +		 and record greatest index we saw.  */
> > > +	      FOR_EACH_EDGE (e, ei, bb->succs)
> > > +		{
> > > +		  bool insert = false;
> > > +		  void **entry;
> > > +
> > > +		  if (loop_exit_edge_p (loop, e))
> > > +		    continue;
> > > +
> > > +		  if (e == loop_latch_edge (loop)
> > > +		      && latch_index < bound_index)
> > > +		    latch_index = bound_index;
> > > +		  else if (!(entry = pointer_map_contains (visited, e->dest)))
> > > +		    {
> > > +		      insert = true;
> > > +		      *pointer_map_insert (visited, e->dest) = (void *)bound_index;
> > > +		    }
> > > +		  else if ((ptrdiff_t)*entry < bound_index)
> > > +		    {
> > > +		      insert = true;
> > > +		      *entry = (void *)bound_index;
> > > +		    }
> > > +		    
> > > +		  if (insert)
> > > +		    {
> > > +		      bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
> > > +		      VEC_safe_push (basic_block, heap, queue2, e->dest);
> > > +		      VEC_replace (bb_queue, queues, bound_index, queue2);
> > > +		    }
> > > +		}
> > > +	    }
> > > +	}
> > > +      else
> > > +	VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
> > > +    }
> > > +
> > > +  gcc_assert (latch_index >= 0);
> > > +  if (latch_index < VEC_length (double_int, bounds))
> > > +    {
> > > +      if (dump_file && (dump_flags & TDF_DETAILS))
> > > +	{
> > > +	  fprintf (dump_file, "Found better loop bound ");
> > > +	  dump_double_int (dump_file,
> > > +			   VEC_index (double_int, bounds, latch_index), true);
> > > +	  fprintf (dump_file, "\n");
> > > +	}
> > > +      record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
> > > +			  false, true);
> > > +    }
> > > +
> > > +  VEC_free (bb_queue, heap, queues);
> > > +  pointer_map_destroy (bb_bounds);
> > > +  pointer_map_destroy (visited);
> > > +}
> > > +
> > >  /* See if every path cross the loop goes through a statement that is known
> > >     to not execute at the last iteration. In that case we can decrese iteration
> > >     count by 1.  */
> > > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str
> > >  
> > >    infer_loop_bounds_from_undefined (loop);
> > >  
> > > +  discover_iteration_bound_by_body_walk (loop);
> > > +
> > >    maybe_lower_iteration_bound (loop);
> > >  
> > >    /* If we have a measured profile, use it to estimate the number of
> > > Index: testsuite/gcc.dg/tree-ssa/loop-38.c
> > > ===================================================================
> > > --- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> > > +++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
> > > +int a[10];
> > > +int b[11];
> > > +t(int n)
> > > +{
> > > +   int i;
> > > +   int sum = 0;
> > > +   for (i=0;i<n;i++)
> > > +     if (q())
> > > +	sum+=a[i];
> > > +     else
> > > +	sum+=b[i];
> > > +  return sum;
> > > +}
> > > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */
> > > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */
> > > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */
> > > 
> > > 
> > 
> > -- 
> > Richard Biener <rguenther@suse.de>
> > SUSE / SUSE Labs
> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> > GF: Jeff Hawn, Jennifer Guild, Felix Imend
> 
>

Patch

Index: tree-ssa-loop-niter.c
===================================================================
--- tree-ssa-loop-niter.c	(revision 192989)
+++ tree-ssa-loop-niter.c	(working copy)
@@ -2955,6 +2955,234 @@  gcov_type_to_double_int (gcov_type val)
   return ret;
 }
 
+/* Compare double ints, callback for qsort.  */
+
+int
+double_int_cmp (const void *p1, const void *p2)
+{
+  const double_int *d1 = (const double_int *)p1;
+  const double_int *d2 = (const double_int *)p2;
+  if (*d1 == *d2)
+    return 0;
+  if (d1->ult (*d2))
+    return -1;
+  return 1;
+}
+
+/* Return index of BOUND in BOUNDS array sorted in increasing order.
+   Lookup by binary search.  */
+
+int
+bound_index (VEC (double_int, heap) *bounds, double_int bound)
+{
+  unsigned int end = VEC_length (double_int, bounds);
+  unsigned int begin = 0;
+
+  /* Find a matching index by means of a binary search.  */
+  while (begin != end)
+    {
+      unsigned int middle = (begin + end) / 2;
+      double_int index = VEC_index (double_int, bounds, middle);
+
+      if (index == bound)
+	return middle;
+      else if (index.ult (bound))
+	begin = middle + 1;
+      else
+	end = middle;
+    }
+  gcc_unreachable ();
+}
+
+/* Used to hold vector of queues of basic blocks bellow.  */
+typedef VEC (basic_block, heap) *bb_queue;
+DEF_VEC_P(bb_queue);
+DEF_VEC_ALLOC_P(bb_queue,heap);
+
+/* We recorded loop bounds only for statements dominating loop latch (and thus
+   executed each loop iteration).  If there are any bounds on statements not
+   dominating the loop latch we can improve the estimate by walking the loop
+   body and seeing if every path from loop header to loop latch contains
+   some bounded statement.  */
+
+static void
+discover_iteration_bound_by_body_walk (struct loop *loop)
+{
+  pointer_map_t *bb_bounds;
+  struct nb_iter_bound *elt;
+  VEC (double_int, heap) *bounds = NULL;
+  VEC (bb_queue, heap) *queues = NULL;
+  bb_queue queue = NULL;
+  ptrdiff_t queue_index;
+  ptrdiff_t latch_index = 0;
+  pointer_map_t *visited;
+
+  /* Discover what bounds may interest us.  */
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      double_int bound = elt->bound;
+
+      /* Exit terminates loop at given iteration, while non-exits produce undefined
+	 effect on the next iteration.  */
+      if (!elt->is_exit)
+	{
+	  bound += double_int_one;
+	  /* Overflow, give up on this bound.  */
+	  if (bound == double_int_zero)
+	    continue;
+	}
+
+      if (!loop->any_upper_bound
+	  || bound.ult (loop->nb_iterations_upper_bound))
+        VEC_safe_push (double_int, heap, bounds, bound);
+    }
+
+  /* Exit early if there is nothing to do.  */
+  if (!bounds)
+    return;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
+
+  /* Sort the bounds in decreasing order.  */
+  qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds),
+	 sizeof (double_int), double_int_cmp);
+
+  /* For every basic block record the lowest bound that is guaranteed to
+     terminate the loop.  */
+
+  bb_bounds = pointer_map_create ();
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      double_int bound = elt->bound;
+      if (!elt->is_exit)
+	{
+	  bound += double_int_one;
+	  /* Overflow, give up on this bound.  */
+	  if (bound == double_int_zero)
+	    continue;
+	}
+
+      if (!loop->any_upper_bound
+	  || bound.ult (loop->nb_iterations_upper_bound))
+	{
+	  ptrdiff_t index = bound_index (bounds, bound);
+	  void **entry = pointer_map_contains (bb_bounds,
+					       gimple_bb (elt->stmt));
+	  if (!entry)
+	    *pointer_map_insert (bb_bounds,
+				 gimple_bb (elt->stmt)) = (void *)index;
+	  else if ((ptrdiff_t)*entry > index)
+	    *entry = (void *)index;
+	}
+    }
+
+  visited = pointer_map_create ();
+
+  /* Perform shortest path discovery loop->header ... loop->latch.
+
+     The "distance" is given by the smallest loop bound of basic block
+     present in the path and we look for path with largest smallest bound
+     on it.
+
+     To avoid the need for fibonaci heap on double ints we simply compress
+     double ints into indexes to BOUNDS array and then represent the queue
+     as arrays of queues for every index.
+     Index of VEC_length (BOUNDS) means that the execution of given BB has
+     no bounds determined.
+
+     VISITED is a pointer map translating basic block into smallest index
+     it was inserted into the priority queue with.  */
+  latch_index = -1;
+
+  /* Start walk in loop header with index set to infinite bound.  */
+  queue_index = VEC_length (double_int, bounds);
+  VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
+  VEC_safe_push (basic_block, heap, queue, loop->header);
+  VEC_replace (bb_queue, queues, queue_index, queue);
+  *pointer_map_insert (visited, loop->header) = (void *)queue_index;
+
+  for (; queue_index >= 0; queue_index--)
+    {
+      if (latch_index < queue_index)
+	{
+	  while (VEC_length (basic_block,
+			     VEC_index (bb_queue, queues, queue_index)))
+	    {
+	      basic_block bb;
+	      ptrdiff_t bound_index = queue_index;
+	      void **entry;
+              edge e;
+              edge_iterator ei;
+
+	      queue = VEC_index (bb_queue, queues, queue_index);
+	      bb = VEC_pop (basic_block, queue);
+
+	      /* OK, we later increased BB priority, skip it.  */
+	      if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
+		continue;
+
+	      /* See if we can improve the bound.  */
+	      entry = pointer_map_contains (bb_bounds, bb);
+	      if (entry && (ptrdiff_t)*entry < bound_index)
+		bound_index = (ptrdiff_t)*entry;
+
+	      /* Insert succesors into the queue, watch for latch edge
+		 and record greatest index we saw.  */
+	      FOR_EACH_EDGE (e, ei, bb->succs)
+		{
+		  bool insert = false;
+		  void **entry;
+
+		  if (loop_exit_edge_p (loop, e))
+		    continue;
+
+		  if (e == loop_latch_edge (loop)
+		      && latch_index < bound_index)
+		    latch_index = bound_index;
+		  else if (!(entry = pointer_map_contains (visited, e->dest)))
+		    {
+		      insert = true;
+		      *pointer_map_insert (visited, e->dest) = (void *)bound_index;
+		    }
+		  else if ((ptrdiff_t)*entry < bound_index)
+		    {
+		      insert = true;
+		      *entry = (void *)bound_index;
+		    }
+		    
+		  if (insert)
+		    {
+		      bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
+		      VEC_safe_push (basic_block, heap, queue2, e->dest);
+		      VEC_replace (bb_queue, queues, bound_index, queue2);
+		    }
+		}
+	    }
+	}
+      else
+	VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
+    }
+
+  gcc_assert (latch_index >= 0);
+  if (latch_index < VEC_length (double_int, bounds))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "Found better loop bound ");
+	  dump_double_int (dump_file,
+			   VEC_index (double_int, bounds, latch_index), true);
+	  fprintf (dump_file, "\n");
+	}
+      record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
+			  false, true);
+    }
+
+  VEC_free (bb_queue, heap, queues);
+  pointer_map_destroy (bb_bounds);
+  pointer_map_destroy (visited);
+}
+
 /* See if every path cross the loop goes through a statement that is known
    to not execute at the last iteration. In that case we can decrese iteration
    count by 1.  */
@@ -3102,6 +3330,8 @@  estimate_numbers_of_iterations_loop (str
 
   infer_loop_bounds_from_undefined (loop);
 
+  discover_iteration_bound_by_body_walk (loop);
+
   maybe_lower_iteration_bound (loop);
 
   /* If we have a measured profile, use it to estimate the number of
Index: testsuite/gcc.dg/tree-ssa/loop-38.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
+int a[10];
+int b[11];
+t(int n)
+{
+   int i;
+   int sum = 0;
+   for (i=0;i<n;i++)
+     if (q())
+	sum+=a[i];
+     else
+	sum+=b[i];
+  return sum;
+}
+/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */
+/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */