Message ID | 20121030182949.GA9056@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
On Tue, 30 Oct 2012, Jan Hubicka wrote: > Hi, > this patch implements the second part of planned change - to determine loop bounds > based by shortest path discovery. This allows to bound number of iterations > on loops with bounds in statements that do not dominate the latch. > > I originally planned to implement this as part of maybe_lower_iteration_bound, > but I found doing it separately is more readable. While both performs walk of > the loop body, both do that for different reasons. > > discover_iteration_bound_by_body_walk walks from header to latch, while > maybe_lower_iteration_bound walks from header to first statement with side > effects looking if there is exit on the way. > > Both walks can be skipped for many loops, but each with different reasons. > > Bootstrapped/regtested x86_64-linux, OK? > > * tree-ssa-loop-niter.c (double_int_cmp, bound_index, > discover_iteration_bound_by_body_walk): New functions. > (discover_iteration_bound_by_body_walk): Use it. > > * gcc.dg/tree-ssa/loop-38.c: New testcase. > > Index: tree-ssa-loop-niter.c > =================================================================== > --- tree-ssa-loop-niter.c (revision 192989) > +++ tree-ssa-loop-niter.c (working copy) > @@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val) > return ret; > } > > +/* Compare double ints, callback for qsort. */ > + > +int > +double_int_cmp (const void *p1, const void *p2) > +{ > + const double_int *d1 = (const double_int *)p1; > + const double_int *d2 = (const double_int *)p2; > + if (*d1 == *d2) > + return 0; > + if (d1->ult (*d2)) > + return -1; > + return 1; > +} > + > +/* Return index of BOUND in BOUNDS array sorted in increasing order. > + Lookup by binary search. */ > + > +int > +bound_index (VEC (double_int, heap) *bounds, double_int bound) > +{ > + unsigned int end = VEC_length (double_int, bounds); > + unsigned int begin = 0; > + > + /* Find a matching index by means of a binary search. */ > + while (begin != end) > + { > + unsigned int middle = (begin + end) / 2; > + double_int index = VEC_index (double_int, bounds, middle); > + > + if (index == bound) > + return middle; > + else if (index.ult (bound)) > + begin = middle + 1; > + else > + end = middle; > + } > + gcc_unreachable (); > +} > + > +/* Used to hold vector of queues of basic blocks bellow. */ > +typedef VEC (basic_block, heap) *bb_queue; > +DEF_VEC_P(bb_queue); > +DEF_VEC_ALLOC_P(bb_queue,heap); > + > +/* We recorded loop bounds only for statements dominating loop latch (and thus > + executed each loop iteration). If there are any bounds on statements not > + dominating the loop latch we can improve the estimate by walking the loop > + body and seeing if every path from loop header to loop latch contains > + some bounded statement. */ > + > +static void > +discover_iteration_bound_by_body_walk (struct loop *loop) > +{ > + pointer_map_t *bb_bounds; > + struct nb_iter_bound *elt; > + VEC (double_int, heap) *bounds = NULL; > + VEC (bb_queue, heap) *queues = NULL; > + bb_queue queue = NULL; > + ptrdiff_t queue_index; > + ptrdiff_t latch_index = 0; > + pointer_map_t *visited; > + > + /* Discover what bounds may interest us. */ > + for (elt = loop->bounds; elt; elt = elt->next) > + { > + double_int bound = elt->bound; > + > + /* Exit terminates loop at given iteration, while non-exits produce undefined > + effect on the next iteration. */ > + if (!elt->is_exit) > + { > + bound += double_int_one; > + /* Overflow, give up on this bound. */ > + if (bound == double_int_zero) > + continue; > + } > + > + if (!loop->any_upper_bound > + || bound.ult (loop->nb_iterations_upper_bound)) > + VEC_safe_push (double_int, heap, bounds, bound); > + } > + > + /* Exit early if there is nothing to do. */ > + if (!bounds) > + return; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n"); > + > + /* Sort the bounds in decreasing order. */ > + qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds), > + sizeof (double_int), double_int_cmp); > + > + /* For every basic block record the lowest bound that is guaranteed to > + terminate the loop. */ > + > + bb_bounds = pointer_map_create (); > + for (elt = loop->bounds; elt; elt = elt->next) > + { > + double_int bound = elt->bound; > + if (!elt->is_exit) > + { > + bound += double_int_one; > + /* Overflow, give up on this bound. */ > + if (bound == double_int_zero) > + continue; > + } > + > + if (!loop->any_upper_bound > + || bound.ult (loop->nb_iterations_upper_bound)) > + { > + ptrdiff_t index = bound_index (bounds, bound); > + void **entry = pointer_map_contains (bb_bounds, > + gimple_bb (elt->stmt)); > + if (!entry) > + *pointer_map_insert (bb_bounds, > + gimple_bb (elt->stmt)) = (void *)index; > + else if ((ptrdiff_t)*entry > index) > + *entry = (void *)index; > + } > + } > + > + visited = pointer_map_create (); visited is a poor name for a map ... Otherwise looks ok. Thanks, Richard. > + > + /* Perform shortest path discovery loop->header ... loop->latch. > + > + The "distance" is given by the smallest loop bound of basic block > + present in the path and we look for path with largest smallest bound > + on it. > + > + To avoid the need for fibonaci heap on double ints we simply compress > + double ints into indexes to BOUNDS array and then represent the queue > + as arrays of queues for every index. > + Index of VEC_length (BOUNDS) means that the execution of given BB has > + no bounds determined. > + > + VISITED is a pointer map translating basic block into smallest index > + it was inserted into the priority queue with. */ > + latch_index = -1; > + > + /* Start walk in loop header with index set to infinite bound. */ > + queue_index = VEC_length (double_int, bounds); > + VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1); > + VEC_safe_push (basic_block, heap, queue, loop->header); > + VEC_replace (bb_queue, queues, queue_index, queue); > + *pointer_map_insert (visited, loop->header) = (void *)queue_index; > + > + for (; queue_index >= 0; queue_index--) > + { > + if (latch_index < queue_index) > + { > + while (VEC_length (basic_block, > + VEC_index (bb_queue, queues, queue_index))) > + { > + basic_block bb; > + ptrdiff_t bound_index = queue_index; > + void **entry; > + edge e; > + edge_iterator ei; > + > + queue = VEC_index (bb_queue, queues, queue_index); > + bb = VEC_pop (basic_block, queue); > + > + /* OK, we later increased BB priority, skip it. */ > + if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index) > + continue; > + > + /* See if we can improve the bound. */ > + entry = pointer_map_contains (bb_bounds, bb); > + if (entry && (ptrdiff_t)*entry < bound_index) > + bound_index = (ptrdiff_t)*entry; > + > + /* Insert succesors into the queue, watch for latch edge > + and record greatest index we saw. */ > + FOR_EACH_EDGE (e, ei, bb->succs) > + { > + bool insert = false; > + void **entry; > + > + if (loop_exit_edge_p (loop, e)) > + continue; > + > + if (e == loop_latch_edge (loop) > + && latch_index < bound_index) > + latch_index = bound_index; > + else if (!(entry = pointer_map_contains (visited, e->dest))) > + { > + insert = true; > + *pointer_map_insert (visited, e->dest) = (void *)bound_index; > + } > + else if ((ptrdiff_t)*entry < bound_index) > + { > + insert = true; > + *entry = (void *)bound_index; > + } > + > + if (insert) > + { > + bb_queue queue2 = VEC_index (bb_queue, queues, bound_index); > + VEC_safe_push (basic_block, heap, queue2, e->dest); > + VEC_replace (bb_queue, queues, bound_index, queue2); > + } > + } > + } > + } > + else > + VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index)); > + } > + > + gcc_assert (latch_index >= 0); > + if (latch_index < VEC_length (double_int, bounds)) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Found better loop bound "); > + dump_double_int (dump_file, > + VEC_index (double_int, bounds, latch_index), true); > + fprintf (dump_file, "\n"); > + } > + record_niter_bound (loop, VEC_index (double_int, bounds, latch_index), > + false, true); > + } > + > + VEC_free (bb_queue, heap, queues); > + pointer_map_destroy (bb_bounds); > + pointer_map_destroy (visited); > +} > + > /* See if every path cross the loop goes through a statement that is known > to not execute at the last iteration. In that case we can decrese iteration > count by 1. */ > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str > > infer_loop_bounds_from_undefined (loop); > > + discover_iteration_bound_by_body_walk (loop); > + > maybe_lower_iteration_bound (loop); > > /* If we have a measured profile, use it to estimate the number of > Index: testsuite/gcc.dg/tree-ssa/loop-38.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ > +int a[10]; > +int b[11]; > +t(int n) > +{ > + int i; > + int sum = 0; > + for (i=0;i<n;i++) > + if (q()) > + sum+=a[i]; > + else > + sum+=b[i]; > + return sum; > +} > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */ > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */ > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ > >
> > visited is a poor name for a map ... Hmm, visited_with_priority? Thanks, Honza > > Otherwise looks ok. > > Thanks, > Richard. > > > + > > + /* Perform shortest path discovery loop->header ... loop->latch. > > + > > + The "distance" is given by the smallest loop bound of basic block > > + present in the path and we look for path with largest smallest bound > > + on it. > > + > > + To avoid the need for fibonaci heap on double ints we simply compress > > + double ints into indexes to BOUNDS array and then represent the queue > > + as arrays of queues for every index. > > + Index of VEC_length (BOUNDS) means that the execution of given BB has > > + no bounds determined. > > + > > + VISITED is a pointer map translating basic block into smallest index > > + it was inserted into the priority queue with. */ > > + latch_index = -1; > > + > > + /* Start walk in loop header with index set to infinite bound. */ > > + queue_index = VEC_length (double_int, bounds); > > + VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1); > > + VEC_safe_push (basic_block, heap, queue, loop->header); > > + VEC_replace (bb_queue, queues, queue_index, queue); > > + *pointer_map_insert (visited, loop->header) = (void *)queue_index; > > + > > + for (; queue_index >= 0; queue_index--) > > + { > > + if (latch_index < queue_index) > > + { > > + while (VEC_length (basic_block, > > + VEC_index (bb_queue, queues, queue_index))) > > + { > > + basic_block bb; > > + ptrdiff_t bound_index = queue_index; > > + void **entry; > > + edge e; > > + edge_iterator ei; > > + > > + queue = VEC_index (bb_queue, queues, queue_index); > > + bb = VEC_pop (basic_block, queue); > > + > > + /* OK, we later increased BB priority, skip it. */ > > + if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index) > > + continue; > > + > > + /* See if we can improve the bound. */ > > + entry = pointer_map_contains (bb_bounds, bb); > > + if (entry && (ptrdiff_t)*entry < bound_index) > > + bound_index = (ptrdiff_t)*entry; > > + > > + /* Insert succesors into the queue, watch for latch edge > > + and record greatest index we saw. */ > > + FOR_EACH_EDGE (e, ei, bb->succs) > > + { > > + bool insert = false; > > + void **entry; > > + > > + if (loop_exit_edge_p (loop, e)) > > + continue; > > + > > + if (e == loop_latch_edge (loop) > > + && latch_index < bound_index) > > + latch_index = bound_index; > > + else if (!(entry = pointer_map_contains (visited, e->dest))) > > + { > > + insert = true; > > + *pointer_map_insert (visited, e->dest) = (void *)bound_index; > > + } > > + else if ((ptrdiff_t)*entry < bound_index) > > + { > > + insert = true; > > + *entry = (void *)bound_index; > > + } > > + > > + if (insert) > > + { > > + bb_queue queue2 = VEC_index (bb_queue, queues, bound_index); > > + VEC_safe_push (basic_block, heap, queue2, e->dest); > > + VEC_replace (bb_queue, queues, bound_index, queue2); > > + } > > + } > > + } > > + } > > + else > > + VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index)); > > + } > > + > > + gcc_assert (latch_index >= 0); > > + if (latch_index < VEC_length (double_int, bounds)) > > + { > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, "Found better loop bound "); > > + dump_double_int (dump_file, > > + VEC_index (double_int, bounds, latch_index), true); > > + fprintf (dump_file, "\n"); > > + } > > + record_niter_bound (loop, VEC_index (double_int, bounds, latch_index), > > + false, true); > > + } > > + > > + VEC_free (bb_queue, heap, queues); > > + pointer_map_destroy (bb_bounds); > > + pointer_map_destroy (visited); > > +} > > + > > /* See if every path cross the loop goes through a statement that is known > > to not execute at the last iteration. In that case we can decrese iteration > > count by 1. */ > > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str > > > > infer_loop_bounds_from_undefined (loop); > > > > + discover_iteration_bound_by_body_walk (loop); > > + > > maybe_lower_iteration_bound (loop); > > > > /* If we have a measured profile, use it to estimate the number of > > Index: testsuite/gcc.dg/tree-ssa/loop-38.c > > =================================================================== > > --- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > > +++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ > > +int a[10]; > > +int b[11]; > > +t(int n) > > +{ > > + int i; > > + int sum = 0; > > + for (i=0;i<n;i++) > > + if (q()) > > + sum+=a[i]; > > + else > > + sum+=b[i]; > > + return sum; > > +} > > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */ > > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */ > > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ > > > > > > -- > Richard Biener <rguenther@suse.de> > SUSE / SUSE Labs > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > GF: Jeff Hawn, Jennifer Guild, Felix Imend
On Wed, 31 Oct 2012, Jan Hubicka wrote: > > > > visited is a poor name for a map ... > Hmm, visited_with_priority? Just block_priority? Richard. > Thanks, > Honza > > > > Otherwise looks ok. > > > > Thanks, > > Richard. > > > > > + > > > + /* Perform shortest path discovery loop->header ... loop->latch. > > > + > > > + The "distance" is given by the smallest loop bound of basic block > > > + present in the path and we look for path with largest smallest bound > > > + on it. > > > + > > > + To avoid the need for fibonaci heap on double ints we simply compress > > > + double ints into indexes to BOUNDS array and then represent the queue > > > + as arrays of queues for every index. > > > + Index of VEC_length (BOUNDS) means that the execution of given BB has > > > + no bounds determined. > > > + > > > + VISITED is a pointer map translating basic block into smallest index > > > + it was inserted into the priority queue with. */ > > > + latch_index = -1; > > > + > > > + /* Start walk in loop header with index set to infinite bound. */ > > > + queue_index = VEC_length (double_int, bounds); > > > + VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1); > > > + VEC_safe_push (basic_block, heap, queue, loop->header); > > > + VEC_replace (bb_queue, queues, queue_index, queue); > > > + *pointer_map_insert (visited, loop->header) = (void *)queue_index; > > > + > > > + for (; queue_index >= 0; queue_index--) > > > + { > > > + if (latch_index < queue_index) > > > + { > > > + while (VEC_length (basic_block, > > > + VEC_index (bb_queue, queues, queue_index))) > > > + { > > > + basic_block bb; > > > + ptrdiff_t bound_index = queue_index; > > > + void **entry; > > > + edge e; > > > + edge_iterator ei; > > > + > > > + queue = VEC_index (bb_queue, queues, queue_index); > > > + bb = VEC_pop (basic_block, queue); > > > + > > > + /* OK, we later increased BB priority, skip it. */ > > > + if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index) > > > + continue; > > > + > > > + /* See if we can improve the bound. */ > > > + entry = pointer_map_contains (bb_bounds, bb); > > > + if (entry && (ptrdiff_t)*entry < bound_index) > > > + bound_index = (ptrdiff_t)*entry; > > > + > > > + /* Insert succesors into the queue, watch for latch edge > > > + and record greatest index we saw. */ > > > + FOR_EACH_EDGE (e, ei, bb->succs) > > > + { > > > + bool insert = false; > > > + void **entry; > > > + > > > + if (loop_exit_edge_p (loop, e)) > > > + continue; > > > + > > > + if (e == loop_latch_edge (loop) > > > + && latch_index < bound_index) > > > + latch_index = bound_index; > > > + else if (!(entry = pointer_map_contains (visited, e->dest))) > > > + { > > > + insert = true; > > > + *pointer_map_insert (visited, e->dest) = (void *)bound_index; > > > + } > > > + else if ((ptrdiff_t)*entry < bound_index) > > > + { > > > + insert = true; > > > + *entry = (void *)bound_index; > > > + } > > > + > > > + if (insert) > > > + { > > > + bb_queue queue2 = VEC_index (bb_queue, queues, bound_index); > > > + VEC_safe_push (basic_block, heap, queue2, e->dest); > > > + VEC_replace (bb_queue, queues, bound_index, queue2); > > > + } > > > + } > > > + } > > > + } > > > + else > > > + VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index)); > > > + } > > > + > > > + gcc_assert (latch_index >= 0); > > > + if (latch_index < VEC_length (double_int, bounds)) > > > + { > > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > > + { > > > + fprintf (dump_file, "Found better loop bound "); > > > + dump_double_int (dump_file, > > > + VEC_index (double_int, bounds, latch_index), true); > > > + fprintf (dump_file, "\n"); > > > + } > > > + record_niter_bound (loop, VEC_index (double_int, bounds, latch_index), > > > + false, true); > > > + } > > > + > > > + VEC_free (bb_queue, heap, queues); > > > + pointer_map_destroy (bb_bounds); > > > + pointer_map_destroy (visited); > > > +} > > > + > > > /* See if every path cross the loop goes through a statement that is known > > > to not execute at the last iteration. In that case we can decrese iteration > > > count by 1. */ > > > @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str > > > > > > infer_loop_bounds_from_undefined (loop); > > > > > > + discover_iteration_bound_by_body_walk (loop); > > > + > > > maybe_lower_iteration_bound (loop); > > > > > > /* If we have a measured profile, use it to estimate the number of > > > Index: testsuite/gcc.dg/tree-ssa/loop-38.c > > > =================================================================== > > > --- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > > > +++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ > > > +int a[10]; > > > +int b[11]; > > > +t(int n) > > > +{ > > > + int i; > > > + int sum = 0; > > > + for (i=0;i<n;i++) > > > + if (q()) > > > + sum+=a[i]; > > > + else > > > + sum+=b[i]; > > > + return sum; > > > +} > > > +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */ > > > +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */ > > > +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ > > > > > > > > > > -- > > Richard Biener <rguenther@suse.de> > > SUSE / SUSE Labs > > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > > GF: Jeff Hawn, Jennifer Guild, Felix Imend > >
Index: tree-ssa-loop-niter.c =================================================================== --- tree-ssa-loop-niter.c (revision 192989) +++ tree-ssa-loop-niter.c (working copy) @@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val) return ret; } +/* Compare double ints, callback for qsort. */ + +int +double_int_cmp (const void *p1, const void *p2) +{ + const double_int *d1 = (const double_int *)p1; + const double_int *d2 = (const double_int *)p2; + if (*d1 == *d2) + return 0; + if (d1->ult (*d2)) + return -1; + return 1; +} + +/* Return index of BOUND in BOUNDS array sorted in increasing order. + Lookup by binary search. */ + +int +bound_index (VEC (double_int, heap) *bounds, double_int bound) +{ + unsigned int end = VEC_length (double_int, bounds); + unsigned int begin = 0; + + /* Find a matching index by means of a binary search. */ + while (begin != end) + { + unsigned int middle = (begin + end) / 2; + double_int index = VEC_index (double_int, bounds, middle); + + if (index == bound) + return middle; + else if (index.ult (bound)) + begin = middle + 1; + else + end = middle; + } + gcc_unreachable (); +} + +/* Used to hold vector of queues of basic blocks bellow. */ +typedef VEC (basic_block, heap) *bb_queue; +DEF_VEC_P(bb_queue); +DEF_VEC_ALLOC_P(bb_queue,heap); + +/* We recorded loop bounds only for statements dominating loop latch (and thus + executed each loop iteration). If there are any bounds on statements not + dominating the loop latch we can improve the estimate by walking the loop + body and seeing if every path from loop header to loop latch contains + some bounded statement. */ + +static void +discover_iteration_bound_by_body_walk (struct loop *loop) +{ + pointer_map_t *bb_bounds; + struct nb_iter_bound *elt; + VEC (double_int, heap) *bounds = NULL; + VEC (bb_queue, heap) *queues = NULL; + bb_queue queue = NULL; + ptrdiff_t queue_index; + ptrdiff_t latch_index = 0; + pointer_map_t *visited; + + /* Discover what bounds may interest us. */ + for (elt = loop->bounds; elt; elt = elt->next) + { + double_int bound = elt->bound; + + /* Exit terminates loop at given iteration, while non-exits produce undefined + effect on the next iteration. */ + if (!elt->is_exit) + { + bound += double_int_one; + /* Overflow, give up on this bound. */ + if (bound == double_int_zero) + continue; + } + + if (!loop->any_upper_bound + || bound.ult (loop->nb_iterations_upper_bound)) + VEC_safe_push (double_int, heap, bounds, bound); + } + + /* Exit early if there is nothing to do. */ + if (!bounds) + return; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n"); + + /* Sort the bounds in decreasing order. */ + qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds), + sizeof (double_int), double_int_cmp); + + /* For every basic block record the lowest bound that is guaranteed to + terminate the loop. */ + + bb_bounds = pointer_map_create (); + for (elt = loop->bounds; elt; elt = elt->next) + { + double_int bound = elt->bound; + if (!elt->is_exit) + { + bound += double_int_one; + /* Overflow, give up on this bound. */ + if (bound == double_int_zero) + continue; + } + + if (!loop->any_upper_bound + || bound.ult (loop->nb_iterations_upper_bound)) + { + ptrdiff_t index = bound_index (bounds, bound); + void **entry = pointer_map_contains (bb_bounds, + gimple_bb (elt->stmt)); + if (!entry) + *pointer_map_insert (bb_bounds, + gimple_bb (elt->stmt)) = (void *)index; + else if ((ptrdiff_t)*entry > index) + *entry = (void *)index; + } + } + + visited = pointer_map_create (); + + /* Perform shortest path discovery loop->header ... loop->latch. + + The "distance" is given by the smallest loop bound of basic block + present in the path and we look for path with largest smallest bound + on it. + + To avoid the need for fibonaci heap on double ints we simply compress + double ints into indexes to BOUNDS array and then represent the queue + as arrays of queues for every index. + Index of VEC_length (BOUNDS) means that the execution of given BB has + no bounds determined. + + VISITED is a pointer map translating basic block into smallest index + it was inserted into the priority queue with. */ + latch_index = -1; + + /* Start walk in loop header with index set to infinite bound. */ + queue_index = VEC_length (double_int, bounds); + VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1); + VEC_safe_push (basic_block, heap, queue, loop->header); + VEC_replace (bb_queue, queues, queue_index, queue); + *pointer_map_insert (visited, loop->header) = (void *)queue_index; + + for (; queue_index >= 0; queue_index--) + { + if (latch_index < queue_index) + { + while (VEC_length (basic_block, + VEC_index (bb_queue, queues, queue_index))) + { + basic_block bb; + ptrdiff_t bound_index = queue_index; + void **entry; + edge e; + edge_iterator ei; + + queue = VEC_index (bb_queue, queues, queue_index); + bb = VEC_pop (basic_block, queue); + + /* OK, we later increased BB priority, skip it. */ + if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index) + continue; + + /* See if we can improve the bound. */ + entry = pointer_map_contains (bb_bounds, bb); + if (entry && (ptrdiff_t)*entry < bound_index) + bound_index = (ptrdiff_t)*entry; + + /* Insert succesors into the queue, watch for latch edge + and record greatest index we saw. */ + FOR_EACH_EDGE (e, ei, bb->succs) + { + bool insert = false; + void **entry; + + if (loop_exit_edge_p (loop, e)) + continue; + + if (e == loop_latch_edge (loop) + && latch_index < bound_index) + latch_index = bound_index; + else if (!(entry = pointer_map_contains (visited, e->dest))) + { + insert = true; + *pointer_map_insert (visited, e->dest) = (void *)bound_index; + } + else if ((ptrdiff_t)*entry < bound_index) + { + insert = true; + *entry = (void *)bound_index; + } + + if (insert) + { + bb_queue queue2 = VEC_index (bb_queue, queues, bound_index); + VEC_safe_push (basic_block, heap, queue2, e->dest); + VEC_replace (bb_queue, queues, bound_index, queue2); + } + } + } + } + else + VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index)); + } + + gcc_assert (latch_index >= 0); + if (latch_index < VEC_length (double_int, bounds)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Found better loop bound "); + dump_double_int (dump_file, + VEC_index (double_int, bounds, latch_index), true); + fprintf (dump_file, "\n"); + } + record_niter_bound (loop, VEC_index (double_int, bounds, latch_index), + false, true); + } + + VEC_free (bb_queue, heap, queues); + pointer_map_destroy (bb_bounds); + pointer_map_destroy (visited); +} + /* See if every path cross the loop goes through a statement that is known to not execute at the last iteration. In that case we can decrese iteration count by 1. */ @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str infer_loop_bounds_from_undefined (loop); + discover_iteration_bound_by_body_walk (loop); + maybe_lower_iteration_bound (loop); /* If we have a measured profile, use it to estimate the number of Index: testsuite/gcc.dg/tree-ssa/loop-38.c =================================================================== --- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */ +int a[10]; +int b[11]; +t(int n) +{ + int i; + int sum = 0; + for (i=0;i<n;i++) + if (q()) + sum+=a[i]; + else + sum+=b[i]; + return sum; +} +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */ +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */ +/* { dg-final { cleanup-tree-dump "cunrolli" } } */