From patchwork Tue Oct 30 18:29:49 2012
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: Non-dominating loop bounds in tree-ssa-loop-niter 2/4
From: Jan Hubicka
X-Patchwork-Id: 195582
Message-Id: <20121030182949.GA9056@kam.mff.cuni.cz>
To: gcc-patches@gcc.gnu.org, rguenther@suse.de
Date: Tue, 30 Oct 2012 19:29:49 +0100
Hi,
this patch implements the second part of planned change - to determine loop bounds
based by shortest path discovery. This allows to bound number of iterations
on loops with bounds in statements that do not dominate the latch.
I originally planned to implement this as part of maybe_lower_iteration_bound,
but I found doing it separately is more readable. While both performs walk of
the loop body, both do that for different reasons.
discover_iteration_bound_by_body_walk walks from header to latch, while
maybe_lower_iteration_bound walks from header to first statement with side
effects looking if there is exit on the way.
Both walks can be skipped for many loops, but each with different reasons.
Bootstrapped/regtested x86_64-linux, OK?
* tree-ssa-loop-niter.c (double_int_cmp, bound_index,
discover_iteration_bound_by_body_walk): New functions.
(discover_iteration_bound_by_body_walk): Use it.
* gcc.dg/tree-ssa/loop-38.c: New testcase.
Index: tree-ssa-loop-niter.c
===================================================================
--- tree-ssa-loop-niter.c (revision 192989)
+++ tree-ssa-loop-niter.c (working copy)
@@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val)
return ret;
}
+/* Compare double ints, callback for qsort. */
+
+int
+double_int_cmp (const void *p1, const void *p2)
+{
+ const double_int *d1 = (const double_int *)p1;
+ const double_int *d2 = (const double_int *)p2;
+ if (*d1 == *d2)
+ return 0;
+ if (d1->ult (*d2))
+ return -1;
+ return 1;
+}
+
+/* Return index of BOUND in BOUNDS array sorted in increasing order.
+ Lookup by binary search. */
+
+int
+bound_index (VEC (double_int, heap) *bounds, double_int bound)
+{
+ unsigned int end = VEC_length (double_int, bounds);
+ unsigned int begin = 0;
+
+ /* Find a matching index by means of a binary search. */
+ while (begin != end)
+ {
+ unsigned int middle = (begin + end) / 2;
+ double_int index = VEC_index (double_int, bounds, middle);
+
+ if (index == bound)
+ return middle;
+ else if (index.ult (bound))
+ begin = middle + 1;
+ else
+ end = middle;
+ }
+ gcc_unreachable ();
+}
+
+/* Used to hold vector of queues of basic blocks bellow. */
+typedef VEC (basic_block, heap) *bb_queue;
+DEF_VEC_P(bb_queue);
+DEF_VEC_ALLOC_P(bb_queue,heap);
+
+/* We recorded loop bounds only for statements dominating loop latch (and thus
+ executed each loop iteration). If there are any bounds on statements not
+ dominating the loop latch we can improve the estimate by walking the loop
+ body and seeing if every path from loop header to loop latch contains
+ some bounded statement. */
+
+static void
+discover_iteration_bound_by_body_walk (struct loop *loop)
+{
+ pointer_map_t *bb_bounds;
+ struct nb_iter_bound *elt;
+ VEC (double_int, heap) *bounds = NULL;
+ VEC (bb_queue, heap) *queues = NULL;
+ bb_queue queue = NULL;
+ ptrdiff_t queue_index;
+ ptrdiff_t latch_index = 0;
+ pointer_map_t *visited;
+
+ /* Discover what bounds may interest us. */
+ for (elt = loop->bounds; elt; elt = elt->next)
+ {
+ double_int bound = elt->bound;
+
+ /* Exit terminates loop at given iteration, while non-exits produce undefined
+ effect on the next iteration. */
+ if (!elt->is_exit)
+ {
+ bound += double_int_one;
+ /* Overflow, give up on this bound. */
+ if (bound == double_int_zero)
+ continue;
+ }
+
+ if (!loop->any_upper_bound
+ || bound.ult (loop->nb_iterations_upper_bound))
+ VEC_safe_push (double_int, heap, bounds, bound);
+ }
+
+ /* Exit early if there is nothing to do. */
+ if (!bounds)
+ return;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
+
+ /* Sort the bounds in decreasing order. */
+ qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds),
+ sizeof (double_int), double_int_cmp);
+
+ /* For every basic block record the lowest bound that is guaranteed to
+ terminate the loop. */
+
+ bb_bounds = pointer_map_create ();
+ for (elt = loop->bounds; elt; elt = elt->next)
+ {
+ double_int bound = elt->bound;
+ if (!elt->is_exit)
+ {
+ bound += double_int_one;
+ /* Overflow, give up on this bound. */
+ if (bound == double_int_zero)
+ continue;
+ }
+
+ if (!loop->any_upper_bound
+ || bound.ult (loop->nb_iterations_upper_bound))
+ {
+ ptrdiff_t index = bound_index (bounds, bound);
+ void **entry = pointer_map_contains (bb_bounds,
+ gimple_bb (elt->stmt));
+ if (!entry)
+ *pointer_map_insert (bb_bounds,
+ gimple_bb (elt->stmt)) = (void *)index;
+ else if ((ptrdiff_t)*entry > index)
+ *entry = (void *)index;
+ }
+ }
+
+ visited = pointer_map_create ();
+
+ /* Perform shortest path discovery loop->header ... loop->latch.
+
+ The "distance" is given by the smallest loop bound of basic block
+ present in the path and we look for path with largest smallest bound
+ on it.
+
+ To avoid the need for fibonaci heap on double ints we simply compress
+ double ints into indexes to BOUNDS array and then represent the queue
+ as arrays of queues for every index.
+ Index of VEC_length (BOUNDS) means that the execution of given BB has
+ no bounds determined.
+
+ VISITED is a pointer map translating basic block into smallest index
+ it was inserted into the priority queue with. */
+ latch_index = -1;
+
+ /* Start walk in loop header with index set to infinite bound. */
+ queue_index = VEC_length (double_int, bounds);
+ VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
+ VEC_safe_push (basic_block, heap, queue, loop->header);
+ VEC_replace (bb_queue, queues, queue_index, queue);
+ *pointer_map_insert (visited, loop->header) = (void *)queue_index;
+
+ for (; queue_index >= 0; queue_index--)
+ {
+ if (latch_index < queue_index)
+ {
+ while (VEC_length (basic_block,
+ VEC_index (bb_queue, queues, queue_index)))
+ {
+ basic_block bb;
+ ptrdiff_t bound_index = queue_index;
+ void **entry;
+ edge e;
+ edge_iterator ei;
+
+ queue = VEC_index (bb_queue, queues, queue_index);
+ bb = VEC_pop (basic_block, queue);
+
+ /* OK, we later increased BB priority, skip it. */
+ if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
+ continue;
+
+ /* See if we can improve the bound. */
+ entry = pointer_map_contains (bb_bounds, bb);
+ if (entry && (ptrdiff_t)*entry < bound_index)
+ bound_index = (ptrdiff_t)*entry;
+
+ /* Insert succesors into the queue, watch for latch edge
+ and record greatest index we saw. */
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ bool insert = false;
+ void **entry;
+
+ if (loop_exit_edge_p (loop, e))
+ continue;
+
+ if (e == loop_latch_edge (loop)
+ && latch_index < bound_index)
+ latch_index = bound_index;
+ else if (!(entry = pointer_map_contains (visited, e->dest)))
+ {
+ insert = true;
+ *pointer_map_insert (visited, e->dest) = (void *)bound_index;
+ }
+ else if ((ptrdiff_t)*entry < bound_index)
+ {
+ insert = true;
+ *entry = (void *)bound_index;
+ }
+
+ if (insert)
+ {
+ bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
+ VEC_safe_push (basic_block, heap, queue2, e->dest);
+ VEC_replace (bb_queue, queues, bound_index, queue2);
+ }
+ }
+ }
+ }
+ else
+ VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
+ }
+
+ gcc_assert (latch_index >= 0);
+ if (latch_index < VEC_length (double_int, bounds))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Found better loop bound ");
+ dump_double_int (dump_file,
+ VEC_index (double_int, bounds, latch_index), true);
+ fprintf (dump_file, "\n");
+ }
+ record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
+ false, true);
+ }
+
+ VEC_free (bb_queue, heap, queues);
+ pointer_map_destroy (bb_bounds);
+ pointer_map_destroy (visited);
+}
+
/* See if every path cross the loop goes through a statement that is known
to not execute at the last iteration. In that case we can decrese iteration
count by 1. */
@@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str
infer_loop_bounds_from_undefined (loop);
+ discover_iteration_bound_by_body_walk (loop);
+
maybe_lower_iteration_bound (loop);
/* If we have a measured profile, use it to estimate the number of
Index: testsuite/gcc.dg/tree-ssa/loop-38.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0)
+++ testsuite/gcc.dg/tree-ssa/loop-38.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
+int a[10];
+int b[11];
+t(int n)
+{
+ int i;
+ int sum = 0;
+ for (i=0;i