===================================================================
@@ -587,14 +587,14 @@ unroll_loop_constant_iterations (struct loop *loop
/* Now unroll the loop. */
opt_info_start_duplication (opt_info);
+
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
max_unroll,
wont_exit, desc->out_edge,
&remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
+ opt_info
? DLTHE_RECORD_COPY_NUMBER
- : 0));
+ : 0);
gcc_assert (ok);
if (opt_info)
@@ -876,6 +876,7 @@ unroll_loop_runtime_iterations (struct loop *loop)
auto_vec<basic_block> dom_bbs;
body = get_loop_body (loop);
+
for (i = 0; i < loop->num_nodes; i++)
{
vec<basic_block> ldom;
@@ -943,6 +944,7 @@ unroll_loop_runtime_iterations (struct loop *loop)
&& !desc->noloop_assumptions)
bitmap_set_bit (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
+
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
@@ -952,6 +954,10 @@ unroll_loop_runtime_iterations (struct loop *loop)
/* Record the place where switch will be built for preconditioning. */
swtch = split_edge (loop_preheader_edge (loop));
+ int iter_freq, new_freq;
+ iter_freq = new_freq = swtch->frequency / (n_peel+1);
+ swtch->frequency = new_freq;
+
for (i = 0; i < n_peel; i++)
{
/* Peel the copy. */
@@ -958,12 +964,19 @@ unroll_loop_runtime_iterations (struct loop *loop)
bitmap_clear (wont_exit);
if (i != n_peel - 1 || !last_may_exit)
bitmap_set_bit (wont_exit, 1);
+ int saved_header_frequency = loop->header->frequency;
+ zero_loop_frequencies (loop);
+
+ int new_header_freq = (saved_header_frequency / (n_peel + 1)) * (i + 1);
+ increment_loop_frequencies (loop, loop->header, new_header_freq);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
+ zero_loop_frequencies (loop);
+ increment_loop_frequencies (loop, loop->header, saved_header_frequency);
gcc_assert (ok);
-
+
/* Create item for switch. */
j = n_peel - i - (extra_zero_check ? 0 : 1);
p = REG_BR_PROB_BASE / (i + 2);
@@ -979,11 +992,24 @@ unroll_loop_runtime_iterations (struct loop *loop)
swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
- single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
+ single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
+
+ new_freq = new_freq + iter_freq;
+ swtch->frequency = new_freq;
+
+ int prehead_frequency = 0;
+ edge_iterator ei;
+ edge an_edge;
+ FOR_EACH_EDGE (an_edge, ei, preheader->preds)
+ {
+ int the_edge_frequency = EDGE_FREQUENCY (an_edge);
+ prehead_frequency += the_edge_frequency;
+ }
+ preheader->frequency = prehead_frequency;
}
if (extra_zero_check)
@@ -1015,14 +1041,44 @@ unroll_loop_runtime_iterations (struct loop *loop)
bitmap_clear_bit (wont_exit, may_exit_copy);
opt_info_start_duplication (opt_info);
+ {
+ /* Recompute the loop body frequencies. */
+ zero_loop_frequencies (loop);
+
+ basic_block my_header = loop->header;
+ int sum_incoming_frequencies = 0;
+
+ edge_iterator ei;
+ edge predecessor;
+ FOR_EACH_EDGE (predecessor, ei, my_header->preds)
+ {
+ if (!in_loop_p (predecessor->src, loop))
+ sum_incoming_frequencies += EDGE_FREQUENCY (predecessor);
+ }
+ /* Scale the incoming frequencies according to the heuristic that
+ * the loop frequency is the incoming edge frequency divided by
+ * 0.09. This heuristic applies only to loops that iterate over a
+ * run-time value that is not known at compile time. Note that
+ * 1/.09 equals 11.1111. We'll use integer arithmetic on ten
+ * thousandths, and then divide by 10,000 after we've "rounded".
+ */
+
+ sum_incoming_frequencies *= 111111; /* multiply by 11.1111 */
+ sum_incoming_frequencies += 5000; /* round by adding 0.5 */
+ sum_incoming_frequencies /= 10000; /* convert ten thousandths
+ to ones
+ */
+
+ increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
+ }
+
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
max_unroll,
wont_exit, desc->out_edge,
&remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
+ opt_info
? DLTHE_RECORD_COPY_NUMBER
- : 0));
+ : 0);
gcc_assert (ok);
if (opt_info)
===================================================================
@@ -34,6 +34,17 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-loop-manip.h"
#include "dumpfile.h"
+/* Define ENABLE_CHECKING to enforce the following run-time checks.
+ *
+ * a. The sum of outgoing edge frequencies for the loop equals the
+ * sum of incoming edge frequencies for the loop header block.
+ *
+ * b. The sum of predecessor edge frequencies for every block
+ * in the loop equals the frequency of that block.
+ *
+ * This may report false-positive errors due to round-off errors.
+ */
+
static void copy_loops_to (struct loop **, int,
struct loop *);
static void loop_redirect_edge (edge, basic_block);
@@ -44,6 +55,543 @@ static void fix_loop_placements (struct loop *, bo
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *, bitmap);
+/*
+ * Return true iff block is considered to reside within the loop
+ * represented by loop_ptr.
+ */
+bool
+in_loop_p (basic_block block, struct loop *loop_ptr)
+{
+ basic_block *bbs = get_loop_body (loop_ptr);
+ bool result = false;
+
+ for (unsigned int i = 0; i < loop_ptr->num_nodes; i++)
+ {
+ if (bbs[i] == block)
+ result = true;
+ }
+ free (bbs);
+ return result;
+}
+
+/*
+ * Zero all frequencies associated with this loop.
+ */
+void
+zero_loop_frequencies (struct loop *loop_ptr)
+{
+ basic_block *bbs = get_loop_body (loop_ptr);
+ for (unsigned i = 0; i < loop_ptr->num_nodes; ++i)
+ {
+ bbs[i]->frequency = 0;
+ }
+ free (bbs);
+}
+
+/* A list of block_ladder_rung structs is used to keep track of all the
+ * blocks visited in a depth-first recursive traversal of a control-flow
+ * graph. This list is used to detect and prevent attempts to revisit
+ * a block that is already being visited in the recursive traversal.
+ */
+typedef struct block_ladder_rung {
+ basic_block block;
+ struct block_ladder_rung *lower_rung;
+} *ladder_rung_p;
+
+/* Return true iff an_edge represents the same source and destination
+ * blocks as another_edge.
+ */
+static bool
+same_edge_p (edge an_edge, edge another_edge)
+{
+ return ((an_edge->src == another_edge->src)
+ && (an_edge->dest == another_edge->dest));
+}
+
+/* Return true iff an_edge matches one of the nodes that is already
+ * present within set_of_edges.
+ */
+static bool
+in_edge_set_p (edge an_edge, vec<edge> set_of_edges)
+{
+ unsigned int j;
+ edge e;
+
+ FOR_EACH_VEC_ELT (set_of_edges, j, e)
+ {
+ if (same_edge_p (e, an_edge))
+ return true;
+ }
+ return false;
+}
+
+/* Return true iff an_edge->dest is already represented within
+ * the ladder_rung list.
+ */
+static bool
+in_call_chain_p (edge an_edge, ladder_rung_p ladder_rung)
+{
+ while (ladder_rung != NULL)
+ {
+ if (an_edge->dest == ladder_rung->block)
+ return true;
+ else
+ ladder_rung = ladder_rung->lower_rung;
+ }
+ return FALSE;
+}
+
+/* This recursive function visits all of the blocks contained within the
+ * loop represented by loop_ptr and reachable from incoming_edge,
+ * and zeroes the frequency field of each block. The recursion
+ * terminates if incoming_edge is known to exit this loop, or
+ * if the destination of incoming edge has already been visited
+ * in this recursive traversal, or if the destination of incoming_edge
+ * is the loop header.
+ */
+static void
+recursively_zero_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung,
+ edge incoming_edge)
+{
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set_p (incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain_p (incoming_edge, ladder_rung))
+ return;
+ else
+ {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency = 0;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ recursively_zero_frequency (loop_ptr, exit_edges,
+ &a_rung, successor);
+ }
+ }
+}
+
+/* Return true iff the candidate block is found within the linked
+ * list represented by lower_steps, which would indicate that this
+ * block has already been visited by a recursive traversal.
+ */
+static bool
+recursion_detected_p (basic_block candidate, ladder_rung_p lower_steps) {
+ while (lower_steps != NULL)
+ {
+ if (lower_steps->block == candidate)
+ return true;
+ lower_steps = lower_steps->lower_rung;
+ }
+ /* we iterated through the entire list and did not find candidate */
+ return false;
+}
+
+/* Return true iff candidate is contained within the loop represented
+ * by loop_header and loop_latch.
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header. (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * Note that if a candidate's successor is in the loop and the successor
+ * is not the loop header, then the candidate itself is also in the loop.
+ * If none of the successors of a candidate are in the loop, then the
+ * candidate itself is not in the loop.
+ */
+static bool
+in_loop_of_header_p (basic_block candidate, basic_block loop_header,
+ basic_block loop_latch, bool start_of_recursion,
+ ladder_rung_p lower_steps)
+{
+ if (candidate == loop_latch)
+ return true;
+ else if (candidate == loop_header)
+ return start_of_recursion;
+ else if (!start_of_recursion
+ && recursion_detected_p (candidate, lower_steps))
+ {
+ /* if recursion revisits a node already visited and the loop latch
+ * was not visited in the call chain, then we are traversing an
+ * iterative path that belongs to an outer-nested loop.
+ */
+ return false;
+ }
+ else
+ {
+ struct block_ladder_rung new_step;
+
+ new_step.block = candidate;
+ new_step.lower_rung = lower_steps;
+
+ edge_iterator ei;
+ edge successor_edge;
+ FOR_EACH_EDGE (successor_edge, ei, candidate->succs)
+ {
+ basic_block successor = successor_edge->dest;
+ if (in_loop_of_header_p (successor, loop_header,
+ loop_latch, false, &new_step))
+ return true;
+ }
+ return false; /* None of the successors was in loop */
+ }
+}
+
+/* Add candidate into the results vector if candidate
+ * is in the loop and it is not already contained within the results
+ * vector.
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header. (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * If and only if candidate is added to the results vector, recursively
+ * do the same for each successor of candidate block.
+ *
+ * Return the potentially modified results vector.
+ */
+static vec<basic_block>
+recursively_get_loop_blocks (basic_block candidate, vec<basic_block> results,
+ basic_block loop_header, basic_block loop_latch)
+{
+ basic_block bb;
+ unsigned int u;
+
+ /* if candidate is already in the results vector, then we're done */
+ FOR_EACH_VEC_ELT (results, u, bb)
+ {
+ if (bb == candidate)
+ return results;
+ }
+
+ if (in_loop_of_header_p (candidate, loop_header, loop_latch, true, NULL))
+ {
+ results.safe_push (candidate);
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, candidate->succs)
+ {
+ if (successor->probability != 0)
+ {
+ results = recursively_get_loop_blocks (successor->dest, results,
+ loop_header, loop_latch);
+ }
+ }
+ }
+ return results;
+}
+
+/* Return a vector containing all of the blocks contained within the
+ * loop identified by loop_ptr.
+ */
+static vec<basic_block>
+get_loop_blocks (struct loop *loop_ptr)
+{
+ vec<basic_block> results;
+
+ results = vNULL;
+ results = recursively_get_loop_blocks (loop_ptr->header, results,
+ loop_ptr->header, loop_ptr->latch);
+ return results;
+}
+
+/* Return true iff block is an element of the block_set vector.
+ */
+static bool
+in_block_set_p (basic_block block, vec<basic_block> block_set)
+{
+ basic_block bb;
+ unsigned int u;
+ FOR_EACH_VEC_ELT (block_set, u, bb)
+ {
+ if (bb == block)
+ return true;
+ }
+ return false;
+}
+
+/* Return a vector containing all of the edges that exit the loop
+ * represented by the loop_blocks vector.
+ */
+static vec<edge>
+get_exit_edges_from_loop_blocks (vec<basic_block> loop_blocks) {
+ basic_block bb;
+ unsigned int u;
+ vec<edge> results = vNULL;
+
+ FOR_EACH_VEC_ELT (loop_blocks, u, bb)
+ {
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, bb->succs)
+ {
+ basic_block edge_dest = successor->dest;
+
+ if (!in_block_set_p (edge_dest, loop_blocks))
+ {
+ results.safe_push (successor);
+ }
+ }
+ }
+ return results;
+}
+
+/*
+ * Zero all frequencies for all blocks contained within the loop
+ * represented by loop_ptr which are reachable from block without
+ * passing through the block header. If block is not within the loop,
+ * this has no effect. The behavior is as outlined by the following
+ * algorithm:
+ *
+ * If block is contained within loop:
+ * Set block's frequency to zero
+ * Using a depth-first traversal, do the same for each successor
+ * transitively, stopping the recursive traversal if:
+ * the current block is the loop header, or
+ * the current block resides outside the loop, or
+ * the current block has already been visited in this depth-first
+ * traversal.
+ */
+static void
+zero_partial_loop_frequencies (struct loop *loop_ptr, basic_block block)
+{
+ /* When zero_partial_loop_frequencies is invoked, the *loop_ptr
+ * object is not entirely coherent, so the library service
+ * get_loop_exit_edges (loop_p) cannot be called from this context.
+ * Instead, we use get_loop_blocks (loop_p) and
+ * get_exit_edges_from_loop_blocks (vec<basic_block>) functions
+ * which assume only the validity of loop_ptr->loop_header,
+ * loop_ptr->loop_latch, and valid successor and predecessor
+ * information for each block contained within the loop.
+ */
+ vec<basic_block> loop_blocks = get_loop_blocks (loop_ptr);
+ if (in_block_set_p (block, loop_blocks))
+ {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+
+ vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_blocks);
+ block->frequency = 0;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ if (successor->probability != 0)
+ {
+ recursively_zero_frequency (loop_ptr, exit_edges,
+ &ladder_rung, successor);
+ }
+ }
+ exit_edges.release ();
+ }
+ loop_blocks.release ();
+}
+
+/* This recursive function visits all of the blocks contained within the
+ * loop represented by loop_ptr and reachable from incoming_edge,
+ * and increments the frequency field of each block by an
+ * appropriate scaling of frequency_increment. The
+ * frequency_increment value is scaled in recursive invocations of
+ * this function by the probability associated with the edge
+ * corresponding to the particular recursive call. The recursion
+ * terminates if incoming_edge is known to exit this loop, or
+ * if the destination of incoming edge has already been visited
+ * in this recursive traversal, or if the destination of incoming_edge
+ * is the loop header.
+ */
+static void
+recursively_increment_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung,
+ edge incoming_edge,
+ int frequency_increment)
+{
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set_p (incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain_p (incoming_edge, ladder_rung))
+ return;
+ else
+ {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency += frequency_increment;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ int successor_increment =
+ (frequency_increment * successor->probability) / REG_BR_PROB_BASE;
+ recursively_increment_frequency (loop_ptr, exit_edges,
+ &a_rung, successor,
+ successor_increment);
+ }
+ }
+}
+
+/*
+ * If block is contained within loop, we do the following:
+ * Add incremental_frequency (which may be negative) to
+ * block->frequency and propogate this change to all successors of
+ * block that reside within the loop, transitively. Use a depth-first
+ * tree traversal, stopping the recursion at the loop header, at any
+ * successor block that resides outside the loop, and at any block
+ * that is already part of the current depth-first traversal.
+ */
+void
+increment_loop_frequencies (struct loop *loop_ptr, basic_block block,
+ int frequency_increment)
+{
+ vec<basic_block> loop_blocks = get_loop_blocks (loop_ptr);
+
+ if (in_block_set_p (block, loop_blocks))
+ {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+
+ vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_blocks);
+ block->frequency += frequency_increment;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ if (successor->probability != 0)
+ {
+ int successor_increment =
+ ((frequency_increment * successor->probability)
+ / REG_BR_PROB_BASE);
+ recursively_increment_frequency (loop_ptr, exit_edges,
+ &ladder_rung, successor,
+ successor_increment);
+ }
+ }
+ exit_edges.release ();
+ }
+ loop_blocks.release ();
+}
+
+#ifdef ENABLE_CHECKING
+/**
+ * Issue a fatal error message and abort program execution.
+ */
+static void
+internal (const char *msg)
+{
+ fprintf (stderr, "Fatal internal error: %s\n", msg);
+ exit (-1);
+}
+
+/*
+ * check_loop_frequency_integrity enforces that:
+ *
+ * a. The sum of outgoing edge frequencies for the loop equals the
+ * sum of incoming edge frequencies for the loop header block.
+ *
+ * b. The sum of predecessor edge frequencies for every block
+ * in the loop equals the frequency of that block.
+ *
+ * The integrity check is problematic due to round-off errors. Though
+ * it hasn't been tested with max-unroll-times greater than 4, we
+ * suspect that unrolling complex control structures contained within a
+ * loop that is unrolled more than 4 times may result in erroneous
+ * integrity check failures due to round-off errors.
+ */
+static void
+check_loop_frequency_integrity (struct loop *loop_ptr)
+{
+ unsigned int i, k;
+ basic_block a_block;
+
+ vec<basic_block> loop_body = get_loop_blocks (loop_ptr);
+ basic_block header;
+
+ FOR_EACH_VEC_ELT (loop_body, k, a_block)
+ {
+ int delta;
+ int predecessor_frequencies = 0;
+
+ edge_iterator ei;
+ edge a_predecessor;
+ FOR_EACH_EDGE (a_predecessor, ei, a_block->preds)
+ {
+ predecessor_frequencies += EDGE_FREQUENCY (a_predecessor);
+ }
+ delta = predecessor_frequencies - a_block->frequency;
+
+ /* Enforce tolerance to within 0.2%. */
+ int tolerance = predecessor_frequencies / 500;
+ if (tolerance < 10)
+ tolerance = 10;
+
+ if (delta < 0)
+ delta = -delta;
+
+ if (delta > tolerance)
+ {
+ internal ("Predecessor frequencies confused while unrolling loop.");
+ }
+ }
+
+ header = loop_ptr->header;
+ int incoming_frequency = 0;
+
+ edge_iterator ei;
+ edge a_predecessor;
+ FOR_EACH_EDGE (a_predecessor, ei, header->preds)
+ {
+ if (!in_block_set_p (a_predecessor->src, loop_body))
+ {
+ incoming_frequency += EDGE_FREQUENCY (a_predecessor);
+ }
+ }
+
+ int outgoing_frequency = 0;
+ vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_body);
+ edge edge;
+ FOR_EACH_VEC_ELT (exit_edges, i, edge)
+ {
+ outgoing_frequency += EDGE_FREQUENCY (edge);
+ }
+
+ /* enforce tolerance to within 0.2% */
+ int tolerance = incoming_frequency / 500;
+ if (tolerance < 10)
+ tolerance = 10;
+ int delta = incoming_frequency - outgoing_frequency;
+
+ if (delta < 0)
+ delta = -delta;
+
+ if (delta > tolerance)
+ {
+ internal ("Incoherent frequencies while unrolling loop.");
+ }
+ loop_body.release ();
+ exit_edges.release ();
+}
+#endif /* ENABLE_CHECKING */
+
/* Checks whether basic block BB is dominated by DATA. */
static bool
rpe_enum_p (const_basic_block bb, const void *data)
@@ -1101,7 +1649,7 @@ can_duplicate_loop_p (const struct loop *loop)
is redistributed evenly to the remaining edges coming from E->src. */
static void
-set_zero_probability (edge e)
+set_zero_probability (struct loop *loop_ptr, edge e)
{
basic_block bb = e->src;
edge_iterator ei;
@@ -1109,6 +1657,10 @@ static void
unsigned n = EDGE_COUNT (bb->succs);
gcov_type cnt = e->count, cnt1;
unsigned prob = e->probability, prob1;
+ int original_edge_frequency;
+ int new_edge_frequency;
+ int change_in_edge_frequency;
+ bool edge_originates_in_loop = in_loop_p (bb, loop_ptr);
gcc_assert (n > 1);
cnt1 = cnt / (n - 1);
@@ -1118,18 +1670,63 @@ static void
{
if (ae == e)
continue;
-
- ae->probability += prob1;
- ae->count += cnt1;
+
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (ae);
+ ae->probability += prob1;
+ ae->count += cnt1;
+ new_edge_frequency = EDGE_FREQUENCY (ae);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+
+ increment_loop_frequencies (loop_ptr, ae->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ ae->probability += prob1;
+ ae->count += cnt1;
+ }
last = ae;
}
-
+
/* Move the rest to one of the edges. */
- last->probability += prob % (n - 1);
- last->count += cnt % (n - 1);
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (last);
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ new_edge_frequency = EDGE_FREQUENCY (last);
+ change_in_edge_frequency = new_edge_frequency - original_edge_frequency;
+ if (change_in_edge_frequency != 0)
+ {
+ increment_loop_frequencies (loop_ptr, last->dest,
+ change_in_edge_frequency);
+ }
+ }
+ else
+ {
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ }
- e->probability = 0;
- e->count = 0;
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (e);
+ e->probability = 0;
+ e->count = 0;
+ new_edge_frequency = EDGE_FREQUENCY (e);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+ increment_loop_frequencies (loop_ptr, e->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ e->probability = 0;
+ e->count = 0;
+ }
}
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
@@ -1170,6 +1767,41 @@ duplicate_loop_to_header_edge (struct loop *loop,
bitmap bbs_to_scale = NULL;
bitmap_iterator bi;
+ /* Remember the initial ratio between frequency
+ * of edge into loop header and the frequency of the loop header.
+ * Preserve this ratio when we make adjustments within the loop.
+ * This distinction is necessary because different flavors of loops
+ * are subject to different heuristics. In particular, loops
+ * bounded by run-time constants assume that branches exiting a loop
+ * have probability 9%. Loops bounded by compile-time constants
+ * assume branches exiting a loop have probability 1%. There may be
+ * other circumstances that assume different behaviors.
+ *
+ * KELVIN_TODO:
+ * For loops that have a single exit, the exit ratio is the same as
+ * the ratio between the sum of the frequency of the header's
+ * incoming edges and the frequency of the header itself. For loops
+ * that have multiple exits, we still need to investigate.
+ */
+ int header_frequency = header->frequency;
+ int preheader_frequency = 0;
+
+ /* Sum the EDGE frequencies for all predecessor edges that
+ * originate outside the loop.
+ */
+
+ edge_iterator ei;
+ edge predecessor;
+ FOR_EACH_EDGE (predecessor, ei, header->preds)
+ {
+ if (!in_loop_p (predecessor->src, loop))
+ {
+ preheader_frequency += EDGE_FREQUENCY (predecessor);
+ }
+ }
+
+ int exit_ratio = (header_frequency * 10000 - 5000) / preheader_frequency;
+
gcc_assert (e->dest == loop->header);
gcc_assert (ndupl > 0);
@@ -1236,7 +1868,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
}
scale_step = XNEWVEC (int, ndupl);
-
+
for (i = 1; i <= ndupl; i++)
scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
? prob_pass_wont_exit
@@ -1293,7 +1925,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
/* Loop the new bbs will belong to. */
target = e->src->loop_father;
-
+
/* Original loops. */
n_orig_loops = 0;
for (aloop = loop->inner; aloop; aloop = aloop->next)
@@ -1301,7 +1933,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
orig_loops = XNEWVEC (struct loop *, n_orig_loops);
for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
orig_loops[i] = aloop;
-
+
set_loop_copy (loop, target);
first_active = XNEWVEC (basic_block, n);
@@ -1310,10 +1942,30 @@ duplicate_loop_to_header_edge (struct loop *loop,
memcpy (first_active, bbs, n * sizeof (basic_block));
first_active_latch = latch;
}
-
+
spec_edges[SE_ORIG] = orig;
spec_edges[SE_LATCH] = latch_edge;
+
+ /* Recompute the loop body frequencies. */
+ zero_loop_frequencies (loop);
+ basic_block my_header = loop->header;
+ int sum_incoming_frequencies = 0;
+
+ /* ei and predecessor declared above */
+ FOR_EACH_EDGE(predecessor, ei, my_header->preds)
+ {
+ /* exit_ratio is computed based on remembered circumstances upon
+ * entry into this function. Note that loops bounded by a compile-time
+ * constant have different exit ratio than loops bounded by a run-time
+ * value.
+ */
+ if (!in_loop_p (predecessor->src, loop))
+ sum_incoming_frequencies +=
+ (int) (EDGE_FREQUENCY (predecessor) * exit_ratio + 5000) / 10000;
+ }
+ increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
+
place_after = e->src;
for (j = 0; j < ndupl; j++)
{
@@ -1322,7 +1974,10 @@ duplicate_loop_to_header_edge (struct loop *loop,
/* Copy bbs. */
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
- place_after, true);
+ place_after, true);
+
+ int place_after_frequency = place_after->frequency;
+ basic_block saved_place_after = place_after;
place_after = new_spec_edges[SE_LATCH]->src;
if (flags & DLTHE_RECORD_COPY_NUMBER)
@@ -1352,8 +2007,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
}
for (i = 0; i < n; i++)
new_bbs[i]->flags &= ~BB_DUPLICATED;
- }
-
+ }
/* Redirect the special edges. */
if (is_latch)
{
@@ -1373,13 +2027,18 @@ duplicate_loop_to_header_edge (struct loop *loop,
e = new_spec_edges[SE_LATCH];
}
+ zero_partial_loop_frequencies (loop, saved_place_after);
+ increment_loop_frequencies (loop,
+ saved_place_after, place_after_frequency);
+
/* Record exit edge in this copy. */
if (orig && bitmap_bit_p (wont_exit, j + 1))
{
if (to_remove)
- to_remove->safe_push (new_spec_edges[SE_ORIG]);
- set_zero_probability (new_spec_edges[SE_ORIG]);
-
+ {
+ to_remove->safe_push (new_spec_edges[SE_ORIG]);
+ }
+ set_zero_probability (loop, new_spec_edges[SE_ORIG]);
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
{
@@ -1413,8 +2072,10 @@ duplicate_loop_to_header_edge (struct loop *loop,
if (orig && bitmap_bit_p (wont_exit, 0))
{
if (to_remove)
- to_remove->safe_push (orig);
- set_zero_probability (orig);
+ {
+ to_remove->safe_push (orig);
+ }
+ set_zero_probability (loop, orig);
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
@@ -1462,6 +2123,13 @@ duplicate_loop_to_header_edge (struct loop *loop,
free (bbs);
BITMAP_FREE (bbs_to_scale);
+#ifdef ENABLE_CHECKING
+ /* This function call is strictly paranoia. it makes no changes
+ * to the data structures.
+ */
+ check_loop_frequency_integrity (loop);
+#endif
+
return true;
}
===================================================================
@@ -60,4 +60,8 @@ extern void force_single_succ_latches (void);
struct loop * loop_version (struct loop *, void *,
basic_block *, unsigned, unsigned, unsigned, bool);
+extern bool in_loop_p (basic_block, struct loop *);
+extern void zero_loop_frequencies (struct loop *);
+extern void increment_loop_frequencies (struct loop *, basic_block, int);
+
#endif /* GCC_CFGLOOPMANIP_H */