===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i;
+
+ for (i=0;i<n;i++)
+ d[i*2] = 0.0;
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+#include <stdio.h>
+
+volatile int a;
+
+extern int baz (int arg);
+
+/* loop with multiple exits and constant iterations */
+void foo (double *d, unsigned long int n, int a, double d1, double d2)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<1000001;i++)
+ {
+ double dd;
+ dd = d1;
+ if (a > 5)
+ dd = d2;
+ else if (a < 0)
+ break;
+ else
+ a = baz (a);
+ d[i] = dd;
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+#include <stdio.h>
+
+volatile int a;
+
+extern int baz (int arg);
+
+/* loop with multiple exits and runtime iterations */
+void foo (double *d, unsigned long int n, int a, double d1, double d2)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<n;i++)
+ {
+ double dd;
+ dd = d1;
+ if (a > 5)
+ dd = d2;
+ else if (a < 0)
+ break;
+ else
+ a = baz (a);
+ d[i] = dd;
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+volatile int a;
+
+/* loop with multiple entries constant iterations, probably
+ should not unroll */
+void foo (double *d, unsigned long int n, double d1, double d2)
+{
+ unsigned long int i;
+
+ if (a <= 0)
+ {
+ goto middle_of_loop;
+ }
+
+ for (i=0;i<1000007;i++)
+ {
+ if (a > 5)
+ d[i] = d1;
+ else
+ d[i] = d2;
+
+ middle_of_loop:
+ if (a == 0)
+ {
+ double td;
+ td = d1;
+ d1 = d2;
+ d2 = td;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+volatile int a;
+
+/* Loop with multiple entries runtime iterations, probably
+ should not unroll. */
+void foo (double *d, unsigned long int n, double d1, double d2)
+{
+ unsigned long int i;
+
+ if (a <= 0)
+ {
+ goto middle_of_loop;
+ }
+
+ for (i=0;i<n;i++)
+ {
+ if (a > 5)
+ d[i] = d1;
+ else
+ d[i] = d2;
+
+ middle_of_loop:
+ if (a == 0)
+ {
+ double td;
+ td = d1;
+ d1 = d2;
+ d2 = td;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+/* Loop within loop (only inner-most loop should be unrolled). Both loops
+ are run-time bounded. */
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i, j;
+
+ for (j = 0; j < n*2; j++)
+ {
+ for (i=0;i<n;i++)
+ {
+ d[i*2] = 0.0;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+/* Loop within loop (only inner-most loop should be unrolled). Both loops
+ are constant bounded. */
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i, j;
+
+ for (j = 0; j < 1000000; j++)
+ {
+ for (i=0;i < 1000003;i++)
+ {
+ d[i*2] = 0.0;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+volatile int a;
+
+void foo (double *d, unsigned long int n, double d1, double d2)
+{
+ unsigned long int i;
+
+ for (i=0;i<n;i++)
+ {
+ double dd;
+ dd = d1;
+ if (a > 5)
+ dd = d2;
+ d[i] = dd;
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<1000000;i++)
+ d[j] = 0.0;
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+#include <stdio.h>
+
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<1000000;i++)
+ {
+ switch (i % 5)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ /* frequency of this block should be 4 times the frequency
+ of case 4, but that might be too much to expect from
+ the compiler's analysis. */
+ fprintf (stderr, "value %d has remainder < 4\n", i);
+ break;
+
+ case 4:
+ fprintf (stderr, "value %d has remainder 4\n", i);
+ break;
+
+ default:
+ fprintf (stderr, "this code should not be reached\n");
+ break;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+/* Constant iterations with iterations not divisible by 4. */
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<1000003;i++)
+ d[j] = 0.0;
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
+
===================================================================
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+#include <stdio.h>
+
+/* Switch statement embedded within a runtime iterations loop. */
+void foo (double *d, unsigned long int n) {
+ unsigned long int i;
+
+ for (i=0;i<n;i++) {
+ switch (i % 5) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ /* Frequency of this block should be 4 times the frequency
+ of case 4, but that might be too much to expect from
+ the compiler's analysis. */
+ fprintf (stderr, "value %d has remainder < 4\n", i);
+ break;
+
+ case 4:
+ fprintf (stderr, "value %d has remainder 4\n", i);
+ break;
+
+ default:
+ fprintf (stderr, "this code should not be reached\n");
+ break;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+#include <stdio.h>
+
+volatile int a;
+
+/* if-then-else statement embedded within a constant iterations loop,
+ where constant number of iterations is not divisible by 4. */
+void foo (double *d, unsigned long int n, double d1, double d2)
+{
+ unsigned long int i;
+ volatile unsigned int j;
+ for (i=0;i<1000001;i++)
+ {
+ double dd;
+ dd = d1;
+ if (a > 5)
+ dd = d2;
+ d[i] = dd;
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+/* Loop within loop (only inner-most loop should be unrolled). Inner loop
+ is run-time bounded. */
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i, j;
+
+ for (j = 0; j < 10000002; j++)
+ {
+ for (i=0;i<n;i++)
+ {
+ d[i*2] = 0.0;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-loop2_unroll" } */
+
+/* Loop within loop (only inner-most loop should be unrolled). Inner loop
+ is constant bounded. */
+void foo (double *d, unsigned long int n)
+{
+ unsigned long int i, j;
+
+ for (i=0;i<n;i++)
+ {
+ for (j = 0; j < 10000002; j++)
+ {
+ d[j*2] = 0.0;
+ }
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
===================================================================
@@ -33,7 +33,15 @@ along with GCC; see the file COPYING3. If not see
#include "gimplify-me.h"
#include "tree-ssa-loop-manip.h"
#include "dumpfile.h"
+#include "diagnostic-core.h"
+/* Use a command-line option to set the flag_checking option to enforce
+ the following run-time checks.
+ a. The sum of outgoing edge frequencies for the loop equals the
+ sum of incoming edge frequencies for the loop header block.
+ b. The sum of predecessor edge frequencies for every block
+ in the loop equals the frequency of that block. */
+
static void copy_loops_to (struct loop **, int,
struct loop *);
static void loop_redirect_edge (edge, basic_block);
@@ -44,6 +52,319 @@ static void fix_loop_placements (struct loop *, bo
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *, bitmap);
+/* Return true iff BLOCK is considered to reside within the loop
+ represented by LOOP_PTR. */
+bool
+in_loop_p (basic_block block, struct loop *loop_ptr)
+{
+ return (block->loop_father == loop_ptr)
+ || flow_loop_nested_p (loop_ptr, block->loop_father);
+}
+
+/* Zero all frequencies associated with loop LOOP_PTR. */
+void
+zero_loop_frequencies (struct loop *loop_ptr)
+{
+ basic_block *bbs = get_loop_body (loop_ptr);
+ for (unsigned i = 0; i < loop_ptr->num_nodes; ++i)
+ {
+ bbs[i]->frequency = 0;
+ }
+ free (bbs);
+}
+
+/* A list of block_ladder_rung structs is used to keep track of all
+ the blocks visited in a depth-first recursive traversal of a control-flow
+ graph. This list is used to detect and prevent attempts to revisit
+ a block that is already being visited in the recursive traversal. */
+typedef struct block_ladder_rung {
+ basic_block block;
+ struct block_ladder_rung *lower_rung;
+} *ladder_rung_p;
+
+/* Return true iff AN_EDGE matches one of the nodes that is already
+ present within SET_OF_EDGES. */
+static bool
+in_edge_set_p (edge an_edge, vec<edge> set_of_edges)
+{
+ unsigned int j;
+ edge e;
+
+ FOR_EACH_VEC_ELT (set_of_edges, j, e)
+ {
+ if (e == an_edge)
+ return true;
+ }
+ return false;
+}
+
+/* Return true iff AN_EDGE->dest is already represented within
+ the LADDER_RUNG list. */
+static bool
+in_call_chain_p (edge an_edge, ladder_rung_p ladder_rung)
+{
+ while (ladder_rung != NULL)
+ {
+ if (an_edge->dest == ladder_rung->block)
+ return true;
+ else
+ ladder_rung = ladder_rung->lower_rung;
+ }
+ return FALSE;
+}
+
+/* This recursive function visits the blocks contained within the
+ loop represented by LOOP_PTR and reachable from INCOMING_EDGE
+ without leaving the loop and zeroes the frequency field of each
+ block. The recursion terminates if INCOMING_EDGE is known to exit
+ this loop, or if the destination of INCOMING_EDGE has already been
+ visited in this recursive traversal, or if the destination of
+ INCOMING_EDGE is the loop header.
+
+ Note that depth of recursion is limited to the maximum number of
+ blocks visited on a non-iterative traversal of the blocks contained
+ within the loop. */
+static void
+recursively_zero_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung,
+ edge incoming_edge)
+{
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set_p (incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain_p (incoming_edge, ladder_rung))
+ return;
+ else
+ {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency = 0;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ recursively_zero_frequency (loop_ptr, exit_edges,
+ &a_rung, successor);
+ }
+}
+
+/* Return true iff the CANDIDATE block is found within the linked
+ list represented by LOWER_STEPS, which would indicate that this
+ block has already been visited by a recursive traversal. */
+static bool
+recursion_detected_p (basic_block candidate, ladder_rung_p lower_steps)
+{
+ while (lower_steps != NULL)
+ {
+ if (lower_steps->block == candidate)
+ return true;
+ lower_steps = lower_steps->lower_rung;
+ }
+ /* we iterated through the entire list and did not find candidate */
+ return false;
+}
+
+/* Zero all frequencies for all blocks contained within the loop
+ represented by LOOP_PTR which are reachable from BLOCK without
+ passing through the loop header. If BLOCK is not within the loop,
+ this has no effect. The behavior is as outlined by the following
+ algorithm:
+
+ If BLOCK is contained within loop:
+ Set BLOCK's frequency to zero
+ Using a depth-first traversal, do the same for each successor
+ transitively, stopping the recursive traversal if:
+ the current block is the loop header, or
+ the current block resides outside the loop, or
+ the current block has already been visited in this depth-first
+ traversal. */
+static void
+zero_partial_loop_frequencies (struct loop *loop_ptr, basic_block block)
+{
+ if (in_loop_p (block, loop_ptr))
+ {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+ vec<edge> exit_edges = get_loop_exit_edges (loop_ptr);
+ block->frequency = 0;
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ if (successor->probability != 0)
+ {
+ recursively_zero_frequency (loop_ptr, exit_edges,
+ &ladder_rung, successor);
+ }
+ }
+ exit_edges.release ();
+ }
+}
+
+/* This recursive function visits all of the blocks contained within the
+ loop represented by LOOP_PTR and reachable from INCOMING_EDGE,
+ and increments the frequency field of each block by an
+ appropriate scaling of FREQUENCY_INCREMENT. The
+ FREQUENCY_INCREMENT value is scaled in recursive invocations of
+ this function by the probability associated with the edge
+ corresponding to the particular recursive call. The recursion
+ terminates if INCOMING_EDGE is known to exit this loop, or
+ if the destination of INCOMING_EDGE has already been visited
+ in this recursive traversal, or if the destination of INCOMING_EDGE
+ is the loop header.
+
+ Note that depth of recursion is limited to the maximum number of
+ blocks visited on a non-iterative traversal of the blocks contained
+ within the loop. */
+static void
+recursively_increment_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung, edge incoming_edge,
+ int frequency_increment)
+{
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set_p (incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain_p (incoming_edge, ladder_rung))
+ return;
+ else
+ {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency += frequency_increment;
+
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ int successor_increment =
+ (frequency_increment * successor->probability) / REG_BR_PROB_BASE;
+ recursively_increment_frequency (loop_ptr, exit_edges,
+ &a_rung, successor,
+ successor_increment);
+ }
+ }
+}
+
+/* If BLOCK is contained within loop LOOP_PTR, we do the following:
+ Add INCREMENTAL_FREQUENCY (which may be negative) to
+ BLOCK->frequency and propogate this change to all successors of
+ BLOCK that reside within the loop, transitively. The
+ FREQUENCY_INCREMENT value is scaled before adding the value
+ to successor nodes by the probability factor associated with
+ the edges along all paths to the successor. Use a depth-first
+ tree traversal, stopping the recursion at the loop header, at any
+ successor block that resides outside the loop, and at any block
+ that is already part of the current depth-first traversal. */
+void
+increment_loop_frequencies (struct loop *loop_ptr, basic_block block,
+ int frequency_increment)
+{
+ if (in_loop_p (block, loop_ptr))
+ {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+
+ vec<edge> exit_edges = get_loop_exit_edges (loop_ptr);
+ block->frequency += frequency_increment;
+ edge_iterator ei;
+ edge successor;
+ FOR_EACH_EDGE (successor, ei, block->succs)
+ {
+ if (successor->probability != 0)
+ {
+ int successor_increment =
+ ((frequency_increment * successor->probability)
+ / REG_BR_PROB_BASE);
+ recursively_increment_frequency (loop_ptr, exit_edges,
+ &ladder_rung, successor,
+ successor_increment);
+ }
+ }
+ exit_edges.release ();
+ }
+}
+
+/* check_loop_frequency_integrity enforces that:
+ a. The sum of outgoing edge frequencies for loop LOOP_PTR equals the
+ sum of incoming edge frequencies for the loop's header block.
+ b. The sum of predecessor edge frequencies for every block
+ in the loop equals the frequency of that block.
+ Consistency of edge frequencies is enforced to within a programmed
+ tolerance value. The objective of allowing some variance from
+ strict enforcement of equality is to allow for the accumulation of
+ round-off errors. */
+static void
+check_loop_frequency_integrity (struct loop *loop_ptr)
+{
+ unsigned int i, k;
+ basic_block a_block;
+ basic_block* loop_body = get_loop_body (loop_ptr);
+ basic_block header;
+
+ for (unsigned k = 0; k < loop_ptr->num_nodes; k++)
+ {
+ int delta;
+ int predecessor_frequencies = 0;
+ edge_iterator ei;
+ edge a_predecessor;
+
+ a_block = loop_body[k];
+ FOR_EACH_EDGE (a_predecessor, ei, a_block->preds)
+ predecessor_frequencies += EDGE_FREQUENCY (a_predecessor);
+ delta = predecessor_frequencies - a_block->frequency;
+
+ /* Enforce tolerance to within 0.2%. */
+ int tolerance = predecessor_frequencies / 500;
+ if (tolerance < 10)
+ tolerance = 10;
+ if (delta < 0)
+ delta = -delta;
+ if (delta > tolerance)
+ fatal_error (input_location,
+ "Inconsistent predecessor frequencies "
+ " while unrolling loop.");
+ }
+ free(loop_body);
+
+ header = loop_ptr->header;
+ int incoming_frequency = 0;
+
+ edge_iterator ei;
+ edge a_predecessor;
+ FOR_EACH_EDGE (a_predecessor, ei, header->preds)
+ if (!in_loop_p (a_predecessor->src, loop_ptr))
+ incoming_frequency += EDGE_FREQUENCY (a_predecessor);
+
+ int outgoing_frequency = 0;
+ vec<edge> exit_edges = get_loop_exit_edges (loop_ptr);
+ edge edge;
+ FOR_EACH_VEC_ELT (exit_edges, i, edge)
+ outgoing_frequency += EDGE_FREQUENCY (edge);
+
+ /* enforce tolerance to within 0.2% */
+ int tolerance = incoming_frequency / 500;
+ if (tolerance < 10)
+ tolerance = 10;
+ int delta = incoming_frequency - outgoing_frequency;
+ if (delta < 0)
+ delta = -delta;
+ if (delta > tolerance)
+ fatal_error (input_location,
+ "Inconsistent enter/exit frequencies "
+ "while unrolling loop.");
+ exit_edges.release ();
+}
+
/* Checks whether basic block BB is dominated by DATA. */
static bool
rpe_enum_p (const_basic_block bb, const void *data)
@@ -1097,11 +1418,12 @@ can_duplicate_loop_p (const struct loop *loop)
return ret;
}
+static void
/* Sets probability and count of edge E to zero. The probability and count
- is redistributed evenly to the remaining edges coming from E->src. */
-
-static void
-set_zero_probability (edge e)
+ is redistributed evenly to the remaining edges coming from E->src
+ and is propagated transitively to all nodes contained within the
+ loop identified by LOOP_PTR and reachable from E->src. */
+set_zero_probability (struct loop* loop_ptr, edge e)
{
basic_block bb = e->src;
edge_iterator ei;
@@ -1109,6 +1431,10 @@ can_duplicate_loop_p (const struct loop *loop)
unsigned n = EDGE_COUNT (bb->succs);
gcov_type cnt = e->count, cnt1;
unsigned prob = e->probability, prob1;
+ int original_edge_frequency;
+ int new_edge_frequency;
+ int change_in_edge_frequency;
+ bool edge_originates_in_loop = in_loop_p (bb, loop_ptr);
gcc_assert (n > 1);
cnt1 = cnt / (n - 1);
@@ -1119,17 +1445,58 @@ can_duplicate_loop_p (const struct loop *loop)
if (ae == e)
continue;
- ae->probability += prob1;
- ae->count += cnt1;
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (ae);
+ ae->probability += prob1;
+ ae->count += cnt1;
+ new_edge_frequency = EDGE_FREQUENCY (ae);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+ increment_loop_frequencies (loop_ptr, ae->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ ae->probability += prob1;
+ ae->count += cnt1;
+ }
last = ae;
}
-
+
/* Move the rest to one of the edges. */
- last->probability += prob % (n - 1);
- last->count += cnt % (n - 1);
-
- e->probability = 0;
- e->count = 0;
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (last);
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ new_edge_frequency = EDGE_FREQUENCY (last);
+ change_in_edge_frequency = new_edge_frequency - original_edge_frequency;
+ if (change_in_edge_frequency != 0)
+ increment_loop_frequencies (loop_ptr, last->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ }
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY (e);
+ e->probability = 0;
+ e->count = 0;
+ new_edge_frequency = EDGE_FREQUENCY (e);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+ increment_loop_frequencies (loop_ptr, e->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ e->probability = 0;
+ e->count = 0;
+ }
}
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
@@ -1170,6 +1537,31 @@ duplicate_loop_to_header_edge (struct loop *loop,
bitmap bbs_to_scale = NULL;
bitmap_iterator bi;
+ /* Remember the initial ratio between frequency of edge into loop
+ header and the frequency of the loop header. Preserve this ratio
+ when we make adjustments within the loop. This distinction is
+ necessary because different flavors of loops are subject to
+ different heuristics. In particular, loops bounded by run-time
+ constants assume that branches exiting a loop have probability
+ 9%. Loops bounded by compile-time constants assume branches
+ exiting a loop have probability 1%. There may be other
+ circumstances that assume different behaviors.
+
+ TODO: For loops that have a single exit, the exit ratio is the
+ same as the ratio between the sum of the frequency of the
+ header's incoming edges and the frequency of the header itself.
+ For loops that have multiple exits, investigate. */
+ int header_frequency = header->frequency;
+ int preheader_frequency = 0;
+
+ /* Sum the EDGE frequencies for all predecessor edges that originate
+ outside the loop. */
+ edge_iterator ei;
+ edge predecessor;
+ FOR_EACH_EDGE (predecessor, ei, header->preds)
+ if (!in_loop_p (predecessor->src, loop))
+ preheader_frequency += EDGE_FREQUENCY (predecessor);
+ int exit_ratio = (header_frequency * 10000 - 5000) / preheader_frequency;
gcc_assert (e->dest == loop->header);
gcc_assert (ndupl > 0);
@@ -1314,6 +1706,22 @@ duplicate_loop_to_header_edge (struct loop *loop,
spec_edges[SE_ORIG] = orig;
spec_edges[SE_LATCH] = latch_edge;
+ /* Recompute the loop body frequencies. */
+ basic_block my_header = loop->header;
+ int sum_incoming_frequencies = 0;
+
+ zero_loop_frequencies (loop);
+ FOR_EACH_EDGE(predecessor, ei, my_header->preds)
+ {
+ /* exit_ratio is computed based on remembered circumstances upon
+ entry into this function. Note that loops bounded by a
+ compile-time constant have different exit ratio than loops
+ bounded by a run-time value. */
+ if (!in_loop_p (predecessor->src, loop))
+ sum_incoming_frequencies +=
+ (int) (EDGE_FREQUENCY (predecessor) * exit_ratio + 5000) / 10000;
+ }
+ increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
place_after = e->src;
for (j = 0; j < ndupl; j++)
{
@@ -1322,7 +1730,10 @@ duplicate_loop_to_header_edge (struct loop *loop,
/* Copy bbs. */
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
- place_after, true);
+ place_after, true);
+
+ int place_after_frequency = place_after->frequency;
+ basic_block saved_place_after = place_after;
place_after = new_spec_edges[SE_LATCH]->src;
if (flags & DLTHE_RECORD_COPY_NUMBER)
@@ -1353,7 +1764,6 @@ duplicate_loop_to_header_edge (struct loop *loop,
for (i = 0; i < n; i++)
new_bbs[i]->flags &= ~BB_DUPLICATED;
}
-
/* Redirect the special edges. */
if (is_latch)
{
@@ -1373,13 +1783,15 @@ duplicate_loop_to_header_edge (struct loop *loop,
e = new_spec_edges[SE_LATCH];
}
+ zero_partial_loop_frequencies (loop, saved_place_after);
+ increment_loop_frequencies (loop,
+ saved_place_after, place_after_frequency);
/* Record exit edge in this copy. */
if (orig && bitmap_bit_p (wont_exit, j + 1))
{
if (to_remove)
to_remove->safe_push (new_spec_edges[SE_ORIG]);
- set_zero_probability (new_spec_edges[SE_ORIG]);
-
+ set_zero_probability (loop, new_spec_edges[SE_ORIG]);
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
{
@@ -1414,16 +1826,14 @@ duplicate_loop_to_header_edge (struct loop *loop,
{
if (to_remove)
to_remove->safe_push (orig);
- set_zero_probability (orig);
+ set_zero_probability (loop, orig);
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
{
EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
- {
- scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
- REG_BR_PROB_BASE);
- }
+ scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
+ REG_BR_PROB_BASE);
}
}
@@ -1462,6 +1872,11 @@ duplicate_loop_to_header_edge (struct loop *loop,
free (bbs);
BITMAP_FREE (bbs_to_scale);
+ /* The call to check_loop_frequency_integrity checks for consistency
+ of predecessor frequencies with the frequency of the node they
+ precede. */
+ if (flag_checking)
+ check_loop_frequency_integrity (loop);
return true;
}
===================================================================
@@ -60,4 +60,7 @@ extern void force_single_succ_latches (void);
struct loop * loop_version (struct loop *, void *,
basic_block *, unsigned, unsigned, unsigned, bool);
+extern bool in_loop_p (basic_block, struct loop *);
+extern void zero_loop_frequencies (struct loop *);
+extern void increment_loop_frequencies (struct loop *, basic_block, int);
#endif /* GCC_CFGLOOPMANIP_H */
===================================================================
@@ -591,10 +591,9 @@ unroll_loop_constant_iterations (struct loop *loop
max_unroll,
wont_exit, desc->out_edge,
&remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
+ opt_info
? DLTHE_RECORD_COPY_NUMBER
- : 0));
+ : 0);
gcc_assert (ok);
if (opt_info)
@@ -867,7 +866,22 @@ unroll_loop_runtime_iterations (struct loop *loop)
bool exit_at_end = loop_exit_at_end_p (loop);
struct opt_info *opt_info = NULL;
bool ok;
+ int header_frequency;
+ int sum_incoming_frequencies;
+ int exit_multiplier;
+ edge_iterator ei;
+ edge predecessor;
+ header_frequency = loop->header->frequency;
+ sum_incoming_frequencies = 0;
+ FOR_EACH_EDGE (predecessor, ei, loop->header->preds)
+ {
+ if (!in_loop_p (predecessor->src, loop))
+ sum_incoming_frequencies += EDGE_FREQUENCY (predecessor);
+ }
+
+ exit_multiplier = (header_frequency * 10000) / sum_incoming_frequencies;
+
if (flag_split_ivs_in_unroller
|| flag_variable_expansion_in_unroller)
opt_info = analyze_insns_in_loop (loop);
@@ -952,6 +966,9 @@ unroll_loop_runtime_iterations (struct loop *loop)
/* Record the place where switch will be built for preconditioning. */
swtch = split_edge (loop_preheader_edge (loop));
+ int iter_freq, new_freq;
+ iter_freq = new_freq = swtch->frequency / (n_peel+1);
+ swtch->frequency = new_freq;
for (i = 0; i < n_peel; i++)
{
/* Peel the copy. */
@@ -958,10 +975,17 @@ unroll_loop_runtime_iterations (struct loop *loop)
bitmap_clear (wont_exit);
if (i != n_peel - 1 || !last_may_exit)
bitmap_set_bit (wont_exit, 1);
+ int saved_header_frequency = loop->header->frequency;
+ zero_loop_frequencies (loop);
+
+ int new_header_freq = (saved_header_frequency / (n_peel + 1)) * (i + 1);
+ increment_loop_frequencies (loop, loop->header, new_header_freq);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
+ zero_loop_frequencies (loop);
+ increment_loop_frequencies (loop, loop->header, saved_header_frequency);
gcc_assert (ok);
/* Create item for switch. */
@@ -979,11 +1003,23 @@ unroll_loop_runtime_iterations (struct loop *loop)
swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
- single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
+ single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
+ new_freq = new_freq + iter_freq;
+ swtch->frequency = new_freq;
+
+ int prehead_frequency = 0;
+ edge_iterator ei;
+ edge an_edge;
+ FOR_EACH_EDGE (an_edge, ei, preheader->preds)
+ {
+ int the_edge_frequency = EDGE_FREQUENCY (an_edge);
+ prehead_frequency += the_edge_frequency;
+ }
+ preheader->frequency = prehead_frequency;
}
if (extra_zero_check)
@@ -1015,14 +1051,61 @@ unroll_loop_runtime_iterations (struct loop *loop)
bitmap_clear_bit (wont_exit, may_exit_copy);
opt_info_start_duplication (opt_info);
+ {
+ /* Recompute the loop body frequencies. */
+ zero_loop_frequencies (loop);
+
+ basic_block my_header = loop->header;
+ sum_incoming_frequencies = 0;
+
+ edge_iterator ei;
+ edge predecessor;
+ FOR_EACH_EDGE (predecessor, ei, my_header->preds)
+ {
+ if (!in_loop_p (predecessor->src, loop))
+ sum_incoming_frequencies += EDGE_FREQUENCY (predecessor);
+ }
+
+ /* Normally, a loop whose iterations are bounded by a value that
+ cannot be computed until run time is assumed to have an
+ exit probability of 0.09. (In other words, a conditional exit
+ from the loop has 9% probability of exiting the loop and 91%
+ probability of remaining in the loop.) This is just a
+ heuristic. It's not clear how well this rule of thumb correlates
+ with real-world behavior. In any case, for a loop that is so
+ characterized, the frequency of the loop header would be the
+ sum of incoming frequencies divided by 0.09.
+
+ There are situations, however, when 9% is not the right exit
+ probability. For example, if this loop was originally
+ processed as if its iteration count was bounded by a
+ compile-time constant, but the loop was subsequently not
+ recognized as constant bounded, causing control to flow into
+ this function, then the "right ratio" for this loop might be
+ something different, like 1%. If the loop was originally
+ generated with a ratio of 1%, then it is important to use this
+ same ratio when we compute the header ratio. Otherwise, there
+ will be inconsistency between the sum of incoming edge
+ frequencies and the sum of outgoing edge frequencies.
+
+ The value of exit_multiplier represents the number of
+ ten-thousandths by which to multiply sum_incoming_frequencies.
+ After multiplying by this quantity, we add 5,000 ten
+ thousandths in order to force integer rounding during the next
+ step, when we divide ten thousandths by 10000 in order to get
+ "ones". */
+ sum_incoming_frequencies *= exit_multiplier;
+ sum_incoming_frequencies += 5000;
+ sum_incoming_frequencies /= 10000;
+ increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
+ }
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
max_unroll,
wont_exit, desc->out_edge,
&remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
+ opt_info
? DLTHE_RECORD_COPY_NUMBER
- : 0));
+ : 0);
gcc_assert (ok);
if (opt_info)