===================================================================
@@ -478,11 +478,10 @@ struct edge_list
#define BRANCH_EDGE(bb) (EDGE_SUCC ((bb), 0)->flags & EDGE_FALLTHRU \
? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0))
+#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
/* Return expected execution frequency of the edge E. */
-#define EDGE_FREQUENCY(e) (((e)->src->frequency \
- * (e)->probability \
- + REG_BR_PROB_BASE / 2) \
- / REG_BR_PROB_BASE)
+#define EDGE_FREQUENCY(e) RDIV ((e)->src->frequency * (e)->probability, \
+ REG_BR_PROB_BASE)
/* Return nonzero if edge is critical. */
#define EDGE_CRITICAL_P(e) (EDGE_COUNT ((e)->src->succs) >= 2 \
@@ -910,4 +909,40 @@ extern void default_rtl_profile (void);
/* In profile.c. */
extern gcov_working_set_t *find_working_set(unsigned pct_times_10);
+/* Check tha probability is sane. */
+
+static inline void
+check_probability (int prob)
+{
+ gcc_checking_assert (prob >= 0 && prob < REG_BR_PROB_BASE);
+}
+
+/* Given PROB1 and PROB2, return PROB1*PROB2/REG_BR_PROB_BASE.
+ Used to combine BB probabilities. */
+
+static inline int
+combine_probabilities (int prob1, int prob2)
+{
+ check_probability (prob1);
+ check_probability (prob2);
+ return RDIV (prob1 * prob2, REG_BR_PROB_BASE);
+}
+
+/* Apply probability PROB on frequency or count FREQ. */
+
+static inline gcov_type
+apply_probability (gcov_type freq, int prob)
+{
+ check_probability (prob);
+ return RDIV (freq * prob, REG_BR_PROB_BASE);
+}
+
+/* Return inverse probability for PROB. */
+
+static inline int
+inverse_probability (int prob1)
+{
+ check_probability (prob1);
+ return REG_BR_PROB_BASE - prob1;
+}
#endif /* GCC_BASIC_BLOCK_H */
===================================================================
@@ -1666,3 +1666,121 @@ loop_exits_from_bb_p (struct loop *loop,
return false;
}
+
+/* Scale the profile estiamte within loop by SCALE.
+ If ITERATION_BOUND is non-zero, scale even further if loop is predicted
+ to iterate too many times. */
+void
+scale_loop_profile (struct loop *loop, int scale, int iteration_bound)
+{
+ gcov_type iterations = expected_loop_iterations_unbounded (loop);
+ basic_block *bbs;
+ unsigned int i;
+ edge e;
+ edge_iterator ei;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, ";; Scaling loop %i with scale %f, "
+ "bounding iterations to %i from guessed %i\n",
+ loop->num, (double)scale / REG_BR_PROB_BASE,
+ iteration_bound, (int)iterations);
+
+ /* See if loop is predicted to iterate too many times. */
+ if (iteration_bound && iterations > 0
+ && RDIV (iterations * scale, REG_BR_PROB_BASE) > iteration_bound)
+ {
+ /* Fixing loop profile for different trip count is not trivial; the exit
+ probabilities has to be updated to match and frequencies propagated down
+ to the loop body.
+
+ We fully update only the simple case of loop with single exit that is
+ either from the latch or BB just before latch and leads from BB with
+ simple conditional jump. This is OK for use in vectorizer. */
+ e = single_exit (loop);
+ if (e)
+ {
+ edge other_e;
+ int freq_delta;
+ gcov_type count_delta;
+
+ FOR_EACH_EDGE (other_e, ei, e->src->succs)
+ if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
+ && e != other_e)
+ break;
+
+ /* Probability of exit must be 1/iterations. */
+ freq_delta = EDGE_FREQUENCY (e);
+ e->probability = REG_BR_PROB_BASE / iteration_bound;
+ other_e->probability = inverse_probability (e->probability);
+ freq_delta -= EDGE_FREQUENCY (e);
+
+ /* Adjust counts accordingly. */
+ count_delta = e->count;
+ e->count = apply_probability (e->src->count, e->probability);
+ other_e->count = apply_probability (e->src->count, other_e->probability);
+ count_delta -= e->count;
+
+ /* If latch exists, change its frequency and count, since we changed
+ probability of exit. Theoretically we should update everything from
+ source of exit edge to latch, but for vectorizer this is enough. */
+ if (loop->latch
+ && loop->latch != e->src)
+ {
+ loop->latch->frequency += freq_delta;
+ if (loop->latch->frequency < 0)
+ loop->latch->frequency = 0;
+ loop->latch->count += count_delta;
+ if (loop->latch->count < 0)
+ loop->latch->count = 0;
+ }
+ }
+
+ /* Roughly speaking we want to reduce the loop body profile by the
+ the difference of loop iterations. We however can do better if
+ we look at the actual profile, if it is available. */
+ scale = RDIV (iteration_bound * scale, iterations);
+ if (loop->header->count)
+ {
+ gcov_type count_in = 0;
+
+ FOR_EACH_EDGE (e, ei, loop->header->preds)
+ if (e->src != loop->latch)
+ count_in += e->count;
+
+ if (count_in != 0)
+ scale = RDIV (count_in * iteration_bound * REG_BR_PROB_BASE, loop->header->count);
+ }
+ else if (loop->header->frequency)
+ {
+ int freq_in = 0;
+
+ FOR_EACH_EDGE (e, ei, loop->header->preds)
+ if (e->src != loop->latch)
+ freq_in += EDGE_FREQUENCY (e);
+
+ if (freq_in != 0)
+ scale = RDIV (freq_in * iteration_bound * REG_BR_PROB_BASE, loop->header->frequency);
+ }
+ if (!scale)
+ scale = 1;
+ }
+
+ if (scale == REG_BR_PROB_BASE)
+ return;
+
+ /* Scale the actual probabilities. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+
+ bb->count = RDIV (bb->count * scale, REG_BR_PROB_BASE);
+ bb->frequency = RDIV (bb->frequency * scale, REG_BR_PROB_BASE);
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = RDIV (e->count * scale, REG_BR_PROB_BASE);
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, ";; guessed iterations are now %i\n",
+ (int)expected_loop_iterations_unbounded (loop));
+ free (bbs);
+}
===================================================================
@@ -713,5 +713,6 @@ extern void unroll_and_peel_loops (int);
extern void doloop_optimize_loops (void);
extern void move_loop_invariants (void);
extern bool finite_loop_p (struct loop *);
+extern void scale_loop_profile (struct loop *loop, int scale, int iteration_bound);
#endif /* GCC_CFGLOOP_H */
===================================================================
@@ -931,7 +932,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (
static edge
slpeel_add_loop_guard (basic_block guard_bb, tree cond,
gimple_seq cond_expr_stmt_list,
- basic_block exit_bb, basic_block dom_bb)
+ basic_block exit_bb, basic_block dom_bb,
+ int probability)
{
gimple_stmt_iterator gsi;
edge new_e, enter_e;
@@ -956,6 +958,12 @@ slpeel_add_loop_guard (basic_block guard
/* Add new edge to connect guard block to the merge/loop-exit block. */
new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
+
+ new_e->count = guard_bb->count;
+ new_e->probability = probability;
+ new_e->count = apply_probability (enter_e->count, probability);
+ enter_e->count -= new_e->count;
+ enter_e->probability = inverse_probability (probability);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
return new_e;
}
@@ -1038,7 +1046,8 @@ static void
set_prologue_iterations (basic_block bb_before_first_loop,
tree *first_niters,
struct loop *loop,
- unsigned int th)
+ unsigned int th,
+ int probability)
{
edge e;
basic_block cond_bb, then_bb;
@@ -1067,7 +1076,15 @@ set_prologue_iterations (basic_block bb_
e_true->flags &= ~EDGE_FALLTHRU;
e_true->flags |= EDGE_TRUE_VALUE;
+ e_true->probability = probability;
+ e_false->probability = inverse_probability (probability);
+ e_true->count = apply_probability (cond_bb->count, probability);
+ e_false->count = cond_bb->count - e_true->count;
+ then_bb->frequency = EDGE_FREQUENCY (e_true);
+ then_bb->count = e_true->count;
+
e_fallthru = EDGE_SUCC (then_bb, 0);
+ e_fallthru->count = then_bb->count;
gsi = gsi_last_bb (cond_bb);
cost_pre_condition =
@@ -1126,6 +1143,8 @@ set_prologue_iterations (basic_block bb_
prologue generation or whether cost model check
has not occurred during prologue generation and hence
needs to occur during epilogue generation.
+ - BOUND1 is the upper bound on number of iterations of the first loop (if known)
+ - BOUND2 is the upper bound on number of iterations of the second loop (if known)
Output:
@@ -1153,7 +1172,8 @@ slpeel_tree_peel_loop_to_edge (struct lo
edge e, tree *first_niters,
tree niters, bool update_first_loop_count,
unsigned int th, bool check_profitability,
- tree cond_expr, gimple_seq cond_expr_stmt_list)
+ tree cond_expr, gimple_seq cond_expr_stmt_list,
+ int bound1, int bound2)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
@@ -1166,6 +1186,13 @@ slpeel_tree_peel_loop_to_edge (struct lo
edge exit_e = single_exit (loop);
LOC loop_loc;
tree cost_pre_condition = NULL_TREE;
+ /* There are many aspects to how likely the first loop is going to be executed.
+ Without histogram we can't really do good job. Simply set it to
+ 2/3, so the first loop is not reordered to the end of function and
+ the hot path through stays short. */
+ int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
+ int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
+ int probability_of_second_loop;
if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL;
@@ -1341,6 +1368,21 @@ slpeel_tree_peel_loop_to_edge (struct lo
bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
bb_before_second_loop = split_edge (single_exit (first_loop));
+ probability_of_second_loop = (inverse_probability (first_guard_probability)
+ + combine_probabilities (second_guard_probability,
+ first_guard_probability));
+ /* Theoretically preheader edge of first loop and exit edge should have
+ same frequencies. Loop exit probablities are however easy to get wrong.
+ It is safer to copy value from original loop entry. */
+ bb_before_second_loop->frequency
+ = apply_probability (bb_before_first_loop->frequency,
+ probability_of_second_loop);
+ bb_before_second_loop->count
+ = apply_probability (bb_before_first_loop->count,
+ probability_of_second_loop);
+ single_succ_edge (bb_before_second_loop)->count
+ = bb_before_second_loop->count;
+
/* Epilogue peeling. */
if (!update_first_loop_count)
{
@@ -1374,7 +1416,7 @@ slpeel_tree_peel_loop_to_edge (struct lo
{
if (check_profitability)
set_prologue_iterations (bb_before_first_loop, first_niters,
- loop, th);
+ loop, th, first_guard_probability);
pre_condition =
fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
@@ -1383,7 +1425,10 @@ slpeel_tree_peel_loop_to_edge (struct lo
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
cond_expr_stmt_list,
- bb_before_second_loop, bb_before_first_loop);
+ bb_before_second_loop, bb_before_first_loop,
+ inverse_probability (first_guard_probability));
+ scale_loop_profile (first_loop, first_guard_probability,
+ check_profitability && (int)th > bound1 ? th : bound1);
slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
first_loop == new_loop,
&new_exit_bb);
@@ -1421,7 +1466,9 @@ slpeel_tree_peel_loop_to_edge (struct lo
pre_condition =
fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
- bb_after_second_loop, bb_before_first_loop);
+ bb_after_second_loop, bb_before_first_loop,
+ inverse_probability (second_guard_probability));
+ scale_loop_profile (second_loop, probability_of_second_loop, bound2);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
second_loop == new_loop, &new_exit_bb);
@@ -1882,7 +1930,8 @@ vect_do_peeling_for_loop_bound (loop_vec
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
&ratio_mult_vf_name, ni_name, false,
th, check_profitability,
- cond_expr, cond_expr_stmt_list);
+ cond_expr, cond_expr_stmt_list,
+ 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
@@ -1951,7 +2000,7 @@ vect_do_peeling_for_loop_bound (loop_vec
use TYPE_VECTOR_SUBPARTS. */
static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -1977,6 +2026,7 @@ vect_gen_niters_for_prolog_loop (loop_ve
fprintf (vect_dump, "known peeling = %d.", npeel);
iters = build_int_cst (niters_type, npeel);
+ *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
}
else
{
@@ -2015,6 +2065,7 @@ vect_gen_niters_for_prolog_loop (loop_ve
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters);
+ *bound = nelements;
}
/* Create: prolog_loop_niters = min (iters, loop_niters) */
@@ -2107,6 +2158,7 @@ vect_do_peeling_for_alignment (loop_vec_
tree wide_prolog_niters;
struct loop *new_loop;
int max_iter;
+ int bound = 0;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
@@ -2115,13 +2167,16 @@ vect_do_peeling_for_alignment (loop_vec_
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
- ni_name);
+ ni_name,
+ &bound);
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
&niters_of_prolog_loop, ni_name, true,
- th, check_profitability, NULL_TREE, NULL);
+ th, check_profitability, NULL_TREE, NULL,
+ bound,
+ 0);
gcc_assert (new_loop);
#ifdef ENABLE_CHECKING