Message ID | 20171102153225.GA53964@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
Series | Drop frequencies from basic blocks | expand |
Hi Honza. Thanks for the huge patch. I'm willing to help you with testing, but I can't apply the patch on top of r254348: ../../gcc/profile.c: In function ‘void compute_branch_probabilities(unsigned int, unsigned int)’: ../../gcc/profile.c:794:11: error: ‘flag_guess_branch_probability’ was not declared in this scope || !flag_guess_branch_probability) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../../gcc/profile.c:794:11: note: suggested alternative: ‘OPT_fguess_branch_probability’ || !flag_guess_branch_probability) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OPT_fguess_branch_probability ../../gcc/profile.c:801:21: error: no match for ‘operator!=’ (operand types are ‘profile_count’ and ‘profile_count’) if (bb->count != profile_count::zero ()) ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ In file included from ../../gcc/coretypes.h:397:0, from ../../gcc/profile.c:52: ../../gcc/wide-int.h:3158:19: note: candidate: template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator!=(const T1&, const T2&) BINARY_PREDICATE (operator !=, ne_p) ^ ../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’ OP (const T1 &x, const T2 &y) \ ^~ ../../gcc/wide-int.h:3158:19: note: template argument deduction/substitution failed: BINARY_PREDICATE (operator !=, ne_p) ^ ../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’ OP (const T1 &x, const T2 &y) \ ^~ ../../gcc/wide-int.h: In substitution of ‘template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator!=(const T1&, const T2&) [with T1 = profile_count; T2 = profile_count]’: ../../gcc/profile.c:801:45: required from here ../../gcc/wide-int.h:3158:19: error: incomplete type ‘wi::int_traits<profile_count>’ used in nested name specifier BINARY_PREDICATE (operator !=, ne_p) ^ ../../gcc/wide-int.h:3142:3: note: in definition of macro ‘BINARY_PREDICATE’ OP (const T1 &x, const T2 &y) \ ^~ Can you please check that? Thanks, Martin
> Hi Honza. > > Thanks for the huge patch. I'm willing to help you with testing, but I can't > apply the patch on top of r254348: Sorry, I must have used older diff file, because it is one of unfinished chnages I made today. I am attaching correct diff. Index: asan.c =================================================================== --- asan.c (revision 254348) +++ asan.c (working copy) @@ -1801,6 +1801,7 @@ create_cond_insert_point (gimple_stmt_it ? profile_probability::very_unlikely () : profile_probability::very_likely (); e->probability = fallthrough_probability.invert (); + then_bb->count = e->count (); if (create_then_fallthru_edge) make_single_succ_edge (then_bb, fallthru_bb, EDGE_FALLTHRU); Index: basic-block.h =================================================================== --- basic-block.h (revision 254348) +++ basic-block.h (working copy) @@ -148,9 +148,6 @@ struct GTY((chain_next ("%h.next_bb"), c /* Expected number of executions: calculated in profile.c. */ profile_count count; - /* Expected frequency. Normalized to be in range 0 to BB_FREQ_MAX. */ - int frequency; - /* The discriminator for this block. The discriminator distinguishes among several basic blocks that share a common locus, allowing for more accurate sample-based profiling. */ @@ -301,7 +298,7 @@ enum cfg_bb_flags ? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0)) /* Return expected execution frequency of the edge E. */ -#define EDGE_FREQUENCY(e) e->probability.apply (e->src->frequency) +#define EDGE_FREQUENCY(e) e->count ().to_frequency (cfun) /* Compute a scale factor (or probability) suitable for scaling of gcov_type values via apply_probability() and apply_scale(). */ Index: bb-reorder.c =================================================================== --- bb-reorder.c (revision 254348) +++ bb-reorder.c (working copy) @@ -256,8 +256,8 @@ push_to_next_round_p (const_basic_block there_exists_another_round = round < number_of_rounds - 1; - block_not_hot_enough = (bb->frequency < exec_th - || bb->count < count_th + block_not_hot_enough = (bb->count.to_frequency (cfun) < exec_th + || bb->count.ipa () < count_th || probably_never_executed_bb_p (cfun, bb)); if (there_exists_another_round @@ -293,9 +293,9 @@ find_traces (int *n_traces, struct trace { bbd[e->dest->index].heap = heap; bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest); - if (e->dest->frequency > max_entry_frequency) - max_entry_frequency = e->dest->frequency; - if (e->dest->count.initialized_p () && e->dest->count > max_entry_count) + if (e->dest->count.to_frequency (cfun) > max_entry_frequency) + max_entry_frequency = e->dest->count.to_frequency (cfun); + if (e->dest->count.ipa_p () && e->dest->count > max_entry_count) max_entry_count = e->dest->count; } @@ -329,8 +329,10 @@ find_traces (int *n_traces, struct trace for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux) - fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency); - fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency); + fprintf (dump_file, "%d [%d] ", bb->index, + bb->count.to_frequency (cfun)); + fprintf (dump_file, "%d [%d]\n", bb->index, + bb->count.to_frequency (cfun)); } fflush (dump_file); } @@ -551,7 +553,7 @@ find_traces_1_round (int branch_th, int continue; prob = e->probability; - freq = e->dest->frequency; + freq = e->dest->count.to_frequency (cfun); /* The only sensible preference for a call instruction is the fallthru edge. Don't bother selecting anything else. */ @@ -573,7 +575,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || ((prob.to_reg_br_prob_base () < branch_th || EDGE_FREQUENCY (e) < exec_th - || e->count () < count_th) && (!for_size))) + || e->count ().ipa () < count_th) && (!for_size))) continue; if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, @@ -671,7 +673,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || prob.to_reg_br_prob_base () < branch_th || freq < exec_th - || e->count () < count_th) + || e->count ().ipa () < count_th) { /* When partitioning hot/cold basic blocks, make sure the cold blocks (and only the cold blocks) all get @@ -706,7 +708,7 @@ find_traces_1_round (int branch_th, int if (best_edge->dest != bb) { if (EDGE_FREQUENCY (best_edge) - > 4 * best_edge->dest->frequency / 5) + > 4 * best_edge->dest->count.to_frequency (cfun) / 5) { /* The loop has at least 4 iterations. If the loop header is not the first block of the function @@ -783,8 +785,8 @@ find_traces_1_round (int branch_th, int & EDGE_CAN_FALLTHRU) && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX) && single_succ (e->dest) == best_edge->dest - && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge) - || for_size)) + && (2 * e->dest->count.to_frequency (cfun) + >= EDGE_FREQUENCY (best_edge) || for_size)) { best_edge = e; if (dump_file) @@ -945,9 +947,9 @@ bb_to_key (basic_block bb) if (priority) /* The block with priority should have significantly lower key. */ - return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); + return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun)); - return -bb->frequency; + return -bb->count.to_frequency (cfun); } /* Return true when the edge E from basic block BB is better than the temporary @@ -1290,7 +1292,7 @@ connect_traces (int n_traces, struct tra && !connected[bbd[di].start_of_trace] && BB_PARTITION (e2->dest) == current_partition && EDGE_FREQUENCY (e2) >= freq_threshold - && e2->count () >= count_threshold + && e2->count ().ipa () >= count_threshold && (!best2 || e2->probability > best2->probability || (e2->probability == best2->probability @@ -1317,7 +1319,7 @@ connect_traces (int n_traces, struct tra optimize_edge_for_speed_p (best) && EDGE_FREQUENCY (best) >= freq_threshold && (!best->count ().initialized_p () - || best->count () >= count_threshold))) + || best->count ().ipa () >= count_threshold))) { basic_block new_bb; @@ -1375,7 +1377,7 @@ copy_bb_p (const_basic_block bb, int cod int max_size = uncond_jump_length; rtx_insn *insn; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return false; if (EDGE_COUNT (bb->preds) < 2) return false; @@ -1459,7 +1461,6 @@ fix_up_crossing_landing_pad (eh_landing_ last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; new_bb = create_basic_block (new_label, jump, last_bb); new_bb->aux = last_bb->aux; - new_bb->frequency = post_bb->frequency; new_bb->count = post_bb->count; last_bb->aux = new_bb; @@ -1517,7 +1518,6 @@ sanitize_hot_paths (bool walk_up, unsign edge_iterator ei; profile_probability highest_probability = profile_probability::uninitialized (); - int highest_freq = 0; profile_count highest_count = profile_count::uninitialized (); bool found = false; @@ -1544,11 +1544,8 @@ sanitize_hot_paths (bool walk_up, unsign /* The following loop will look for the hottest edge via the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (!highest_count.initialized_p () || e->count () > highest_count) + if (!(e->count () > highest_count)) highest_count = e->count (); - int edge_freq = EDGE_FREQUENCY (e); - if (edge_freq > highest_freq) - highest_freq = edge_freq; if (!highest_probability.initialized_p () || e->probability > highest_probability) highest_probability = e->probability; @@ -1573,17 +1570,12 @@ sanitize_hot_paths (bool walk_up, unsign /* Select the hottest edge using the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (highest_count > 0) + if (highest_count.initialized_p ()) { - if (e->count () < highest_count) + if (!(e->count () >= highest_count)) continue; } - else if (highest_freq) - { - if (EDGE_FREQUENCY (e) < highest_freq) - continue; - } - else if (e->probability < highest_probability) + else if (!(e->probability >= highest_probability)) continue; basic_block reach_bb = walk_up ? e->src : e->dest; Index: bt-load.c =================================================================== --- bt-load.c (revision 254348) +++ bt-load.c (working copy) @@ -185,7 +185,7 @@ static int first_btr, last_btr; static int basic_block_freq (const_basic_block bb) { - return bb->frequency; + return bb->count.to_frequency (cfun); } /* If the rtx at *XP references (sets or reads) any branch target Index: cfg.c =================================================================== --- cfg.c (revision 254348) +++ cfg.c (working copy) @@ -68,6 +68,7 @@ init_flow (struct function *the_fun) if (!the_fun->cfg) the_fun->cfg = ggc_cleared_alloc<control_flow_graph> (); n_edges_for_fn (the_fun) = 0; + the_fun->cfg->count_max = profile_count::uninitialized (); ENTRY_BLOCK_PTR_FOR_FN (the_fun) = alloc_block (); ENTRY_BLOCK_PTR_FOR_FN (the_fun)->index = ENTRY_BLOCK; @@ -447,13 +448,18 @@ check_bb_profile (basic_block bb, FILE * } if (bb != ENTRY_BLOCK_PTR_FOR_FN (fun)) { - int sum = 0; + profile_count sum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100) - fprintf (file, - ";; %sInvalid sum of incoming frequencies %i, should be %i\n", - s_indent, sum, bb->frequency); + sum += e->count (); + if (sum.differs_from_p (bb->count)) + { + fprintf (file, ";; %sInvalid sum of incoming counts ", + s_indent); + sum.dump (file); + fprintf (file, ", should be "); + bb->count.dump (file); + fprintf (file, "\n"); + } } if (BB_PARTITION (bb) == BB_COLD_PARTITION) { @@ -751,7 +757,6 @@ dump_bb_info (FILE *outf, basic_block bb fputs (", count ", outf); bb->count.dump (outf); } - fprintf (outf, ", freq %i", bb->frequency); if (maybe_hot_bb_p (fun, bb)) fputs (", maybe hot", outf); if (probably_never_executed_bb_p (fun, bb)) @@ -843,15 +848,15 @@ brief_dump_cfg (FILE *file, dump_flags_t } } -/* An edge originally destinating BB of FREQUENCY and COUNT has been proved to +/* An edge originally destinating BB of COUNT has been proved to leave the block by TAKEN_EDGE. Update profile of BB such that edge E can be redirected to destination of TAKEN_EDGE. This function may leave the profile inconsistent in the case TAKEN_EDGE - frequency or count is believed to be lower than FREQUENCY or COUNT + frequency or count is believed to be lower than COUNT respectively. */ void -update_bb_profile_for_threading (basic_block bb, int edge_frequency, +update_bb_profile_for_threading (basic_block bb, profile_count count, edge taken_edge) { edge c; @@ -866,16 +871,10 @@ update_bb_profile_for_threading (basic_b } bb->count -= count; - bb->frequency -= edge_frequency; - if (bb->frequency < 0) - bb->frequency = 0; - /* Compute the probability of TAKEN_EDGE being reached via threaded edge. Watch for overflows. */ - if (bb->frequency) - /* FIXME: We should get edge frequency as count. */ - prob = profile_probability::probability_in_gcov_type - (edge_frequency, bb->frequency); + if (bb->count.nonzero_p ()) + prob = count.probability_in (bb->count); else prob = profile_probability::never (); if (prob > taken_edge->probability) @@ -899,9 +898,9 @@ update_bb_profile_for_threading (basic_b if (prob == profile_probability::never ()) { if (dump_file) - fprintf (dump_file, "Edge frequencies of bb %i has been reset, " - "frequency of block should end up being 0, it is %i\n", - bb->index, bb->frequency); + fprintf (dump_file, "Edge probabilities of bb %i has been reset, " + "count of block should end up being 0, it is non-zero\n", + bb->index); EDGE_SUCC (bb, 0)->probability = profile_probability::guessed_always (); ei = ei_start (bb->succs); ei_next (&ei); @@ -942,18 +941,10 @@ scale_bbs_frequencies_int (basic_block * for (i = 0; i < nbbs; i++) { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - /* Make sure the frequencies do not grow over BB_FREQ_MAX. */ - if (bbs[i]->frequency > BB_FREQ_MAX) - bbs[i]->frequency = BB_FREQ_MAX; bbs[i]->count = bbs[i]->count.apply_scale (num, den); } } -/* numbers smaller than this value are safe to multiply without getting - 64bit overflow. */ -#define MAX_SAFE_MULTIPLIER (1 << (sizeof (int64_t) * 4 - 1)) - /* Multiply all frequencies of basic blocks in array BBS of length NBBS by NUM/DEN, in gcov_type arithmetic. More accurate than previous function but considerably slower. */ @@ -962,28 +953,9 @@ scale_bbs_frequencies_gcov_type (basic_b gcov_type den) { int i; - gcov_type fraction = RDIV (num * 65536, den); - - gcc_assert (fraction >= 0); - if (num < MAX_SAFE_MULTIPLIER) - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) - bbs[i]->count = bbs[i]->count.apply_scale (num, den); - else - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } - else - for (i = 0; i < nbbs; i++) - { - if (sizeof (gcov_type) > sizeof (int)) - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - else - bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536); - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } + for (i = 0; i < nbbs; i++) + bbs[i]->count = bbs[i]->count.apply_scale (num, den); } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -994,13 +966,9 @@ scale_bbs_frequencies_profile_count (bas profile_count num, profile_count den) { int i; - - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num.to_gcov_type (), - den.to_gcov_type ()); + if (num == profile_count::zero () || den.nonzero_p ()) + for (i = 0; i < nbbs; i++) bbs[i]->count = bbs[i]->count.apply_scale (num, den); - } } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -1013,10 +981,7 @@ scale_bbs_frequencies (basic_block *bbs, int i; for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = p.apply (bbs[i]->frequency); - bbs[i]->count = bbs[i]->count.apply_probability (p); - } + bbs[i]->count = bbs[i]->count.apply_probability (p); } /* Helper types for hash tables. */ Index: cfg.h =================================================================== --- cfg.h (revision 254348) +++ cfg.h (working copy) @@ -71,6 +71,9 @@ struct GTY(()) control_flow_graph { /* Maximal number of entities in the single jumptable. Used to estimate final flowgraph size. */ int max_jumptable_ents; + + /* Maximal count of BB in function. */ + profile_count count_max; }; @@ -103,7 +106,7 @@ extern void debug_bb (basic_block); extern basic_block debug_bb_n (int); extern void dump_bb_info (FILE *, basic_block, int, dump_flags_t, bool, bool); extern void brief_dump_cfg (FILE *, dump_flags_t); -extern void update_bb_profile_for_threading (basic_block, int, profile_count, edge); +extern void update_bb_profile_for_threading (basic_block, profile_count, edge); extern void scale_bbs_frequencies_int (basic_block *, int, int, int); extern void scale_bbs_frequencies_gcov_type (basic_block *, int, gcov_type, gcov_type); Index: cfgbuild.c =================================================================== --- cfgbuild.c (revision 254348) +++ cfgbuild.c (working copy) @@ -499,7 +499,6 @@ find_bb_boundaries (basic_block bb) remove_edge (fallthru); /* BB is unreachable at this point - we need to determine its profile once edges are built. */ - bb->frequency = 0; bb->count = profile_count::uninitialized (); flow_transfer_insn = NULL; debug_insn = NULL; @@ -669,7 +668,6 @@ find_many_sub_basic_blocks (sbitmap bloc { bool initialized_src = false, uninitialized_src = false; bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) { if (e->count ().initialized_p ()) @@ -679,8 +677,6 @@ find_many_sub_basic_blocks (sbitmap bloc } else uninitialized_src = true; - if (e->probability.initialized_p ()) - bb->frequency += EDGE_FREQUENCY (e); } /* When some edges are missing with read profile, this is most likely because RTL expansion introduced loop. @@ -692,7 +688,7 @@ find_many_sub_basic_blocks (sbitmap bloc precisely once. */ if (!initialized_src || (uninitialized_src - && profile_status_for_fn (cfun) != PROFILE_READ)) + && profile_status_for_fn (cfun) < PROFILE_GUESSED)) bb->count = profile_count::uninitialized (); } /* If nothing changed, there is no need to create new BBs. */ Index: cfgcleanup.c =================================================================== --- cfgcleanup.c (revision 254348) +++ cfgcleanup.c (working copy) @@ -559,8 +559,6 @@ try_forward_edges (int mode, basic_block { /* Save the values now, as the edge may get removed. */ profile_count edge_count = e->count (); - profile_probability edge_probability = e->probability; - int edge_frequency; int n = 0; e->goto_locus = goto_locus; @@ -585,8 +583,6 @@ try_forward_edges (int mode, basic_block /* We successfully forwarded the edge. Now update profile data: for each edge we traversed in the chain, remove the original edge's execution count. */ - edge_frequency = edge_probability.apply (b->frequency); - do { edge t; @@ -596,16 +592,12 @@ try_forward_edges (int mode, basic_block gcc_assert (n < nthreaded_edges); t = threaded_edges [n++]; gcc_assert (t->src == first); - update_bb_profile_for_threading (first, edge_frequency, - edge_count, t); + update_bb_profile_for_threading (first, edge_count, t); update_br_prob_note (first); } else { first->count -= edge_count; - first->frequency -= edge_frequency; - if (first->frequency < 0) - first->frequency = 0; /* It is possible that as the result of threading we've removed edge as it is threaded to the fallthru edge. Avoid @@ -2109,7 +2101,7 @@ try_crossjump_to_edge (int mode, edge e1 else redirect_edges_to = osrc2; - /* Recompute the frequencies and counts of outgoing edges. */ + /* Recompute the counts of destinations of outgoing edges. */ FOR_EACH_EDGE (s, ei, redirect_edges_to->succs) { edge s2; @@ -2132,24 +2124,19 @@ try_crossjump_to_edge (int mode, edge e1 that there is no more than one in the chain, so we can't run into infinite loop. */ if (FORWARDER_BLOCK_P (s->dest)) - { - s->dest->frequency += EDGE_FREQUENCY (s); - } + s->dest->count += s->count (); if (FORWARDER_BLOCK_P (s2->dest)) - { - s2->dest->frequency -= EDGE_FREQUENCY (s); - if (s2->dest->frequency < 0) - s2->dest->frequency = 0; - } + s2->dest->count -= s->count (); - if (!redirect_edges_to->frequency && !src1->frequency) + /* FIXME: Is this correct? Should be rewritten to count API. */ + if (redirect_edges_to->count.nonzero_p () && src1->count.nonzero_p ()) s->probability = s->probability.combine_with_freq - (redirect_edges_to->frequency, - s2->probability, src1->frequency); + (redirect_edges_to->count.to_frequency (cfun), + s2->probability, src1->count.to_frequency (cfun)); } - /* Adjust count and frequency for the block. An earlier jump + /* Adjust count for the block. An earlier jump threading pass may have left the profile in an inconsistent state (see update_bb_profile_for_threading) so we must be prepared for overflows. */ @@ -2157,9 +2144,6 @@ try_crossjump_to_edge (int mode, edge e1 do { tmp->count += src1->count; - tmp->frequency += src1->frequency; - if (tmp->frequency > BB_FREQ_MAX) - tmp->frequency = BB_FREQ_MAX; if (tmp == redirect_edges_to) break; tmp = find_fallthru_edge (tmp->succs)->dest; Index: cfgexpand.c =================================================================== --- cfgexpand.c (revision 254348) +++ cfgexpand.c (working copy) @@ -2516,7 +2516,6 @@ expand_gimple_cond (basic_block bb, gcon redirect_edge_succ (false_edge, new_bb); false_edge->flags |= EDGE_FALLTHRU; new_bb->count = false_edge->count (); - new_bb->frequency = EDGE_FREQUENCY (false_edge); loop_p loop = find_common_loop (bb->loop_father, dest->loop_father); add_bb_to_loop (new_bb, loop); if (loop->latch == bb @@ -3847,11 +3846,7 @@ expand_gimple_tailcall (basic_block bb, if (!(e->flags & (EDGE_ABNORMAL | EDGE_EH))) { if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - e->dest->frequency -= EDGE_FREQUENCY (e); - if (e->dest->frequency < 0) - e->dest->frequency = 0; - } + e->dest->count -= e->count (); probability += e->probability; remove_edge (e); } @@ -5860,7 +5855,6 @@ construct_init_block (void) init_block = create_basic_block (NEXT_INSN (get_insns ()), get_last_insn (), ENTRY_BLOCK_PTR_FOR_FN (cfun)); - init_block->frequency = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; init_block->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (init_block, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father); if (e) @@ -5924,7 +5918,7 @@ construct_exit_block (void) while (NEXT_INSN (head) && NOTE_P (NEXT_INSN (head))) head = NEXT_INSN (head); /* But make sure exit_block starts with RETURN_LABEL, otherwise the - bb frequency counting will be confused. Any instructions before that + bb count counting will be confused. Any instructions before that label are emitted for the case where PREV_BB falls through into the exit block, so append those instructions to prev_bb in that case. */ if (NEXT_INSN (head) != return_label) @@ -5937,7 +5931,6 @@ construct_exit_block (void) } } exit_block = create_basic_block (NEXT_INSN (head), end, prev_bb); - exit_block->frequency = EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency; exit_block->count = EXIT_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (exit_block, EXIT_BLOCK_PTR_FOR_FN (cfun)->loop_father); @@ -5957,10 +5950,7 @@ construct_exit_block (void) if (e2 != e) { exit_block->count -= e2->count (); - exit_block->frequency -= EDGE_FREQUENCY (e2); } - if (exit_block->frequency < 0) - exit_block->frequency = 0; update_bb_for_insn (exit_block); } Index: cfghooks.c =================================================================== --- cfghooks.c (revision 254348) +++ cfghooks.c (working copy) @@ -146,10 +146,12 @@ verify_flow_info (void) error ("verify_flow_info: Wrong count of block %i", bb->index); err = 1; } - if (bb->frequency < 0) + /* FIXME: Graphite and SLJL and target code still tends to produce + edges with no probablity. */ + if (profile_status_for_fn (cfun) >= PROFILE_GUESSED + && !bb->count.initialized_p () && !flag_graphite && 0) { - error ("verify_flow_info: Wrong frequency of block %i %i", - bb->index, bb->frequency); + error ("verify_flow_info: Missing count of block %i", bb->index); err = 1; } @@ -164,7 +166,7 @@ verify_flow_info (void) /* FIXME: Graphite and SLJL and target code still tends to produce edges with no probablity. */ if (profile_status_for_fn (cfun) >= PROFILE_GUESSED - && !e->probability.initialized_p () && 0) + && !e->probability.initialized_p () && !flag_graphite && 0) { error ("Uninitialized probability of edge %i->%i", e->src->index, e->dest->index); @@ -315,7 +317,6 @@ dump_bb_for_graph (pretty_printer *pp, b /* TODO: Add pretty printer for counter. */ if (bb->count.initialized_p ()) pp_printf (pp, "COUNT:" "%" PRId64, bb->count.to_gcov_type ()); - pp_printf (pp, " FREQ:%i |", bb->frequency); pp_write_text_to_stream (pp); if (!(dump_flags & TDF_SLIM)) cfg_hooks->dump_bb_for_graph (pp, bb); @@ -513,7 +514,6 @@ split_block_1 (basic_block bb, void *i) return NULL; new_bb->count = bb->count; - new_bb->frequency = bb->frequency; new_bb->discriminator = bb->discriminator; if (dom_info_available_p (CDI_DOMINATORS)) @@ -626,7 +626,6 @@ split_edge (edge e) { basic_block ret; profile_count count = e->count (); - int freq = EDGE_FREQUENCY (e); edge f; bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; struct loop *loop; @@ -640,7 +639,6 @@ split_edge (edge e) ret = cfg_hooks->split_edge (e); ret->count = count; - ret->frequency = freq; single_succ_edge (ret)->probability = profile_probability::always (); if (irr) @@ -869,7 +867,6 @@ make_forwarder_block (basic_block bb, bo fallthru = split_block_after_labels (bb); dummy = fallthru->src; dummy->count = profile_count::zero (); - dummy->frequency = 0; bb = fallthru->dest; /* Redirect back edges we want to keep. */ @@ -879,10 +876,6 @@ make_forwarder_block (basic_block bb, bo if (redirect_edge_p (e)) { - dummy->frequency += EDGE_FREQUENCY (e); - if (dummy->frequency > BB_FREQ_MAX) - dummy->frequency = BB_FREQ_MAX; - dummy->count += e->count (); ei_next (&ei); continue; @@ -1101,19 +1094,10 @@ duplicate_block (basic_block bb, edge e, new_bb->count = new_count; bb->count -= new_count; - new_bb->frequency = EDGE_FREQUENCY (e); - bb->frequency -= EDGE_FREQUENCY (e); - redirect_edge_and_branch_force (e, new_bb); - - if (bb->frequency < 0) - bb->frequency = 0; } else - { - new_bb->count = bb->count; - new_bb->frequency = bb->frequency; - } + new_bb->count = bb->count; set_bb_original (new_bb, bb); set_bb_copy (bb, new_bb); @@ -1463,13 +1447,6 @@ account_profile_record (struct profile_r if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && profile_status_for_fn (cfun) != PROFILE_ABSENT) { - int sum = 0; - FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100 - || (MAX (sum, bb->frequency) > 10 - && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10)) - record->num_mismatched_freq_in[after_pass]++; profile_count lsum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) lsum += e->count (); Index: cfgloop.c =================================================================== --- cfgloop.c (revision 254348) +++ cfgloop.c (working copy) @@ -607,7 +607,7 @@ find_subloop_latch_edge_by_profile (vec< tcount += e->count(); } - if (!tcount.initialized_p () || tcount < HEAVY_EDGE_MIN_SAMPLES + if (!tcount.initialized_p () || !(tcount.ipa () > HEAVY_EDGE_MIN_SAMPLES) || (tcount - mcount).apply_scale (HEAVY_EDGE_RATIO, 1) > tcount) return NULL; Index: cfgloopanal.c =================================================================== --- cfgloopanal.c (revision 254348) +++ cfgloopanal.c (working copy) @@ -213,9 +213,10 @@ average_num_loop_insns (const struct loo if (NONDEBUG_INSN_P (insn)) binsns++; - ratio = loop->header->frequency == 0 + ratio = loop->header->count.to_frequency (cfun) == 0 ? BB_FREQ_MAX - : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; + : (bb->count.to_frequency (cfun) * BB_FREQ_MAX) + / loop->header->count.to_frequency (cfun); ninsns += binsns * ratio; } free (bbs); @@ -245,8 +246,8 @@ expected_loop_iterations_unbounded (cons /* If we have no profile at all, use AVG_LOOP_NITER. */ if (profile_status_for_fn (cfun) == PROFILE_ABSENT) expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else if (loop->latch && (loop->latch->count.reliable_p () - || loop->header->count.reliable_p ())) + else if (loop->latch && (loop->latch->count.initialized_p () + || loop->header->count.initialized_p ())) { profile_count count_in = profile_count::zero (), count_latch = profile_count::zero (); @@ -258,45 +259,25 @@ expected_loop_iterations_unbounded (cons count_in += e->count (); if (!count_latch.initialized_p ()) - ; - else if (!(count_in > profile_count::zero ())) + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); + else if (!count_in.nonzero_p ()) expected = count_latch.to_gcov_type () * 2; else { expected = (count_latch.to_gcov_type () + count_in.to_gcov_type () - 1) / count_in.to_gcov_type (); - if (read_profile_p) + if (read_profile_p + && count_latch.reliable_p () && count_in.reliable_p ()) *read_profile_p = true; } } - if (expected == -1) - { - int freq_in, freq_latch; - - freq_in = 0; - freq_latch = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (flow_bb_inside_loop_p (loop, e->src)) - freq_latch += EDGE_FREQUENCY (e); - else - freq_in += EDGE_FREQUENCY (e); - - if (freq_in == 0) - { - /* If we have no profile at all, use AVG_LOOP_NITER iterations. */ - if (!freq_latch) - expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else - expected = freq_latch * 2; - } - else - expected = (freq_latch + freq_in - 1) / freq_in; - } + else + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); HOST_WIDE_INT max = get_max_loop_iterations_int (loop); if (max != -1 && max < expected) return max; + return expected; } Index: cfgloopmanip.c =================================================================== --- cfgloopmanip.c (revision 254348) +++ cfgloopmanip.c (working copy) @@ -536,7 +536,6 @@ scale_loop_profile (struct loop *loop, p if (e) { edge other_e; - int freq_delta; profile_count count_delta; FOR_EACH_EDGE (other_e, ei, e->src->succs) @@ -545,23 +544,18 @@ scale_loop_profile (struct loop *loop, p break; /* Probability of exit must be 1/iterations. */ - freq_delta = EDGE_FREQUENCY (e); count_delta = e->count (); e->probability = profile_probability::always () .apply_scale (1, iteration_bound); other_e->probability = e->probability.invert (); - freq_delta -= EDGE_FREQUENCY (e); count_delta -= e->count (); - /* If latch exists, change its frequency and count, since we changed + /* If latch exists, change its count, since we changed probability of exit. Theoretically we should update everything from source of exit edge to latch, but for vectorizer this is enough. */ if (loop->latch && loop->latch != e->src) { - loop->latch->frequency += freq_delta; - if (loop->latch->frequency < 0) - loop->latch->frequency = 0; loop->latch->count += count_delta; } } @@ -571,7 +565,6 @@ scale_loop_profile (struct loop *loop, p we look at the actual profile, if it is available. */ p = p.apply_scale (iteration_bound, iterations); - bool determined = false; if (loop->header->count.initialized_p ()) { profile_count count_in = profile_count::zero (); @@ -584,21 +577,8 @@ scale_loop_profile (struct loop *loop, p { p = count_in.probability_in (loop->header->count.apply_scale (iteration_bound, 1)); - determined = true; } } - if (!determined && loop->header->frequency) - { - int freq_in = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (e->src != loop->latch) - freq_in += EDGE_FREQUENCY (e); - - if (freq_in != 0) - p = profile_probability::probability_in_gcov_type - (freq_in * iteration_bound, loop->header->frequency); - } if (!(p > profile_probability::never ())) p = profile_probability::very_unlikely (); } @@ -800,7 +780,7 @@ create_empty_loop_on_edge (edge entry_ed loop->latch = loop_latch; add_loop (loop, outer); - /* TODO: Fix frequencies and counts. */ + /* TODO: Fix counts. */ scale_loop_frequencies (loop, profile_probability::even ()); /* Update dominators. */ @@ -866,13 +846,11 @@ loopify (edge latch_edge, edge header_ed basic_block pred_bb = header_edge->src; struct loop *loop = alloc_loop (); struct loop *outer = loop_outer (succ_bb->loop_father); - int freq; profile_count cnt; loop->header = header_edge->dest; loop->latch = latch_edge->src; - freq = EDGE_FREQUENCY (header_edge); cnt = header_edge->count (); /* Redirect edges. */ @@ -901,10 +879,9 @@ loopify (edge latch_edge, edge header_ed remove_bb_from_loops (switch_bb); add_bb_to_loop (switch_bb, outer); - /* Fix frequencies. */ + /* Fix counts. */ if (redirect_all_edges) { - switch_bb->frequency = freq; switch_bb->count = cnt; } scale_loop_frequencies (loop, false_scale); @@ -1167,7 +1144,7 @@ duplicate_loop_to_header_edge (struct lo { /* Calculate coefficients by that we have to scale frequencies of duplicated loop bodies. */ - freq_in = header->frequency; + freq_in = header->count.to_frequency (cfun); freq_le = EDGE_FREQUENCY (latch_edge); if (freq_in == 0) freq_in = 1; Index: cfgrtl.c =================================================================== --- cfgrtl.c (revision 254348) +++ cfgrtl.c (working copy) @@ -1533,6 +1533,7 @@ force_nonfallthru_and_redirect (edge e, basic_block bb = create_basic_block (BB_HEAD (e->dest), NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); + bb->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; /* Change the existing edge's source to be the new block, and add a new edge from the entry block to the new block. */ @@ -1628,7 +1629,6 @@ force_nonfallthru_and_redirect (edge e, jump_block = create_basic_block (new_head, NULL, e->src); jump_block->count = count; - jump_block->frequency = EDGE_FREQUENCY (e); /* Make sure new block ends up in correct hot/cold section. */ @@ -1652,7 +1652,6 @@ force_nonfallthru_and_redirect (edge e, { new_edge->probability = new_edge->probability.apply_scale (1, 2); jump_block->count = jump_block->count.apply_scale (1, 2); - jump_block->frequency /= 2; edge new_edge2 = make_edge (new_edge->src, target, e->flags & ~EDGE_FALLTHRU); new_edge2->probability = probability - new_edge->probability; @@ -2245,9 +2244,23 @@ void update_br_prob_note (basic_block bb) { rtx note; - if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) - return; note = find_reg_note (BB_END (bb), REG_BR_PROB, NULL_RTX); + if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) + { + if (note) + { + rtx *note_link, this_rtx; + + note_link = ®_NOTES (BB_END (bb)); + for (this_rtx = *note_link; this_rtx; this_rtx = XEXP (this_rtx, 1)) + if (this_rtx == note) + { + *note_link = XEXP (this_rtx, 1); + break; + } + } + return; + } if (!note || XINT (note, 0) == BRANCH_EDGE (bb)->probability.to_reg_br_prob_note ()) return; @@ -3623,7 +3636,6 @@ relink_block_chain (bool stay_in_cfglayo fprintf (dump_file, "compensation "); else fprintf (dump_file, "bb %i ", bb->index); - fprintf (dump_file, " [%i]\n", bb->frequency); } } @@ -5034,7 +5046,7 @@ rtl_account_profile_record (basic_block += insn_cost (insn, true) * bb->count.to_gcov_type (); else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] - += insn_cost (insn, true) * bb->frequency; + += insn_cost (insn, true) * bb->count.to_frequency (cfun); } } Index: cgraph.c =================================================================== --- cgraph.c (revision 254348) +++ cgraph.c (working copy) @@ -862,7 +862,7 @@ symbol_table::create_edge (cgraph_node * edge->next_callee = NULL; edge->lto_stmt_uid = 0; - edge->count = count; + edge->count = count.ipa (); edge->frequency = freq; gcc_checking_assert (freq >= 0); gcc_checking_assert (freq <= CGRAPH_FREQ_MAX); @@ -1308,7 +1308,7 @@ cgraph_edge::redirect_call_stmt_to_calle /* We are producing the final function body and will throw away the callgraph edges really soon. Reset the counts/frequencies to keep verifier happy in the case of roundoff errors. */ - e->count = gimple_bb (e->call_stmt)->count; + e->count = gimple_bb (e->call_stmt)->count.ipa (); e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)); } @@ -1338,7 +1338,7 @@ cgraph_edge::redirect_call_stmt_to_calle prob = profile_probability::even (); new_stmt = gimple_ic (e->call_stmt, dyn_cast<cgraph_node *> (ref->referred), - prob, e->count, e->count + e2->count); + prob); e->speculative = false; e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt, false); @@ -1644,7 +1644,7 @@ cgraph_update_edges_for_call_stmt_node ( /* Otherwise remove edge and create new one; we can't simply redirect since function has changed, so inline plan and other information attached to edge is invalid. */ - count = e->count; + count = e->count.ipa (); frequency = e->frequency; if (e->indirect_unknown_callee || e->inline_failed) e->remove (); @@ -1655,7 +1655,7 @@ cgraph_update_edges_for_call_stmt_node ( { /* We are seeing new direct call; compute profile info based on BB. */ basic_block bb = gimple_bb (new_stmt); - count = bb->count; + count = bb->count.ipa (); frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); } @@ -3082,9 +3082,14 @@ bool cgraph_edge::verify_count_and_frequency () { bool error_found = false; - if (count < 0) + if (!count.verify ()) { - error ("caller edge count is negative"); + error ("caller edge count invalid"); + error_found = true; + } + if (!count.ipa_p ()) + { + error ("caller edge count is local"); error_found = true; } if (frequency < 0) @@ -3183,9 +3188,14 @@ cgraph_node::verify_node (void) identifier_to_locale (e->callee->name ())); error_found = true; } - if (count < 0) + if (!count.verify ()) + { + error ("cgraph count invalid"); + error_found = true; + } + if (!count.ipa_p ()) { - error ("execution count is negative"); + error ("cgraph count is local"); error_found = true; } if (global.inlined_to && same_comdat_group) @@ -3269,7 +3279,9 @@ cgraph_node::verify_node (void) { if (e->verify_count_and_frequency ()) error_found = true; + /* FIXME: re-enable once cgraph is converted to counts. */ if (gimple_has_body_p (e->caller->decl) + && 0 && !e->caller->global.inlined_to && !e->speculative /* Optimized out calls are redirected to __builtin_unreachable. */ @@ -3292,9 +3304,11 @@ cgraph_node::verify_node (void) { if (e->verify_count_and_frequency ()) error_found = true; + /* FIXME: re-enable once cgraph is converted to counts. */ if (gimple_has_body_p (e->caller->decl) && !e->caller->global.inlined_to && !e->speculative + && 0 && (e->frequency != compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)))) Index: cgraphbuild.c =================================================================== --- cgraphbuild.c (revision 254348) +++ cgraphbuild.c (working copy) @@ -190,21 +190,8 @@ record_eh_tables (cgraph_node *node, fun int compute_call_stmt_bb_frequency (tree decl, basic_block bb) { - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN - (DECL_STRUCT_FUNCTION (decl))->frequency; - int freq = bb->frequency; - - if (profile_status_for_fn (DECL_STRUCT_FUNCTION (decl)) == PROFILE_ABSENT) - return CGRAPH_FREQ_BASE; - - if (!entry_freq) - entry_freq = 1, freq++; - - freq = freq * CGRAPH_FREQ_BASE / entry_freq; - if (freq > CGRAPH_FREQ_MAX) - freq = CGRAPH_FREQ_MAX; - - return freq; + return bb->count.to_cgraph_frequency + (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (decl))->count); } /* Mark address taken in STMT. */ @@ -415,7 +402,7 @@ cgraph_edge::rebuild_edges (void) node->remove_callees (); node->remove_all_references (); - node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); FOR_EACH_BB_FN (bb, cfun) { Index: cgraphunit.c =================================================================== --- cgraphunit.c (revision 254348) +++ cgraphunit.c (working copy) @@ -1601,12 +1601,9 @@ init_lowered_empty_function (tree decl, /* Create BB for body of the function and connect it properly. */ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; EXIT_BLOCK_PTR_FOR_FN (cfun)->count = count; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; bb = create_basic_block (NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); bb->count = count; - bb->frequency = BB_FREQ_MAX; e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU); e->probability = profile_probability::always (); e = make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -1852,8 +1849,12 @@ cgraph_node::expand_thunk (bool output_a else resdecl = DECL_RESULT (thunk_fndecl); + profile_count cfg_count = count; + if (!cfg_count.initialized_p ()) + cfg_count = profile_count::from_gcov_type (BB_FREQ_MAX).guessed_local (); + bb = then_bb = else_bb = return_bb - = init_lowered_empty_function (thunk_fndecl, true, count); + = init_lowered_empty_function (thunk_fndecl, true, cfg_count); bsi = gsi_start_bb (bb); @@ -1966,14 +1967,11 @@ cgraph_node::expand_thunk (bool output_a adjustment, because that's why we're emitting a thunk. */ then_bb = create_basic_block (NULL, bb); - then_bb->count = count - count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX - BB_FREQ_MAX / 16; + then_bb->count = cfg_count - cfg_count.apply_scale (1, 16); return_bb = create_basic_block (NULL, then_bb); - return_bb->count = count; - return_bb->frequency = BB_FREQ_MAX; + return_bb->count = cfg_count; else_bb = create_basic_block (NULL, else_bb); - then_bb->count = count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX / 16; + else_bb->count = cfg_count.apply_scale (1, 16); add_bb_to_loop (then_bb, bb->loop_father); add_bb_to_loop (return_bb, bb->loop_father); add_bb_to_loop (else_bb, bb->loop_father); @@ -2028,8 +2026,10 @@ cgraph_node::expand_thunk (bool output_a } cfun->gimple_df->in_ssa_p = true; + counts_to_freqs (); profile_status_for_fn (cfun) - = count.initialized_p () ? PROFILE_READ : PROFILE_GUESSED; + = cfg_count.initialized_p () && cfg_count.ipa_p () + ? PROFILE_READ : PROFILE_GUESSED; /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ TREE_ASM_WRITTEN (thunk_fndecl) = false; delete_unreachable_blocks (); Index: except.c =================================================================== --- except.c (revision 254348) +++ except.c (working copy) @@ -1003,7 +1003,6 @@ dw2_build_landing_pads (void) bb = emit_to_new_bb_before (seq, label_rtx (lp->post_landing_pad)); bb->count = bb->next_bb->count; - bb->frequency = bb->next_bb->frequency; make_single_succ_edge (bb, bb->next_bb, e_flags); if (current_loops) { Index: final.c =================================================================== --- final.c (revision 254348) +++ final.c (working copy) @@ -694,8 +694,8 @@ compute_alignments (void) } loop_optimizer_init (AVOID_CFG_MODIFICATIONS); FOR_EACH_BB_FN (bb, cfun) - if (bb->frequency > freq_max) - freq_max = bb->frequency; + if (bb->count.to_frequency (cfun) > freq_max) + freq_max = bb->count.to_frequency (cfun); freq_threshold = freq_max / PARAM_VALUE (PARAM_ALIGN_THRESHOLD); if (dump_file) @@ -713,7 +713,8 @@ compute_alignments (void) if (dump_file) fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth %2i skipped.\n", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), + bb->loop_father->num, bb_loop_depth (bb)); continue; } @@ -731,7 +732,7 @@ compute_alignments (void) { fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth" " %2i fall %4i branch %4i", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), bb->loop_father->num, bb_loop_depth (bb), fallthru_frequency, branch_frequency); if (!bb->loop_father->inner && bb->loop_father->num) @@ -753,9 +754,10 @@ compute_alignments (void) if (!has_fallthru && (branch_frequency > freq_threshold - || (bb->frequency > bb->prev_bb->frequency * 10 - && (bb->prev_bb->frequency - <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency / 2)))) + || (bb->count.to_frequency (cfun) + > bb->prev_bb->count.to_frequency (cfun) * 10 + && (bb->prev_bb->count.to_frequency (cfun) + <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) / 2)))) { log = JUMP_ALIGN (label); if (dump_file) @@ -1942,8 +1944,6 @@ dump_basic_block_info (FILE *file, rtx_i edge_iterator ei; fprintf (file, "%s BLOCK %d", ASM_COMMENT_START, bb->index); - if (bb->frequency) - fprintf (file, " freq:%d", bb->frequency); if (bb->count.initialized_p ()) { fprintf (file, ", count:"); Index: gimple-pretty-print.c =================================================================== --- gimple-pretty-print.c (revision 254348) +++ gimple-pretty-print.c (working copy) @@ -82,21 +82,17 @@ debug_gimple_stmt (gimple *gs) by xstrdup_for_dump. */ static const char * -dump_profile (int frequency, profile_count &count) +dump_profile (profile_count &count) { - float minimum = 0.01f; - - gcc_assert (0 <= frequency && frequency <= REG_BR_PROB_BASE); - float fvalue = frequency * 100.0f / REG_BR_PROB_BASE; - if (fvalue < minimum && frequency > 0) - return "[0.01%]"; - char *buf; - if (count.initialized_p ()) - buf = xasprintf ("[%.2f%%] [count: %" PRId64 "]", fvalue, + if (!count.initialized_p ()) + return NULL; + if (count.ipa_p ()) + buf = xasprintf ("[count: %" PRId64 "]", + count.to_gcov_type ()); + else if (count.initialized_p ()) + buf = xasprintf ("[local count: %" PRId64 "]", count.to_gcov_type ()); - else - buf = xasprintf ("[%.2f%%] [count: INV]", fvalue); const char *ret = xstrdup_for_dump (buf); free (buf); @@ -2695,8 +2691,7 @@ dump_gimple_bb_header (FILE *outf, basic fprintf (outf, "%*sbb_%d:\n", indent, "", bb->index); else fprintf (outf, "%*s<bb %d> %s:\n", - indent, "", bb->index, dump_profile (bb->frequency, - bb->count)); + indent, "", bb->index, dump_profile (bb->count)); } } Index: gimple-ssa-isolate-paths.c =================================================================== --- gimple-ssa-isolate-paths.c (revision 254348) +++ gimple-ssa-isolate-paths.c (working copy) @@ -154,7 +154,6 @@ isolate_path (basic_block bb, basic_bloc if (!duplicate) { duplicate = duplicate_block (bb, NULL, NULL); - bb->frequency = 0; bb->count = profile_count::zero (); if (!ret_zero) for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); ) @@ -168,7 +167,7 @@ isolate_path (basic_block bb, basic_bloc flush_pending_stmts (e2); /* Update profile only when redirection is really processed. */ - bb->frequency += EDGE_FREQUENCY (e); + bb->count += e->count (); } /* There may be more than one statement in DUPLICATE which exhibits Index: gimple-streamer-in.c =================================================================== --- gimple-streamer-in.c (revision 254348) +++ gimple-streamer-in.c (working copy) @@ -266,7 +266,6 @@ input_bb (struct lto_input_block *ib, en bb->count = profile_count::stream_in (ib).apply_scale (count_materialization_scale, REG_BR_PROB_BASE); - bb->frequency = streamer_read_hwi (ib); bb->flags = streamer_read_hwi (ib); /* LTO_bb1 has statements. LTO_bb0 does not. */ Index: gimple-streamer-out.c =================================================================== --- gimple-streamer-out.c (revision 254348) +++ gimple-streamer-out.c (working copy) @@ -210,7 +210,6 @@ output_bb (struct output_block *ob, basi streamer_write_uhwi (ob, bb->index); bb->count.stream_out (ob); - streamer_write_hwi (ob, bb->frequency); streamer_write_hwi (ob, bb->flags); if (!gsi_end_p (bsi) || phi_nodes (bb)) Index: haifa-sched.c =================================================================== --- haifa-sched.c (revision 254348) +++ haifa-sched.c (working copy) @@ -3917,8 +3917,8 @@ sched_pressure_start_bb (basic_block bb) - call_saved_regs_num[cl]). */ { int i; - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; - int bb_freq = bb->frequency; + int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); + int bb_freq = bb->count.to_frequency (cfun); if (bb_freq == 0) { @@ -8141,8 +8141,6 @@ init_before_recovery (basic_block *befor single->count = last->count; empty->count = last->count; - single->frequency = last->frequency; - empty->frequency = last->frequency; BB_COPY_PARTITION (single, last); BB_COPY_PARTITION (empty, last); @@ -8236,7 +8234,6 @@ sched_create_recovery_edges (basic_block in sel-sched.c `check_ds' in create_speculation_check. */ e->probability = profile_probability::very_unlikely (); rec->count = e->count (); - rec->frequency = EDGE_FREQUENCY (e); e2->probability = e->probability.invert (); rtx_code_label *label = block_label (second_bb); Index: hsa-gen.c =================================================================== --- hsa-gen.c (revision 254348) +++ hsa-gen.c (working copy) @@ -6374,7 +6374,7 @@ convert_switch_statements (void) edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); next_edge->probability = new_edge->probability.invert (); - next_bb->frequency = EDGE_FREQUENCY (next_edge); + next_bb->count = next_edge->count (); cur_bb = next_bb; } else /* Link last IF statement and default label Index: ipa-cp.c =================================================================== --- ipa-cp.c (revision 254348) +++ ipa-cp.c (working copy) @@ -3257,6 +3257,8 @@ ipcp_propagate_stage (struct ipa_topo_in if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); + max_count = profile_count::uninitialized (); + FOR_EACH_DEFINED_FUNCTION (node) { struct ipa_node_params *info = IPA_NODE_REF (node); @@ -3270,8 +3272,7 @@ ipcp_propagate_stage (struct ipa_topo_in } if (node->definition && !node->alias) overall_size += ipa_fn_summaries->get (node)->self_size; - if (node->count > max_count) - max_count = node->count; + max_count = max_count.max (node->count); } max_new_size = overall_size; @@ -5125,7 +5126,7 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_c_finalize (void) { - max_count = profile_count::zero (); + max_count = profile_count::uninitialized (); overall_size = 0; max_new_size = 0; } Index: ipa-fnsummary.c =================================================================== --- ipa-fnsummary.c (revision 254348) +++ ipa-fnsummary.c (working copy) @@ -1608,7 +1608,7 @@ static basic_block get_minimal_bb (basic_block init_bb, basic_block use_bb) { struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); - if (l && l->header->frequency < init_bb->frequency) + if (l && l->header->count < init_bb->count) return l->header; return init_bb; } @@ -1664,20 +1664,21 @@ param_change_prob (gimple *stmt, int i) { int init_freq; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; if (SSA_NAME_IS_DEFAULT_DEF (base)) - init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else init_freq = get_minimal_bb (gimple_bb (SSA_NAME_DEF_STMT (base)), - gimple_bb (stmt))->frequency; + gimple_bb (stmt))->count.to_frequency (cfun); if (!init_freq) init_freq = 1; - if (init_freq < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1); + if (init_freq < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (init_freq, + bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } @@ -1692,7 +1693,7 @@ param_change_prob (gimple *stmt, int i) if (init != error_mark_node) return 0; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; ao_ref_init (&refd, op); info.stmt = stmt; @@ -1708,17 +1709,17 @@ param_change_prob (gimple *stmt, int i) /* Assume that every memory is initialized at entry. TODO: Can we easilly determine if value is always defined and thus we may skip entry block? */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) - max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) + max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else max = 1; EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency); + max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun)); BITMAP_FREE (info.bb_set); - if (max < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1); + if (max < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 254348) +++ ipa-inline.c (working copy) @@ -640,8 +640,8 @@ compute_uninlined_call_time (struct cgra ? edge->caller->global.inlined_to : edge->caller); - if (edge->count > profile_count::zero () - && caller->count > profile_count::zero ()) + if (edge->count.nonzero_p () + && caller->count.nonzero_p ()) uninlined_call_time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type (); if (edge->frequency) @@ -665,8 +665,8 @@ compute_inlined_call_time (struct cgraph : edge->caller); sreal caller_time = ipa_fn_summaries->get (caller)->time; - if (edge->count > profile_count::zero () - && caller->count > profile_count::zero ()) + if (edge->count.nonzero_p () + && caller->count.nonzero_p ()) time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type (); if (edge->frequency) time *= cgraph_freq_base_rec * edge->frequency; @@ -733,7 +733,7 @@ want_inline_small_function_p (struct cgr want_inline = false; } else if ((DECL_DECLARED_INLINE_P (callee->decl) - || e->count > profile_count::zero ()) + || e->count.nonzero_p ()) && ipa_fn_summaries->get (callee)->min_size - ipa_call_summaries->get (e)->call_stmt_size > 16 * MAX_INLINE_INSNS_SINGLE) @@ -843,7 +843,7 @@ want_inline_self_recursive_call_p (struc reason = "recursive call is cold"; want_inline = false; } - else if (outer_node->count == profile_count::zero ()) + else if (!outer_node->count.nonzero_p ()) { reason = "not executed in profile"; want_inline = false; @@ -881,7 +881,7 @@ want_inline_self_recursive_call_p (struc int i; for (i = 1; i < depth; i++) max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE; - if (max_count > profile_count::zero () && edge->count > profile_count::zero () + if (max_count.nonzero_p () && edge->count.nonzero_p () && (edge->count.to_gcov_type () * CGRAPH_FREQ_BASE / outer_node->count.to_gcov_type () >= max_prob)) @@ -889,7 +889,7 @@ want_inline_self_recursive_call_p (struc reason = "profile of recursive call is too large"; want_inline = false; } - if (max_count == profile_count::zero () + if (!max_count.nonzero_p () && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq >= max_prob)) { @@ -915,7 +915,7 @@ want_inline_self_recursive_call_p (struc methods. */ else { - if (max_count > profile_count::zero () && edge->count.initialized_p () + if (max_count.nonzero_p () && edge->count.initialized_p () && (edge->count.to_gcov_type () * 100 / outer_node->count.to_gcov_type () <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) @@ -923,7 +923,7 @@ want_inline_self_recursive_call_p (struc reason = "profile of recursive call is too small"; want_inline = false; } - else if ((max_count == profile_count::zero () + else if ((!max_count.nonzero_p () || !edge->count.initialized_p ()) && (edge->frequency * 100 / caller_freq <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) @@ -1070,7 +1070,7 @@ edge_badness (struct cgraph_edge *edge, then calls without. */ else if (opt_for_fn (caller->decl, flag_guess_branch_prob) - || caller->count > profile_count::zero ()) + || caller->count.nonzero_p ()) { sreal numerator, denominator; int overall_growth; @@ -1080,7 +1080,7 @@ edge_badness (struct cgraph_edge *edge, - inlined_time); if (numerator == 0) numerator = ((sreal) 1 >> 8); - if (caller->count > profile_count::zero ()) + if (caller->count.nonzero_p ()) numerator *= caller->count.to_gcov_type (); else if (caller->count.initialized_p ()) numerator = numerator >> 11; @@ -1521,7 +1521,7 @@ recursive_inlining (struct cgraph_edge * { fprintf (dump_file, " Inlining call of depth %i", depth); - if (node->count > profile_count::zero ()) + if (node->count.nonzero_p ()) { fprintf (dump_file, " called approx. %.2f times per call", (double)curr->count.to_gcov_type () @@ -1789,8 +1789,7 @@ inline_small_functions (void) } for (edge = node->callers; edge; edge = edge->next_caller) - if (!(max_count >= edge->count)) - max_count = edge->count; + max_count = max_count.max (edge->count); } ipa_free_postorder_info (); initialize_growth_caches (); @@ -2049,7 +2048,7 @@ inline_small_functions (void) update_caller_keys (&edge_heap, where, updated_nodes, NULL); /* Offline copy count has possibly changed, recompute if profile is available. */ - if (max_count > profile_count::zero ()) + if (max_count.nonzero_p ()) { struct cgraph_node *n = cgraph_node::get (edge->callee->decl); if (n != edge->callee && n->analyzed) Index: ipa-profile.c =================================================================== --- ipa-profile.c (revision 254348) +++ ipa-profile.c (working copy) @@ -179,53 +179,54 @@ ipa_profile_generate_summary (void) hash_table<histogram_hash> hashtable (10); FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) - FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - { - int time = 0; - int size = 0; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_CALL - && !gimple_call_fndecl (stmt)) - { - histogram_value h; - h = gimple_histogram_value_of_type - (DECL_STRUCT_FUNCTION (node->decl), - stmt, HIST_TYPE_INDIR_CALL); - /* No need to do sanity check: gimple_ic_transform already - takes away bad histograms. */ - if (h) - { - /* counter 0 is target, counter 1 is number of execution we called target, - counter 2 is total number of executions. */ - if (h->hvalue.counters[2]) - { - struct cgraph_edge * e = node->get_edge (stmt); - if (e && !e->indirect_unknown_callee) - continue; - e->indirect_info->common_target_id - = h->hvalue.counters [0]; - e->indirect_info->common_target_probability - = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); - if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) - { - if (dump_file) - fprintf (dump_file, "Probability capped to 1\n"); - e->indirect_info->common_target_probability = REG_BR_PROB_BASE; - } - } - gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), - stmt, h); - } - } - time += estimate_num_insns (stmt, &eni_time_weights); - size += estimate_num_insns (stmt, &eni_size_weights); - } - if (bb->count.initialized_p ()) - account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), - time, size); - } + if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + int time = 0; + int size = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_fndecl (stmt)) + { + histogram_value h; + h = gimple_histogram_value_of_type + (DECL_STRUCT_FUNCTION (node->decl), + stmt, HIST_TYPE_INDIR_CALL); + /* No need to do sanity check: gimple_ic_transform already + takes away bad histograms. */ + if (h) + { + /* counter 0 is target, counter 1 is number of execution we called target, + counter 2 is total number of executions. */ + if (h->hvalue.counters[2]) + { + struct cgraph_edge * e = node->get_edge (stmt); + if (e && !e->indirect_unknown_callee) + continue; + e->indirect_info->common_target_id + = h->hvalue.counters [0]; + e->indirect_info->common_target_probability + = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); + if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) + { + if (dump_file) + fprintf (dump_file, "Probability capped to 1\n"); + e->indirect_info->common_target_probability = REG_BR_PROB_BASE; + } + } + gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), + stmt, h); + } + } + time += estimate_num_insns (stmt, &eni_time_weights); + size += estimate_num_insns (stmt, &eni_size_weights); + } + if (bb->count.ipa_p () && bb->count.initialized_p ()) + account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (), + time, size); + } histogram.qsort (cmp_counts); } Index: ipa-split.c =================================================================== --- ipa-split.c (revision 254348) +++ ipa-split.c (working copy) @@ -444,7 +444,7 @@ consider_split (struct split_point *curr /* Do not split when we would end up calling function anyway. */ if (incoming_freq - >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency + >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100)) { /* When profile is guessed, we can not expect it to give us @@ -454,13 +454,14 @@ consider_split (struct split_point *curr is likely noticeable win. */ if (back_edge && profile_status_for_fn (cfun) != PROFILE_READ - && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) + && incoming_freq + < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Split before loop, accepting despite low frequencies %i %i.\n", incoming_freq, - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)); } else { @@ -714,8 +715,10 @@ consider_split (struct split_point *curr out smallest size of header. In future we might re-consider this heuristics. */ if (!best_split_point.split_bbs - || best_split_point.entry_bb->frequency > current->entry_bb->frequency - || (best_split_point.entry_bb->frequency == current->entry_bb->frequency + || best_split_point.entry_bb->count.to_frequency (cfun) + > current->entry_bb->count.to_frequency (cfun) + || (best_split_point.entry_bb->count.to_frequency (cfun) + == current->entry_bb->count.to_frequency (cfun) && best_split_point.split_size < current->split_size)) { @@ -1285,7 +1288,7 @@ split_function (basic_block return_bb, s FOR_EACH_EDGE (e, ei, return_bb->preds) if (bitmap_bit_p (split_point->split_bbs, e->src->index)) { - new_return_bb->frequency += EDGE_FREQUENCY (e); + new_return_bb->count += e->count (); redirect_edge_and_branch (e, new_return_bb); redirected = true; break; Index: ira-build.c =================================================================== --- ira-build.c (revision 254348) +++ ira-build.c (working copy) @@ -2202,7 +2202,8 @@ loop_compare_func (const void *v1p, cons return -1; if (! l1->to_remove_p && l2->to_remove_p) return 1; - if ((diff = l1->loop->header->frequency - l2->loop->header->frequency) != 0) + if ((diff = l1->loop->header->count.to_frequency (cfun) + - l2->loop->header->count.to_frequency (cfun)) != 0) return diff; if ((diff = (int) loop_depth (l1->loop) - (int) loop_depth (l2->loop)) != 0) return diff; @@ -2260,7 +2261,7 @@ mark_loops_for_removal (void) (ira_dump_file, " Mark loop %d (header %d, freq %d, depth %d) for removal (%s)\n", sorted_loops[i]->loop_num, sorted_loops[i]->loop->header->index, - sorted_loops[i]->loop->header->frequency, + sorted_loops[i]->loop->header->count.to_frequency (cfun), loop_depth (sorted_loops[i]->loop), low_pressure_loop_node_p (sorted_loops[i]->parent) && low_pressure_loop_node_p (sorted_loops[i]) @@ -2293,7 +2294,7 @@ mark_all_loops_for_removal (void) " Mark loop %d (header %d, freq %d, depth %d) for removal\n", ira_loop_nodes[i].loop_num, ira_loop_nodes[i].loop->header->index, - ira_loop_nodes[i].loop->header->frequency, + ira_loop_nodes[i].loop->header->count.to_frequency (cfun), loop_depth (ira_loop_nodes[i].loop)); } } Index: loop-doloop.c =================================================================== --- loop-doloop.c (revision 254348) +++ loop-doloop.c (working copy) @@ -506,7 +506,6 @@ doloop_modify (struct loop *loop, struct set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = profile_count::uninitialized (); - set_zero->frequency = 0; te = single_succ_edge (preheader); for (; ass; ass = XEXP (ass, 1)) @@ -522,7 +521,6 @@ doloop_modify (struct loop *loop, struct also be very hard to show that it is impossible, so we must handle this case. */ set_zero->count = preheader->count; - set_zero->frequency = preheader->frequency; } if (EDGE_COUNT (set_zero->preds) == 0) Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 254348) +++ loop-unroll.c (working copy) @@ -863,7 +863,7 @@ unroll_loop_runtime_iterations (struct l unsigned i, j; profile_probability p; basic_block preheader, *body, swtch, ezc_swtch = NULL; - int may_exit_copy, iter_freq, new_freq; + int may_exit_copy; profile_count iter_count, new_count; unsigned n_peel; edge e; @@ -970,12 +970,10 @@ unroll_loop_runtime_iterations (struct l /* Record the place where switch will be built for preconditioning. */ swtch = split_edge (loop_preheader_edge (loop)); - /* Compute frequency/count increments for each switch block and initialize + /* Compute count increments for each switch block and initialize innermost switch block. Switch blocks and peeled loop copies are built from innermost outward. */ - iter_freq = new_freq = swtch->frequency / (max_unroll + 1); iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); - swtch->frequency = new_freq; swtch->count = new_count; for (i = 0; i < n_peel; i++) @@ -995,8 +993,7 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, i + 2); preheader = split_edge (loop_preheader_edge (loop)); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, block_label (preheader), p, @@ -1009,9 +1006,7 @@ unroll_loop_runtime_iterations (struct l swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); single_succ_edge (swtch)->probability = p.invert (); - new_freq += iter_freq; new_count += iter_count; - swtch->frequency = new_freq; swtch->count = new_count; e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); @@ -1024,12 +1019,10 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, max_unroll + 1); swtch = ezc_swtch; preheader = split_edge (loop_preheader_edge (loop)); - /* Recompute frequency/count adjustments since initial peel copy may + /* Recompute count adjustments since initial peel copy may have exited and reduced those values that were computed above. */ - iter_freq = swtch->frequency / (max_unroll + 1); iter_count = swtch->count.apply_scale (1, max_unroll + 1); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, block_label (preheader), p, Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 254348) +++ lto-streamer-in.c (working copy) @@ -1192,6 +1192,7 @@ input_function (tree fn_decl, struct dat gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest)); } + counts_to_freqs (); fixup_call_stmt_edges (node, stmts); execute_all_ipa_stmt_fixups (node, stmts); Index: omp-expand.c =================================================================== --- omp-expand.c (revision 254348) +++ omp-expand.c (working copy) @@ -1399,6 +1399,7 @@ expand_omp_taskreg (struct omp_region *r if (optimize) optimize_omp_library_calls (entry_stmt); + counts_to_freqs (); cgraph_edge::rebuild_edges (); /* Some EH regions might become dead, see PR34608. If Index: omp-simd-clone.c =================================================================== --- omp-simd-clone.c (revision 254348) +++ omp-simd-clone.c (working copy) @@ -1132,6 +1132,7 @@ simd_clone_adjust (struct cgraph_node *n { basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; incr_bb = create_empty_bb (orig_exit); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty flag. Set it now to be a FALLTHRU_EDGE. */ @@ -1142,11 +1143,13 @@ simd_clone_adjust (struct cgraph_node *n { edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i); redirect_edge_succ (e, incr_bb); + incr_bb->count += e->count (); } } else if (node->simdclone->inbranch) { incr_bb = create_empty_bb (entry_bb); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); } @@ -1243,6 +1246,7 @@ simd_clone_adjust (struct cgraph_node *n gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::unlikely ().guessed (); + incr_bb->count += e->count (); edge fallthru = FALLTHRU_EDGE (loop->header); fallthru->flags = EDGE_FALSE_VALUE; fallthru->probability = profile_probability::likely ().guessed (); Index: predict.c =================================================================== --- predict.c (revision 254348) +++ predict.c (working copy) @@ -137,12 +137,12 @@ maybe_hot_frequency_p (struct function * if (profile_status_for_fn (fun) == PROFILE_ABSENT) return true; if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE - && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) + && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun) * 2 / 3)) return false; if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) return false; if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) - < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) + < ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun)) return false; return true; } @@ -175,10 +175,14 @@ set_hot_bb_threshold (gcov_type min) /* Return TRUE if frequency FREQ is considered to be hot. */ bool -maybe_hot_count_p (struct function *, profile_count count) +maybe_hot_count_p (struct function *fun, profile_count count) { if (!count.initialized_p ()) return true; + if (!count.ipa_p ()) + return maybe_hot_frequency_p (fun, count.to_frequency (fun)); + if (count.ipa () == profile_count::zero ()) + return false; /* Code executed at most once is not hot. */ if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) return false; @@ -192,9 +196,7 @@ bool maybe_hot_bb_p (struct function *fun, const_basic_block bb) { gcc_checking_assert (fun); - if (!maybe_hot_count_p (fun, bb->count)) - return false; - return maybe_hot_frequency_p (fun, bb->frequency); + return maybe_hot_count_p (fun, bb->count); } /* Return true in case BB can be CPU intensive and should be optimized @@ -203,9 +205,7 @@ maybe_hot_bb_p (struct function *fun, co bool maybe_hot_edge_p (edge e) { - if (!maybe_hot_count_p (cfun, e->count ())) - return false; - return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); + return maybe_hot_count_p (cfun, e->count ()); } /* Return true if profile COUNT and FREQUENCY, or function FUN static @@ -213,7 +213,7 @@ maybe_hot_edge_p (edge e) static bool probably_never_executed (struct function *fun, - profile_count count, int) + profile_count count) { gcc_checking_assert (fun); if (count == profile_count::zero ()) @@ -238,7 +238,7 @@ probably_never_executed (struct function bool probably_never_executed_bb_p (struct function *fun, const_basic_block bb) { - return probably_never_executed (fun, bb->count, bb->frequency); + return probably_never_executed (fun, bb->count); } @@ -259,7 +259,7 @@ probably_never_executed_edge_p (struct f { if (unlikely_executed_edge_p (e)) return true; - return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); + return probably_never_executed (fun, e->count ()); } /* Return true when current function should always be optimized for size. */ @@ -1289,7 +1289,8 @@ combine_predictions_for_bb (basic_block } clear_bb_predictions (bb); - if (!bb->count.initialized_p () && !dry_run) + if ((!bb->count.nonzero_p () || !first->probability.initialized_p ()) + && !dry_run) { first->probability = profile_probability::from_reg_br_prob_base (combined_probability); @@ -3014,10 +3015,7 @@ propagate_freq (basic_block head, bitmap BLOCK_INFO (bb)->npredecessors = count; /* When function never returns, we will never process exit block. */ if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - bb->count = profile_count::zero (); - bb->frequency = 0; - } + bb->count = profile_count::zero (); } BLOCK_INFO (head)->frequency = 1; @@ -3050,7 +3048,10 @@ propagate_freq (basic_block head, bitmap * BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (e->src)->frequency; tmp *= real_inv_br_prob_base; frequency += tmp; @@ -3082,7 +3083,10 @@ propagate_freq (basic_block head, bitmap = ((e->probability * BLOCK_INFO (bb)->frequency) / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (bb)->frequency; EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; } @@ -3196,10 +3200,26 @@ drop_profile (struct cgraph_node *node, } basic_block bb; - FOR_ALL_BB_FN (bb, fn) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + if (flag_guess_branch_prob) { - bb->count = profile_count::uninitialized (); + bool clear_zeros + = ENTRY_BLOCK_PTR_FOR_FN + (DECL_STRUCT_FUNCTION (node->decl))->count.nonzero_p (); + FOR_ALL_BB_FN (bb, fn) + if (clear_zeros || !(bb->count == profile_count::zero ())) + bb->count = bb->count.guessed_local (); + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max = + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max.guessed_local (); } + else + { + FOR_ALL_BB_FN (bb, fn) + bb->count = profile_count::uninitialized (); + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max + = profile_count::uninitialized (); + } + pop_cfun (); struct cgraph_edge *e; for (e = node->callees; e; e = e->next_caller) @@ -3300,33 +3320,16 @@ handle_missing_profiles (void) bool counts_to_freqs (void) { - gcov_type count_max; - profile_count true_count_max = profile_count::zero (); + profile_count true_count_max = profile_count::uninitialized (); basic_block bb; - /* Don't overwrite the estimated frequencies when the profile for - the function is missing. We may drop this function PROFILE_GUESSED - later in drop_profile (). */ - if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () - || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) - return false; - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > true_count_max) - true_count_max = bb->count; + if (!(bb->count < true_count_max)) + true_count_max = true_count_max.max (bb->count); - /* If we have no counts to base frequencies on, keep those that are - already there. */ - if (!(true_count_max > 0)) - return false; - - count_max = true_count_max.to_gcov_type (); + cfun->cfg->count_max = true_count_max; - FOR_ALL_BB_FN (bb, cfun) - if (bb->count.initialized_p ()) - bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); - - return true; + return true_count_max.nonzero_p (); } /* Return true if function is likely to be expensive, so there is no point to @@ -3348,11 +3351,11 @@ expensive_function_p (int threshold) /* Frequencies are out of range. This either means that function contains internal loop executing more than BB_FREQ_MAX times or profile feedback is available and function has not been executed at all. */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) == 0) return true; /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ - limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; + limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * threshold; FOR_EACH_BB_FN (bb, cfun) { rtx_insn *insn; @@ -3360,7 +3363,7 @@ expensive_function_p (int threshold) FOR_BB_INSNS (bb, insn) if (active_insn_p (insn)) { - sum += bb->frequency; + sum += bb->count.to_frequency (cfun); if (sum > limit) return true; } @@ -3409,7 +3412,6 @@ propagate_unlikely_bbs_forward (void) "Basic block %i is marked unlikely by forward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; } else bb->aux = NULL; @@ -3440,9 +3442,6 @@ determine_unlikely_bbs () bb->count = profile_count::zero (); } - if (bb->count == profile_count::zero ()) - bb->frequency = 0; - FOR_EACH_EDGE (e, ei, bb->succs) if (!(e->probability == profile_probability::never ()) && unlikely_executed_edge_p (e)) @@ -3497,7 +3496,6 @@ determine_unlikely_bbs () "Basic block %i is marked unlikely by backward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) if (!(e->probability == profile_probability::never ())) { @@ -3554,8 +3552,13 @@ estimate_bb_frequencies (bool force) FOR_EACH_EDGE (e, ei, bb->succs) { - EDGE_INFO (e)->back_edge_prob - = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + if (e->probability.initialized_p ()) + EDGE_INFO (e)->back_edge_prob + = e->probability.to_reg_br_prob_base (); + else + EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; } } @@ -3564,16 +3567,28 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges. */ estimate_loops (); + bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p (); + freq_max = 0; FOR_EACH_BB_FN (bb, cfun) if (freq_max < BLOCK_INFO (bb)->frequency) freq_max = BLOCK_INFO (bb)->frequency; freq_max = real_bb_freq_max / freq_max; + cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; - bb->frequency = tmp.to_int (); + profile_count count = profile_count::from_gcov_type (tmp.to_int ()); + + /* If we have profile feedback in which this function was never + executed, then preserve this info. */ + if (global0) + bb->count = count.global0 (); + else if (!(bb->count == profile_count::zero ())) + bb->count = count.guessed_local (); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); } free_aux_for_blocks (); @@ -3598,7 +3613,8 @@ compute_function_frequency (void) if (profile_status_for_fn (cfun) != PROFILE_READ) { int flags = flags_from_decl_or_type (current_function_decl); - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () + if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) != NULL) { @@ -3717,7 +3733,7 @@ pass_profile::execute (function *fun) { struct loop *loop; FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) - if (loop->header->frequency) + if (loop->header->count.initialized_p ()) fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", loop->num, (int)expected_loop_iterations_unbounded (loop)); @@ -3843,15 +3859,12 @@ rebuild_frequencies (void) which may also lead to frequencies incorrectly reduced to 0. There is less precision in the probabilities, so we only do this for small max counts. */ - profile_count count_max = profile_count::zero (); + cfun->cfg->count_max = profile_count::uninitialized (); basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > count_max) - count_max = bb->count; + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); - if (profile_status_for_fn (cfun) == PROFILE_GUESSED - || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ - && count_max < REG_BR_PROB_BASE / 10)) + if (profile_status_for_fn (cfun) == PROFILE_GUESSED) { loop_optimizer_init (0); add_noreturn_fake_exit_edges (); @@ -4017,17 +4030,19 @@ force_edge_cold (edge e, bool impossible after loop transforms. */ if (!(prob_sum > profile_probability::never ()) && count_sum == profile_count::zero () - && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) + && single_pred_p (e->src) && e->src->count.to_frequency (cfun) + > (impossible ? 0 : 1)) { - int old_frequency = e->src->frequency; + int old_frequency = e->src->count.to_frequency (cfun); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Making bb %i %s.\n", e->src->index, impossible ? "impossible" : "cold"); - e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); + int new_frequency = MIN (e->src->count.to_frequency (cfun), + impossible ? 0 : 1); if (impossible) e->src->count = profile_count::zero (); else - e->src->count = e->count ().apply_scale (e->src->frequency, + e->src->count = e->count ().apply_scale (new_frequency, old_frequency); force_edge_cold (single_pred_edge (e->src), impossible); } Index: profile-count.c =================================================================== --- profile-count.c (revision 254348) +++ profile-count.c (working copy) @@ -42,7 +42,11 @@ profile_count::dump (FILE *f) const else { fprintf (f, "%" PRId64, m_val); - if (m_quality == profile_adjusted) + if (m_quality == profile_guessed_local) + fprintf (f, " (estimated locally)"); + else if (m_quality == profile_guessed_global0) + fprintf (f, " (estimated locally, globally 0)"); + else if (m_quality == profile_adjusted) fprintf (f, " (adjusted)"); else if (m_quality == profile_afdo) fprintf (f, " (auto FDO)"); @@ -65,6 +69,7 @@ profile_count::debug () const bool profile_count::differs_from_p (profile_count other) const { + gcc_checking_assert (compatible_p (other)); if (!initialized_p () || !other.initialized_p ()) return false; if ((uint64_t)m_val - (uint64_t)other.m_val < 100 @@ -213,3 +218,40 @@ slow_safe_scale_64bit (uint64_t a, uint6 *res = (uint64_t) -1; return false; } + +/* Return count as frequency within FUN scaled in range 0 to REG_FREQ_MAX + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_frequency (struct function *fun) const +{ + if (!initialized_p ()) + return BB_FREQ_MAX; + if (*this == profile_count::zero ()) + return 0; + gcc_assert (REG_BR_PROB_BASE == BB_FREQ_MAX + && fun->cfg->count_max.initialized_p ()); + profile_probability prob = probability_in (fun->cfg->count_max); + if (!prob.initialized_p ()) + return REG_BR_PROB_BASE; + return prob.to_reg_br_prob_base (); +} + +/* Return count as frequency within FUN scaled in range 0 to CGRAPH_FREQ_MAX + where CGRAPH_FREQ_BASE means that count equals to entry block count. + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_cgraph_frequency (profile_count entry_bb_count) const +{ + if (!initialized_p ()) + return CGRAPH_FREQ_BASE; + if (*this == profile_count::zero ()) + return 0; + gcc_checking_assert (entry_bb_count.initialized_p ()); + uint64_t scale; + if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val, + CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale)) + return CGRAPH_FREQ_MAX; + return MIN (scale, CGRAPH_FREQ_MAX); +} Index: profile-count.h =================================================================== --- profile-count.h (revision 254348) +++ profile-count.h (working copy) @@ -21,21 +21,37 @@ along with GCC; see the file COPYING3. #ifndef GCC_PROFILE_COUNT_H #define GCC_PROFILE_COUNT_H +struct function; + /* Quality of the profile count. Because gengtype does not support enums inside of classes, this is in global namespace. */ enum profile_quality { + /* Profile is based on static branch prediction heuristics and may + or may not match reality. It is local to function and can not be compared + inter-procedurally. Never used by probabilities (they are always local). + */ + profile_guessed_local = 0, + /* Profile was read by feedback and was 0, we used local heuristics to guess + better. This is the case of functions not run in profile fedback. + Never used by probabilities. */ + profile_guessed_global0 = 1, + + /* Profile is based on static branch prediction heuristics. It may or may - not reflect the reality. */ - profile_guessed = 0, + not reflect the reality but it can be compared interprocedurally + (for example, we inlined function w/o profile feedback into function + with feedback and propagated from that). + Never used by probablities. */ + profile_guessed = 2, /* Profile was determined by autofdo. */ - profile_afdo = 1, + profile_afdo = 3, /* Profile was originally based on feedback but it was adjusted by code duplicating optimization. It may not precisely reflect the particular code path. */ - profile_adjusted = 2, + profile_adjusted = 4, /* Profile was read from profile feedback or determined by accurate static method. */ - profile_precise = 3 + profile_precise = 5 }; /* The base value for branch probability notes and edge probabilities. */ @@ -114,15 +130,15 @@ safe_scale_64bit (uint64_t a, uint64_t b class GTY((user)) profile_probability { - static const int n_bits = 30; + static const int n_bits = 29; /* We can technically use ((uint32_t) 1 << (n_bits - 1)) - 2 but that will lead to harder multiplication sequences. */ static const uint32_t max_probability = (uint32_t) 1 << (n_bits - 2); static const uint32_t uninitialized_probability = ((uint32_t) 1 << (n_bits - 1)) - 1; - uint32_t m_val : 30; - enum profile_quality m_quality : 2; + uint32_t m_val : 29; + enum profile_quality m_quality : 3; friend class profile_count; public: @@ -226,14 +242,14 @@ public: static profile_probability from_reg_br_prob_note (int v) { profile_probability ret; - ret.m_val = ((unsigned int)v) / 4; - ret.m_quality = (enum profile_quality)(v & 3); + ret.m_val = ((unsigned int)v) / 8; + ret.m_quality = (enum profile_quality)(v & 7); return ret; } int to_reg_br_prob_note () const { gcc_checking_assert (initialized_p ()); - int ret = m_val * 4 + m_quality; + int ret = m_val * 8 + m_quality; gcc_checking_assert (profile_probability::from_reg_br_prob_note (ret) == *this); return ret; @@ -489,8 +505,9 @@ public: { if (m_val == uninitialized_probability) return m_quality == profile_guessed; - else - return m_val <= max_probability; + else if (m_quality < profile_guessed) + return false; + return m_val <= max_probability; } /* Comparsions are three-state and conservative. False is returned if @@ -530,9 +547,32 @@ public: void stream_out (struct lto_output_stream *); }; -/* Main data type to hold profile counters in GCC. In most cases profile - counts originate from profile feedback. They are 64bit integers - representing number of executions during the train run. +/* Main data type to hold profile counters in GCC. Profile counts originate + either from profile feedback, static profile estimation or both. We do not + perform whole program profile propagation and thus profile estimation + counters are often local to function, while counters from profile feedback + (or special cases of profile estimation) can be used inter-procedurally. + + There are 3 basic types + 1) local counters which are result of intra-procedural static profile + estimation. + 2) ipa counters which are result of profile feedback or special case + of static profile estimation (such as in function main). + 3) counters which counts as 0 inter-procedurally (beause given function + was never run in train feedback) but they hold local static profile + estimate. + + Counters of type 1 and 3 can not be mixed with counters of different type + within operation (because whole function should use one type of counter) + with exception that global zero mix in most operations where outcome is + well defined. + + To take local counter and use it inter-procedurally use ipa member function + which strips information irelevant at the inter-procedural level. + + Counters are 61bit integers representing number of executions during the + train run or normalized frequency within the function. + As the profile is maintained during the compilation, many adjustments are made. Not all transformations can be made precisely, most importantly when code is being duplicated. It also may happen that part of CFG has @@ -567,12 +607,25 @@ class GTY(()) profile_count 64bit. Although a counter cannot be negative, we use a signed type to hold various extra stages. */ - static const int n_bits = 62; + static const int n_bits = 61; static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2; static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1; uint64_t m_val : n_bits; - enum profile_quality m_quality : 2; + enum profile_quality m_quality : 3; + + /* Return true if both values can meaningfully appear in single function + body. We have either all counters in function local or global, otherwise + operations between them are not really defined well. */ + bool compatible_p (const profile_count other) const + { + if (!initialized_p () || !other.initialized_p ()) + return true; + if (*this == profile_count::zero () + || other == profile_count::zero ()) + return true; + return ipa_p () == other.ipa_p (); + } public: /* Used for counters which are expected to be never executed. */ @@ -597,7 +650,7 @@ public: { profile_count c; c.m_val = uninitialized_count; - c.m_quality = profile_guessed; + c.m_quality = profile_guessed_local; return c; } @@ -630,6 +683,11 @@ public: { return m_quality >= profile_adjusted; } + /* Return true if vlaue can be operated inter-procedurally. */ + bool ipa_p () const + { + return !initialized_p () || m_quality >= profile_guessed_global0; + } /* When merging basic blocks, the two different profile counts are unified. Return true if this can be done without losing info about profile. @@ -671,6 +729,7 @@ public: return profile_count::uninitialized (); profile_count ret; + gcc_checking_assert (compatible_p (other)); ret.m_val = m_val + other.m_val; ret.m_quality = MIN (m_quality, other.m_quality); return ret; @@ -688,6 +747,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val += other.m_val; m_quality = MIN (m_quality, other.m_quality); } @@ -699,6 +759,7 @@ public: return *this; if (!initialized_p () || !other.initialized_p ()) return profile_count::uninitialized (); + gcc_checking_assert (compatible_p (other)); profile_count ret; ret.m_val = m_val >= other.m_val ? m_val - other.m_val : 0; ret.m_quality = MIN (m_quality, other.m_quality); @@ -712,6 +773,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val = m_val >= other.m_val ? m_val - other.m_val: 0; m_quality = MIN (m_quality, other.m_quality); } @@ -721,48 +783,115 @@ public: /* Return false if profile_count is bogus. */ bool verify () const { - return m_val != uninitialized_count || m_quality == profile_guessed; + return m_val != uninitialized_count || m_quality == profile_guessed_local; } /* Comparsions are three-state and conservative. False is returned if the inequality can not be decided. */ bool operator< (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val < other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + if (other == profile_count::zero ()) + return false; + gcc_checking_assert (compatible_p (other)); + return m_val < other.m_val; } bool operator> (const profile_count &other) const { + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return false; + if (other == profile_count::zero ()) + return !(*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); return initialized_p () && other.initialized_p () && m_val > other.m_val; } bool operator< (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val < (uint64_t) other; } bool operator> (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val > (uint64_t) other; } bool operator<= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val <= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return true; + if (other == profile_count::zero ()) + return (*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val <= other.m_val; } bool operator>= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val >= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (other == profile_count::zero ()) + return true; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val >= other.m_val; } bool operator<= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val <= (uint64_t) other; } bool operator>= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val >= (uint64_t) other; } + /* Return true when value is not zero and can be used for scaling. + This is different from *this > 0 because that requires counter to + be IPA. */ + bool nonzero_p () const + { + return initialized_p () && m_val != 0; + } + + /* Make counter forcingly nonzero. */ + profile_count force_nonzero () const + { + if (!initialized_p ()) + return *this; + profile_count ret = *this; + if (ret.m_val == 0) + ret.m_val = 1; + return ret; + } + + profile_count max (profile_count other) const + { + if (!initialized_p ()) + return other; + if (!other.initialized_p ()) + return *this; + if (*this == profile_count::zero ()) + return other; + if (other == profile_count::zero ()) + return *this; + gcc_checking_assert (compatible_p (other)); + if (m_val < other.m_val || (m_val == other.m_val + && m_quality < other.m_quality)) + return other; + return *this; + } /* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter accordingly. */ @@ -814,13 +943,13 @@ public: } profile_count apply_scale (profile_count num, profile_count den) const { - if (m_val == 0) + if (*this == profile_count::zero ()) return *this; - if (num.m_val == 0) + if (num == profile_count::zero ()) return num; if (!initialized_p () || !num.initialized_p () || !den.initialized_p ()) return profile_count::uninitialized (); - gcc_checking_assert (den > 0); + gcc_checking_assert (den.m_val); if (num == den) return *this; @@ -828,7 +957,30 @@ public: uint64_t val; safe_scale_64bit (m_val, num.m_val, den.m_val, &val); ret.m_val = MIN (val, max_count); - ret.m_quality = MIN (m_quality, profile_adjusted); + ret.m_quality = MIN (MIN (MIN (m_quality, profile_adjusted), + num.m_quality), den.m_quality); + if (num.ipa_p () && !ret.ipa_p ()) + ret.m_quality = MIN (num.m_quality, profile_guessed); + return ret; + } + + /* Return THIS with quality dropped to GUESSED_LOCAL. */ + profile_count guessed_local () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_local; + return ret; + } + + /* We know that profile is globally0 but keep local profile if present. */ + profile_count global0 () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_global0; return ret; } @@ -836,10 +988,21 @@ public: profile_count guessed () const { profile_count ret = *this; - ret.m_quality = profile_guessed; + ret.m_quality = MIN (ret.m_quality, profile_guessed); return ret; } + /* Return variant of profile counte which is always safe to compare + acorss functions. */ + profile_count ipa () const + { + if (m_quality > profile_guessed_global0) + return *this; + if (m_quality == profile_guessed_global0) + return profile_count::zero (); + return profile_count::uninitialized (); + } + /* Return THIS with quality dropped to AFDO. */ profile_count afdo () const { @@ -852,21 +1015,26 @@ public: OVERALL. */ profile_probability probability_in (const profile_count overall) const { - if (!m_val) + if (*this == profile_count::zero ()) return profile_probability::never (); if (!initialized_p () || !overall.initialized_p () || !overall.m_val) return profile_probability::uninitialized (); profile_probability ret; - if (overall < m_val) + gcc_checking_assert (compatible_p (overall)); + + if (overall.m_val < m_val) ret.m_val = profile_probability::max_probability; else ret.m_val = RDIV (m_val * profile_probability::max_probability, overall.m_val); - ret.m_quality = MIN (m_quality, overall.m_quality); + ret.m_quality = MAX (MIN (m_quality, overall.m_quality), profile_guessed); return ret; } + int to_frequency (struct function *fun) const; + int to_cgraph_frequency (profile_count entry_bb_count) const; + /* Output THIS to F. */ void dump (FILE *f) const; Index: profile.c =================================================================== --- profile.c (revision 254348) +++ profile.c (working copy) @@ -476,38 +476,6 @@ read_profile_edge_counts (gcov_type *exe return num_edges; } -#define OVERLAP_BASE 10000 - -/* Compare the static estimated profile to the actual profile, and - return the "degree of overlap" measure between them. - - Degree of overlap is a number between 0 and OVERLAP_BASE. It is - the sum of each basic block's minimum relative weights between - two profiles. And overlap of OVERLAP_BASE means two profiles are - identical. */ - -static int -compute_frequency_overlap (void) -{ - gcov_type count_total = 0, freq_total = 0; - int overlap = 0; - basic_block bb; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - { - count_total += bb_gcov_count (bb); - freq_total += bb->frequency; - } - - if (count_total == 0 || freq_total == 0) - return 0; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - overlap += MIN (bb_gcov_count (bb) * OVERLAP_BASE / count_total, - bb->frequency * OVERLAP_BASE / freq_total); - - return overlap; -} /* Compute the branch probabilities for the various branches. Annotate them accordingly. @@ -676,14 +644,6 @@ compute_branch_probabilities (unsigned c } } } - if (dump_file) - { - int overlap = compute_frequency_overlap (); - gimple_dump_cfg (dump_file, dump_flags); - fprintf (dump_file, "Static profile overlap: %d.%d%%\n", - overlap / (OVERLAP_BASE / 100), - overlap % (OVERLAP_BASE / 100)); - } total_num_passes += passes; if (dump_file) @@ -829,10 +789,18 @@ compute_branch_probabilities (unsigned c } } - FOR_ALL_BB_FN (bb, cfun) - { + /* If we have real data, use them! */ + if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun)) + || !flag_guess_branch_prob) + FOR_ALL_BB_FN (bb, cfun) bb->count = profile_count::from_gcov_type (bb_gcov_count (bb)); - } + /* If function was not trained, preserve local estimates including statically + determined zero counts. */ + else + FOR_ALL_BB_FN (bb, cfun) + if (!(bb->count == profile_count::zero ())) + bb->count = bb->count.global0 (); + bb_gcov_counts.release (); delete edge_gcov_counts; edge_gcov_counts = NULL; Index: regs.h =================================================================== --- regs.h (revision 254348) +++ regs.h (working copy) @@ -130,8 +130,10 @@ extern size_t reg_info_p_size; frequency. */ #define REG_FREQ_FROM_BB(bb) (optimize_function_for_size_p (cfun) \ ? REG_FREQ_MAX \ - : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ - ? ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ + : ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ + ? ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ : 1) /* Indexed by N, gives number of insns in which register N dies. Index: sched-ebb.c =================================================================== --- sched-ebb.c (revision 254348) +++ sched-ebb.c (working copy) @@ -231,11 +231,9 @@ rank (rtx_insn *insn1, rtx_insn *insn2) basic_block bb1 = BLOCK_FOR_INSN (insn1); basic_block bb2 = BLOCK_FOR_INSN (insn2); - if (bb1->count > bb2->count - || bb1->frequency > bb2->frequency) + if (bb1->count > bb2->count) return -1; - if (bb1->count < bb2->count - || bb1->frequency < bb2->frequency) + if (bb1->count < bb2->count) return 1; return 0; } Index: shrink-wrap.c =================================================================== --- shrink-wrap.c (revision 254348) +++ shrink-wrap.c (working copy) @@ -561,7 +561,7 @@ handle_simple_exit (edge e) BB_END (old_bb) = end; redirect_edge_succ (e, new_bb); - new_bb->frequency = EDGE_FREQUENCY (e); + new_bb->count = e->count (); e->flags |= EDGE_FALLTHRU; e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -887,7 +887,7 @@ try_shrink_wrapping (edge *entry_edge, r if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) { num += EDGE_FREQUENCY (e); - den += e->src->frequency; + den += e->src->count.to_frequency (cfun); } if (den == 0) @@ -920,8 +920,6 @@ try_shrink_wrapping (edge *entry_edge, r if (dump_file) fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); - bb->frequency = RDIV (num * bb->frequency, den); - dup->frequency -= bb->frequency; bb->count = bb->count.apply_scale (num, den); dup->count -= bb->count; } @@ -995,8 +993,7 @@ try_shrink_wrapping (edge *entry_edge, r continue; } - new_bb->count += e->src->count.apply_probability (e->probability); - new_bb->frequency += EDGE_FREQUENCY (e); + new_bb->count += e->count (); redirect_edge_and_branch_force (e, new_bb); if (dump_file) @@ -1181,7 +1178,7 @@ place_prologue_for_one_component (unsign work: this does not always add up to the block frequency at all, and even if it does, rounding error makes for bad decisions. */ - SW (bb)->own_cost = bb->frequency; + SW (bb)->own_cost = bb->count.to_frequency (cfun); edge e; edge_iterator ei; Index: testsuite/gcc.dg/no-strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/no-strict-overflow-3.c (revision 254348) +++ testsuite/gcc.dg/no-strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump "1234" "optimized" } } */ Index: testsuite/gcc.dg/strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/strict-overflow-3.c (revision 254348) +++ testsuite/gcc.dg/strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump-not "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1234" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (working copy) @@ -290,7 +290,7 @@ RNG (0, 6, 8, "%s%ls", "1", L"2"); /* Only conditional calls to must_not_eliminate must be made (with any probability): - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } No unconditional calls to abort should be made: { dg-final { scan-tree-dump-not ";\n *must_not_eliminate" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/dump-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/dump-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/dump-2.c (working copy) @@ -6,4 +6,4 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[100\\\.00%\\\] \\\[count: INV\\\]:" "optimized" } } */ +/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-10.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-10.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-10.c (working copy) @@ -26,5 +26,5 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-11.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-11.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-11.c (working copy) @@ -24,5 +24,4 @@ int foo (float *x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-12.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-12.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-12.c (working copy) @@ -29,6 +29,5 @@ int foo (int x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (working copy) @@ -39,4 +39,4 @@ int main1 () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (working copy) @@ -43,5 +43,4 @@ void foo(const int * __restrict__ zr_in, which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-5.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-5.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-5.c (working copy) @@ -27,4 +27,4 @@ dct_unquantize_h263_inter_c (short *bloc which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-8.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-8.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-8.c (working copy) @@ -22,5 +22,4 @@ void test () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-9.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-9.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-9.c (working copy) @@ -26,4 +26,4 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-cd.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-cd.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-cd.c (working copy) @@ -32,5 +32,4 @@ void foo (int *x1, int *x2, int *x3, int which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr56541.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (working copy) @@ -29,5 +29,4 @@ void foo() which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr68583.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (working copy) @@ -26,5 +26,5 @@ void foo (long *a) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (working copy) @@ -20,5 +20,4 @@ void foo (int a[], int b[]) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (working copy) @@ -21,4 +21,4 @@ foo (const char *u, const char *v, long which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.target/i386/pr61403.c =================================================================== --- testsuite/gcc.target/i386/pr61403.c (revision 254348) +++ testsuite/gcc.target/i386/pr61403.c (working copy) @@ -23,4 +23,4 @@ norm (struct XYZ *in, struct XYZ *out, i } } -/* { dg-final { scan-assembler "blend" } } */ +/* { dg-final { scan-assembler "rsqrtps" } } */ Index: tracer.c =================================================================== --- tracer.c (revision 254348) +++ tracer.c (working copy) @@ -179,7 +179,7 @@ find_best_predecessor (basic_block bb) if (!best || ignore_bb_p (best->src)) return NULL; if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE - < bb->frequency * branch_ratio_cutoff) + < bb->count.to_frequency (cfun) * branch_ratio_cutoff) return NULL; return best; } @@ -194,7 +194,7 @@ find_trace (basic_block bb, basic_block edge e; if (dump_file) - fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun)); while ((e = find_best_predecessor (bb)) != NULL) { @@ -203,11 +203,11 @@ find_trace (basic_block bb, basic_block || find_best_successor (bb2) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); bb = bb2; } if (dump_file) - fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; /* Follow the trace in forward direction. */ @@ -218,7 +218,7 @@ find_trace (basic_block bb, basic_block || find_best_predecessor (bb) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; } if (dump_file) @@ -282,11 +282,11 @@ tail_duplicate (void) { int n = count_insns (bb); if (!ignore_bb_p (bb)) - blocks[bb->index] = heap.insert (-bb->frequency, bb); + blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); counts [bb->index] = n; ninsns += n; - weighted_insns += n * bb->frequency; + weighted_insns += n * bb->count.to_frequency (cfun); } if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) @@ -314,7 +314,7 @@ tail_duplicate (void) n = find_trace (bb, trace); bb = trace[0]; - traced_insns += bb->frequency * counts [bb->index]; + traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; if (blocks[bb->index]) { heap.delete_node (blocks[bb->index]); @@ -330,7 +330,7 @@ tail_duplicate (void) heap.delete_node (blocks[bb2->index]); blocks[bb2->index] = NULL; } - traced_insns += bb2->frequency * counts [bb2->index]; + traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; if (EDGE_COUNT (bb2->preds) > 1 && can_duplicate_block_p (bb2) /* We have the tendency to duplicate the loop header @@ -345,11 +345,11 @@ tail_duplicate (void) /* Reconsider the original copy of block we've duplicated. Removing the most common predecessor may make it to be head. */ - blocks[bb2->index] = heap.insert (-bb2->frequency, bb2); + blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2); if (dump_file) fprintf (dump_file, "Duplicated %i as %i [%i]\n", - bb2->index, copy->index, copy->frequency); + bb2->index, copy->index, copy->count.to_frequency (cfun)); bb2 = copy; changed = true; Index: trans-mem.c =================================================================== --- trans-mem.c (revision 254348) +++ trans-mem.c (working copy) @@ -2932,7 +2932,6 @@ expand_transaction (struct tm_region *re edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE); redirect_edge_pred (fallthru_edge, join_bb); - join_bb->frequency = test_bb->frequency = transaction_bb->frequency; join_bb->count = test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -2940,7 +2939,6 @@ expand_transaction (struct tm_region *re ef->probability = profile_probability::unlikely (); code_bb->count = et->count (); - code_bb->frequency = EDGE_FREQUENCY (et); transaction_bb = join_bb; } @@ -2964,7 +2962,6 @@ expand_transaction (struct tm_region *re gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU); - test_bb->frequency = transaction_bb->frequency; test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -3006,7 +3003,6 @@ expand_transaction (struct tm_region *re edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags); e->probability = fallthru_edge->probability; test_bb->count = fallthru_edge->count (); - test_bb->frequency = EDGE_FREQUENCY (e); // Now update the edges to the inst/uninist implementations. // For now assume that the paths are equally likely. When using HTM, Index: tree-call-cdce.c =================================================================== --- tree-call-cdce.c (revision 254348) +++ tree-call-cdce.c (working copy) @@ -906,7 +906,6 @@ shrink_wrap_one_built_in_call_with_conds Here we take the second approach because it's slightly simpler and because it's easy to see that it doesn't lose profile counts. */ bi_call_bb->count = profile_count::zero (); - bi_call_bb->frequency = 0; while (!edges.is_empty ()) { edge_pair e = edges.pop (); @@ -919,16 +918,10 @@ shrink_wrap_one_built_in_call_with_conds nocall_edge->probability = profile_probability::always () - call_edge->probability; - unsigned int call_frequency - = call_edge->probability.apply (src_bb->frequency); - bi_call_bb->count += call_edge->count (); - bi_call_bb->frequency += call_frequency; if (nocall_edge->dest != join_tgt_bb) - { - nocall_edge->dest->frequency = src_bb->frequency - call_frequency; - } + nocall_edge->dest->count = src_bb->count - bi_call_bb->count; } if (dom_info_available_p (CDI_DOMINATORS)) Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 254348) +++ tree-cfg.c (working copy) @@ -1071,7 +1071,6 @@ gimple_find_sub_bbs (gimple_seq seq, gim tree_guess_outgoing_edge_probabilities (bb); if (all || profile_status_for_fn (cfun) == PROFILE_READ) bb->count = cnt; - bb->frequency = freq; bb = bb->next_bb; } @@ -2081,7 +2080,6 @@ gimple_merge_blocks (basic_block a, basi if (a->loop_father == b->loop_father) { a->count = a->count.merge (b->count); - a->frequency = MAX (a->frequency, b->frequency); } /* Merge the sequences. */ @@ -2840,7 +2838,6 @@ gimple_split_edge (edge edge_in) after_bb = split_edge_bb_loc (edge_in); new_bb = create_empty_bb (after_bb); - new_bb->frequency = EDGE_FREQUENCY (edge_in); new_bb->count = edge_in->count (); e = redirect_edge_and_branch (edge_in, new_bb); @@ -6306,9 +6303,8 @@ gimple_duplicate_sese_region (edge entry bool free_region_copy = false, copying_header = false; struct loop *loop = entry->dest->loop_father; edge exit_copy; - vec<basic_block> doms; + vec<basic_block> doms = vNULL; edge redirected; - int total_freq = 0, entry_freq = 0; profile_count total_count = profile_count::uninitialized (); profile_count entry_count = profile_count::uninitialized (); @@ -6376,21 +6372,10 @@ gimple_duplicate_sese_region (edge entry if (entry_count > total_count) entry_count = total_count; } - if (!(total_count > 0) || !(entry_count > 0)) - { - total_freq = entry->dest->frequency; - entry_freq = EDGE_FREQUENCY (entry); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - else if (entry_freq > total_freq) - entry_freq = total_freq; - } copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, split_edge_bb_loc (entry), update_dominance); - if (total_count > 0 && entry_count > 0) + if (total_count.initialized_p () && entry_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - entry_count, @@ -6398,12 +6383,6 @@ gimple_duplicate_sese_region (edge entry scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); - } if (copying_header) { @@ -6492,7 +6471,6 @@ gimple_duplicate_sese_tail (edge entry, struct loop *orig_loop = entry->dest->loop_father; basic_block switch_bb, entry_bb, nentry_bb; vec<basic_block> doms; - int total_freq = 0, exit_freq = 0; profile_count total_count = profile_count::uninitialized (), exit_count = profile_count::uninitialized (); edge exits[2], nexits[2], e; @@ -6537,30 +6515,16 @@ gimple_duplicate_sese_tail (edge entry, inside. */ doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region); - if (exit->src->count > 0) - { - total_count = exit->src->count; - exit_count = exit->count (); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (exit_count > total_count) - exit_count = total_count; - } - else - { - total_freq = exit->src->frequency; - exit_freq = EDGE_FREQUENCY (exit); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - if (exit_freq > total_freq) - exit_freq = total_freq; - } + total_count = exit->src->count; + exit_count = exit->count (); + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (exit_count > total_count) + exit_count = total_count; copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop, split_edge_bb_loc (exit), true); - if (total_count.initialized_p ()) + if (total_count.initialized_p () && exit_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - exit_count, @@ -6568,12 +6532,6 @@ gimple_duplicate_sese_tail (edge entry, scale_bbs_frequencies_profile_count (region_copy, n_region, exit_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq); - } /* Create the switch block, and put the exit condition to it. */ entry_bb = entry->dest; @@ -7614,9 +7572,15 @@ move_sese_region_to_fn (struct function FIXME, this is silly. The CFG ought to become a parameter to these helpers. */ push_cfun (dest_cfun); - make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb->count; + make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); if (exit_bb) - make_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + { + make_single_succ_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = exit_bb->count; + } + else + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero (); pop_cfun (); /* Back in the original function, the SESE region has disappeared, @@ -8691,7 +8655,7 @@ gimple_account_profile_record (basic_blo else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] += estimate_num_insns (gsi_stmt (i), - &eni_time_weights) * bb->frequency; + &eni_time_weights) * bb->count.to_frequency (cfun); } } @@ -8843,7 +8807,6 @@ insert_cond_bb (basic_block bb, gimple * edge e = make_edge (bb, new_bb, EDGE_TRUE_VALUE); e->probability = prob; new_bb->count = e->count (); - new_bb->frequency = prob.apply (bb->frequency); make_single_succ_edge (new_bb, fall->dest, EDGE_FALLTHRU); /* Fix edge for split bb. */ Index: tree-complex.c =================================================================== --- tree-complex.c (revision 254348) +++ tree-complex.c (working copy) @@ -1191,7 +1191,6 @@ expand_complex_div_wide (gimple_stmt_ite bb_join = e->dest; bb_true = create_empty_bb (bb_cond); bb_false = create_empty_bb (bb_true); - bb_true->frequency = bb_false->frequency = bb_cond->frequency / 2; bb_true->count = bb_false->count = bb_cond->count.apply_probability (profile_probability::even ()); Index: tree-eh.c =================================================================== --- tree-eh.c (revision 254348) +++ tree-eh.c (working copy) @@ -3224,6 +3224,7 @@ lower_resx (basic_block bb, gresx *stmt, gimple_stmt_iterator gsi2; new_bb = create_empty_bb (bb); + new_bb->count = bb->count; add_bb_to_loop (new_bb, bb->loop_father); lab = gimple_block_label (new_bb); gsi2 = gsi_start_bb (new_bb); Index: tree-inline.c =================================================================== --- tree-inline.c (revision 254348) +++ tree-inline.c (working copy) @@ -1763,16 +1763,15 @@ remap_gimple_stmt (gimple *stmt, copy_bo later */ static basic_block -copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, +copy_bb (copy_body_data *id, basic_block bb, profile_count num, profile_count den) { gimple_stmt_iterator gsi, copy_gsi, seq_gsi; basic_block copy_basic_block; tree decl; - gcov_type freq; basic_block prev; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + bool scale = !num.initialized_p () + || (den.nonzero_p () || num == profile_count::zero ()); /* Search for previous copied basic block. */ prev = bb->prev_bb; @@ -1784,15 +1783,8 @@ copy_bb (copy_body_data *id, basic_block copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux); if (scale) copy_basic_block->count = bb->count.apply_scale (num, den); - - /* We are going to rebuild frequencies from scratch. These values - have just small importance to drive canonicalize_loop_headers. */ - freq = apply_scale ((gcov_type)bb->frequency, frequency_scale); - - /* We recompute frequencies after inlining, so this is quite safe. */ - if (freq > BB_FREQ_MAX) - freq = BB_FREQ_MAX; - copy_basic_block->frequency = freq; + else if (num.initialized_p ()) + copy_basic_block->count = bb->count; copy_gsi = gsi_start_bb (copy_basic_block); @@ -2068,8 +2060,8 @@ copy_bb (copy_body_data *id, basic_block fprintf (dump_file, "Orig bb: %i, orig bb freq %i, new bb freq %i\n", bb->index, - bb->frequency, - copy_basic_block->frequency); + bb->count.to_frequency (cfun), + copy_basic_block->count.to_frequency (cfun)); } } } @@ -2507,11 +2499,8 @@ initialize_cfun (tree new_fndecl, tree c profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); - /* FIXME: When all counts are known to be zero, scaling is also meaningful. - */ if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p () - && count.initialized_p () - && ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()) + && count.ipa ().initialized_p ()) { ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, @@ -2520,10 +2509,13 @@ initialize_cfun (tree new_fndecl, tree c EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); } - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency - = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->frequency; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = - EXIT_BLOCK_PTR_FOR_FN (src_cfun)->frequency; + else + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; + EXIT_BLOCK_PTR_FOR_FN (cfun)->count + = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count; + } if (src_cfun->eh) init_eh_for_function (); @@ -2680,27 +2672,11 @@ redirect_all_calls (copy_body_data * id, } } -/* Convert estimated frequencies into counts for NODE, scaling COUNT - with each bb's frequency. Used when NODE has a 0-weight entry - but we are about to inline it into a non-zero count call bb. - See the comments for handle_missing_profiles() in predict.c for - when this can happen for COMDATs. */ - -void -freqs_to_counts (struct cgraph_node *node, profile_count count) -{ - basic_block bb; - struct function *fn = DECL_STRUCT_FUNCTION (node->decl); - - FOR_ALL_BB_FN(bb, fn) - bb->count = count.apply_scale (bb->frequency, BB_FREQ_MAX); -} - /* Make a copy of the body of FN so that it can be inserted inline in another function. Walks FN via CFG, returns new fndecl. */ static tree -copy_cfg_body (copy_body_data * id, profile_count count, int frequency_scale, +copy_cfg_body (copy_body_data * id, profile_count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -2712,31 +2688,10 @@ copy_cfg_body (copy_body_data * id, prof tree new_fndecl = NULL; bool need_debug_cleanup = false; int last; - int incoming_frequency = 0; - profile_count incoming_count = profile_count::zero (); - profile_count num = count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + profile_count num = entry_block_map->count; - /* This can happen for COMDAT routines that end up with 0 counts - despite being called (see the comments for handle_missing_profiles() - in predict.c as to why). Apply counts to the blocks in the callee - before inlining, using the guessed edge frequencies, so that we don't - end up with a 0-count inline body which can confuse downstream - optimizations such as function splitting. */ - if (!(ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count > 0) && count > 0) - { - /* Apply the larger of the call bb count and the total incoming - call edge count to the callee. */ - profile_count in_count = profile_count::zero (); - struct cgraph_edge *in_edge; - for (in_edge = id->src_node->callers; in_edge; - in_edge = in_edge->next_caller) - if (in_edge->count.initialized_p ()) - in_count += in_edge->count; - freqs_to_counts (id->src_node, count > in_count ? count : in_count); - } + cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); /* Register specific tree functions. */ gimple_register_cfg_hooks (); @@ -2750,25 +2705,18 @@ copy_cfg_body (copy_body_data * id, prof { edge e; edge_iterator ei; + den = profile_count::zero (); FOR_EACH_EDGE (e, ei, new_entry->preds) if (!e->src->aux) - incoming_frequency += EDGE_FREQUENCY (e); - if (scale) - incoming_count = incoming_count.apply_scale (num, den); - else - incoming_count = profile_count::uninitialized (); - incoming_frequency - = apply_scale ((gcov_type)incoming_frequency, frequency_scale); - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = incoming_count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = incoming_frequency; + den += e->count (); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den; } /* Must have a CFG here at this point. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (callee_fndecl))); - cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); ENTRY_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = entry_block_map; EXIT_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = exit_block_map; @@ -2784,7 +2732,7 @@ copy_cfg_body (copy_body_data * id, prof FOR_EACH_BB_FN (bb, cfun_to_copy) if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index)) { - basic_block new_bb = copy_bb (id, bb, frequency_scale, num, den); + basic_block new_bb = copy_bb (id, bb, num, den); bb->aux = new_bb; new_bb->aux = bb; new_bb->loop_father = entry_block_map->loop_father; @@ -3011,7 +2959,7 @@ copy_tree_body (copy_body_data *id) another function. */ static tree -copy_body (copy_body_data *id, profile_count count, int frequency_scale, +copy_body (copy_body_data *id, profile_count count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -3020,7 +2968,7 @@ copy_body (copy_body_data *id, profile_c /* If this body has a CFG, walk CFG and copy. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl))); - body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map, + body = copy_cfg_body (id, count, entry_block_map, exit_block_map, new_entry); copy_debug_stmts (id); @@ -4771,7 +4719,6 @@ expand_call_inline (basic_block bb, gimp a self-referential call; if we're calling ourselves, we need to duplicate our body before altering anything. */ copy_body (id, cg_edge->callee->count, - GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE), bb, return_block, NULL); reset_debug_bindings (id, stmt_gsi); @@ -5146,6 +5093,7 @@ optimize_inline_calls (tree fn) } /* Fold queued statements. */ + counts_to_freqs (); fold_marked_statements (last, id.statements_to_fold); delete id.statements_to_fold; @@ -6090,7 +6038,7 @@ tree_function_versioning (tree old_decl, } /* Copy the Function's body. */ - copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE, + copy_body (&id, old_entry_block->count, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun), new_entry); @@ -6122,6 +6070,7 @@ tree_function_versioning (tree old_decl, free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); + counts_to_freqs (); fold_marked_statements (0, id.statements_to_fold); delete id.statements_to_fold; delete_unreachable_blocks_update_callgraph (&id); @@ -6141,20 +6090,20 @@ tree_function_versioning (tree old_decl, struct cgraph_edge *e; rebuild_frequencies (); - new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); for (e = new_version_node->callees; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } for (e = new_version_node->indirect_calls; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } } Index: tree-ssa-coalesce.c =================================================================== --- tree-ssa-coalesce.c (revision 254348) +++ tree-ssa-coalesce.c (working copy) @@ -164,7 +164,7 @@ coalesce_cost (int frequency, bool optim static inline int coalesce_cost_bb (basic_block bb) { - return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb)); + return coalesce_cost (bb->count.to_frequency (cfun), optimize_bb_for_size_p (bb)); } Index: tree-ssa-ifcombine.c =================================================================== --- tree-ssa-ifcombine.c (revision 254348) +++ tree-ssa-ifcombine.c (working copy) @@ -366,7 +366,6 @@ update_profile_after_ifcombine (basic_bl - inner_taken->probability; outer_to_inner->probability = profile_probability::always (); - inner_cond_bb->frequency = outer_cond_bb->frequency; outer2->probability = profile_probability::never (); } Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 254348) +++ tree-ssa-loop-im.c (working copy) @@ -1803,7 +1803,7 @@ execute_sm_if_changed (edge ex, tree mem for (hash_set<basic_block>::iterator it = flag_bbs->begin (); it != flag_bbs->end (); ++it) { - freq_sum += (*it)->frequency; + freq_sum += (*it)->count.to_frequency (cfun); if ((*it)->count.initialized_p ()) count_sum += (*it)->count, ncount ++; if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) @@ -1815,20 +1815,15 @@ execute_sm_if_changed (edge ex, tree mem if (flag_probability.initialized_p ()) ; - else if (ncount == nbbs && count_sum > 0 && preheader->count () >= count_sum) + else if (ncount == nbbs + && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) { flag_probability = count_sum.probability_in (preheader->count ()); if (flag_probability > cap) flag_probability = cap; } - else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum) - { - flag_probability = profile_probability::from_reg_br_prob_base - (GCOV_COMPUTE_SCALE (freq_sum, EDGE_FREQUENCY (preheader))); - if (flag_probability > cap) - flag_probability = cap; - } - else + + if (!flag_probability.initialized_p ()) flag_probability = cap; /* ?? Insert store after previous store if applicable. See note @@ -1861,7 +1856,6 @@ execute_sm_if_changed (edge ex, tree mem old_dest = ex->dest; new_bb = split_edge (ex); then_bb = create_empty_bb (new_bb); - then_bb->frequency = flag_probability.apply (new_bb->frequency); then_bb->count = new_bb->count.apply_probability (flag_probability); if (irr) then_bb->flags = BB_IRREDUCIBLE_LOOP; Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 254348) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -647,7 +647,6 @@ unloop_loops (bitmap loop_closed_ssa_inv add_bb_to_loop (latch_edge->dest, current_loops->tree_root); latch_edge->dest->count = profile_count::zero (); - latch_edge->dest->frequency = 0; set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); gsi = gsi_start_bb (latch_edge->dest); @@ -1090,7 +1089,6 @@ try_peel_loop (struct loop *loop, } } profile_count entry_count = profile_count::zero (); - int entry_freq = 0; edge e; edge_iterator ei; @@ -1099,15 +1097,10 @@ try_peel_loop (struct loop *loop, { if (e->src->count.initialized_p ()) entry_count = e->src->count + e->src->count; - entry_freq += e->src->frequency; gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); } profile_probability p = profile_probability::very_unlikely (); - if (loop->header->count > 0) - p = entry_count.probability_in (loop->header->count); - else if (loop->header->frequency) - p = profile_probability::probability_in_gcov_type - (entry_freq, loop->header->frequency); + p = entry_count.probability_in (loop->header->count); scale_loop_profile (loop, p, 0); bitmap_set_bit (peeled_loops, loop->num); return true; Index: tree-ssa-loop-ivopts.c =================================================================== --- tree-ssa-loop-ivopts.c (revision 254348) +++ tree-ssa-loop-ivopts.c (working copy) @@ -4457,8 +4457,8 @@ get_address_cost (struct ivopts_data *da static comp_cost get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) { - int loop_freq = data->current_loop->header->frequency; - int bb_freq = gimple_bb (at)->frequency; + int loop_freq = data->current_loop->header->count.to_frequency (cfun); + int bb_freq = gimple_bb (at)->count.to_frequency (cfun); if (loop_freq != 0) { gcc_assert (cost.scratch <= cost.cost); Index: tree-ssa-loop-manip.c =================================================================== --- tree-ssa-loop-manip.c (revision 254348) +++ tree-ssa-loop-manip.c (working copy) @@ -1122,6 +1122,9 @@ niter_for_unrolled_loop (struct loop *lo converts back. */ gcov_type new_est_niter = est_niter / factor; + if (est_niter == -1) + return -1; + /* Without profile feedback, loops for which we do not know a better estimate are assumed to roll 10 times. When we unroll such loop, it appears to roll too little, and it may even seem to be cold. To avoid this, we @@ -1370,14 +1373,7 @@ tree_transform_and_unroll_loop (struct l freq_h = loop->header->count; freq_e = (loop_preheader_edge (loop))->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type - (EDGE_FREQUENCY (loop_preheader_edge (loop))); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ @@ -1392,7 +1388,6 @@ tree_transform_and_unroll_loop (struct l .apply_scale (1, new_est_niter + 1); rest->count += new_exit->count (); - rest->frequency += EDGE_FREQUENCY (new_exit); new_nonexit = single_pred_edge (loop->latch); prob = new_nonexit->probability; Index: tree-ssa-loop-niter.c =================================================================== --- tree-ssa-loop-niter.c (revision 254348) +++ tree-ssa-loop-niter.c (working copy) @@ -3901,7 +3901,7 @@ estimate_numbers_of_iterations (struct l recomputing iteration bounds later in the compilation process will just introduce random roundoff errors. */ if (!loop->any_estimate - && loop->header->count > 0) + && loop->header->count.reliable_p ()) { gcov_type nit = expected_loop_iterations_unbounded (loop); bound = gcov_type_to_wide_int (nit); Index: tree-ssa-loop-unswitch.c =================================================================== --- tree-ssa-loop-unswitch.c (revision 254348) +++ tree-ssa-loop-unswitch.c (working copy) @@ -852,7 +852,7 @@ hoist_guard (struct loop *loop, edge gua /* Determine the probability that we skip the loop. Assume that loop has same average number of iterations regardless outcome of guard. */ new_edge->probability = guard->probability; - profile_count skip_count = guard->src->count > 0 + profile_count skip_count = guard->src->count.nonzero_p () ? guard->count ().apply_scale (pre_header->count, guard->src->count) : guard->count ().apply_probability (new_edge->probability); @@ -875,7 +875,6 @@ hoist_guard (struct loop *loop, edge gua to loop header... */ e->probability = new_edge->probability.invert (); e->dest->count = e->count (); - e->dest->frequency = EDGE_FREQUENCY (e); /* ... now update profile to represent that original guard will be optimized away ... */ Index: tree-ssa-sink.c =================================================================== --- tree-ssa-sink.c (revision 254348) +++ tree-ssa-sink.c (working copy) @@ -226,7 +226,8 @@ select_best_block (basic_block early_bb, /* If BEST_BB is at the same nesting level, then require it to have significantly lower execution frequency to avoid gratutious movement. */ if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb) - && best_bb->frequency < (early_bb->frequency * threshold / 100.0)) + && best_bb->count.to_frequency (cfun) + < (early_bb->count.to_frequency (cfun) * threshold / 100.0)) return best_bb; /* No better block found, so return EARLY_BB, which happens to be the Index: tree-ssa-tail-merge.c =================================================================== --- tree-ssa-tail-merge.c (revision 254348) +++ tree-ssa-tail-merge.c (working copy) @@ -1530,8 +1530,6 @@ static void replace_block_by (basic_block bb1, basic_block bb2) { edge pred_edge; - edge e1, e2; - edge_iterator ei; unsigned int i; gphi *bb2_phi; @@ -1560,9 +1558,13 @@ replace_block_by (basic_block bb1, basic bb2->count += bb1->count; + /* FIXME: Fix merging of probabilities. They need to be redistributed + according to the relative counts of merged BBs. */ +#if 0 /* Merge the outgoing edge counts from bb1 onto bb2. */ profile_count out_sum = profile_count::zero (); int out_freq_sum = 0; + edge e1, e2; /* Recompute the edge probabilities from the new merged edge count. Use the sum of the new merged edge counts computed above instead @@ -1580,7 +1582,6 @@ replace_block_by (basic_block bb1, basic out_sum += e1->count (); out_freq_sum += EDGE_FREQUENCY (e1); } - FOR_EACH_EDGE (e1, ei, bb1->succs) { e2 = find_edge (bb2, e1->dest); @@ -1589,9 +1590,9 @@ replace_block_by (basic_block bb1, basic { e2->probability = e2->count ().probability_in (bb2->count); } - else if (bb1->frequency && bb2->frequency) + else if (bb1->count.to_frequency (cfun) && bb2->count.to_frequency (cfun)) e2->probability = e1->probability; - else if (bb2->frequency && !bb1->frequency) + else if (bb2->count.to_frequency (cfun) && !bb1->count.to_frequency (cfun)) ; else if (out_freq_sum) e2->probability = profile_probability::from_reg_br_prob_base @@ -1600,9 +1601,7 @@ replace_block_by (basic_block bb1, basic out_freq_sum)); out_sum += e2->count (); } - bb2->frequency += bb1->frequency; - if (bb2->frequency > BB_FREQ_MAX) - bb2->frequency = BB_FREQ_MAX; +#endif /* Move over any user labels from bb1 after the bb2 labels. */ gimple_stmt_iterator gsi1 = gsi_start_bb (bb1); Index: tree-ssa-threadupdate.c =================================================================== --- tree-ssa-threadupdate.c (revision 254348) +++ tree-ssa-threadupdate.c (working copy) @@ -339,7 +339,6 @@ create_block_for_threading (basic_block e->aux = NULL; /* Zero out the profile, since the block is unreachable for now. */ - rd->dup_blocks[count]->frequency = 0; rd->dup_blocks[count]->count = profile_count::uninitialized (); if (duplicate_blocks) bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); @@ -590,7 +589,7 @@ any_remaining_duplicated_blocks (vec<jum } -/* Compute the amount of profile count/frequency coming into the jump threading +/* Compute the amount of profile count coming into the jump threading path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to @@ -598,7 +597,7 @@ any_remaining_duplicated_blocks (vec<jum edges that need to be ignored in the analysis. Return true if path contains a joiner, false otherwise. - In the non-joiner case, this is straightforward - all the counts/frequency + In the non-joiner case, this is straightforward - all the counts flowing into the jump threading path should flow through the duplicated block and out of the duplicated path. @@ -851,16 +850,14 @@ compute_path_counts (struct redirection_ /* Update the counts and frequencies for both an original path edge EPATH and its duplicate EDUP. The duplicate source block - will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ, + will get a count of PATH_IN_COUNT and PATH_IN_FREQ, and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ static void update_profile (edge epath, edge edup, profile_count path_in_count, - profile_count path_out_count, int path_in_freq) + profile_count path_out_count) { - if (!(path_in_count > 0)) - return; - /* First update the duplicated block's count / frequency. */ + /* First update the duplicated block's count. */ if (edup) { basic_block dup_block = edup->src; @@ -894,167 +891,54 @@ update_profile (edge epath, edge edup, p if (esucc != edup) esucc->probability *= scale; } - edup->probability = edup_prob; + if (edup_prob.initialized_p ()) + edup->probability = edup_prob; - /* FIXME once freqs_to_counts is dropped re-enable this check. */ - gcc_assert (!dup_block->count.initialized_p () || 1); - gcc_assert (dup_block->frequency == 0); + gcc_assert (!dup_block->count.initialized_p ()); dup_block->count = path_in_count; - dup_block->frequency = path_in_freq; } + if (path_in_count == profile_count::zero ()) + return; + profile_count final_count = epath->count () - path_out_count; - /* Now update the original block's count and frequency in the + /* Now update the original block's count in the opposite manner - remove the counts/freq that will flow into the duplicated block. Handle underflow due to precision/ rounding issues. */ epath->src->count -= path_in_count; - epath->src->frequency -= path_in_freq; - if (epath->src->frequency < 0) - epath->src->frequency = 0; /* Next update this path edge's original and duplicated counts. We know that the duplicated path will have path_out_count flowing out of it (in the joiner case this is the count along the duplicated path out of the duplicated joiner). This count can then be removed from the original path edge. */ - if (epath->src->count > 0) - { - edge esucc; - edge_iterator ei; - profile_probability epath_prob = final_count.probability_in (epath->src->count); - - if (epath->probability > epath_prob) - { - profile_probability rev_scale - = (profile_probability::always () - epath->probability) - / (profile_probability::always () - epath_prob); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability /= rev_scale; - } - else if (epath->probability < epath_prob) - { - profile_probability scale - = (profile_probability::always () - epath_prob) - / (profile_probability::always () - epath->probability); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability *= scale; - } - epath->probability = epath_prob; - } -} - -/* Check if the paths through RD all have estimated frequencies but zero - profile counts. This is more accurate than checking the entry block - for a zero profile count, since profile insanities sometimes creep in. */ - -static bool -estimated_freqs_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; + edge esucc; edge_iterator ei; - bool non_zero_freq = false; - FOR_EACH_EDGE (ein, ei, e->dest->preds) - { - if (ein->count () > 0) - return false; - non_zero_freq |= ein->src->frequency != 0; - } + profile_probability epath_prob = final_count.probability_in (epath->src->count); - for (unsigned int i = 1; i < path->length (); i++) + if (epath->probability > epath_prob) { - edge epath = (*path)[i]->e; - if (epath->src->count > 0) - return false; - non_zero_freq |= epath->src->frequency != 0; - edge esucc; + profile_probability rev_scale + = (profile_probability::always () - epath->probability) + / (profile_probability::always () - epath_prob); + FOR_EACH_EDGE (esucc, ei, epath->src->succs) + if (esucc != epath) + esucc->probability /= rev_scale; + } + else if (epath->probability < epath_prob) + { + profile_probability scale + = (profile_probability::always () - epath_prob) + / (profile_probability::always () - epath->probability); FOR_EACH_EDGE (esucc, ei, epath->src->succs) - { - if (esucc->count () > 0) - return false; - non_zero_freq |= esucc->src->frequency != 0; - } - } - return non_zero_freq; -} - - -/* Invoked for routines that have guessed frequencies and no profile - counts to record the block and edge frequencies for paths through RD - in the profile count fields of those blocks and edges. This is because - ssa_fix_duplicate_block_edges incrementally updates the block and - edge counts as edges are redirected, and it is difficult to do that - for edge frequencies which are computed on the fly from the source - block frequency and probability. When a block frequency is updated - its outgoing edge frequencies are affected and become difficult to - adjust. */ - -static void -freqs_to_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = profile_count::from_gcov_type - (ein->src->frequency * REG_BR_PROB_BASE); - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding - errors applying the edge probability when the frequencies are very - small. */ - epath->src->count = - profile_count::from_gcov_type - (epath->src->frequency * REG_BR_PROB_BASE); - } -} - - -/* For routines that have guessed frequencies and no profile counts, where we - used freqs_to_counts_path to record block and edge frequencies for paths - through RD, we clear the counts after completing all updates for RD. - The updates in ssa_fix_duplicate_block_edges are based off the count fields, - but the block frequencies and edge probabilities were updated as well, - so we can simply clear the count fields. */ - -static void -clear_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - profile_count val = profile_count::uninitialized (); - if (profile_status_for_fn (cfun) == PROFILE_READ) - val = profile_count::zero (); - - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = val; - - /* First clear counts along original path. */ - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - epath->src->count = val; - } - /* Also need to clear the counts along duplicated path. */ - for (unsigned int i = 0; i < 2; i++) - { - basic_block dup = rd->dup_blocks[i]; - if (!dup) - continue; - dup->count = val; + if (esucc != epath) + esucc->probability *= scale; } + if (epath_prob.initialized_p ()) + epath->probability = epath_prob; } /* Wire up the outgoing edges from the duplicate blocks and @@ -1072,20 +956,6 @@ ssa_fix_duplicate_block_edges (struct re profile_count path_out_count = profile_count::zero (); int path_in_freq = 0; - /* This routine updates profile counts, frequencies, and probabilities - incrementally. Since it is difficult to do the incremental updates - using frequencies/probabilities alone, for routines without profile - data we first take a snapshot of the existing block and edge frequencies - by copying them into the empty profile count fields. These counts are - then used to do the incremental updates, and cleared at the end of this - routine. If the function is marked as having a profile, we still check - to see if the paths through RD are using estimated frequencies because - the routine had zero profile counts. */ - bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ - || estimated_freqs_path (rd)); - if (do_freqs_to_counts) - freqs_to_counts_path (rd); - /* First determine how much profile count to move from original path to the duplicate path. This is tricky in the presence of a joiner (see comments for compute_path_counts), where some portion @@ -1096,7 +966,6 @@ ssa_fix_duplicate_block_edges (struct re &path_in_count, &path_out_count, &path_in_freq); - int cur_path_freq = path_in_freq; for (unsigned int count = 0, i = 1; i < path->length (); i++) { edge epath = (*path)[i]->e; @@ -1162,19 +1031,14 @@ ssa_fix_duplicate_block_edges (struct re } } - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. The path duplicate's - incoming count and frequency are the totals for all edges + incoming count are the totals for all edges incoming to this jump threading path computed earlier. And we know that the duplicated path will have path_out_count flowing out of it (i.e. along the duplicated path out of the duplicated joiner). */ - update_profile (epath, e2, path_in_count, path_out_count, - path_in_freq); - - /* Record the frequency flowing to the downstream duplicated - path blocks. */ - cur_path_freq = EDGE_FREQUENCY (e2); + update_profile (epath, e2, path_in_count, path_out_count); } else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) { @@ -1184,7 +1048,7 @@ ssa_fix_duplicate_block_edges (struct re if (count == 1) single_succ_edge (rd->dup_blocks[1])->aux = NULL; - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. Since we are now after any joiner that may have existed on the path, the count flowing along the duplicated threaded path is path_out_count. @@ -1194,7 +1058,7 @@ ssa_fix_duplicate_block_edges (struct re been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), - path_out_count, path_out_count, cur_path_freq); + path_out_count, path_out_count); } else { @@ -1211,8 +1075,7 @@ ssa_fix_duplicate_block_edges (struct re thread path (path_in_freq). If we had a joiner, it would have been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ - update_profile (epath, NULL, path_out_count, path_out_count, - cur_path_freq); + update_profile (epath, NULL, path_out_count, path_out_count); } /* Increment the index into the duplicated path when we processed @@ -1223,11 +1086,6 @@ ssa_fix_duplicate_block_edges (struct re count++; } } - - /* Done with all profile and frequency updates, clear counts if they - were copied. */ - if (do_freqs_to_counts) - clear_counts_path (rd); } /* Hash table traversal callback routine to create duplicate blocks. */ @@ -2137,7 +1995,6 @@ duplicate_thread_path (edge entry, edge struct loop *loop = entry->dest->loop_father; edge exit_copy; edge redirected; - int curr_freq; profile_count curr_count; if (!can_copy_bbs_p (region, n_region)) @@ -2170,7 +2027,6 @@ duplicate_thread_path (edge entry, edge the jump-thread path in order. */ curr_count = entry->count (); - curr_freq = EDGE_FREQUENCY (entry); for (i = 0; i < n_region; i++) { @@ -2181,10 +2037,8 @@ duplicate_thread_path (edge entry, edge /* Watch inconsistent profile. */ if (curr_count > region[i]->count) curr_count = region[i]->count; - if (curr_freq > region[i]->frequency) - curr_freq = region[i]->frequency; /* Scale current BB. */ - if (region[i]->count > 0 && curr_count.initialized_p ()) + if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) { /* In the middle of the path we only scale the frequencies. In last BB we need to update probabilities of outgoing edges @@ -2195,24 +2049,11 @@ duplicate_thread_path (edge entry, edge region[i]->count); else update_bb_profile_for_threading (region[i], - curr_freq, curr_count, + curr_count, exit); scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, region_copy[i]->count); } - else if (region[i]->frequency) - { - if (i + 1 != n_region) - scale_bbs_frequencies_int (region + i, 1, - region[i]->frequency - curr_freq, - region[i]->frequency); - else - update_bb_profile_for_threading (region[i], - curr_freq, curr_count, - exit); - scale_bbs_frequencies_int (region_copy + i, 1, curr_freq, - region_copy[i]->frequency); - } if (single_succ_p (bb)) { @@ -2221,7 +2062,6 @@ duplicate_thread_path (edge entry, edge || region_copy[i + 1] == single_succ_edge (bb)->dest); if (i + 1 != n_region) { - curr_freq = EDGE_FREQUENCY (single_succ_edge (bb)); curr_count = single_succ_edge (bb)->count (); } continue; @@ -2252,7 +2092,6 @@ duplicate_thread_path (edge entry, edge } else { - curr_freq = EDGE_FREQUENCY (e); curr_count = e->count (); } } Index: tree-switch-conversion.c =================================================================== --- tree-switch-conversion.c (revision 254348) +++ tree-switch-conversion.c (working copy) @@ -1443,10 +1443,10 @@ gen_inbound_check (gswitch *swtch, struc } /* frequencies of the new BBs */ - bb1->frequency = EDGE_FREQUENCY (e01); - bb2->frequency = EDGE_FREQUENCY (e02); + bb1->count = e01->count (); + bb2->count = e02->count (); if (!info->default_case_nonstandard) - bbf->frequency = EDGE_FREQUENCY (e1f) + EDGE_FREQUENCY (e2f); + bbf->count = e1f->count () + e2f->count (); /* Tidy blocks that have become unreachable. */ prune_bbs (bbd, info->final_bb, Index: tree-tailcall.c =================================================================== --- tree-tailcall.c (revision 254348) +++ tree-tailcall.c (working copy) @@ -805,12 +805,9 @@ adjust_return_value (basic_block bb, tre /* Subtract COUNT and FREQUENCY from the basic block and it's outgoing edge. */ static void -decrease_profile (basic_block bb, profile_count count, int frequency) +decrease_profile (basic_block bb, profile_count count) { bb->count = bb->count - count; - bb->frequency -= frequency; - if (bb->frequency < 0) - bb->frequency = 0; if (!single_succ_p (bb)) { gcc_assert (!EDGE_COUNT (bb->succs)); @@ -892,11 +889,10 @@ eliminate_tail_call (struct tailcall *t) /* Number of executions of function has reduced by the tailcall. */ e = single_succ_edge (gsi_bb (t->call_gsi)); - decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count (), EDGE_FREQUENCY (e)); - decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count (), - EDGE_FREQUENCY (e)); + decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count ()); + decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count ()); if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - decrease_profile (e->dest, e->count (), EDGE_FREQUENCY (e)); + decrease_profile (e->dest, e->count ()); /* Replace the call by a jump to the start of function. */ e = redirect_edge_and_branch (single_succ_edge (gsi_bb (t->call_gsi)), Index: tree-vect-loop-manip.c =================================================================== --- tree-vect-loop-manip.c (revision 254348) +++ tree-vect-loop-manip.c (working copy) @@ -1843,7 +1843,6 @@ vect_do_peeling (loop_vec_info loop_vinf /* Simply propagate profile info from guard_bb to guard_to which is a merge point of control flow. */ - guard_to->frequency = guard_bb->frequency; guard_to->count = guard_bb->count; /* Scale probability of epilog loop back. FIXME: We should avoid scaling down and back up. Profile may Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 254348) +++ tree-vect-loop.c (working copy) @@ -7229,20 +7229,14 @@ scale_profile_for_vect_loop (struct loop gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); profile_count freq_h = loop->header->count, freq_e = preheader->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader)); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { profile_probability p; /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ - if (!(freq_e > profile_count::from_gcov_type (1))) - freq_e = profile_count::from_gcov_type (1); + if (!(freq_e == profile_count::zero ())) + freq_e = freq_e.force_nonzero (); p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); scale_loop_frequencies (loop, p); } @@ -7781,7 +7775,7 @@ optimize_mask_stores (struct loop *loop) efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); /* Put STORE_BB to likely part. */ efalse->probability = profile_probability::unlikely (); - store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse); + store_bb->count = efalse->count (); make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); if (dom_info_available_p (CDI_DOMINATORS)) set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 254348) +++ tree-vect-stmts.c (working copy) @@ -3221,7 +3221,7 @@ vectorizable_simd_clone_call (gimple *st vec<tree> vargs = vNULL; size_t i, nargs; tree lhs, rtype, ratype; - vec<constructor_elt, va_gc> *ret_ctor_elts; + vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; /* Is STMT a vectorizable call? */ if (!is_gimple_call (stmt)) Index: ubsan.c =================================================================== --- ubsan.c (revision 254348) +++ ubsan.c (working copy) @@ -804,6 +804,7 @@ ubsan_expand_null_ifn (gimple_stmt_itera this edge is unlikely taken, so set up the probability accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); /* Connect 'then block' with the 'else block'. This is needed as the ubsan routines we call in the 'then block' are not noreturn. @@ -1085,6 +1086,7 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); } else { @@ -1098,12 +1100,14 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat e = make_edge (cond_neg_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); cond_pos_bb = create_empty_bb (cond_bb); add_bb_to_loop (cond_pos_bb, cond_bb->loop_father); e = make_edge (cond_bb, cond_pos_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::even (); + cond_pos_bb->count = e->count (); e = make_edge (cond_pos_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); Index: value-prof.c =================================================================== --- value-prof.c (revision 254348) +++ value-prof.c (working copy) @@ -1299,7 +1299,7 @@ check_ic_target (gcall *call_stmt, struc gcall * gimple_ic (gcall *icall_stmt, struct cgraph_node *direct_call, - profile_probability prob, profile_count count, profile_count all) + profile_probability prob) { gcall *dcall_stmt; gassign *load_stmt; @@ -1354,11 +1354,11 @@ gimple_ic (gcall *icall_stmt, struct cgr /* Edge e_cd connects cond_bb to dcall_bb, etc; note the first letters. */ e_cd = split_block (cond_bb, cond_stmt); dcall_bb = e_cd->dest; - dcall_bb->count = count; + dcall_bb->count = cond_bb->count.apply_probability (prob); e_di = split_block (dcall_bb, dcall_stmt); icall_bb = e_di->dest; - icall_bb->count = all - count; + icall_bb->count = cond_bb->count - dcall_bb->count; /* Do not disturb existing EH edges from the indirect call. */ if (!stmt_ends_bb_p (icall_stmt)) @@ -1376,7 +1376,7 @@ gimple_ic (gcall *icall_stmt, struct cgr if (e_ij != NULL) { join_bb = e_ij->dest; - join_bb->count = all; + join_bb->count = cond_bb->count; } e_cd->flags = (e_cd->flags & ~EDGE_FALLTHRU) | EDGE_TRUE_VALUE; Index: value-prof.h =================================================================== --- value-prof.h (revision 254348) +++ value-prof.h (working copy) @@ -90,8 +90,7 @@ void gimple_move_stmt_histograms (struct void verify_histograms (void); void free_histograms (function *); void stringop_block_profile (gimple *, unsigned int *, HOST_WIDE_INT *); -gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability, - profile_count, profile_count); +gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability); bool check_ic_target (gcall *, struct cgraph_node *);
On 11/02/2017 08:06 PM, Jan Hubicka wrote: > Sorry, I must have used older diff file, because it is one of unfinished chnages I made today. > I am attaching correct diff. Thank you. This one works for me, however I see various errors for postgres PGO: cd src/backend/replication/ marxin@marxinbox:~/Programming/postgres/src/backend/replication> gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fprofile-use -I. -I. -I../../../src/include -D_GNU_SOURCE -c -o walsender.o walsender.c In file included from walsender.c:56:0: walsender.c: In function ‘XLogRead’: ../../../src/include/access/xlog_internal.h:188:26: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (7531) (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ walsender.c:2367:4: note: in expansion of macro ‘XLogFilePath’ XLogFilePath(path, curFileTimeLine, sendSegNo, wal_segment_size); ^~~~~~~~~~~~ Problem is following: Here we estimate count: Old value = -1 New value = 7531 0x0000000000aecb52 in estimate_bb_frequencies (force=force@entry=false) at ../../gcc/predict.c:3590 3590 bb->count = count.guessed_local (); (gdb) bt #0 0x0000000000aecb52 in estimate_bb_frequencies (force=force@entry=false) at ../../gcc/predict.c:3590 #1 0x0000000000af05a4 in tree_estimate_probability (dry_run=dry_run@entry=false) at ../../gcc/predict.c:2828 #2 0x0000000000af0b9c in (anonymous namespace)::pass_profile::execute (this=<optimized out>, fun=0x7ffff5b9e790) at ../../gcc/predict.c:3722 #3 0x0000000000ad23f1 in execute_one_pass (pass=pass@entry=0x210faf0) at ../../gcc/passes.c:2497 #4 0x0000000000ad2cb1 in execute_pass_list_1 (pass=0x210faf0) at ../../gcc/passes.c:2586 #5 0x0000000000ad2cc3 in execute_pass_list_1 (pass=0x210f470) at ../../gcc/passes.c:2587 #6 0x0000000000ad2d05 in execute_pass_list (fn=<optimized out>, pass=<optimized out>) at ../../gcc/passes.c:2597 #7 0x0000000000ad1631 in do_per_function_toporder (callback=callback@entry=0xad2cf0 <execute_pass_list(function*, opt_pass*)>, data=0x210f2f0) at ../../gcc/passes.c:1739 #8 0x0000000000ad3387 in execute_ipa_pass_list (pass=0x210f290) at ../../gcc/passes.c:2937 #9 0x000000000078fb42 in ipa_passes () at ../../gcc/cgraphunit.c:2423 #10 symbol_table::compile (this=this@entry=0x7ffff6817100) at ../../gcc/cgraphunit.c:2558 #11 0x00000000007923c7 in symbol_table::compile (this=0x7ffff6817100) at ../../gcc/cgraphunit.c:2719 #12 symbol_table::finalize_compilation_unit (this=0x7ffff6817100) at ../../gcc/cgraphunit.c:2716 #13 0x0000000000bb20d3 in compile_file () at ../../gcc/toplev.c:479 #14 0x00000000005d7fc5 in do_compile () at ../../gcc/toplev.c:2059 #15 toplev::main (this=this@entry=0x7fffffffd85e, argc=<optimized out>, argc@entry=38, argv=<optimized out>, argv@entry=0x7fffffffd958) at ../../gcc/toplev.c:2194 #16 0x00000000005da46b in main (argc=38, argv=0x7fffffffd958) at ../../gcc/main.c:39 And later on we check it with real value of an interval counter: Breakpoint 1, error_at (loc=2147497451, gmsgid=0x1605208 "corrupted value profile: %s profile counter (%d out of %d) inconsistent with basic-block count (%d)") at ../../gcc/diagnostic.c:1354 1354 { (gdb) bt #0 error_at (loc=2147497451, gmsgid=0x1605208 "corrupted value profile: %s profile counter (%d out of %d) inconsistent with basic-block count (%d)") at ../../gcc/diagnostic.c:1354 #1 0x0000000000eae5b9 in check_counter (stmt=0x7ffff58359f8, name=0x16052fc "interval", count=0x7fffffffd5a8, all=0x7fffffffd5c0, bb_count_d=...) at ../../gcc/value-prof.c:607 #2 0x0000000000eafe95 in gimple_mod_subtract_transform (si=0x7fffffffd640) at ../../gcc/value-prof.c:1133 #3 0x0000000000eae709 in gimple_value_profile_transformations () at ../../gcc/value-prof.c:658 #4 0x0000000000ca712c in tree_profiling () at ../../gcc/tree-profile.c:687 #5 (anonymous namespace)::pass_ipa_tree_profile::execute (this=<optimized out>) at ../../gcc/tree-profile.c:780 #6 0x0000000000ad23f1 in execute_one_pass (pass=pass@entry=0x2110350) at ../../gcc/passes.c:2497 #7 0x0000000000ad33f2 in execute_ipa_pass_list (pass=0x2110350) at ../../gcc/passes.c:2932 #8 0x000000000078fb42 in ipa_passes () at ../../gcc/cgraphunit.c:2423 #9 symbol_table::compile (this=this@entry=0x7ffff6817100) at ../../gcc/cgraphunit.c:2558 #10 0x00000000007923c7 in symbol_table::compile (this=0x7ffff6817100) at ../../gcc/cgraphunit.c:2719 #11 symbol_table::finalize_compilation_unit (this=0x7ffff6817100) at ../../gcc/cgraphunit.c:2716 #12 0x0000000000bb20d3 in compile_file () at ../../gcc/toplev.c:479 #13 0x00000000005d7fc5 in do_compile () at ../../gcc/toplev.c:2059 #14 toplev::main (this=this@entry=0x7fffffffd85e, argc=<optimized out>, argc@entry=38, argv=<optimized out>, argv@entry=0x7fffffffd958) at ../../gcc/toplev.c:2194 #15 0x00000000005da46b in main (argc=38, argv=0x7fffffffd958) at ../../gcc/main.c:39 Can you please take a look? Or will you need a reproducer? Thank you, Martin
Can be also seen in GCC PGO: checking for ssize_t... ../../libdecnumber/decNumber.c: In function ‘decDecap’: ../../libdecnumber/decNumber.c:7640:25: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (4356) if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */ ^~ no checking unwind.h usability... ../../libdecnumber/decNumber.c: In function ‘decNumberRotate’: ../../libdecnumber/decNumber.c:2526:9: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (8) uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ ^~~~ ../../libdecnumber/decNumber.c:2529:11: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (4) uInt rem=save%powers[shift-msudigits];/* split save */ ^~~ ../../libdecnumber/decNumber.c:2546:11: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (2) uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ ^~~~
Apart from that I also see this GCC PGO: ../../libiberty/pex-unix.c:789:1: warning: Missing counts for called function pex_child_error.isra.1/69 } ^ during IPA pass: inline ../../libiberty/pex-unix.c: In function ‘pex_child_error.isra.1’: ../../libiberty/pex-unix.c:373:1: internal compiler error: in operator>, at profile-count.h:821 pex_child_error (struct pex_obj *obj, const char *executable, ^~~~~~~~~~~~~~~ 0x13b2e53 profile_count::operator>(long) const ../../gcc/profile-count.h:821 0x13b2e53 inline_transform(cgraph_node*) ../../gcc/ipa-inline-transform.c:680 0x5d8dae execute_one_ipa_transform_pass ../../gcc/passes.c:2239 0x5d8dae execute_all_ipa_transforms() ../../gcc/passes.c:2281 0x894e4f cgraph_node::expand() ../../gcc/cgraphunit.c:2132 0x8961e0 expand_all_functions ../../gcc/cgraphunit.c:2275 0x8961e0 symbol_table::compile() ../../gcc/cgraphunit.c:2623 0x898ac6 symbol_table::compile() ../../gcc/cgraphunit.c:2719 0x898ac6 symbol_table::finalize_compilation_unit() ../../gcc/cgraphunit.c:2716 Feel free to ask me about details if necessary. Martin
> Can be also seen in GCC PGO: > > checking for ssize_t... ../../libdecnumber/decNumber.c: In function ‘decDecap’: > ../../libdecnumber/decNumber.c:7640:25: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (4356) > if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */ Hmm, I have restarted profiledbootstrap and it also reproduces to me. It is odd that those counters are 0 in basic block that is executed. Having small reproducer would be cool, but I will try to debug it from libdecnumber. That other ICE is another issue with Theresa's code for dropping profiles that are mismatched (it is only place we mismatch profile in GCC). I will take a look what went wrong here. Thanks, Honza > ^~ > no > checking unwind.h usability... ../../libdecnumber/decNumber.c: In function ‘decNumberRotate’: > ../../libdecnumber/decNumber.c:2526:9: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (8) > uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ > ^~~~ > ../../libdecnumber/decNumber.c:2529:11: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (4) > uInt rem=save%powers[shift-msudigits];/* split save */ > ^~~ > ../../libdecnumber/decNumber.c:2546:11: error: corrupted value profile: interval profile counter (0 out of 0) inconsistent with basic-block count (2) > uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ > ^~~~
Hello, this is updated patch which I have comitted after profiledbootstrapping x86-64 2017-11-02 Jan Hubicka <hubicka@ucw.cz> * asan.c (create_cond_insert_point): Maintain profile. * ipa-utils.c (ipa_merge_profiles): Be sure only IPA profiles are merged. * basic-block.h (struct basic_block_def): Remove frequency. (EDGE_FREQUENCY): Use to_frequency * bb-reorder.c (push_to_next_round_p): Use only IPA counts for global heuristics. (find_traces): Update to use to_frequency. (find_traces_1_round): Likewise; use only IPA counts. (bb_to_key): Likewise. (connect_traces): Use IPA counts only. (copy_bb_p): Update to use to_frequency. (fix_up_crossing_landing_pad): Likewise. (sanitize_hot_paths): Likewise. * bt-load.c (basic_block_freq): Likewise. * cfg.c (init_flow): Set count_max to uninitialized. (check_bb_profile): Remove frequencies; check counts. (dump_bb_info): Do not dump frequencies. (update_bb_profile_for_threading): Update counts only. (scale_bbs_frequencies_int): Likewise. (MAX_SAFE_MULTIPLIER): Remove. (scale_bbs_frequencies_gcov_type): Update counts only. (scale_bbs_frequencies_profile_count): Update counts only. (scale_bbs_frequencies): Update counts only. * cfg.h (struct control_flow_graph): Add count-max. (update_bb_profile_for_threading): Update prototype. * cfgbuild.c (find_bb_boundaries): Do not update frequencies. (find_many_sub_basic_blocks): Likewise. * cfgcleanup.c (try_forward_edges): Likewise. (try_crossjump_to_edge): Likewise. * cfgexpand.c (expand_gimple_cond): Likewise. (expand_gimple_tailcall): Likewise. (construct_init_block): Likewise. (construct_exit_block): Likewise. * cfghooks.c (verify_flow_info): Check consistency of counts. (dump_bb_for_graph): Do not dump frequencies. (split_block_1): Do not update frequencies. (split_edge): Do not update frequencies. (make_forwarder_block): Do not update frequencies. (duplicate_block): Do not update frequencies. (account_profile_record): Do not update frequencies. * cfgloop.c (find_subloop_latch_edge_by_profile): Use IPA counts for global heuristics. * cfgloopanal.c (average_num_loop_insns): Update to use to_frequency. (expected_loop_iterations_unbounded): Use counts only. * cfgloopmanip.c (scale_loop_profile): Simplify. (create_empty_loop_on_edge): Simplify (loopify): Simplify (duplicate_loop_to_header_edge): Simplify * cfgrtl.c (force_nonfallthru_and_redirect): Update profile. (update_br_prob_note): Take care of removing note when profile becomes undefined. (relink_block_chain): Do not dump frequency. (rtl_account_profile_record): Use to_frequency. * cgraph.c (symbol_table::create_edge): Convert count to ipa count. (cgraph_edge::redirect_call_stmt_to_calle): Conver tcount to ipa count. (cgraph_update_edges_for_call_stmt_node): Likewise. (cgraph_edge::verify_count_and_frequency): Update. (cgraph_node::verify_node): Temporarily disable frequency verification. * cgraphbuild.c (compute_call_stmt_bb_frequency): Use to_cgraph_frequency. (cgraph_edge::rebuild_edges): Convert to ipa counts. * cgraphunit.c (init_lowered_empty_function): Do not initialize frequencies. (cgraph_node::expand_thunk): Update profile. * except.c (dw2_build_landing_pads): Do not update frequency. * final.c (compute_alignments): Use to_frequency. (dump_basic_block_info): Do not dump frequency. * gimple-pretty-print.c (dump_profile): Do not dump frequency. (dump_gimple_bb_header): Do not dump frequency. * gimple-ssa-isolate-paths.c (isolate_path): Do not update frequency; do update count. * gimple-streamer-in.c (input_bb): Do not stream frequency. * gimple-streamer-out.c (output_bb): Do not stream frequency. * haifa-sched.c (sched_pressure_start_bb): Use to_freuqency. (init_before_recovery): Do not update frequency. (sched_create_recovery_edges): Do not update frequency. * hsa-gen.c (convert_switch_statements): Do not update frequency. * ipa-cp.c (ipcp_propagate_stage): Update search for max_count. (ipa_cp_c_finalize): Set max_count to uninitialized. * ipa-fnsummary.c (get_minimal_bb): Use counts. (param_change_prob): Use counts. * ipa-profile.c (ipa_profile_generate_summary): Do not summarize local profiles. * ipa-split.c (consider_split): Use to_frequency. (split_function): Use to_frequency. * ira-build.c (loop_compare_func): Likewise. (mark_loops_for_removal): Likewise. (mark_all_loops_for_removal): Likewise. * loop-doloop.c (doloop_modify): Do not update frequency. * loop-unroll.c (unroll_loop_runtime_iterations): Do not update frequency. * lto-streamer-in.c (input_function): Update count_max. * omp-expand.c (expand_omp_taskreg): Update count_max. * omp-simd-clone.c (simd_clone_adjust): Update profile. * predict.c (maybe_hot_frequency_p): Use to_frequency. (maybe_hot_count_p): Use ipa counts only. (maybe_hot_bb_p): Simplify. (maybe_hot_edge_p): Simplify. (probably_never_executed): Do not take frequency argument. (probably_never_executed_bb_p): Do not pass frequency. (probably_never_executed_edge_p): Likewise. (combine_predictions_for_bb): Check that profile is nonzero. (propagate_freq): Do not set frequency. (drop_profile): Simplify. (counts_to_freqs): Simplify. (expensive_function_p): Use to_frequency. (propagate_unlikely_bbs_forward): Simplify. (determine_unlikely_bbs): Simplify. (estimate_bb_frequencies): Add hack to silence graphite issues. (compute_function_frequency): Use ipa counts. (pass_profile::execute): Update. (rebuild_frequencies): Use counts only. (force_edge_cold): Use counts only. * profile-count.c (profile_count::dump): Dump new count types. (profile_count::differs_from_p): Check compatiblity. (profile_count::to_frequency): New function. (profile_count::to_cgraph_frequency): New function. * profile-count.h (struct function): Declare. (enum profile_quality): Add profile_guessed_local and profile_guessed_global0. (class profile_proability): Decrease number of bits to 29; update from_reg_br_prob_note and to_reg_br_prob_note. (class profile_count: Update comment; decrease number of bits to 61. Check compatibility. (profile_count::compatible_p): New private member function. (profile_count::ipa_p): New member function. (profile_count::operator<): Handle global zero correctly. (profile_count::operator>): Handle global zero correctly. (profile_count::operator<=): Handle global zero correctly. (profile_count::operator>=): Handle global zero correctly. (profile_count::nonzero_p): New member function. (profile_count::force_nonzero): New member function. (profile_count::max): New member function. (profile_count::apply_scale): Handle IPA scalling. (profile_count::guessed_local): New member function. (profile_count::global0): New member function. (profile_count::ipa): New member function. (profile_count::to_frequency): Declare. (profile_count::to_cgraph_frequency): Declare. * profile.c (OVERLAP_BASE): Delete. (compute_frequency_overlap): Delete. (compute_branch_probabilities): Do not use compute_frequency_overlap. * regs.h (REG_FREQ_FROM_BB): Use to_frequency. * sched-ebb.c (rank): Use counts only. * shrink-wrap.c (handle_simple_exit): Use counts only. (try_shrink_wrapping): Use counts only. (place_prologue_for_one_component): Use counts only. * tracer.c (find_best_predecessor): Use to_frequency. (find_trace): Use to_frequency. (tail_duplicate): Use to_frequency. * trans-mem.c (expand_transaction): Do not update frequency. * tree-call-cdce.c: Do not update frequency. * tree-cfg.c (gimple_find_sub_bbs): Likewise. (gimple_merge_blocks): Likewise. (gimple_split_edge): Likewise. (gimple_duplicate_sese_region): Likewise. (gimple_duplicate_sese_tail): Likewise. (move_sese_region_to_fn): Likewise. (gimple_account_profile_record): Likewise. (insert_cond_bb): Likewise. * tree-complex.c (expand_complex_div_wide): Likewise. * tree-eh.c (lower_resx): Update profile. * tree-inline.c (copy_bb): Simplify count scaling; do not scale frequencies. (initialize_cfun): Do not initialize frequencies (freqs_to_counts): Delete. (copy_cfg_body): Ignore count parameter. (copy_body): Update. (expand_call_inline): Update count_max. (optimize_inline_calls): Update count_max. (tree_function_versioning): Update count_max. * tree-ssa-coalesce.c (coalesce_cost_bb): Use to_frequency. * tree-ssa-ifcombine.c (update_profile_after_ifcombine): Do not update frequency. * tree-ssa-loop-im.c (execute_sm_if_changed): Use counts only. * tree-ssa-loop-ivcanon.c (unloop_loops): Do not update freuqency. (try_peel_loop): Likewise. * tree-ssa-loop-ivopts.c (get_scaled_computation_cost_at): Use to_frequency. * tree-ssa-loop-manip.c (niter_for_unrolled_loop): Pass -1. (tree_transform_and_unroll_loop): Do not use frequencies * tree-ssa-loop-niter.c (estimate_numbers_of_iterations): Use reliable prediction only. * tree-ssa-loop-unswitch.c (hoist_guard): Do not use frequencies. * tree-ssa-sink.c (select_best_block): Use to_frequency. * tree-ssa-tail-merge.c (replace_block_by): Temporarily disable probability scaling. * tree-ssa-threadupdate.c (create_block_for_threading): Do not update frequency (any_remaining_duplicated_blocks): Likewise. (update_profile): Likewise. (estimated_freqs_path): Delete. (freqs_to_counts_path): Delete. (clear_counts_path): Delete. (ssa_fix_duplicate_block_edges): Likewise. (duplicate_thread_path): Likewise. * tree-switch-conversion.c (gen_inbound_check): Use counts. * tree-tailcall.c (decrease_profile): Do not update frequency. (eliminate_tail_call): Likewise. * tree-vect-loop-manip.c (vect_do_peeling): Likewise. * tree-vect-loop.c (scale_profile_for_vect_loop): Likewise. (optimize_mask_stores): Likewise. * tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise. * ubsan.c (ubsan_expand_null_ifn): Update profile. (ubsan_expand_ptr_ifn): Update profile. * value-prof.c (gimple_ic): Simplify. * value-prof.h (gimple_ic): Update prototype. * ipa-inline-transform.c (inline_transform): Fix scaling conditoins. * ipa-inline.c (compute_uninlined_call_time): Be sure that counts are nonzero. (want_inline_self_recursive_call_p): Likewise. (resolve_noninline_speculation): Only cummulate defined counts. (inline_small_functions): Use nonzero_p. (ipa_inline): Do not access freed node. Unknown ChangeLog: 2017-11-02 Jan Hubicka <hubicka@ucw.cz> * testsuite/gcc.dg/no-strict-overflow-3.c (foo): Update magic value to not clash with frequency. * testsuite/gcc.dg/strict-overflow-3.c (foo): Likewise. * testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c: Update template. * testsuite/gcc.dg/tree-ssa/dump-2.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-10.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-11.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-12.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-5.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-8.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-9.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-cd.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-pr56541.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-pr68583.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c: Update template. * testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c: Update template. * testsuite/gcc.target/i386/pr61403.c: Update template. Index: asan.c =================================================================== --- asan.c (revision 254348) +++ asan.c (working copy) @@ -1801,6 +1801,7 @@ create_cond_insert_point (gimple_stmt_it ? profile_probability::very_unlikely () : profile_probability::very_likely (); e->probability = fallthrough_probability.invert (); + then_bb->count = e->count (); if (create_then_fallthru_edge) make_single_succ_edge (then_bb, fallthru_bb, EDGE_FALLTHRU); Index: basic-block.h =================================================================== --- basic-block.h (revision 254348) +++ basic-block.h (working copy) @@ -148,9 +148,6 @@ struct GTY((chain_next ("%h.next_bb"), c /* Expected number of executions: calculated in profile.c. */ profile_count count; - /* Expected frequency. Normalized to be in range 0 to BB_FREQ_MAX. */ - int frequency; - /* The discriminator for this block. The discriminator distinguishes among several basic blocks that share a common locus, allowing for more accurate sample-based profiling. */ @@ -301,7 +298,7 @@ enum cfg_bb_flags ? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0)) /* Return expected execution frequency of the edge E. */ -#define EDGE_FREQUENCY(e) e->probability.apply (e->src->frequency) +#define EDGE_FREQUENCY(e) e->count ().to_frequency (cfun) /* Compute a scale factor (or probability) suitable for scaling of gcov_type values via apply_probability() and apply_scale(). */ Index: bb-reorder.c =================================================================== --- bb-reorder.c (revision 254348) +++ bb-reorder.c (working copy) @@ -256,8 +256,8 @@ push_to_next_round_p (const_basic_block there_exists_another_round = round < number_of_rounds - 1; - block_not_hot_enough = (bb->frequency < exec_th - || bb->count < count_th + block_not_hot_enough = (bb->count.to_frequency (cfun) < exec_th + || bb->count.ipa () < count_th || probably_never_executed_bb_p (cfun, bb)); if (there_exists_another_round @@ -293,9 +293,9 @@ find_traces (int *n_traces, struct trace { bbd[e->dest->index].heap = heap; bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest); - if (e->dest->frequency > max_entry_frequency) - max_entry_frequency = e->dest->frequency; - if (e->dest->count.initialized_p () && e->dest->count > max_entry_count) + if (e->dest->count.to_frequency (cfun) > max_entry_frequency) + max_entry_frequency = e->dest->count.to_frequency (cfun); + if (e->dest->count.ipa_p () && e->dest->count > max_entry_count) max_entry_count = e->dest->count; } @@ -329,8 +329,10 @@ find_traces (int *n_traces, struct trace for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux) - fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency); - fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency); + fprintf (dump_file, "%d [%d] ", bb->index, + bb->count.to_frequency (cfun)); + fprintf (dump_file, "%d [%d]\n", bb->index, + bb->count.to_frequency (cfun)); } fflush (dump_file); } @@ -551,7 +553,7 @@ find_traces_1_round (int branch_th, int continue; prob = e->probability; - freq = e->dest->frequency; + freq = e->dest->count.to_frequency (cfun); /* The only sensible preference for a call instruction is the fallthru edge. Don't bother selecting anything else. */ @@ -573,7 +575,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || ((prob.to_reg_br_prob_base () < branch_th || EDGE_FREQUENCY (e) < exec_th - || e->count () < count_th) && (!for_size))) + || e->count ().ipa () < count_th) && (!for_size))) continue; if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, @@ -671,7 +673,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || prob.to_reg_br_prob_base () < branch_th || freq < exec_th - || e->count () < count_th) + || e->count ().ipa () < count_th) { /* When partitioning hot/cold basic blocks, make sure the cold blocks (and only the cold blocks) all get @@ -706,7 +708,7 @@ find_traces_1_round (int branch_th, int if (best_edge->dest != bb) { if (EDGE_FREQUENCY (best_edge) - > 4 * best_edge->dest->frequency / 5) + > 4 * best_edge->dest->count.to_frequency (cfun) / 5) { /* The loop has at least 4 iterations. If the loop header is not the first block of the function @@ -783,8 +785,8 @@ find_traces_1_round (int branch_th, int & EDGE_CAN_FALLTHRU) && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX) && single_succ (e->dest) == best_edge->dest - && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge) - || for_size)) + && (2 * e->dest->count.to_frequency (cfun) + >= EDGE_FREQUENCY (best_edge) || for_size)) { best_edge = e; if (dump_file) @@ -945,9 +947,9 @@ bb_to_key (basic_block bb) if (priority) /* The block with priority should have significantly lower key. */ - return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); + return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun)); - return -bb->frequency; + return -bb->count.to_frequency (cfun); } /* Return true when the edge E from basic block BB is better than the temporary @@ -1290,7 +1292,7 @@ connect_traces (int n_traces, struct tra && !connected[bbd[di].start_of_trace] && BB_PARTITION (e2->dest) == current_partition && EDGE_FREQUENCY (e2) >= freq_threshold - && e2->count () >= count_threshold + && e2->count ().ipa () >= count_threshold && (!best2 || e2->probability > best2->probability || (e2->probability == best2->probability @@ -1317,7 +1319,7 @@ connect_traces (int n_traces, struct tra optimize_edge_for_speed_p (best) && EDGE_FREQUENCY (best) >= freq_threshold && (!best->count ().initialized_p () - || best->count () >= count_threshold))) + || best->count ().ipa () >= count_threshold))) { basic_block new_bb; @@ -1375,7 +1377,7 @@ copy_bb_p (const_basic_block bb, int cod int max_size = uncond_jump_length; rtx_insn *insn; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return false; if (EDGE_COUNT (bb->preds) < 2) return false; @@ -1459,7 +1461,6 @@ fix_up_crossing_landing_pad (eh_landing_ last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; new_bb = create_basic_block (new_label, jump, last_bb); new_bb->aux = last_bb->aux; - new_bb->frequency = post_bb->frequency; new_bb->count = post_bb->count; last_bb->aux = new_bb; @@ -1517,7 +1518,6 @@ sanitize_hot_paths (bool walk_up, unsign edge_iterator ei; profile_probability highest_probability = profile_probability::uninitialized (); - int highest_freq = 0; profile_count highest_count = profile_count::uninitialized (); bool found = false; @@ -1544,11 +1544,8 @@ sanitize_hot_paths (bool walk_up, unsign /* The following loop will look for the hottest edge via the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (!highest_count.initialized_p () || e->count () > highest_count) + if (!(e->count () > highest_count)) highest_count = e->count (); - int edge_freq = EDGE_FREQUENCY (e); - if (edge_freq > highest_freq) - highest_freq = edge_freq; if (!highest_probability.initialized_p () || e->probability > highest_probability) highest_probability = e->probability; @@ -1573,17 +1570,12 @@ sanitize_hot_paths (bool walk_up, unsign /* Select the hottest edge using the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (highest_count > 0) + if (highest_count.initialized_p ()) { - if (e->count () < highest_count) + if (!(e->count () >= highest_count)) continue; } - else if (highest_freq) - { - if (EDGE_FREQUENCY (e) < highest_freq) - continue; - } - else if (e->probability < highest_probability) + else if (!(e->probability >= highest_probability)) continue; basic_block reach_bb = walk_up ? e->src : e->dest; Index: bt-load.c =================================================================== --- bt-load.c (revision 254348) +++ bt-load.c (working copy) @@ -185,7 +185,7 @@ static int first_btr, last_btr; static int basic_block_freq (const_basic_block bb) { - return bb->frequency; + return bb->count.to_frequency (cfun); } /* If the rtx at *XP references (sets or reads) any branch target Index: cfg.c =================================================================== --- cfg.c (revision 254348) +++ cfg.c (working copy) @@ -68,6 +68,7 @@ init_flow (struct function *the_fun) if (!the_fun->cfg) the_fun->cfg = ggc_cleared_alloc<control_flow_graph> (); n_edges_for_fn (the_fun) = 0; + the_fun->cfg->count_max = profile_count::uninitialized (); ENTRY_BLOCK_PTR_FOR_FN (the_fun) = alloc_block (); ENTRY_BLOCK_PTR_FOR_FN (the_fun)->index = ENTRY_BLOCK; @@ -447,13 +448,18 @@ check_bb_profile (basic_block bb, FILE * } if (bb != ENTRY_BLOCK_PTR_FOR_FN (fun)) { - int sum = 0; + profile_count sum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100) - fprintf (file, - ";; %sInvalid sum of incoming frequencies %i, should be %i\n", - s_indent, sum, bb->frequency); + sum += e->count (); + if (sum.differs_from_p (bb->count)) + { + fprintf (file, ";; %sInvalid sum of incoming counts ", + s_indent); + sum.dump (file); + fprintf (file, ", should be "); + bb->count.dump (file); + fprintf (file, "\n"); + } } if (BB_PARTITION (bb) == BB_COLD_PARTITION) { @@ -751,7 +757,6 @@ dump_bb_info (FILE *outf, basic_block bb fputs (", count ", outf); bb->count.dump (outf); } - fprintf (outf, ", freq %i", bb->frequency); if (maybe_hot_bb_p (fun, bb)) fputs (", maybe hot", outf); if (probably_never_executed_bb_p (fun, bb)) @@ -843,15 +848,15 @@ brief_dump_cfg (FILE *file, dump_flags_t } } -/* An edge originally destinating BB of FREQUENCY and COUNT has been proved to +/* An edge originally destinating BB of COUNT has been proved to leave the block by TAKEN_EDGE. Update profile of BB such that edge E can be redirected to destination of TAKEN_EDGE. This function may leave the profile inconsistent in the case TAKEN_EDGE - frequency or count is believed to be lower than FREQUENCY or COUNT + frequency or count is believed to be lower than COUNT respectively. */ void -update_bb_profile_for_threading (basic_block bb, int edge_frequency, +update_bb_profile_for_threading (basic_block bb, profile_count count, edge taken_edge) { edge c; @@ -866,16 +871,10 @@ update_bb_profile_for_threading (basic_b } bb->count -= count; - bb->frequency -= edge_frequency; - if (bb->frequency < 0) - bb->frequency = 0; - /* Compute the probability of TAKEN_EDGE being reached via threaded edge. Watch for overflows. */ - if (bb->frequency) - /* FIXME: We should get edge frequency as count. */ - prob = profile_probability::probability_in_gcov_type - (edge_frequency, bb->frequency); + if (bb->count.nonzero_p ()) + prob = count.probability_in (bb->count); else prob = profile_probability::never (); if (prob > taken_edge->probability) @@ -899,9 +898,9 @@ update_bb_profile_for_threading (basic_b if (prob == profile_probability::never ()) { if (dump_file) - fprintf (dump_file, "Edge frequencies of bb %i has been reset, " - "frequency of block should end up being 0, it is %i\n", - bb->index, bb->frequency); + fprintf (dump_file, "Edge probabilities of bb %i has been reset, " + "count of block should end up being 0, it is non-zero\n", + bb->index); EDGE_SUCC (bb, 0)->probability = profile_probability::guessed_always (); ei = ei_start (bb->succs); ei_next (&ei); @@ -942,18 +941,10 @@ scale_bbs_frequencies_int (basic_block * for (i = 0; i < nbbs; i++) { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - /* Make sure the frequencies do not grow over BB_FREQ_MAX. */ - if (bbs[i]->frequency > BB_FREQ_MAX) - bbs[i]->frequency = BB_FREQ_MAX; bbs[i]->count = bbs[i]->count.apply_scale (num, den); } } -/* numbers smaller than this value are safe to multiply without getting - 64bit overflow. */ -#define MAX_SAFE_MULTIPLIER (1 << (sizeof (int64_t) * 4 - 1)) - /* Multiply all frequencies of basic blocks in array BBS of length NBBS by NUM/DEN, in gcov_type arithmetic. More accurate than previous function but considerably slower. */ @@ -962,28 +953,9 @@ scale_bbs_frequencies_gcov_type (basic_b gcov_type den) { int i; - gcov_type fraction = RDIV (num * 65536, den); - - gcc_assert (fraction >= 0); - if (num < MAX_SAFE_MULTIPLIER) - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) - bbs[i]->count = bbs[i]->count.apply_scale (num, den); - else - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } - else - for (i = 0; i < nbbs; i++) - { - if (sizeof (gcov_type) > sizeof (int)) - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - else - bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536); - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } + for (i = 0; i < nbbs; i++) + bbs[i]->count = bbs[i]->count.apply_scale (num, den); } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -994,13 +966,9 @@ scale_bbs_frequencies_profile_count (bas profile_count num, profile_count den) { int i; - - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num.to_gcov_type (), - den.to_gcov_type ()); + if (num == profile_count::zero () || den.nonzero_p ()) + for (i = 0; i < nbbs; i++) bbs[i]->count = bbs[i]->count.apply_scale (num, den); - } } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -1013,10 +981,7 @@ scale_bbs_frequencies (basic_block *bbs, int i; for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = p.apply (bbs[i]->frequency); - bbs[i]->count = bbs[i]->count.apply_probability (p); - } + bbs[i]->count = bbs[i]->count.apply_probability (p); } /* Helper types for hash tables. */ Index: cfg.h =================================================================== --- cfg.h (revision 254348) +++ cfg.h (working copy) @@ -71,6 +71,9 @@ struct GTY(()) control_flow_graph { /* Maximal number of entities in the single jumptable. Used to estimate final flowgraph size. */ int max_jumptable_ents; + + /* Maximal count of BB in function. */ + profile_count count_max; }; @@ -103,7 +106,7 @@ extern void debug_bb (basic_block); extern basic_block debug_bb_n (int); extern void dump_bb_info (FILE *, basic_block, int, dump_flags_t, bool, bool); extern void brief_dump_cfg (FILE *, dump_flags_t); -extern void update_bb_profile_for_threading (basic_block, int, profile_count, edge); +extern void update_bb_profile_for_threading (basic_block, profile_count, edge); extern void scale_bbs_frequencies_int (basic_block *, int, int, int); extern void scale_bbs_frequencies_gcov_type (basic_block *, int, gcov_type, gcov_type); Index: cfgbuild.c =================================================================== --- cfgbuild.c (revision 254348) +++ cfgbuild.c (working copy) @@ -499,7 +499,6 @@ find_bb_boundaries (basic_block bb) remove_edge (fallthru); /* BB is unreachable at this point - we need to determine its profile once edges are built. */ - bb->frequency = 0; bb->count = profile_count::uninitialized (); flow_transfer_insn = NULL; debug_insn = NULL; @@ -669,7 +668,6 @@ find_many_sub_basic_blocks (sbitmap bloc { bool initialized_src = false, uninitialized_src = false; bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) { if (e->count ().initialized_p ()) @@ -679,8 +677,6 @@ find_many_sub_basic_blocks (sbitmap bloc } else uninitialized_src = true; - if (e->probability.initialized_p ()) - bb->frequency += EDGE_FREQUENCY (e); } /* When some edges are missing with read profile, this is most likely because RTL expansion introduced loop. @@ -692,7 +688,7 @@ find_many_sub_basic_blocks (sbitmap bloc precisely once. */ if (!initialized_src || (uninitialized_src - && profile_status_for_fn (cfun) != PROFILE_READ)) + && profile_status_for_fn (cfun) < PROFILE_GUESSED)) bb->count = profile_count::uninitialized (); } /* If nothing changed, there is no need to create new BBs. */ Index: cfgcleanup.c =================================================================== --- cfgcleanup.c (revision 254348) +++ cfgcleanup.c (working copy) @@ -559,8 +559,6 @@ try_forward_edges (int mode, basic_block { /* Save the values now, as the edge may get removed. */ profile_count edge_count = e->count (); - profile_probability edge_probability = e->probability; - int edge_frequency; int n = 0; e->goto_locus = goto_locus; @@ -585,8 +583,6 @@ try_forward_edges (int mode, basic_block /* We successfully forwarded the edge. Now update profile data: for each edge we traversed in the chain, remove the original edge's execution count. */ - edge_frequency = edge_probability.apply (b->frequency); - do { edge t; @@ -596,16 +592,12 @@ try_forward_edges (int mode, basic_block gcc_assert (n < nthreaded_edges); t = threaded_edges [n++]; gcc_assert (t->src == first); - update_bb_profile_for_threading (first, edge_frequency, - edge_count, t); + update_bb_profile_for_threading (first, edge_count, t); update_br_prob_note (first); } else { first->count -= edge_count; - first->frequency -= edge_frequency; - if (first->frequency < 0) - first->frequency = 0; /* It is possible that as the result of threading we've removed edge as it is threaded to the fallthru edge. Avoid @@ -2109,7 +2101,7 @@ try_crossjump_to_edge (int mode, edge e1 else redirect_edges_to = osrc2; - /* Recompute the frequencies and counts of outgoing edges. */ + /* Recompute the counts of destinations of outgoing edges. */ FOR_EACH_EDGE (s, ei, redirect_edges_to->succs) { edge s2; @@ -2132,24 +2124,19 @@ try_crossjump_to_edge (int mode, edge e1 that there is no more than one in the chain, so we can't run into infinite loop. */ if (FORWARDER_BLOCK_P (s->dest)) - { - s->dest->frequency += EDGE_FREQUENCY (s); - } + s->dest->count += s->count (); if (FORWARDER_BLOCK_P (s2->dest)) - { - s2->dest->frequency -= EDGE_FREQUENCY (s); - if (s2->dest->frequency < 0) - s2->dest->frequency = 0; - } + s2->dest->count -= s->count (); - if (!redirect_edges_to->frequency && !src1->frequency) + /* FIXME: Is this correct? Should be rewritten to count API. */ + if (redirect_edges_to->count.nonzero_p () && src1->count.nonzero_p ()) s->probability = s->probability.combine_with_freq - (redirect_edges_to->frequency, - s2->probability, src1->frequency); + (redirect_edges_to->count.to_frequency (cfun), + s2->probability, src1->count.to_frequency (cfun)); } - /* Adjust count and frequency for the block. An earlier jump + /* Adjust count for the block. An earlier jump threading pass may have left the profile in an inconsistent state (see update_bb_profile_for_threading) so we must be prepared for overflows. */ @@ -2157,9 +2144,6 @@ try_crossjump_to_edge (int mode, edge e1 do { tmp->count += src1->count; - tmp->frequency += src1->frequency; - if (tmp->frequency > BB_FREQ_MAX) - tmp->frequency = BB_FREQ_MAX; if (tmp == redirect_edges_to) break; tmp = find_fallthru_edge (tmp->succs)->dest; Index: cfgexpand.c =================================================================== --- cfgexpand.c (revision 254348) +++ cfgexpand.c (working copy) @@ -2516,7 +2516,6 @@ expand_gimple_cond (basic_block bb, gcon redirect_edge_succ (false_edge, new_bb); false_edge->flags |= EDGE_FALLTHRU; new_bb->count = false_edge->count (); - new_bb->frequency = EDGE_FREQUENCY (false_edge); loop_p loop = find_common_loop (bb->loop_father, dest->loop_father); add_bb_to_loop (new_bb, loop); if (loop->latch == bb @@ -3847,11 +3846,7 @@ expand_gimple_tailcall (basic_block bb, if (!(e->flags & (EDGE_ABNORMAL | EDGE_EH))) { if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - e->dest->frequency -= EDGE_FREQUENCY (e); - if (e->dest->frequency < 0) - e->dest->frequency = 0; - } + e->dest->count -= e->count (); probability += e->probability; remove_edge (e); } @@ -5860,7 +5855,6 @@ construct_init_block (void) init_block = create_basic_block (NEXT_INSN (get_insns ()), get_last_insn (), ENTRY_BLOCK_PTR_FOR_FN (cfun)); - init_block->frequency = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; init_block->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (init_block, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father); if (e) @@ -5924,7 +5918,7 @@ construct_exit_block (void) while (NEXT_INSN (head) && NOTE_P (NEXT_INSN (head))) head = NEXT_INSN (head); /* But make sure exit_block starts with RETURN_LABEL, otherwise the - bb frequency counting will be confused. Any instructions before that + bb count counting will be confused. Any instructions before that label are emitted for the case where PREV_BB falls through into the exit block, so append those instructions to prev_bb in that case. */ if (NEXT_INSN (head) != return_label) @@ -5937,7 +5931,6 @@ construct_exit_block (void) } } exit_block = create_basic_block (NEXT_INSN (head), end, prev_bb); - exit_block->frequency = EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency; exit_block->count = EXIT_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (exit_block, EXIT_BLOCK_PTR_FOR_FN (cfun)->loop_father); @@ -5957,10 +5950,7 @@ construct_exit_block (void) if (e2 != e) { exit_block->count -= e2->count (); - exit_block->frequency -= EDGE_FREQUENCY (e2); } - if (exit_block->frequency < 0) - exit_block->frequency = 0; update_bb_for_insn (exit_block); } Index: cfghooks.c =================================================================== --- cfghooks.c (revision 254348) +++ cfghooks.c (working copy) @@ -146,10 +146,12 @@ verify_flow_info (void) error ("verify_flow_info: Wrong count of block %i", bb->index); err = 1; } - if (bb->frequency < 0) + /* FIXME: Graphite and SLJL and target code still tends to produce + edges with no probablity. */ + if (profile_status_for_fn (cfun) >= PROFILE_GUESSED + && !bb->count.initialized_p () && !flag_graphite && 0) { - error ("verify_flow_info: Wrong frequency of block %i %i", - bb->index, bb->frequency); + error ("verify_flow_info: Missing count of block %i", bb->index); err = 1; } @@ -164,7 +166,7 @@ verify_flow_info (void) /* FIXME: Graphite and SLJL and target code still tends to produce edges with no probablity. */ if (profile_status_for_fn (cfun) >= PROFILE_GUESSED - && !e->probability.initialized_p () && 0) + && !e->probability.initialized_p () && !flag_graphite && 0) { error ("Uninitialized probability of edge %i->%i", e->src->index, e->dest->index); @@ -315,7 +317,6 @@ dump_bb_for_graph (pretty_printer *pp, b /* TODO: Add pretty printer for counter. */ if (bb->count.initialized_p ()) pp_printf (pp, "COUNT:" "%" PRId64, bb->count.to_gcov_type ()); - pp_printf (pp, " FREQ:%i |", bb->frequency); pp_write_text_to_stream (pp); if (!(dump_flags & TDF_SLIM)) cfg_hooks->dump_bb_for_graph (pp, bb); @@ -513,7 +514,6 @@ split_block_1 (basic_block bb, void *i) return NULL; new_bb->count = bb->count; - new_bb->frequency = bb->frequency; new_bb->discriminator = bb->discriminator; if (dom_info_available_p (CDI_DOMINATORS)) @@ -626,7 +626,6 @@ split_edge (edge e) { basic_block ret; profile_count count = e->count (); - int freq = EDGE_FREQUENCY (e); edge f; bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; struct loop *loop; @@ -640,7 +639,6 @@ split_edge (edge e) ret = cfg_hooks->split_edge (e); ret->count = count; - ret->frequency = freq; single_succ_edge (ret)->probability = profile_probability::always (); if (irr) @@ -869,7 +867,6 @@ make_forwarder_block (basic_block bb, bo fallthru = split_block_after_labels (bb); dummy = fallthru->src; dummy->count = profile_count::zero (); - dummy->frequency = 0; bb = fallthru->dest; /* Redirect back edges we want to keep. */ @@ -879,10 +876,6 @@ make_forwarder_block (basic_block bb, bo if (redirect_edge_p (e)) { - dummy->frequency += EDGE_FREQUENCY (e); - if (dummy->frequency > BB_FREQ_MAX) - dummy->frequency = BB_FREQ_MAX; - dummy->count += e->count (); ei_next (&ei); continue; @@ -1101,19 +1094,10 @@ duplicate_block (basic_block bb, edge e, new_bb->count = new_count; bb->count -= new_count; - new_bb->frequency = EDGE_FREQUENCY (e); - bb->frequency -= EDGE_FREQUENCY (e); - redirect_edge_and_branch_force (e, new_bb); - - if (bb->frequency < 0) - bb->frequency = 0; } else - { - new_bb->count = bb->count; - new_bb->frequency = bb->frequency; - } + new_bb->count = bb->count; set_bb_original (new_bb, bb); set_bb_copy (bb, new_bb); @@ -1463,13 +1447,6 @@ account_profile_record (struct profile_r if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && profile_status_for_fn (cfun) != PROFILE_ABSENT) { - int sum = 0; - FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100 - || (MAX (sum, bb->frequency) > 10 - && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10)) - record->num_mismatched_freq_in[after_pass]++; profile_count lsum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) lsum += e->count (); Index: cfgloop.c =================================================================== --- cfgloop.c (revision 254348) +++ cfgloop.c (working copy) @@ -607,7 +607,7 @@ find_subloop_latch_edge_by_profile (vec< tcount += e->count(); } - if (!tcount.initialized_p () || tcount < HEAVY_EDGE_MIN_SAMPLES + if (!tcount.initialized_p () || !(tcount.ipa () > HEAVY_EDGE_MIN_SAMPLES) || (tcount - mcount).apply_scale (HEAVY_EDGE_RATIO, 1) > tcount) return NULL; Index: cfgloopanal.c =================================================================== --- cfgloopanal.c (revision 254348) +++ cfgloopanal.c (working copy) @@ -213,9 +213,10 @@ average_num_loop_insns (const struct loo if (NONDEBUG_INSN_P (insn)) binsns++; - ratio = loop->header->frequency == 0 + ratio = loop->header->count.to_frequency (cfun) == 0 ? BB_FREQ_MAX - : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; + : (bb->count.to_frequency (cfun) * BB_FREQ_MAX) + / loop->header->count.to_frequency (cfun); ninsns += binsns * ratio; } free (bbs); @@ -245,8 +246,8 @@ expected_loop_iterations_unbounded (cons /* If we have no profile at all, use AVG_LOOP_NITER. */ if (profile_status_for_fn (cfun) == PROFILE_ABSENT) expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else if (loop->latch && (loop->latch->count.reliable_p () - || loop->header->count.reliable_p ())) + else if (loop->latch && (loop->latch->count.initialized_p () + || loop->header->count.initialized_p ())) { profile_count count_in = profile_count::zero (), count_latch = profile_count::zero (); @@ -258,45 +259,25 @@ expected_loop_iterations_unbounded (cons count_in += e->count (); if (!count_latch.initialized_p ()) - ; - else if (!(count_in > profile_count::zero ())) + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); + else if (!count_in.nonzero_p ()) expected = count_latch.to_gcov_type () * 2; else { expected = (count_latch.to_gcov_type () + count_in.to_gcov_type () - 1) / count_in.to_gcov_type (); - if (read_profile_p) + if (read_profile_p + && count_latch.reliable_p () && count_in.reliable_p ()) *read_profile_p = true; } } - if (expected == -1) - { - int freq_in, freq_latch; - - freq_in = 0; - freq_latch = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (flow_bb_inside_loop_p (loop, e->src)) - freq_latch += EDGE_FREQUENCY (e); - else - freq_in += EDGE_FREQUENCY (e); - - if (freq_in == 0) - { - /* If we have no profile at all, use AVG_LOOP_NITER iterations. */ - if (!freq_latch) - expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else - expected = freq_latch * 2; - } - else - expected = (freq_latch + freq_in - 1) / freq_in; - } + else + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); HOST_WIDE_INT max = get_max_loop_iterations_int (loop); if (max != -1 && max < expected) return max; + return expected; } Index: cfgloopmanip.c =================================================================== --- cfgloopmanip.c (revision 254348) +++ cfgloopmanip.c (working copy) @@ -536,7 +536,6 @@ scale_loop_profile (struct loop *loop, p if (e) { edge other_e; - int freq_delta; profile_count count_delta; FOR_EACH_EDGE (other_e, ei, e->src->succs) @@ -545,23 +544,18 @@ scale_loop_profile (struct loop *loop, p break; /* Probability of exit must be 1/iterations. */ - freq_delta = EDGE_FREQUENCY (e); count_delta = e->count (); e->probability = profile_probability::always () .apply_scale (1, iteration_bound); other_e->probability = e->probability.invert (); - freq_delta -= EDGE_FREQUENCY (e); count_delta -= e->count (); - /* If latch exists, change its frequency and count, since we changed + /* If latch exists, change its count, since we changed probability of exit. Theoretically we should update everything from source of exit edge to latch, but for vectorizer this is enough. */ if (loop->latch && loop->latch != e->src) { - loop->latch->frequency += freq_delta; - if (loop->latch->frequency < 0) - loop->latch->frequency = 0; loop->latch->count += count_delta; } } @@ -571,7 +565,6 @@ scale_loop_profile (struct loop *loop, p we look at the actual profile, if it is available. */ p = p.apply_scale (iteration_bound, iterations); - bool determined = false; if (loop->header->count.initialized_p ()) { profile_count count_in = profile_count::zero (); @@ -584,21 +577,8 @@ scale_loop_profile (struct loop *loop, p { p = count_in.probability_in (loop->header->count.apply_scale (iteration_bound, 1)); - determined = true; } } - if (!determined && loop->header->frequency) - { - int freq_in = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (e->src != loop->latch) - freq_in += EDGE_FREQUENCY (e); - - if (freq_in != 0) - p = profile_probability::probability_in_gcov_type - (freq_in * iteration_bound, loop->header->frequency); - } if (!(p > profile_probability::never ())) p = profile_probability::very_unlikely (); } @@ -800,7 +780,7 @@ create_empty_loop_on_edge (edge entry_ed loop->latch = loop_latch; add_loop (loop, outer); - /* TODO: Fix frequencies and counts. */ + /* TODO: Fix counts. */ scale_loop_frequencies (loop, profile_probability::even ()); /* Update dominators. */ @@ -866,13 +846,11 @@ loopify (edge latch_edge, edge header_ed basic_block pred_bb = header_edge->src; struct loop *loop = alloc_loop (); struct loop *outer = loop_outer (succ_bb->loop_father); - int freq; profile_count cnt; loop->header = header_edge->dest; loop->latch = latch_edge->src; - freq = EDGE_FREQUENCY (header_edge); cnt = header_edge->count (); /* Redirect edges. */ @@ -901,10 +879,9 @@ loopify (edge latch_edge, edge header_ed remove_bb_from_loops (switch_bb); add_bb_to_loop (switch_bb, outer); - /* Fix frequencies. */ + /* Fix counts. */ if (redirect_all_edges) { - switch_bb->frequency = freq; switch_bb->count = cnt; } scale_loop_frequencies (loop, false_scale); @@ -1167,7 +1144,7 @@ duplicate_loop_to_header_edge (struct lo { /* Calculate coefficients by that we have to scale frequencies of duplicated loop bodies. */ - freq_in = header->frequency; + freq_in = header->count.to_frequency (cfun); freq_le = EDGE_FREQUENCY (latch_edge); if (freq_in == 0) freq_in = 1; Index: cfgrtl.c =================================================================== --- cfgrtl.c (revision 254348) +++ cfgrtl.c (working copy) @@ -1533,6 +1533,7 @@ force_nonfallthru_and_redirect (edge e, basic_block bb = create_basic_block (BB_HEAD (e->dest), NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); + bb->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; /* Change the existing edge's source to be the new block, and add a new edge from the entry block to the new block. */ @@ -1628,7 +1629,6 @@ force_nonfallthru_and_redirect (edge e, jump_block = create_basic_block (new_head, NULL, e->src); jump_block->count = count; - jump_block->frequency = EDGE_FREQUENCY (e); /* Make sure new block ends up in correct hot/cold section. */ @@ -1652,7 +1652,6 @@ force_nonfallthru_and_redirect (edge e, { new_edge->probability = new_edge->probability.apply_scale (1, 2); jump_block->count = jump_block->count.apply_scale (1, 2); - jump_block->frequency /= 2; edge new_edge2 = make_edge (new_edge->src, target, e->flags & ~EDGE_FALLTHRU); new_edge2->probability = probability - new_edge->probability; @@ -2245,9 +2244,23 @@ void update_br_prob_note (basic_block bb) { rtx note; - if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) - return; note = find_reg_note (BB_END (bb), REG_BR_PROB, NULL_RTX); + if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) + { + if (note) + { + rtx *note_link, this_rtx; + + note_link = ®_NOTES (BB_END (bb)); + for (this_rtx = *note_link; this_rtx; this_rtx = XEXP (this_rtx, 1)) + if (this_rtx == note) + { + *note_link = XEXP (this_rtx, 1); + break; + } + } + return; + } if (!note || XINT (note, 0) == BRANCH_EDGE (bb)->probability.to_reg_br_prob_note ()) return; @@ -3623,7 +3636,6 @@ relink_block_chain (bool stay_in_cfglayo fprintf (dump_file, "compensation "); else fprintf (dump_file, "bb %i ", bb->index); - fprintf (dump_file, " [%i]\n", bb->frequency); } } @@ -5034,7 +5046,7 @@ rtl_account_profile_record (basic_block += insn_cost (insn, true) * bb->count.to_gcov_type (); else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] - += insn_cost (insn, true) * bb->frequency; + += insn_cost (insn, true) * bb->count.to_frequency (cfun); } } Index: cgraph.c =================================================================== --- cgraph.c (revision 254348) +++ cgraph.c (working copy) @@ -862,7 +862,7 @@ symbol_table::create_edge (cgraph_node * edge->next_callee = NULL; edge->lto_stmt_uid = 0; - edge->count = count; + edge->count = count.ipa (); edge->frequency = freq; gcc_checking_assert (freq >= 0); gcc_checking_assert (freq <= CGRAPH_FREQ_MAX); @@ -1308,7 +1308,7 @@ cgraph_edge::redirect_call_stmt_to_calle /* We are producing the final function body and will throw away the callgraph edges really soon. Reset the counts/frequencies to keep verifier happy in the case of roundoff errors. */ - e->count = gimple_bb (e->call_stmt)->count; + e->count = gimple_bb (e->call_stmt)->count.ipa (); e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)); } @@ -1338,7 +1338,7 @@ cgraph_edge::redirect_call_stmt_to_calle prob = profile_probability::even (); new_stmt = gimple_ic (e->call_stmt, dyn_cast<cgraph_node *> (ref->referred), - prob, e->count, e->count + e2->count); + prob); e->speculative = false; e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt, false); @@ -1644,7 +1644,7 @@ cgraph_update_edges_for_call_stmt_node ( /* Otherwise remove edge and create new one; we can't simply redirect since function has changed, so inline plan and other information attached to edge is invalid. */ - count = e->count; + count = e->count.ipa (); frequency = e->frequency; if (e->indirect_unknown_callee || e->inline_failed) e->remove (); @@ -1655,7 +1655,7 @@ cgraph_update_edges_for_call_stmt_node ( { /* We are seeing new direct call; compute profile info based on BB. */ basic_block bb = gimple_bb (new_stmt); - count = bb->count; + count = bb->count.ipa (); frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); } @@ -3082,9 +3082,14 @@ bool cgraph_edge::verify_count_and_frequency () { bool error_found = false; - if (count < 0) + if (!count.verify ()) { - error ("caller edge count is negative"); + error ("caller edge count invalid"); + error_found = true; + } + if (count.initialized_p () && !(count.ipa () == count)) + { + error ("caller edge count is local"); error_found = true; } if (frequency < 0) @@ -3183,9 +3188,14 @@ cgraph_node::verify_node (void) identifier_to_locale (e->callee->name ())); error_found = true; } - if (count < 0) + if (!count.verify ()) + { + error ("cgraph count invalid"); + error_found = true; + } + if (count.initialized_p () && !(count.ipa () == count)) { - error ("execution count is negative"); + error ("cgraph count is local"); error_found = true; } if (global.inlined_to && same_comdat_group) @@ -3269,7 +3279,9 @@ cgraph_node::verify_node (void) { if (e->verify_count_and_frequency ()) error_found = true; + /* FIXME: re-enable once cgraph is converted to counts. */ if (gimple_has_body_p (e->caller->decl) + && 0 && !e->caller->global.inlined_to && !e->speculative /* Optimized out calls are redirected to __builtin_unreachable. */ @@ -3292,9 +3304,11 @@ cgraph_node::verify_node (void) { if (e->verify_count_and_frequency ()) error_found = true; + /* FIXME: re-enable once cgraph is converted to counts. */ if (gimple_has_body_p (e->caller->decl) && !e->caller->global.inlined_to && !e->speculative + && 0 && (e->frequency != compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)))) Index: cgraphbuild.c =================================================================== --- cgraphbuild.c (revision 254348) +++ cgraphbuild.c (working copy) @@ -190,21 +190,8 @@ record_eh_tables (cgraph_node *node, fun int compute_call_stmt_bb_frequency (tree decl, basic_block bb) { - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN - (DECL_STRUCT_FUNCTION (decl))->frequency; - int freq = bb->frequency; - - if (profile_status_for_fn (DECL_STRUCT_FUNCTION (decl)) == PROFILE_ABSENT) - return CGRAPH_FREQ_BASE; - - if (!entry_freq) - entry_freq = 1, freq++; - - freq = freq * CGRAPH_FREQ_BASE / entry_freq; - if (freq > CGRAPH_FREQ_MAX) - freq = CGRAPH_FREQ_MAX; - - return freq; + return bb->count.to_cgraph_frequency + (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (decl))->count); } /* Mark address taken in STMT. */ @@ -415,7 +402,7 @@ cgraph_edge::rebuild_edges (void) node->remove_callees (); node->remove_all_references (); - node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); FOR_EACH_BB_FN (bb, cfun) { Index: cgraphunit.c =================================================================== --- cgraphunit.c (revision 254348) +++ cgraphunit.c (working copy) @@ -1601,12 +1601,9 @@ init_lowered_empty_function (tree decl, /* Create BB for body of the function and connect it properly. */ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; EXIT_BLOCK_PTR_FOR_FN (cfun)->count = count; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; bb = create_basic_block (NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); bb->count = count; - bb->frequency = BB_FREQ_MAX; e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU); e->probability = profile_probability::always (); e = make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -1852,8 +1849,12 @@ cgraph_node::expand_thunk (bool output_a else resdecl = DECL_RESULT (thunk_fndecl); + profile_count cfg_count = count; + if (!cfg_count.initialized_p ()) + cfg_count = profile_count::from_gcov_type (BB_FREQ_MAX).guessed_local (); + bb = then_bb = else_bb = return_bb - = init_lowered_empty_function (thunk_fndecl, true, count); + = init_lowered_empty_function (thunk_fndecl, true, cfg_count); bsi = gsi_start_bb (bb); @@ -1966,14 +1967,11 @@ cgraph_node::expand_thunk (bool output_a adjustment, because that's why we're emitting a thunk. */ then_bb = create_basic_block (NULL, bb); - then_bb->count = count - count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX - BB_FREQ_MAX / 16; + then_bb->count = cfg_count - cfg_count.apply_scale (1, 16); return_bb = create_basic_block (NULL, then_bb); - return_bb->count = count; - return_bb->frequency = BB_FREQ_MAX; + return_bb->count = cfg_count; else_bb = create_basic_block (NULL, else_bb); - then_bb->count = count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX / 16; + else_bb->count = cfg_count.apply_scale (1, 16); add_bb_to_loop (then_bb, bb->loop_father); add_bb_to_loop (return_bb, bb->loop_father); add_bb_to_loop (else_bb, bb->loop_father); @@ -2028,8 +2026,10 @@ cgraph_node::expand_thunk (bool output_a } cfun->gimple_df->in_ssa_p = true; + counts_to_freqs (); profile_status_for_fn (cfun) - = count.initialized_p () ? PROFILE_READ : PROFILE_GUESSED; + = cfg_count.initialized_p () && cfg_count.ipa_p () + ? PROFILE_READ : PROFILE_GUESSED; /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ TREE_ASM_WRITTEN (thunk_fndecl) = false; delete_unreachable_blocks (); Index: except.c =================================================================== --- except.c (revision 254348) +++ except.c (working copy) @@ -1003,7 +1003,6 @@ dw2_build_landing_pads (void) bb = emit_to_new_bb_before (seq, label_rtx (lp->post_landing_pad)); bb->count = bb->next_bb->count; - bb->frequency = bb->next_bb->frequency; make_single_succ_edge (bb, bb->next_bb, e_flags); if (current_loops) { Index: final.c =================================================================== --- final.c (revision 254348) +++ final.c (working copy) @@ -694,8 +694,8 @@ compute_alignments (void) } loop_optimizer_init (AVOID_CFG_MODIFICATIONS); FOR_EACH_BB_FN (bb, cfun) - if (bb->frequency > freq_max) - freq_max = bb->frequency; + if (bb->count.to_frequency (cfun) > freq_max) + freq_max = bb->count.to_frequency (cfun); freq_threshold = freq_max / PARAM_VALUE (PARAM_ALIGN_THRESHOLD); if (dump_file) @@ -713,7 +713,8 @@ compute_alignments (void) if (dump_file) fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth %2i skipped.\n", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), + bb->loop_father->num, bb_loop_depth (bb)); continue; } @@ -731,7 +732,7 @@ compute_alignments (void) { fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth" " %2i fall %4i branch %4i", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), bb->loop_father->num, bb_loop_depth (bb), fallthru_frequency, branch_frequency); if (!bb->loop_father->inner && bb->loop_father->num) @@ -753,9 +754,10 @@ compute_alignments (void) if (!has_fallthru && (branch_frequency > freq_threshold - || (bb->frequency > bb->prev_bb->frequency * 10 - && (bb->prev_bb->frequency - <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency / 2)))) + || (bb->count.to_frequency (cfun) + > bb->prev_bb->count.to_frequency (cfun) * 10 + && (bb->prev_bb->count.to_frequency (cfun) + <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) / 2)))) { log = JUMP_ALIGN (label); if (dump_file) @@ -1942,8 +1944,6 @@ dump_basic_block_info (FILE *file, rtx_i edge_iterator ei; fprintf (file, "%s BLOCK %d", ASM_COMMENT_START, bb->index); - if (bb->frequency) - fprintf (file, " freq:%d", bb->frequency); if (bb->count.initialized_p ()) { fprintf (file, ", count:"); Index: gimple-pretty-print.c =================================================================== --- gimple-pretty-print.c (revision 254348) +++ gimple-pretty-print.c (working copy) @@ -82,21 +82,17 @@ debug_gimple_stmt (gimple *gs) by xstrdup_for_dump. */ static const char * -dump_profile (int frequency, profile_count &count) +dump_profile (profile_count &count) { - float minimum = 0.01f; - - gcc_assert (0 <= frequency && frequency <= REG_BR_PROB_BASE); - float fvalue = frequency * 100.0f / REG_BR_PROB_BASE; - if (fvalue < minimum && frequency > 0) - return "[0.01%]"; - char *buf; - if (count.initialized_p ()) - buf = xasprintf ("[%.2f%%] [count: %" PRId64 "]", fvalue, + if (!count.initialized_p ()) + return NULL; + if (count.ipa_p ()) + buf = xasprintf ("[count: %" PRId64 "]", + count.to_gcov_type ()); + else if (count.initialized_p ()) + buf = xasprintf ("[local count: %" PRId64 "]", count.to_gcov_type ()); - else - buf = xasprintf ("[%.2f%%] [count: INV]", fvalue); const char *ret = xstrdup_for_dump (buf); free (buf); @@ -2695,8 +2691,7 @@ dump_gimple_bb_header (FILE *outf, basic fprintf (outf, "%*sbb_%d:\n", indent, "", bb->index); else fprintf (outf, "%*s<bb %d> %s:\n", - indent, "", bb->index, dump_profile (bb->frequency, - bb->count)); + indent, "", bb->index, dump_profile (bb->count)); } } Index: gimple-ssa-isolate-paths.c =================================================================== --- gimple-ssa-isolate-paths.c (revision 254348) +++ gimple-ssa-isolate-paths.c (working copy) @@ -154,7 +154,6 @@ isolate_path (basic_block bb, basic_bloc if (!duplicate) { duplicate = duplicate_block (bb, NULL, NULL); - bb->frequency = 0; bb->count = profile_count::zero (); if (!ret_zero) for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); ) @@ -168,7 +167,7 @@ isolate_path (basic_block bb, basic_bloc flush_pending_stmts (e2); /* Update profile only when redirection is really processed. */ - bb->frequency += EDGE_FREQUENCY (e); + bb->count += e->count (); } /* There may be more than one statement in DUPLICATE which exhibits Index: gimple-streamer-in.c =================================================================== --- gimple-streamer-in.c (revision 254348) +++ gimple-streamer-in.c (working copy) @@ -266,7 +266,6 @@ input_bb (struct lto_input_block *ib, en bb->count = profile_count::stream_in (ib).apply_scale (count_materialization_scale, REG_BR_PROB_BASE); - bb->frequency = streamer_read_hwi (ib); bb->flags = streamer_read_hwi (ib); /* LTO_bb1 has statements. LTO_bb0 does not. */ Index: gimple-streamer-out.c =================================================================== --- gimple-streamer-out.c (revision 254348) +++ gimple-streamer-out.c (working copy) @@ -210,7 +210,6 @@ output_bb (struct output_block *ob, basi streamer_write_uhwi (ob, bb->index); bb->count.stream_out (ob); - streamer_write_hwi (ob, bb->frequency); streamer_write_hwi (ob, bb->flags); if (!gsi_end_p (bsi) || phi_nodes (bb)) Index: haifa-sched.c =================================================================== --- haifa-sched.c (revision 254348) +++ haifa-sched.c (working copy) @@ -3917,8 +3917,8 @@ sched_pressure_start_bb (basic_block bb) - call_saved_regs_num[cl]). */ { int i; - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; - int bb_freq = bb->frequency; + int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); + int bb_freq = bb->count.to_frequency (cfun); if (bb_freq == 0) { @@ -8141,8 +8141,6 @@ init_before_recovery (basic_block *befor single->count = last->count; empty->count = last->count; - single->frequency = last->frequency; - empty->frequency = last->frequency; BB_COPY_PARTITION (single, last); BB_COPY_PARTITION (empty, last); @@ -8236,7 +8234,6 @@ sched_create_recovery_edges (basic_block in sel-sched.c `check_ds' in create_speculation_check. */ e->probability = profile_probability::very_unlikely (); rec->count = e->count (); - rec->frequency = EDGE_FREQUENCY (e); e2->probability = e->probability.invert (); rtx_code_label *label = block_label (second_bb); Index: hsa-gen.c =================================================================== --- hsa-gen.c (revision 254348) +++ hsa-gen.c (working copy) @@ -6374,7 +6374,7 @@ convert_switch_statements (void) edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); next_edge->probability = new_edge->probability.invert (); - next_bb->frequency = EDGE_FREQUENCY (next_edge); + next_bb->count = next_edge->count (); cur_bb = next_bb; } else /* Link last IF statement and default label Index: ipa-cp.c =================================================================== --- ipa-cp.c (revision 254348) +++ ipa-cp.c (working copy) @@ -3257,6 +3257,8 @@ ipcp_propagate_stage (struct ipa_topo_in if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); + max_count = profile_count::uninitialized (); + FOR_EACH_DEFINED_FUNCTION (node) { struct ipa_node_params *info = IPA_NODE_REF (node); @@ -3270,8 +3272,7 @@ ipcp_propagate_stage (struct ipa_topo_in } if (node->definition && !node->alias) overall_size += ipa_fn_summaries->get (node)->self_size; - if (node->count > max_count) - max_count = node->count; + max_count = max_count.max (node->count); } max_new_size = overall_size; @@ -5125,7 +5126,7 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_c_finalize (void) { - max_count = profile_count::zero (); + max_count = profile_count::uninitialized (); overall_size = 0; max_new_size = 0; } Index: ipa-fnsummary.c =================================================================== --- ipa-fnsummary.c (revision 254348) +++ ipa-fnsummary.c (working copy) @@ -1608,7 +1608,7 @@ static basic_block get_minimal_bb (basic_block init_bb, basic_block use_bb) { struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); - if (l && l->header->frequency < init_bb->frequency) + if (l && l->header->count < init_bb->count) return l->header; return init_bb; } @@ -1664,20 +1664,21 @@ param_change_prob (gimple *stmt, int i) { int init_freq; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; if (SSA_NAME_IS_DEFAULT_DEF (base)) - init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else init_freq = get_minimal_bb (gimple_bb (SSA_NAME_DEF_STMT (base)), - gimple_bb (stmt))->frequency; + gimple_bb (stmt))->count.to_frequency (cfun); if (!init_freq) init_freq = 1; - if (init_freq < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1); + if (init_freq < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (init_freq, + bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } @@ -1692,7 +1693,7 @@ param_change_prob (gimple *stmt, int i) if (init != error_mark_node) return 0; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; ao_ref_init (&refd, op); info.stmt = stmt; @@ -1708,17 +1709,17 @@ param_change_prob (gimple *stmt, int i) /* Assume that every memory is initialized at entry. TODO: Can we easilly determine if value is always defined and thus we may skip entry block? */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) - max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) + max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else max = 1; EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency); + max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun)); BITMAP_FREE (info.bb_set); - if (max < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1); + if (max < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } Index: ipa-inline-transform.c =================================================================== --- ipa-inline-transform.c (revision 254348) +++ ipa-inline-transform.c (working copy) @@ -676,9 +676,9 @@ inline_transform (struct cgraph_node *no { profile_count num = node->count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()) - && !(num == den); + bool scale = num.initialized_p () && den.ipa_p () + && (den.nonzero_p () || num == profile_count::zero ()) + && !(num == den.ipa ()); if (scale) { if (dump_file) Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 254348) +++ ipa-inline.c (working copy) @@ -640,8 +640,8 @@ compute_uninlined_call_time (struct cgra ? edge->caller->global.inlined_to : edge->caller); - if (edge->count > profile_count::zero () - && caller->count > profile_count::zero ()) + if (edge->count.nonzero_p () + && caller->count.nonzero_p ()) uninlined_call_time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type (); if (edge->frequency) @@ -665,8 +665,8 @@ compute_inlined_call_time (struct cgraph : edge->caller); sreal caller_time = ipa_fn_summaries->get (caller)->time; - if (edge->count > profile_count::zero () - && caller->count > profile_count::zero ()) + if (edge->count.nonzero_p () + && caller->count.nonzero_p ()) time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type (); if (edge->frequency) time *= cgraph_freq_base_rec * edge->frequency; @@ -733,7 +733,7 @@ want_inline_small_function_p (struct cgr want_inline = false; } else if ((DECL_DECLARED_INLINE_P (callee->decl) - || e->count > profile_count::zero ()) + || e->count.nonzero_p ()) && ipa_fn_summaries->get (callee)->min_size - ipa_call_summaries->get (e)->call_stmt_size > 16 * MAX_INLINE_INSNS_SINGLE) @@ -843,7 +843,7 @@ want_inline_self_recursive_call_p (struc reason = "recursive call is cold"; want_inline = false; } - else if (outer_node->count == profile_count::zero ()) + else if (!outer_node->count.nonzero_p ()) { reason = "not executed in profile"; want_inline = false; @@ -881,7 +881,7 @@ want_inline_self_recursive_call_p (struc int i; for (i = 1; i < depth; i++) max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE; - if (max_count > profile_count::zero () && edge->count > profile_count::zero () + if (max_count.nonzero_p () && edge->count.nonzero_p () && (edge->count.to_gcov_type () * CGRAPH_FREQ_BASE / outer_node->count.to_gcov_type () >= max_prob)) @@ -889,7 +889,7 @@ want_inline_self_recursive_call_p (struc reason = "profile of recursive call is too large"; want_inline = false; } - if (max_count == profile_count::zero () + if (!max_count.nonzero_p () && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq >= max_prob)) { @@ -915,7 +915,7 @@ want_inline_self_recursive_call_p (struc methods. */ else { - if (max_count > profile_count::zero () && edge->count.initialized_p () + if (max_count.nonzero_p () && edge->count.initialized_p () && (edge->count.to_gcov_type () * 100 / outer_node->count.to_gcov_type () <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) @@ -923,7 +923,7 @@ want_inline_self_recursive_call_p (struc reason = "profile of recursive call is too small"; want_inline = false; } - else if ((max_count == profile_count::zero () + else if ((!max_count.nonzero_p () || !edge->count.initialized_p ()) && (edge->frequency * 100 / caller_freq <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) @@ -1070,7 +1070,7 @@ edge_badness (struct cgraph_edge *edge, then calls without. */ else if (opt_for_fn (caller->decl, flag_guess_branch_prob) - || caller->count > profile_count::zero ()) + || caller->count.nonzero_p ()) { sreal numerator, denominator; int overall_growth; @@ -1080,7 +1080,7 @@ edge_badness (struct cgraph_edge *edge, - inlined_time); if (numerator == 0) numerator = ((sreal) 1 >> 8); - if (caller->count > profile_count::zero ()) + if (caller->count.nonzero_p ()) numerator *= caller->count.to_gcov_type (); else if (caller->count.initialized_p ()) numerator = numerator >> 11; @@ -1521,7 +1521,7 @@ recursive_inlining (struct cgraph_edge * { fprintf (dump_file, " Inlining call of depth %i", depth); - if (node->count > profile_count::zero ()) + if (node->count.nonzero_p ()) { fprintf (dump_file, " called approx. %.2f times per call", (double)curr->count.to_gcov_type () @@ -1684,7 +1684,8 @@ resolve_noninline_speculation (edge_heap ? node->global.inlined_to : node; auto_bitmap updated_nodes; - spec_rem += edge->count; + if (edge->count.initialized_p ()) + spec_rem += edge->count; edge->resolve_speculation (); reset_edge_caches (where); ipa_update_overall_fn_summary (where); @@ -1789,8 +1790,7 @@ inline_small_functions (void) } for (edge = node->callers; edge; edge = edge->next_caller) - if (!(max_count >= edge->count)) - max_count = edge->count; + max_count = max_count.max (edge->count); } ipa_free_postorder_info (); initialize_growth_caches (); @@ -2049,7 +2049,7 @@ inline_small_functions (void) update_caller_keys (&edge_heap, where, updated_nodes, NULL); /* Offline copy count has possibly changed, recompute if profile is available. */ - if (max_count > profile_count::zero ()) + if (max_count.nonzero_p ()) { struct cgraph_node *n = cgraph_node::get (edge->callee->decl); if (n != edge->callee && n->analyzed) @@ -2392,6 +2392,7 @@ ipa_inline (void) ipa_dump_fn_summaries (dump_file); nnodes = ipa_reverse_postorder (order); + spec_rem = profile_count::zero (); FOR_EACH_FUNCTION (node) { @@ -2487,8 +2488,9 @@ ipa_inline (void) next = edge->next_callee; if (edge->speculative && !speculation_useful_p (edge, false)) { + if (edge->count.initialized_p ()) + spec_rem += edge->count; edge->resolve_speculation (); - spec_rem += edge->count; update = true; remove_functions = true; } Index: ipa-profile.c =================================================================== --- ipa-profile.c (revision 254348) +++ ipa-profile.c (working copy) @@ -179,53 +179,54 @@ ipa_profile_generate_summary (void) hash_table<histogram_hash> hashtable (10); FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) - FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - { - int time = 0; - int size = 0; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_CALL - && !gimple_call_fndecl (stmt)) - { - histogram_value h; - h = gimple_histogram_value_of_type - (DECL_STRUCT_FUNCTION (node->decl), - stmt, HIST_TYPE_INDIR_CALL); - /* No need to do sanity check: gimple_ic_transform already - takes away bad histograms. */ - if (h) - { - /* counter 0 is target, counter 1 is number of execution we called target, - counter 2 is total number of executions. */ - if (h->hvalue.counters[2]) - { - struct cgraph_edge * e = node->get_edge (stmt); - if (e && !e->indirect_unknown_callee) - continue; - e->indirect_info->common_target_id - = h->hvalue.counters [0]; - e->indirect_info->common_target_probability - = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); - if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) - { - if (dump_file) - fprintf (dump_file, "Probability capped to 1\n"); - e->indirect_info->common_target_probability = REG_BR_PROB_BASE; - } - } - gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), - stmt, h); - } - } - time += estimate_num_insns (stmt, &eni_time_weights); - size += estimate_num_insns (stmt, &eni_size_weights); - } - if (bb->count.initialized_p ()) - account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), - time, size); - } + if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + int time = 0; + int size = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_fndecl (stmt)) + { + histogram_value h; + h = gimple_histogram_value_of_type + (DECL_STRUCT_FUNCTION (node->decl), + stmt, HIST_TYPE_INDIR_CALL); + /* No need to do sanity check: gimple_ic_transform already + takes away bad histograms. */ + if (h) + { + /* counter 0 is target, counter 1 is number of execution we called target, + counter 2 is total number of executions. */ + if (h->hvalue.counters[2]) + { + struct cgraph_edge * e = node->get_edge (stmt); + if (e && !e->indirect_unknown_callee) + continue; + e->indirect_info->common_target_id + = h->hvalue.counters [0]; + e->indirect_info->common_target_probability + = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); + if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) + { + if (dump_file) + fprintf (dump_file, "Probability capped to 1\n"); + e->indirect_info->common_target_probability = REG_BR_PROB_BASE; + } + } + gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), + stmt, h); + } + } + time += estimate_num_insns (stmt, &eni_time_weights); + size += estimate_num_insns (stmt, &eni_size_weights); + } + if (bb->count.ipa_p () && bb->count.initialized_p ()) + account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (), + time, size); + } histogram.qsort (cmp_counts); } Index: ipa-split.c =================================================================== --- ipa-split.c (revision 254348) +++ ipa-split.c (working copy) @@ -444,7 +444,7 @@ consider_split (struct split_point *curr /* Do not split when we would end up calling function anyway. */ if (incoming_freq - >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency + >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100)) { /* When profile is guessed, we can not expect it to give us @@ -454,13 +454,14 @@ consider_split (struct split_point *curr is likely noticeable win. */ if (back_edge && profile_status_for_fn (cfun) != PROFILE_READ - && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) + && incoming_freq + < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Split before loop, accepting despite low frequencies %i %i.\n", incoming_freq, - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)); } else { @@ -714,8 +715,10 @@ consider_split (struct split_point *curr out smallest size of header. In future we might re-consider this heuristics. */ if (!best_split_point.split_bbs - || best_split_point.entry_bb->frequency > current->entry_bb->frequency - || (best_split_point.entry_bb->frequency == current->entry_bb->frequency + || best_split_point.entry_bb->count.to_frequency (cfun) + > current->entry_bb->count.to_frequency (cfun) + || (best_split_point.entry_bb->count.to_frequency (cfun) + == current->entry_bb->count.to_frequency (cfun) && best_split_point.split_size < current->split_size)) { @@ -1285,7 +1288,7 @@ split_function (basic_block return_bb, s FOR_EACH_EDGE (e, ei, return_bb->preds) if (bitmap_bit_p (split_point->split_bbs, e->src->index)) { - new_return_bb->frequency += EDGE_FREQUENCY (e); + new_return_bb->count += e->count (); redirect_edge_and_branch (e, new_return_bb); redirected = true; break; Index: ipa-utils.c =================================================================== --- ipa-utils.c (revision 254348) +++ ipa-utils.c (working copy) @@ -524,7 +524,14 @@ ipa_merge_profiles (struct cgraph_node * unsigned int i; dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index); - if (!dstbb->count.initialized_p ()) + + /* Either sum the profiles if both are IPA and not global0, or + pick more informative one (that is nonzero IPA if other is + uninitialized, guessed or global0). */ + if (!dstbb->count.ipa ().initialized_p () + || (dstbb->count.ipa () == profile_count::zero () + && (srcbb->count.ipa ().initialized_p () + && !(srcbb->count.ipa () == profile_count::zero ())))) { dstbb->count = srcbb->count; for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) @@ -535,7 +542,8 @@ ipa_merge_profiles (struct cgraph_node * dste->probability = srce->probability; } } - else if (srcbb->count.initialized_p ()) + else if (srcbb->count.ipa ().initialized_p () + && !(srcbb->count.ipa () == profile_count::zero ())) { for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) { @@ -556,7 +564,7 @@ ipa_merge_profiles (struct cgraph_node * { if (e->speculative) continue; - e->count = gimple_bb (e->call_stmt)->count; + e->count = gimple_bb (e->call_stmt)->count.ipa (); e->frequency = compute_call_stmt_bb_frequency (dst->decl, gimple_bb (e->call_stmt)); @@ -634,7 +642,7 @@ ipa_merge_profiles (struct cgraph_node * ipa_ref *ref; e2->speculative_call_info (direct, indirect, ref); - e->count = count; + e->count = count.ipa (); e->frequency = freq; int prob = direct->count.probability_in (e->count) .to_reg_br_prob_base (); @@ -643,7 +651,7 @@ ipa_merge_profiles (struct cgraph_node * } else { - e->count = count; + e->count = count.ipa (); e->frequency = freq; } } Index: ira-build.c =================================================================== --- ira-build.c (revision 254348) +++ ira-build.c (working copy) @@ -2202,7 +2202,8 @@ loop_compare_func (const void *v1p, cons return -1; if (! l1->to_remove_p && l2->to_remove_p) return 1; - if ((diff = l1->loop->header->frequency - l2->loop->header->frequency) != 0) + if ((diff = l1->loop->header->count.to_frequency (cfun) + - l2->loop->header->count.to_frequency (cfun)) != 0) return diff; if ((diff = (int) loop_depth (l1->loop) - (int) loop_depth (l2->loop)) != 0) return diff; @@ -2260,7 +2261,7 @@ mark_loops_for_removal (void) (ira_dump_file, " Mark loop %d (header %d, freq %d, depth %d) for removal (%s)\n", sorted_loops[i]->loop_num, sorted_loops[i]->loop->header->index, - sorted_loops[i]->loop->header->frequency, + sorted_loops[i]->loop->header->count.to_frequency (cfun), loop_depth (sorted_loops[i]->loop), low_pressure_loop_node_p (sorted_loops[i]->parent) && low_pressure_loop_node_p (sorted_loops[i]) @@ -2293,7 +2294,7 @@ mark_all_loops_for_removal (void) " Mark loop %d (header %d, freq %d, depth %d) for removal\n", ira_loop_nodes[i].loop_num, ira_loop_nodes[i].loop->header->index, - ira_loop_nodes[i].loop->header->frequency, + ira_loop_nodes[i].loop->header->count.to_frequency (cfun), loop_depth (ira_loop_nodes[i].loop)); } } Index: loop-doloop.c =================================================================== --- loop-doloop.c (revision 254348) +++ loop-doloop.c (working copy) @@ -506,7 +506,6 @@ doloop_modify (struct loop *loop, struct set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = profile_count::uninitialized (); - set_zero->frequency = 0; te = single_succ_edge (preheader); for (; ass; ass = XEXP (ass, 1)) @@ -522,7 +521,6 @@ doloop_modify (struct loop *loop, struct also be very hard to show that it is impossible, so we must handle this case. */ set_zero->count = preheader->count; - set_zero->frequency = preheader->frequency; } if (EDGE_COUNT (set_zero->preds) == 0) Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 254348) +++ loop-unroll.c (working copy) @@ -863,7 +863,7 @@ unroll_loop_runtime_iterations (struct l unsigned i, j; profile_probability p; basic_block preheader, *body, swtch, ezc_swtch = NULL; - int may_exit_copy, iter_freq, new_freq; + int may_exit_copy; profile_count iter_count, new_count; unsigned n_peel; edge e; @@ -970,12 +970,10 @@ unroll_loop_runtime_iterations (struct l /* Record the place where switch will be built for preconditioning. */ swtch = split_edge (loop_preheader_edge (loop)); - /* Compute frequency/count increments for each switch block and initialize + /* Compute count increments for each switch block and initialize innermost switch block. Switch blocks and peeled loop copies are built from innermost outward. */ - iter_freq = new_freq = swtch->frequency / (max_unroll + 1); iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); - swtch->frequency = new_freq; swtch->count = new_count; for (i = 0; i < n_peel; i++) @@ -995,8 +993,7 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, i + 2); preheader = split_edge (loop_preheader_edge (loop)); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, block_label (preheader), p, @@ -1009,9 +1006,7 @@ unroll_loop_runtime_iterations (struct l swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); single_succ_edge (swtch)->probability = p.invert (); - new_freq += iter_freq; new_count += iter_count; - swtch->frequency = new_freq; swtch->count = new_count; e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); @@ -1024,12 +1019,10 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, max_unroll + 1); swtch = ezc_swtch; preheader = split_edge (loop_preheader_edge (loop)); - /* Recompute frequency/count adjustments since initial peel copy may + /* Recompute count adjustments since initial peel copy may have exited and reduced those values that were computed above. */ - iter_freq = swtch->frequency / (max_unroll + 1); iter_count = swtch->count.apply_scale (1, max_unroll + 1); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, block_label (preheader), p, Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 254348) +++ lto-streamer-in.c (working copy) @@ -1192,6 +1192,7 @@ input_function (tree fn_decl, struct dat gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest)); } + counts_to_freqs (); fixup_call_stmt_edges (node, stmts); execute_all_ipa_stmt_fixups (node, stmts); Index: omp-expand.c =================================================================== --- omp-expand.c (revision 254348) +++ omp-expand.c (working copy) @@ -1399,6 +1399,7 @@ expand_omp_taskreg (struct omp_region *r if (optimize) optimize_omp_library_calls (entry_stmt); + counts_to_freqs (); cgraph_edge::rebuild_edges (); /* Some EH regions might become dead, see PR34608. If Index: omp-simd-clone.c =================================================================== --- omp-simd-clone.c (revision 254348) +++ omp-simd-clone.c (working copy) @@ -1132,6 +1132,7 @@ simd_clone_adjust (struct cgraph_node *n { basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; incr_bb = create_empty_bb (orig_exit); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty flag. Set it now to be a FALLTHRU_EDGE. */ @@ -1142,11 +1143,13 @@ simd_clone_adjust (struct cgraph_node *n { edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i); redirect_edge_succ (e, incr_bb); + incr_bb->count += e->count (); } } else if (node->simdclone->inbranch) { incr_bb = create_empty_bb (entry_bb); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); } @@ -1243,6 +1246,7 @@ simd_clone_adjust (struct cgraph_node *n gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::unlikely ().guessed (); + incr_bb->count += e->count (); edge fallthru = FALLTHRU_EDGE (loop->header); fallthru->flags = EDGE_FALSE_VALUE; fallthru->probability = profile_probability::likely ().guessed (); Index: predict.c =================================================================== --- predict.c (revision 254348) +++ predict.c (working copy) @@ -137,12 +137,12 @@ maybe_hot_frequency_p (struct function * if (profile_status_for_fn (fun) == PROFILE_ABSENT) return true; if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE - && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) + && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun) * 2 / 3)) return false; if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) return false; if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) - < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) + < ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun)) return false; return true; } @@ -175,10 +175,14 @@ set_hot_bb_threshold (gcov_type min) /* Return TRUE if frequency FREQ is considered to be hot. */ bool -maybe_hot_count_p (struct function *, profile_count count) +maybe_hot_count_p (struct function *fun, profile_count count) { if (!count.initialized_p ()) return true; + if (!count.ipa_p ()) + return maybe_hot_frequency_p (fun, count.to_frequency (fun)); + if (count.ipa () == profile_count::zero ()) + return false; /* Code executed at most once is not hot. */ if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) return false; @@ -192,9 +196,7 @@ bool maybe_hot_bb_p (struct function *fun, const_basic_block bb) { gcc_checking_assert (fun); - if (!maybe_hot_count_p (fun, bb->count)) - return false; - return maybe_hot_frequency_p (fun, bb->frequency); + return maybe_hot_count_p (fun, bb->count); } /* Return true in case BB can be CPU intensive and should be optimized @@ -203,9 +205,7 @@ maybe_hot_bb_p (struct function *fun, co bool maybe_hot_edge_p (edge e) { - if (!maybe_hot_count_p (cfun, e->count ())) - return false; - return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); + return maybe_hot_count_p (cfun, e->count ()); } /* Return true if profile COUNT and FREQUENCY, or function FUN static @@ -213,7 +213,7 @@ maybe_hot_edge_p (edge e) static bool probably_never_executed (struct function *fun, - profile_count count, int) + profile_count count) { gcc_checking_assert (fun); if (count == profile_count::zero ()) @@ -238,7 +238,7 @@ probably_never_executed (struct function bool probably_never_executed_bb_p (struct function *fun, const_basic_block bb) { - return probably_never_executed (fun, bb->count, bb->frequency); + return probably_never_executed (fun, bb->count); } @@ -259,7 +259,7 @@ probably_never_executed_edge_p (struct f { if (unlikely_executed_edge_p (e)) return true; - return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); + return probably_never_executed (fun, e->count ()); } /* Return true when current function should always be optimized for size. */ @@ -1289,7 +1289,8 @@ combine_predictions_for_bb (basic_block } clear_bb_predictions (bb); - if (!bb->count.initialized_p () && !dry_run) + if ((!bb->count.nonzero_p () || !first->probability.initialized_p ()) + && !dry_run) { first->probability = profile_probability::from_reg_br_prob_base (combined_probability); @@ -3014,10 +3015,7 @@ propagate_freq (basic_block head, bitmap BLOCK_INFO (bb)->npredecessors = count; /* When function never returns, we will never process exit block. */ if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - bb->count = profile_count::zero (); - bb->frequency = 0; - } + bb->count = profile_count::zero (); } BLOCK_INFO (head)->frequency = 1; @@ -3050,7 +3048,10 @@ propagate_freq (basic_block head, bitmap * BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (e->src)->frequency; tmp *= real_inv_br_prob_base; frequency += tmp; @@ -3082,7 +3083,10 @@ propagate_freq (basic_block head, bitmap = ((e->probability * BLOCK_INFO (bb)->frequency) / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (bb)->frequency; EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; } @@ -3196,10 +3200,26 @@ drop_profile (struct cgraph_node *node, } basic_block bb; - FOR_ALL_BB_FN (bb, fn) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + if (flag_guess_branch_prob) { - bb->count = profile_count::uninitialized (); + bool clear_zeros + = ENTRY_BLOCK_PTR_FOR_FN + (DECL_STRUCT_FUNCTION (node->decl))->count.nonzero_p (); + FOR_ALL_BB_FN (bb, fn) + if (clear_zeros || !(bb->count == profile_count::zero ())) + bb->count = bb->count.guessed_local (); + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max = + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max.guessed_local (); } + else + { + FOR_ALL_BB_FN (bb, fn) + bb->count = profile_count::uninitialized (); + DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max + = profile_count::uninitialized (); + } + pop_cfun (); struct cgraph_edge *e; for (e = node->callees; e; e = e->next_caller) @@ -3300,33 +3320,16 @@ handle_missing_profiles (void) bool counts_to_freqs (void) { - gcov_type count_max; - profile_count true_count_max = profile_count::zero (); + profile_count true_count_max = profile_count::uninitialized (); basic_block bb; - /* Don't overwrite the estimated frequencies when the profile for - the function is missing. We may drop this function PROFILE_GUESSED - later in drop_profile (). */ - if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () - || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) - return false; - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > true_count_max) - true_count_max = bb->count; + if (!(bb->count < true_count_max)) + true_count_max = true_count_max.max (bb->count); - /* If we have no counts to base frequencies on, keep those that are - already there. */ - if (!(true_count_max > 0)) - return false; - - count_max = true_count_max.to_gcov_type (); + cfun->cfg->count_max = true_count_max; - FOR_ALL_BB_FN (bb, cfun) - if (bb->count.initialized_p ()) - bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); - - return true; + return true_count_max.nonzero_p (); } /* Return true if function is likely to be expensive, so there is no point to @@ -3348,11 +3351,11 @@ expensive_function_p (int threshold) /* Frequencies are out of range. This either means that function contains internal loop executing more than BB_FREQ_MAX times or profile feedback is available and function has not been executed at all. */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) == 0) return true; /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ - limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; + limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * threshold; FOR_EACH_BB_FN (bb, cfun) { rtx_insn *insn; @@ -3360,7 +3363,7 @@ expensive_function_p (int threshold) FOR_BB_INSNS (bb, insn) if (active_insn_p (insn)) { - sum += bb->frequency; + sum += bb->count.to_frequency (cfun); if (sum > limit) return true; } @@ -3409,7 +3412,6 @@ propagate_unlikely_bbs_forward (void) "Basic block %i is marked unlikely by forward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; } else bb->aux = NULL; @@ -3440,9 +3442,6 @@ determine_unlikely_bbs () bb->count = profile_count::zero (); } - if (bb->count == profile_count::zero ()) - bb->frequency = 0; - FOR_EACH_EDGE (e, ei, bb->succs) if (!(e->probability == profile_probability::never ()) && unlikely_executed_edge_p (e)) @@ -3497,7 +3496,6 @@ determine_unlikely_bbs () "Basic block %i is marked unlikely by backward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) if (!(e->probability == profile_probability::never ())) { @@ -3554,8 +3552,13 @@ estimate_bb_frequencies (bool force) FOR_EACH_EDGE (e, ei, bb->succs) { - EDGE_INFO (e)->back_edge_prob - = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + if (e->probability.initialized_p ()) + EDGE_INFO (e)->back_edge_prob + = e->probability.to_reg_br_prob_base (); + else + EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; } } @@ -3564,16 +3567,28 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges. */ estimate_loops (); + bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p (); + freq_max = 0; FOR_EACH_BB_FN (bb, cfun) if (freq_max < BLOCK_INFO (bb)->frequency) freq_max = BLOCK_INFO (bb)->frequency; freq_max = real_bb_freq_max / freq_max; + cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; - bb->frequency = tmp.to_int (); + profile_count count = profile_count::from_gcov_type (tmp.to_int ()); + + /* If we have profile feedback in which this function was never + executed, then preserve this info. */ + if (global0) + bb->count = count.global0 (); + else if (!(bb->count == profile_count::zero ())) + bb->count = count.guessed_local (); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); } free_aux_for_blocks (); @@ -3598,7 +3613,8 @@ compute_function_frequency (void) if (profile_status_for_fn (cfun) != PROFILE_READ) { int flags = flags_from_decl_or_type (current_function_decl); - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () + if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) != NULL) { @@ -3717,7 +3733,7 @@ pass_profile::execute (function *fun) { struct loop *loop; FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) - if (loop->header->frequency) + if (loop->header->count.initialized_p ()) fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", loop->num, (int)expected_loop_iterations_unbounded (loop)); @@ -3843,15 +3859,12 @@ rebuild_frequencies (void) which may also lead to frequencies incorrectly reduced to 0. There is less precision in the probabilities, so we only do this for small max counts. */ - profile_count count_max = profile_count::zero (); + cfun->cfg->count_max = profile_count::uninitialized (); basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > count_max) - count_max = bb->count; + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); - if (profile_status_for_fn (cfun) == PROFILE_GUESSED - || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ - && count_max < REG_BR_PROB_BASE / 10)) + if (profile_status_for_fn (cfun) == PROFILE_GUESSED) { loop_optimizer_init (0); add_noreturn_fake_exit_edges (); @@ -4017,17 +4030,19 @@ force_edge_cold (edge e, bool impossible after loop transforms. */ if (!(prob_sum > profile_probability::never ()) && count_sum == profile_count::zero () - && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) + && single_pred_p (e->src) && e->src->count.to_frequency (cfun) + > (impossible ? 0 : 1)) { - int old_frequency = e->src->frequency; + int old_frequency = e->src->count.to_frequency (cfun); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Making bb %i %s.\n", e->src->index, impossible ? "impossible" : "cold"); - e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); + int new_frequency = MIN (e->src->count.to_frequency (cfun), + impossible ? 0 : 1); if (impossible) e->src->count = profile_count::zero (); else - e->src->count = e->count ().apply_scale (e->src->frequency, + e->src->count = e->count ().apply_scale (new_frequency, old_frequency); force_edge_cold (single_pred_edge (e->src), impossible); } Index: profile-count.c =================================================================== --- profile-count.c (revision 254348) +++ profile-count.c (working copy) @@ -42,7 +42,11 @@ profile_count::dump (FILE *f) const else { fprintf (f, "%" PRId64, m_val); - if (m_quality == profile_adjusted) + if (m_quality == profile_guessed_local) + fprintf (f, " (estimated locally)"); + else if (m_quality == profile_guessed_global0) + fprintf (f, " (estimated locally, globally 0)"); + else if (m_quality == profile_adjusted) fprintf (f, " (adjusted)"); else if (m_quality == profile_afdo) fprintf (f, " (auto FDO)"); @@ -65,6 +69,7 @@ profile_count::debug () const bool profile_count::differs_from_p (profile_count other) const { + gcc_checking_assert (compatible_p (other)); if (!initialized_p () || !other.initialized_p ()) return false; if ((uint64_t)m_val - (uint64_t)other.m_val < 100 @@ -213,3 +218,40 @@ slow_safe_scale_64bit (uint64_t a, uint6 *res = (uint64_t) -1; return false; } + +/* Return count as frequency within FUN scaled in range 0 to REG_FREQ_MAX + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_frequency (struct function *fun) const +{ + if (!initialized_p ()) + return BB_FREQ_MAX; + if (*this == profile_count::zero ()) + return 0; + gcc_assert (REG_BR_PROB_BASE == BB_FREQ_MAX + && fun->cfg->count_max.initialized_p ()); + profile_probability prob = probability_in (fun->cfg->count_max); + if (!prob.initialized_p ()) + return REG_BR_PROB_BASE; + return prob.to_reg_br_prob_base (); +} + +/* Return count as frequency within FUN scaled in range 0 to CGRAPH_FREQ_MAX + where CGRAPH_FREQ_BASE means that count equals to entry block count. + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_cgraph_frequency (profile_count entry_bb_count) const +{ + if (!initialized_p ()) + return CGRAPH_FREQ_BASE; + if (*this == profile_count::zero ()) + return 0; + gcc_checking_assert (entry_bb_count.initialized_p ()); + uint64_t scale; + if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val, + CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale)) + return CGRAPH_FREQ_MAX; + return MIN (scale, CGRAPH_FREQ_MAX); +} Index: profile-count.h =================================================================== --- profile-count.h (revision 254348) +++ profile-count.h (working copy) @@ -21,21 +21,37 @@ along with GCC; see the file COPYING3. #ifndef GCC_PROFILE_COUNT_H #define GCC_PROFILE_COUNT_H +struct function; + /* Quality of the profile count. Because gengtype does not support enums inside of classes, this is in global namespace. */ enum profile_quality { + /* Profile is based on static branch prediction heuristics and may + or may not match reality. It is local to function and can not be compared + inter-procedurally. Never used by probabilities (they are always local). + */ + profile_guessed_local = 0, + /* Profile was read by feedback and was 0, we used local heuristics to guess + better. This is the case of functions not run in profile fedback. + Never used by probabilities. */ + profile_guessed_global0 = 1, + + /* Profile is based on static branch prediction heuristics. It may or may - not reflect the reality. */ - profile_guessed = 0, + not reflect the reality but it can be compared interprocedurally + (for example, we inlined function w/o profile feedback into function + with feedback and propagated from that). + Never used by probablities. */ + profile_guessed = 2, /* Profile was determined by autofdo. */ - profile_afdo = 1, + profile_afdo = 3, /* Profile was originally based on feedback but it was adjusted by code duplicating optimization. It may not precisely reflect the particular code path. */ - profile_adjusted = 2, + profile_adjusted = 4, /* Profile was read from profile feedback or determined by accurate static method. */ - profile_precise = 3 + profile_precise = 5 }; /* The base value for branch probability notes and edge probabilities. */ @@ -114,15 +130,15 @@ safe_scale_64bit (uint64_t a, uint64_t b class GTY((user)) profile_probability { - static const int n_bits = 30; + static const int n_bits = 29; /* We can technically use ((uint32_t) 1 << (n_bits - 1)) - 2 but that will lead to harder multiplication sequences. */ static const uint32_t max_probability = (uint32_t) 1 << (n_bits - 2); static const uint32_t uninitialized_probability = ((uint32_t) 1 << (n_bits - 1)) - 1; - uint32_t m_val : 30; - enum profile_quality m_quality : 2; + uint32_t m_val : 29; + enum profile_quality m_quality : 3; friend class profile_count; public: @@ -226,14 +242,14 @@ public: static profile_probability from_reg_br_prob_note (int v) { profile_probability ret; - ret.m_val = ((unsigned int)v) / 4; - ret.m_quality = (enum profile_quality)(v & 3); + ret.m_val = ((unsigned int)v) / 8; + ret.m_quality = (enum profile_quality)(v & 7); return ret; } int to_reg_br_prob_note () const { gcc_checking_assert (initialized_p ()); - int ret = m_val * 4 + m_quality; + int ret = m_val * 8 + m_quality; gcc_checking_assert (profile_probability::from_reg_br_prob_note (ret) == *this); return ret; @@ -489,8 +505,9 @@ public: { if (m_val == uninitialized_probability) return m_quality == profile_guessed; - else - return m_val <= max_probability; + else if (m_quality < profile_guessed) + return false; + return m_val <= max_probability; } /* Comparsions are three-state and conservative. False is returned if @@ -530,9 +547,32 @@ public: void stream_out (struct lto_output_stream *); }; -/* Main data type to hold profile counters in GCC. In most cases profile - counts originate from profile feedback. They are 64bit integers - representing number of executions during the train run. +/* Main data type to hold profile counters in GCC. Profile counts originate + either from profile feedback, static profile estimation or both. We do not + perform whole program profile propagation and thus profile estimation + counters are often local to function, while counters from profile feedback + (or special cases of profile estimation) can be used inter-procedurally. + + There are 3 basic types + 1) local counters which are result of intra-procedural static profile + estimation. + 2) ipa counters which are result of profile feedback or special case + of static profile estimation (such as in function main). + 3) counters which counts as 0 inter-procedurally (beause given function + was never run in train feedback) but they hold local static profile + estimate. + + Counters of type 1 and 3 can not be mixed with counters of different type + within operation (because whole function should use one type of counter) + with exception that global zero mix in most operations where outcome is + well defined. + + To take local counter and use it inter-procedurally use ipa member function + which strips information irelevant at the inter-procedural level. + + Counters are 61bit integers representing number of executions during the + train run or normalized frequency within the function. + As the profile is maintained during the compilation, many adjustments are made. Not all transformations can be made precisely, most importantly when code is being duplicated. It also may happen that part of CFG has @@ -567,12 +607,25 @@ class GTY(()) profile_count 64bit. Although a counter cannot be negative, we use a signed type to hold various extra stages. */ - static const int n_bits = 62; + static const int n_bits = 61; static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2; static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1; uint64_t m_val : n_bits; - enum profile_quality m_quality : 2; + enum profile_quality m_quality : 3; + + /* Return true if both values can meaningfully appear in single function + body. We have either all counters in function local or global, otherwise + operations between them are not really defined well. */ + bool compatible_p (const profile_count other) const + { + if (!initialized_p () || !other.initialized_p ()) + return true; + if (*this == profile_count::zero () + || other == profile_count::zero ()) + return true; + return ipa_p () == other.ipa_p (); + } public: /* Used for counters which are expected to be never executed. */ @@ -597,7 +650,7 @@ public: { profile_count c; c.m_val = uninitialized_count; - c.m_quality = profile_guessed; + c.m_quality = profile_guessed_local; return c; } @@ -630,6 +683,11 @@ public: { return m_quality >= profile_adjusted; } + /* Return true if vlaue can be operated inter-procedurally. */ + bool ipa_p () const + { + return !initialized_p () || m_quality >= profile_guessed_global0; + } /* When merging basic blocks, the two different profile counts are unified. Return true if this can be done without losing info about profile. @@ -671,6 +729,7 @@ public: return profile_count::uninitialized (); profile_count ret; + gcc_checking_assert (compatible_p (other)); ret.m_val = m_val + other.m_val; ret.m_quality = MIN (m_quality, other.m_quality); return ret; @@ -688,6 +747,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val += other.m_val; m_quality = MIN (m_quality, other.m_quality); } @@ -699,6 +759,7 @@ public: return *this; if (!initialized_p () || !other.initialized_p ()) return profile_count::uninitialized (); + gcc_checking_assert (compatible_p (other)); profile_count ret; ret.m_val = m_val >= other.m_val ? m_val - other.m_val : 0; ret.m_quality = MIN (m_quality, other.m_quality); @@ -712,6 +773,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val = m_val >= other.m_val ? m_val - other.m_val: 0; m_quality = MIN (m_quality, other.m_quality); } @@ -721,48 +783,115 @@ public: /* Return false if profile_count is bogus. */ bool verify () const { - return m_val != uninitialized_count || m_quality == profile_guessed; + return m_val != uninitialized_count || m_quality == profile_guessed_local; } /* Comparsions are three-state and conservative. False is returned if the inequality can not be decided. */ bool operator< (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val < other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + if (other == profile_count::zero ()) + return false; + gcc_checking_assert (compatible_p (other)); + return m_val < other.m_val; } bool operator> (const profile_count &other) const { + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return false; + if (other == profile_count::zero ()) + return !(*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); return initialized_p () && other.initialized_p () && m_val > other.m_val; } bool operator< (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val < (uint64_t) other; } bool operator> (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val > (uint64_t) other; } bool operator<= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val <= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return true; + if (other == profile_count::zero ()) + return (*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val <= other.m_val; } bool operator>= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val >= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (other == profile_count::zero ()) + return true; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val >= other.m_val; } bool operator<= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val <= (uint64_t) other; } bool operator>= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val >= (uint64_t) other; } + /* Return true when value is not zero and can be used for scaling. + This is different from *this > 0 because that requires counter to + be IPA. */ + bool nonzero_p () const + { + return initialized_p () && m_val != 0; + } + + /* Make counter forcingly nonzero. */ + profile_count force_nonzero () const + { + if (!initialized_p ()) + return *this; + profile_count ret = *this; + if (ret.m_val == 0) + ret.m_val = 1; + return ret; + } + + profile_count max (profile_count other) const + { + if (!initialized_p ()) + return other; + if (!other.initialized_p ()) + return *this; + if (*this == profile_count::zero ()) + return other; + if (other == profile_count::zero ()) + return *this; + gcc_checking_assert (compatible_p (other)); + if (m_val < other.m_val || (m_val == other.m_val + && m_quality < other.m_quality)) + return other; + return *this; + } /* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter accordingly. */ @@ -814,13 +943,13 @@ public: } profile_count apply_scale (profile_count num, profile_count den) const { - if (m_val == 0) + if (*this == profile_count::zero ()) return *this; - if (num.m_val == 0) + if (num == profile_count::zero ()) return num; if (!initialized_p () || !num.initialized_p () || !den.initialized_p ()) return profile_count::uninitialized (); - gcc_checking_assert (den > 0); + gcc_checking_assert (den.m_val); if (num == den) return *this; @@ -828,7 +957,30 @@ public: uint64_t val; safe_scale_64bit (m_val, num.m_val, den.m_val, &val); ret.m_val = MIN (val, max_count); - ret.m_quality = MIN (m_quality, profile_adjusted); + ret.m_quality = MIN (MIN (MIN (m_quality, profile_adjusted), + num.m_quality), den.m_quality); + if (num.ipa_p () && !ret.ipa_p ()) + ret.m_quality = MIN (num.m_quality, profile_guessed); + return ret; + } + + /* Return THIS with quality dropped to GUESSED_LOCAL. */ + profile_count guessed_local () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_local; + return ret; + } + + /* We know that profile is globally0 but keep local profile if present. */ + profile_count global0 () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_global0; return ret; } @@ -836,10 +988,21 @@ public: profile_count guessed () const { profile_count ret = *this; - ret.m_quality = profile_guessed; + ret.m_quality = MIN (ret.m_quality, profile_guessed); return ret; } + /* Return variant of profile counte which is always safe to compare + acorss functions. */ + profile_count ipa () const + { + if (m_quality > profile_guessed_global0) + return *this; + if (m_quality == profile_guessed_global0) + return profile_count::zero (); + return profile_count::uninitialized (); + } + /* Return THIS with quality dropped to AFDO. */ profile_count afdo () const { @@ -852,21 +1015,26 @@ public: OVERALL. */ profile_probability probability_in (const profile_count overall) const { - if (!m_val) + if (*this == profile_count::zero ()) return profile_probability::never (); if (!initialized_p () || !overall.initialized_p () || !overall.m_val) return profile_probability::uninitialized (); profile_probability ret; - if (overall < m_val) + gcc_checking_assert (compatible_p (overall)); + + if (overall.m_val < m_val) ret.m_val = profile_probability::max_probability; else ret.m_val = RDIV (m_val * profile_probability::max_probability, overall.m_val); - ret.m_quality = MIN (m_quality, overall.m_quality); + ret.m_quality = MAX (MIN (m_quality, overall.m_quality), profile_guessed); return ret; } + int to_frequency (struct function *fun) const; + int to_cgraph_frequency (profile_count entry_bb_count) const; + /* Output THIS to F. */ void dump (FILE *f) const; Index: profile.c =================================================================== --- profile.c (revision 254348) +++ profile.c (working copy) @@ -476,38 +476,6 @@ read_profile_edge_counts (gcov_type *exe return num_edges; } -#define OVERLAP_BASE 10000 - -/* Compare the static estimated profile to the actual profile, and - return the "degree of overlap" measure between them. - - Degree of overlap is a number between 0 and OVERLAP_BASE. It is - the sum of each basic block's minimum relative weights between - two profiles. And overlap of OVERLAP_BASE means two profiles are - identical. */ - -static int -compute_frequency_overlap (void) -{ - gcov_type count_total = 0, freq_total = 0; - int overlap = 0; - basic_block bb; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - { - count_total += bb_gcov_count (bb); - freq_total += bb->frequency; - } - - if (count_total == 0 || freq_total == 0) - return 0; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - overlap += MIN (bb_gcov_count (bb) * OVERLAP_BASE / count_total, - bb->frequency * OVERLAP_BASE / freq_total); - - return overlap; -} /* Compute the branch probabilities for the various branches. Annotate them accordingly. @@ -676,14 +644,6 @@ compute_branch_probabilities (unsigned c } } } - if (dump_file) - { - int overlap = compute_frequency_overlap (); - gimple_dump_cfg (dump_file, dump_flags); - fprintf (dump_file, "Static profile overlap: %d.%d%%\n", - overlap / (OVERLAP_BASE / 100), - overlap % (OVERLAP_BASE / 100)); - } total_num_passes += passes; if (dump_file) @@ -829,10 +789,18 @@ compute_branch_probabilities (unsigned c } } - FOR_ALL_BB_FN (bb, cfun) - { + /* If we have real data, use them! */ + if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun)) + || !flag_guess_branch_prob) + FOR_ALL_BB_FN (bb, cfun) bb->count = profile_count::from_gcov_type (bb_gcov_count (bb)); - } + /* If function was not trained, preserve local estimates including statically + determined zero counts. */ + else + FOR_ALL_BB_FN (bb, cfun) + if (!(bb->count == profile_count::zero ())) + bb->count = bb->count.global0 (); + bb_gcov_counts.release (); delete edge_gcov_counts; edge_gcov_counts = NULL; Index: regs.h =================================================================== --- regs.h (revision 254348) +++ regs.h (working copy) @@ -130,8 +130,10 @@ extern size_t reg_info_p_size; frequency. */ #define REG_FREQ_FROM_BB(bb) (optimize_function_for_size_p (cfun) \ ? REG_FREQ_MAX \ - : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ - ? ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ + : ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ + ? ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ : 1) /* Indexed by N, gives number of insns in which register N dies. Index: sched-ebb.c =================================================================== --- sched-ebb.c (revision 254348) +++ sched-ebb.c (working copy) @@ -231,11 +231,9 @@ rank (rtx_insn *insn1, rtx_insn *insn2) basic_block bb1 = BLOCK_FOR_INSN (insn1); basic_block bb2 = BLOCK_FOR_INSN (insn2); - if (bb1->count > bb2->count - || bb1->frequency > bb2->frequency) + if (bb1->count > bb2->count) return -1; - if (bb1->count < bb2->count - || bb1->frequency < bb2->frequency) + if (bb1->count < bb2->count) return 1; return 0; } Index: shrink-wrap.c =================================================================== --- shrink-wrap.c (revision 254348) +++ shrink-wrap.c (working copy) @@ -561,7 +561,7 @@ handle_simple_exit (edge e) BB_END (old_bb) = end; redirect_edge_succ (e, new_bb); - new_bb->frequency = EDGE_FREQUENCY (e); + new_bb->count = e->count (); e->flags |= EDGE_FALLTHRU; e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -887,7 +887,7 @@ try_shrink_wrapping (edge *entry_edge, r if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) { num += EDGE_FREQUENCY (e); - den += e->src->frequency; + den += e->src->count.to_frequency (cfun); } if (den == 0) @@ -920,8 +920,6 @@ try_shrink_wrapping (edge *entry_edge, r if (dump_file) fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); - bb->frequency = RDIV (num * bb->frequency, den); - dup->frequency -= bb->frequency; bb->count = bb->count.apply_scale (num, den); dup->count -= bb->count; } @@ -995,8 +993,7 @@ try_shrink_wrapping (edge *entry_edge, r continue; } - new_bb->count += e->src->count.apply_probability (e->probability); - new_bb->frequency += EDGE_FREQUENCY (e); + new_bb->count += e->count (); redirect_edge_and_branch_force (e, new_bb); if (dump_file) @@ -1181,7 +1178,7 @@ place_prologue_for_one_component (unsign work: this does not always add up to the block frequency at all, and even if it does, rounding error makes for bad decisions. */ - SW (bb)->own_cost = bb->frequency; + SW (bb)->own_cost = bb->count.to_frequency (cfun); edge e; edge_iterator ei; Index: testsuite/gcc.dg/no-strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/no-strict-overflow-3.c (revision 254348) +++ testsuite/gcc.dg/no-strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump "1234" "optimized" } } */ Index: testsuite/gcc.dg/strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/strict-overflow-3.c (revision 254348) +++ testsuite/gcc.dg/strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump-not "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1234" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (working copy) @@ -290,7 +290,7 @@ RNG (0, 6, 8, "%s%ls", "1", L"2"); /* Only conditional calls to must_not_eliminate must be made (with any probability): - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } No unconditional calls to abort should be made: { dg-final { scan-tree-dump-not ";\n *must_not_eliminate" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/dump-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/dump-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/dump-2.c (working copy) @@ -6,4 +6,4 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[100\\\.00%\\\] \\\[count: INV\\\]:" "optimized" } } */ +/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-10.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-10.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-10.c (working copy) @@ -26,5 +26,5 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-11.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-11.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-11.c (working copy) @@ -24,5 +24,4 @@ int foo (float *x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-12.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-12.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-12.c (working copy) @@ -29,6 +29,5 @@ int foo (int x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (working copy) @@ -39,4 +39,4 @@ int main1 () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (working copy) @@ -43,5 +43,4 @@ void foo(const int * __restrict__ zr_in, which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-5.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-5.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-5.c (working copy) @@ -27,4 +27,4 @@ dct_unquantize_h263_inter_c (short *bloc which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-8.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-8.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-8.c (working copy) @@ -22,5 +22,4 @@ void test () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-9.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-9.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-9.c (working copy) @@ -26,4 +26,4 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-cd.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-cd.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-cd.c (working copy) @@ -32,5 +32,4 @@ void foo (int *x1, int *x2, int *x3, int which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr56541.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (working copy) @@ -29,5 +29,4 @@ void foo() which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr68583.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (working copy) @@ -26,5 +26,5 @@ void foo (long *a) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (working copy) @@ -20,5 +20,4 @@ void foo (int a[], int b[]) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (revision 254348) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (working copy) @@ -21,4 +21,4 @@ foo (const char *u, const char *v, long which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.target/i386/pr61403.c =================================================================== --- testsuite/gcc.target/i386/pr61403.c (revision 254348) +++ testsuite/gcc.target/i386/pr61403.c (working copy) @@ -23,4 +23,4 @@ norm (struct XYZ *in, struct XYZ *out, i } } -/* { dg-final { scan-assembler "blend" } } */ +/* { dg-final { scan-assembler "rsqrtps" } } */ Index: tracer.c =================================================================== --- tracer.c (revision 254348) +++ tracer.c (working copy) @@ -179,7 +179,7 @@ find_best_predecessor (basic_block bb) if (!best || ignore_bb_p (best->src)) return NULL; if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE - < bb->frequency * branch_ratio_cutoff) + < bb->count.to_frequency (cfun) * branch_ratio_cutoff) return NULL; return best; } @@ -194,7 +194,7 @@ find_trace (basic_block bb, basic_block edge e; if (dump_file) - fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun)); while ((e = find_best_predecessor (bb)) != NULL) { @@ -203,11 +203,11 @@ find_trace (basic_block bb, basic_block || find_best_successor (bb2) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); bb = bb2; } if (dump_file) - fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; /* Follow the trace in forward direction. */ @@ -218,7 +218,7 @@ find_trace (basic_block bb, basic_block || find_best_predecessor (bb) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; } if (dump_file) @@ -282,11 +282,11 @@ tail_duplicate (void) { int n = count_insns (bb); if (!ignore_bb_p (bb)) - blocks[bb->index] = heap.insert (-bb->frequency, bb); + blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); counts [bb->index] = n; ninsns += n; - weighted_insns += n * bb->frequency; + weighted_insns += n * bb->count.to_frequency (cfun); } if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) @@ -314,7 +314,7 @@ tail_duplicate (void) n = find_trace (bb, trace); bb = trace[0]; - traced_insns += bb->frequency * counts [bb->index]; + traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; if (blocks[bb->index]) { heap.delete_node (blocks[bb->index]); @@ -330,7 +330,7 @@ tail_duplicate (void) heap.delete_node (blocks[bb2->index]); blocks[bb2->index] = NULL; } - traced_insns += bb2->frequency * counts [bb2->index]; + traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; if (EDGE_COUNT (bb2->preds) > 1 && can_duplicate_block_p (bb2) /* We have the tendency to duplicate the loop header @@ -345,11 +345,11 @@ tail_duplicate (void) /* Reconsider the original copy of block we've duplicated. Removing the most common predecessor may make it to be head. */ - blocks[bb2->index] = heap.insert (-bb2->frequency, bb2); + blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2); if (dump_file) fprintf (dump_file, "Duplicated %i as %i [%i]\n", - bb2->index, copy->index, copy->frequency); + bb2->index, copy->index, copy->count.to_frequency (cfun)); bb2 = copy; changed = true; Index: trans-mem.c =================================================================== --- trans-mem.c (revision 254348) +++ trans-mem.c (working copy) @@ -2932,7 +2932,6 @@ expand_transaction (struct tm_region *re edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE); redirect_edge_pred (fallthru_edge, join_bb); - join_bb->frequency = test_bb->frequency = transaction_bb->frequency; join_bb->count = test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -2940,7 +2939,6 @@ expand_transaction (struct tm_region *re ef->probability = profile_probability::unlikely (); code_bb->count = et->count (); - code_bb->frequency = EDGE_FREQUENCY (et); transaction_bb = join_bb; } @@ -2964,7 +2962,6 @@ expand_transaction (struct tm_region *re gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU); - test_bb->frequency = transaction_bb->frequency; test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -3006,7 +3003,6 @@ expand_transaction (struct tm_region *re edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags); e->probability = fallthru_edge->probability; test_bb->count = fallthru_edge->count (); - test_bb->frequency = EDGE_FREQUENCY (e); // Now update the edges to the inst/uninist implementations. // For now assume that the paths are equally likely. When using HTM, Index: tree-call-cdce.c =================================================================== --- tree-call-cdce.c (revision 254348) +++ tree-call-cdce.c (working copy) @@ -906,7 +906,6 @@ shrink_wrap_one_built_in_call_with_conds Here we take the second approach because it's slightly simpler and because it's easy to see that it doesn't lose profile counts. */ bi_call_bb->count = profile_count::zero (); - bi_call_bb->frequency = 0; while (!edges.is_empty ()) { edge_pair e = edges.pop (); @@ -919,16 +918,10 @@ shrink_wrap_one_built_in_call_with_conds nocall_edge->probability = profile_probability::always () - call_edge->probability; - unsigned int call_frequency - = call_edge->probability.apply (src_bb->frequency); - bi_call_bb->count += call_edge->count (); - bi_call_bb->frequency += call_frequency; if (nocall_edge->dest != join_tgt_bb) - { - nocall_edge->dest->frequency = src_bb->frequency - call_frequency; - } + nocall_edge->dest->count = src_bb->count - bi_call_bb->count; } if (dom_info_available_p (CDI_DOMINATORS)) Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 254348) +++ tree-cfg.c (working copy) @@ -1071,7 +1071,6 @@ gimple_find_sub_bbs (gimple_seq seq, gim tree_guess_outgoing_edge_probabilities (bb); if (all || profile_status_for_fn (cfun) == PROFILE_READ) bb->count = cnt; - bb->frequency = freq; bb = bb->next_bb; } @@ -2081,7 +2080,6 @@ gimple_merge_blocks (basic_block a, basi if (a->loop_father == b->loop_father) { a->count = a->count.merge (b->count); - a->frequency = MAX (a->frequency, b->frequency); } /* Merge the sequences. */ @@ -2840,7 +2838,6 @@ gimple_split_edge (edge edge_in) after_bb = split_edge_bb_loc (edge_in); new_bb = create_empty_bb (after_bb); - new_bb->frequency = EDGE_FREQUENCY (edge_in); new_bb->count = edge_in->count (); e = redirect_edge_and_branch (edge_in, new_bb); @@ -6306,9 +6303,8 @@ gimple_duplicate_sese_region (edge entry bool free_region_copy = false, copying_header = false; struct loop *loop = entry->dest->loop_father; edge exit_copy; - vec<basic_block> doms; + vec<basic_block> doms = vNULL; edge redirected; - int total_freq = 0, entry_freq = 0; profile_count total_count = profile_count::uninitialized (); profile_count entry_count = profile_count::uninitialized (); @@ -6376,21 +6372,10 @@ gimple_duplicate_sese_region (edge entry if (entry_count > total_count) entry_count = total_count; } - if (!(total_count > 0) || !(entry_count > 0)) - { - total_freq = entry->dest->frequency; - entry_freq = EDGE_FREQUENCY (entry); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - else if (entry_freq > total_freq) - entry_freq = total_freq; - } copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, split_edge_bb_loc (entry), update_dominance); - if (total_count > 0 && entry_count > 0) + if (total_count.initialized_p () && entry_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - entry_count, @@ -6398,12 +6383,6 @@ gimple_duplicate_sese_region (edge entry scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); - } if (copying_header) { @@ -6492,7 +6471,6 @@ gimple_duplicate_sese_tail (edge entry, struct loop *orig_loop = entry->dest->loop_father; basic_block switch_bb, entry_bb, nentry_bb; vec<basic_block> doms; - int total_freq = 0, exit_freq = 0; profile_count total_count = profile_count::uninitialized (), exit_count = profile_count::uninitialized (); edge exits[2], nexits[2], e; @@ -6537,30 +6515,16 @@ gimple_duplicate_sese_tail (edge entry, inside. */ doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region); - if (exit->src->count > 0) - { - total_count = exit->src->count; - exit_count = exit->count (); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (exit_count > total_count) - exit_count = total_count; - } - else - { - total_freq = exit->src->frequency; - exit_freq = EDGE_FREQUENCY (exit); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - if (exit_freq > total_freq) - exit_freq = total_freq; - } + total_count = exit->src->count; + exit_count = exit->count (); + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (exit_count > total_count) + exit_count = total_count; copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop, split_edge_bb_loc (exit), true); - if (total_count.initialized_p ()) + if (total_count.initialized_p () && exit_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - exit_count, @@ -6568,12 +6532,6 @@ gimple_duplicate_sese_tail (edge entry, scale_bbs_frequencies_profile_count (region_copy, n_region, exit_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq); - } /* Create the switch block, and put the exit condition to it. */ entry_bb = entry->dest; @@ -7614,9 +7572,15 @@ move_sese_region_to_fn (struct function FIXME, this is silly. The CFG ought to become a parameter to these helpers. */ push_cfun (dest_cfun); - make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb->count; + make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); if (exit_bb) - make_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + { + make_single_succ_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = exit_bb->count; + } + else + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero (); pop_cfun (); /* Back in the original function, the SESE region has disappeared, @@ -8691,7 +8655,7 @@ gimple_account_profile_record (basic_blo else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] += estimate_num_insns (gsi_stmt (i), - &eni_time_weights) * bb->frequency; + &eni_time_weights) * bb->count.to_frequency (cfun); } } @@ -8843,7 +8807,6 @@ insert_cond_bb (basic_block bb, gimple * edge e = make_edge (bb, new_bb, EDGE_TRUE_VALUE); e->probability = prob; new_bb->count = e->count (); - new_bb->frequency = prob.apply (bb->frequency); make_single_succ_edge (new_bb, fall->dest, EDGE_FALLTHRU); /* Fix edge for split bb. */ @@ -9264,9 +9227,9 @@ execute_fixup_cfg (void) cgraph_node *node = cgraph_node::get (current_function_decl); profile_count num = node->count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()) - && !(num == den); + bool scale = num.initialized_p () && den.ipa_p () + && (den.nonzero_p () || num == profile_count::zero ()) + && !(num == den.ipa ()); if (scale) { Index: tree-complex.c =================================================================== --- tree-complex.c (revision 254348) +++ tree-complex.c (working copy) @@ -1191,7 +1191,6 @@ expand_complex_div_wide (gimple_stmt_ite bb_join = e->dest; bb_true = create_empty_bb (bb_cond); bb_false = create_empty_bb (bb_true); - bb_true->frequency = bb_false->frequency = bb_cond->frequency / 2; bb_true->count = bb_false->count = bb_cond->count.apply_probability (profile_probability::even ()); Index: tree-eh.c =================================================================== --- tree-eh.c (revision 254348) +++ tree-eh.c (working copy) @@ -3224,6 +3224,7 @@ lower_resx (basic_block bb, gresx *stmt, gimple_stmt_iterator gsi2; new_bb = create_empty_bb (bb); + new_bb->count = bb->count; add_bb_to_loop (new_bb, bb->loop_father); lab = gimple_block_label (new_bb); gsi2 = gsi_start_bb (new_bb); Index: tree-inline.c =================================================================== --- tree-inline.c (revision 254348) +++ tree-inline.c (working copy) @@ -1763,16 +1763,15 @@ remap_gimple_stmt (gimple *stmt, copy_bo later */ static basic_block -copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, +copy_bb (copy_body_data *id, basic_block bb, profile_count num, profile_count den) { gimple_stmt_iterator gsi, copy_gsi, seq_gsi; basic_block copy_basic_block; tree decl; - gcov_type freq; basic_block prev; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + bool scale = !num.initialized_p () + || (den.nonzero_p () || num == profile_count::zero ()); /* Search for previous copied basic block. */ prev = bb->prev_bb; @@ -1784,15 +1783,8 @@ copy_bb (copy_body_data *id, basic_block copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux); if (scale) copy_basic_block->count = bb->count.apply_scale (num, den); - - /* We are going to rebuild frequencies from scratch. These values - have just small importance to drive canonicalize_loop_headers. */ - freq = apply_scale ((gcov_type)bb->frequency, frequency_scale); - - /* We recompute frequencies after inlining, so this is quite safe. */ - if (freq > BB_FREQ_MAX) - freq = BB_FREQ_MAX; - copy_basic_block->frequency = freq; + else if (num.initialized_p ()) + copy_basic_block->count = bb->count; copy_gsi = gsi_start_bb (copy_basic_block); @@ -2068,8 +2060,8 @@ copy_bb (copy_body_data *id, basic_block fprintf (dump_file, "Orig bb: %i, orig bb freq %i, new bb freq %i\n", bb->index, - bb->frequency, - copy_basic_block->frequency); + bb->count.to_frequency (cfun), + copy_basic_block->count.to_frequency (cfun)); } } } @@ -2507,11 +2499,8 @@ initialize_cfun (tree new_fndecl, tree c profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); - /* FIXME: When all counts are known to be zero, scaling is also meaningful. - */ if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p () - && count.initialized_p () - && ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()) + && count.ipa ().initialized_p ()) { ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, @@ -2520,10 +2509,13 @@ initialize_cfun (tree new_fndecl, tree c EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); } - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency - = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->frequency; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = - EXIT_BLOCK_PTR_FOR_FN (src_cfun)->frequency; + else + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; + EXIT_BLOCK_PTR_FOR_FN (cfun)->count + = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count; + } if (src_cfun->eh) init_eh_for_function (); @@ -2680,27 +2672,11 @@ redirect_all_calls (copy_body_data * id, } } -/* Convert estimated frequencies into counts for NODE, scaling COUNT - with each bb's frequency. Used when NODE has a 0-weight entry - but we are about to inline it into a non-zero count call bb. - See the comments for handle_missing_profiles() in predict.c for - when this can happen for COMDATs. */ - -void -freqs_to_counts (struct cgraph_node *node, profile_count count) -{ - basic_block bb; - struct function *fn = DECL_STRUCT_FUNCTION (node->decl); - - FOR_ALL_BB_FN(bb, fn) - bb->count = count.apply_scale (bb->frequency, BB_FREQ_MAX); -} - /* Make a copy of the body of FN so that it can be inserted inline in another function. Walks FN via CFG, returns new fndecl. */ static tree -copy_cfg_body (copy_body_data * id, profile_count count, int frequency_scale, +copy_cfg_body (copy_body_data * id, profile_count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -2712,31 +2688,10 @@ copy_cfg_body (copy_body_data * id, prof tree new_fndecl = NULL; bool need_debug_cleanup = false; int last; - int incoming_frequency = 0; - profile_count incoming_count = profile_count::zero (); - profile_count num = count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + profile_count num = entry_block_map->count; - /* This can happen for COMDAT routines that end up with 0 counts - despite being called (see the comments for handle_missing_profiles() - in predict.c as to why). Apply counts to the blocks in the callee - before inlining, using the guessed edge frequencies, so that we don't - end up with a 0-count inline body which can confuse downstream - optimizations such as function splitting. */ - if (!(ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count > 0) && count > 0) - { - /* Apply the larger of the call bb count and the total incoming - call edge count to the callee. */ - profile_count in_count = profile_count::zero (); - struct cgraph_edge *in_edge; - for (in_edge = id->src_node->callers; in_edge; - in_edge = in_edge->next_caller) - if (in_edge->count.initialized_p ()) - in_count += in_edge->count; - freqs_to_counts (id->src_node, count > in_count ? count : in_count); - } + cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); /* Register specific tree functions. */ gimple_register_cfg_hooks (); @@ -2750,25 +2705,18 @@ copy_cfg_body (copy_body_data * id, prof { edge e; edge_iterator ei; + den = profile_count::zero (); FOR_EACH_EDGE (e, ei, new_entry->preds) if (!e->src->aux) - incoming_frequency += EDGE_FREQUENCY (e); - if (scale) - incoming_count = incoming_count.apply_scale (num, den); - else - incoming_count = profile_count::uninitialized (); - incoming_frequency - = apply_scale ((gcov_type)incoming_frequency, frequency_scale); - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = incoming_count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = incoming_frequency; + den += e->count (); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den; } /* Must have a CFG here at this point. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (callee_fndecl))); - cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); ENTRY_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = entry_block_map; EXIT_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = exit_block_map; @@ -2784,7 +2732,7 @@ copy_cfg_body (copy_body_data * id, prof FOR_EACH_BB_FN (bb, cfun_to_copy) if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index)) { - basic_block new_bb = copy_bb (id, bb, frequency_scale, num, den); + basic_block new_bb = copy_bb (id, bb, num, den); bb->aux = new_bb; new_bb->aux = bb; new_bb->loop_father = entry_block_map->loop_father; @@ -3011,7 +2959,7 @@ copy_tree_body (copy_body_data *id) another function. */ static tree -copy_body (copy_body_data *id, profile_count count, int frequency_scale, +copy_body (copy_body_data *id, profile_count count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -3020,7 +2968,7 @@ copy_body (copy_body_data *id, profile_c /* If this body has a CFG, walk CFG and copy. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl))); - body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map, + body = copy_cfg_body (id, count, entry_block_map, exit_block_map, new_entry); copy_debug_stmts (id); @@ -4771,7 +4719,6 @@ expand_call_inline (basic_block bb, gimp a self-referential call; if we're calling ourselves, we need to duplicate our body before altering anything. */ copy_body (id, cg_edge->callee->count, - GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE), bb, return_block, NULL); reset_debug_bindings (id, stmt_gsi); @@ -5146,6 +5093,7 @@ optimize_inline_calls (tree fn) } /* Fold queued statements. */ + counts_to_freqs (); fold_marked_statements (last, id.statements_to_fold); delete id.statements_to_fold; @@ -6090,7 +6038,7 @@ tree_function_versioning (tree old_decl, } /* Copy the Function's body. */ - copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE, + copy_body (&id, old_entry_block->count, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun), new_entry); @@ -6122,6 +6070,7 @@ tree_function_versioning (tree old_decl, free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); + counts_to_freqs (); fold_marked_statements (0, id.statements_to_fold); delete id.statements_to_fold; delete_unreachable_blocks_update_callgraph (&id); @@ -6141,20 +6090,20 @@ tree_function_versioning (tree old_decl, struct cgraph_edge *e; rebuild_frequencies (); - new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); for (e = new_version_node->callees; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } for (e = new_version_node->indirect_calls; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } } Index: tree-ssa-coalesce.c =================================================================== --- tree-ssa-coalesce.c (revision 254348) +++ tree-ssa-coalesce.c (working copy) @@ -164,7 +164,7 @@ coalesce_cost (int frequency, bool optim static inline int coalesce_cost_bb (basic_block bb) { - return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb)); + return coalesce_cost (bb->count.to_frequency (cfun), optimize_bb_for_size_p (bb)); } Index: tree-ssa-ifcombine.c =================================================================== --- tree-ssa-ifcombine.c (revision 254348) +++ tree-ssa-ifcombine.c (working copy) @@ -366,7 +366,6 @@ update_profile_after_ifcombine (basic_bl - inner_taken->probability; outer_to_inner->probability = profile_probability::always (); - inner_cond_bb->frequency = outer_cond_bb->frequency; outer2->probability = profile_probability::never (); } Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 254348) +++ tree-ssa-loop-im.c (working copy) @@ -1803,7 +1803,7 @@ execute_sm_if_changed (edge ex, tree mem for (hash_set<basic_block>::iterator it = flag_bbs->begin (); it != flag_bbs->end (); ++it) { - freq_sum += (*it)->frequency; + freq_sum += (*it)->count.to_frequency (cfun); if ((*it)->count.initialized_p ()) count_sum += (*it)->count, ncount ++; if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) @@ -1815,20 +1815,15 @@ execute_sm_if_changed (edge ex, tree mem if (flag_probability.initialized_p ()) ; - else if (ncount == nbbs && count_sum > 0 && preheader->count () >= count_sum) + else if (ncount == nbbs + && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) { flag_probability = count_sum.probability_in (preheader->count ()); if (flag_probability > cap) flag_probability = cap; } - else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum) - { - flag_probability = profile_probability::from_reg_br_prob_base - (GCOV_COMPUTE_SCALE (freq_sum, EDGE_FREQUENCY (preheader))); - if (flag_probability > cap) - flag_probability = cap; - } - else + + if (!flag_probability.initialized_p ()) flag_probability = cap; /* ?? Insert store after previous store if applicable. See note @@ -1861,7 +1856,6 @@ execute_sm_if_changed (edge ex, tree mem old_dest = ex->dest; new_bb = split_edge (ex); then_bb = create_empty_bb (new_bb); - then_bb->frequency = flag_probability.apply (new_bb->frequency); then_bb->count = new_bb->count.apply_probability (flag_probability); if (irr) then_bb->flags = BB_IRREDUCIBLE_LOOP; Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 254348) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -647,7 +647,6 @@ unloop_loops (bitmap loop_closed_ssa_inv add_bb_to_loop (latch_edge->dest, current_loops->tree_root); latch_edge->dest->count = profile_count::zero (); - latch_edge->dest->frequency = 0; set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); gsi = gsi_start_bb (latch_edge->dest); @@ -1090,7 +1089,6 @@ try_peel_loop (struct loop *loop, } } profile_count entry_count = profile_count::zero (); - int entry_freq = 0; edge e; edge_iterator ei; @@ -1099,15 +1097,10 @@ try_peel_loop (struct loop *loop, { if (e->src->count.initialized_p ()) entry_count = e->src->count + e->src->count; - entry_freq += e->src->frequency; gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); } profile_probability p = profile_probability::very_unlikely (); - if (loop->header->count > 0) - p = entry_count.probability_in (loop->header->count); - else if (loop->header->frequency) - p = profile_probability::probability_in_gcov_type - (entry_freq, loop->header->frequency); + p = entry_count.probability_in (loop->header->count); scale_loop_profile (loop, p, 0); bitmap_set_bit (peeled_loops, loop->num); return true; Index: tree-ssa-loop-ivopts.c =================================================================== --- tree-ssa-loop-ivopts.c (revision 254348) +++ tree-ssa-loop-ivopts.c (working copy) @@ -4457,8 +4457,8 @@ get_address_cost (struct ivopts_data *da static comp_cost get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) { - int loop_freq = data->current_loop->header->frequency; - int bb_freq = gimple_bb (at)->frequency; + int loop_freq = data->current_loop->header->count.to_frequency (cfun); + int bb_freq = gimple_bb (at)->count.to_frequency (cfun); if (loop_freq != 0) { gcc_assert (cost.scratch <= cost.cost); Index: tree-ssa-loop-manip.c =================================================================== --- tree-ssa-loop-manip.c (revision 254348) +++ tree-ssa-loop-manip.c (working copy) @@ -1122,6 +1122,9 @@ niter_for_unrolled_loop (struct loop *lo converts back. */ gcov_type new_est_niter = est_niter / factor; + if (est_niter == -1) + return -1; + /* Without profile feedback, loops for which we do not know a better estimate are assumed to roll 10 times. When we unroll such loop, it appears to roll too little, and it may even seem to be cold. To avoid this, we @@ -1370,14 +1373,7 @@ tree_transform_and_unroll_loop (struct l freq_h = loop->header->count; freq_e = (loop_preheader_edge (loop))->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type - (EDGE_FREQUENCY (loop_preheader_edge (loop))); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ @@ -1392,7 +1388,6 @@ tree_transform_and_unroll_loop (struct l .apply_scale (1, new_est_niter + 1); rest->count += new_exit->count (); - rest->frequency += EDGE_FREQUENCY (new_exit); new_nonexit = single_pred_edge (loop->latch); prob = new_nonexit->probability; Index: tree-ssa-loop-niter.c =================================================================== --- tree-ssa-loop-niter.c (revision 254348) +++ tree-ssa-loop-niter.c (working copy) @@ -3901,7 +3901,7 @@ estimate_numbers_of_iterations (struct l recomputing iteration bounds later in the compilation process will just introduce random roundoff errors. */ if (!loop->any_estimate - && loop->header->count > 0) + && loop->header->count.reliable_p ()) { gcov_type nit = expected_loop_iterations_unbounded (loop); bound = gcov_type_to_wide_int (nit); Index: tree-ssa-loop-unswitch.c =================================================================== --- tree-ssa-loop-unswitch.c (revision 254348) +++ tree-ssa-loop-unswitch.c (working copy) @@ -852,7 +852,7 @@ hoist_guard (struct loop *loop, edge gua /* Determine the probability that we skip the loop. Assume that loop has same average number of iterations regardless outcome of guard. */ new_edge->probability = guard->probability; - profile_count skip_count = guard->src->count > 0 + profile_count skip_count = guard->src->count.nonzero_p () ? guard->count ().apply_scale (pre_header->count, guard->src->count) : guard->count ().apply_probability (new_edge->probability); @@ -875,7 +875,6 @@ hoist_guard (struct loop *loop, edge gua to loop header... */ e->probability = new_edge->probability.invert (); e->dest->count = e->count (); - e->dest->frequency = EDGE_FREQUENCY (e); /* ... now update profile to represent that original guard will be optimized away ... */ Index: tree-ssa-sink.c =================================================================== --- tree-ssa-sink.c (revision 254348) +++ tree-ssa-sink.c (working copy) @@ -226,7 +226,8 @@ select_best_block (basic_block early_bb, /* If BEST_BB is at the same nesting level, then require it to have significantly lower execution frequency to avoid gratutious movement. */ if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb) - && best_bb->frequency < (early_bb->frequency * threshold / 100.0)) + && best_bb->count.to_frequency (cfun) + < (early_bb->count.to_frequency (cfun) * threshold / 100.0)) return best_bb; /* No better block found, so return EARLY_BB, which happens to be the Index: tree-ssa-tail-merge.c =================================================================== --- tree-ssa-tail-merge.c (revision 254348) +++ tree-ssa-tail-merge.c (working copy) @@ -1530,8 +1530,6 @@ static void replace_block_by (basic_block bb1, basic_block bb2) { edge pred_edge; - edge e1, e2; - edge_iterator ei; unsigned int i; gphi *bb2_phi; @@ -1560,9 +1558,13 @@ replace_block_by (basic_block bb1, basic bb2->count += bb1->count; + /* FIXME: Fix merging of probabilities. They need to be redistributed + according to the relative counts of merged BBs. */ +#if 0 /* Merge the outgoing edge counts from bb1 onto bb2. */ profile_count out_sum = profile_count::zero (); int out_freq_sum = 0; + edge e1, e2; /* Recompute the edge probabilities from the new merged edge count. Use the sum of the new merged edge counts computed above instead @@ -1580,7 +1582,6 @@ replace_block_by (basic_block bb1, basic out_sum += e1->count (); out_freq_sum += EDGE_FREQUENCY (e1); } - FOR_EACH_EDGE (e1, ei, bb1->succs) { e2 = find_edge (bb2, e1->dest); @@ -1589,9 +1590,9 @@ replace_block_by (basic_block bb1, basic { e2->probability = e2->count ().probability_in (bb2->count); } - else if (bb1->frequency && bb2->frequency) + else if (bb1->count.to_frequency (cfun) && bb2->count.to_frequency (cfun)) e2->probability = e1->probability; - else if (bb2->frequency && !bb1->frequency) + else if (bb2->count.to_frequency (cfun) && !bb1->count.to_frequency (cfun)) ; else if (out_freq_sum) e2->probability = profile_probability::from_reg_br_prob_base @@ -1600,9 +1601,7 @@ replace_block_by (basic_block bb1, basic out_freq_sum)); out_sum += e2->count (); } - bb2->frequency += bb1->frequency; - if (bb2->frequency > BB_FREQ_MAX) - bb2->frequency = BB_FREQ_MAX; +#endif /* Move over any user labels from bb1 after the bb2 labels. */ gimple_stmt_iterator gsi1 = gsi_start_bb (bb1); Index: tree-ssa-threadupdate.c =================================================================== --- tree-ssa-threadupdate.c (revision 254348) +++ tree-ssa-threadupdate.c (working copy) @@ -339,7 +339,6 @@ create_block_for_threading (basic_block e->aux = NULL; /* Zero out the profile, since the block is unreachable for now. */ - rd->dup_blocks[count]->frequency = 0; rd->dup_blocks[count]->count = profile_count::uninitialized (); if (duplicate_blocks) bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); @@ -590,7 +589,7 @@ any_remaining_duplicated_blocks (vec<jum } -/* Compute the amount of profile count/frequency coming into the jump threading +/* Compute the amount of profile count coming into the jump threading path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to @@ -598,7 +597,7 @@ any_remaining_duplicated_blocks (vec<jum edges that need to be ignored in the analysis. Return true if path contains a joiner, false otherwise. - In the non-joiner case, this is straightforward - all the counts/frequency + In the non-joiner case, this is straightforward - all the counts flowing into the jump threading path should flow through the duplicated block and out of the duplicated path. @@ -851,16 +850,14 @@ compute_path_counts (struct redirection_ /* Update the counts and frequencies for both an original path edge EPATH and its duplicate EDUP. The duplicate source block - will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ, + will get a count of PATH_IN_COUNT and PATH_IN_FREQ, and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ static void update_profile (edge epath, edge edup, profile_count path_in_count, - profile_count path_out_count, int path_in_freq) + profile_count path_out_count) { - if (!(path_in_count > 0)) - return; - /* First update the duplicated block's count / frequency. */ + /* First update the duplicated block's count. */ if (edup) { basic_block dup_block = edup->src; @@ -894,167 +891,54 @@ update_profile (edge epath, edge edup, p if (esucc != edup) esucc->probability *= scale; } - edup->probability = edup_prob; + if (edup_prob.initialized_p ()) + edup->probability = edup_prob; - /* FIXME once freqs_to_counts is dropped re-enable this check. */ - gcc_assert (!dup_block->count.initialized_p () || 1); - gcc_assert (dup_block->frequency == 0); + gcc_assert (!dup_block->count.initialized_p ()); dup_block->count = path_in_count; - dup_block->frequency = path_in_freq; } + if (path_in_count == profile_count::zero ()) + return; + profile_count final_count = epath->count () - path_out_count; - /* Now update the original block's count and frequency in the + /* Now update the original block's count in the opposite manner - remove the counts/freq that will flow into the duplicated block. Handle underflow due to precision/ rounding issues. */ epath->src->count -= path_in_count; - epath->src->frequency -= path_in_freq; - if (epath->src->frequency < 0) - epath->src->frequency = 0; /* Next update this path edge's original and duplicated counts. We know that the duplicated path will have path_out_count flowing out of it (in the joiner case this is the count along the duplicated path out of the duplicated joiner). This count can then be removed from the original path edge. */ - if (epath->src->count > 0) - { - edge esucc; - edge_iterator ei; - profile_probability epath_prob = final_count.probability_in (epath->src->count); - - if (epath->probability > epath_prob) - { - profile_probability rev_scale - = (profile_probability::always () - epath->probability) - / (profile_probability::always () - epath_prob); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability /= rev_scale; - } - else if (epath->probability < epath_prob) - { - profile_probability scale - = (profile_probability::always () - epath_prob) - / (profile_probability::always () - epath->probability); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability *= scale; - } - epath->probability = epath_prob; - } -} - -/* Check if the paths through RD all have estimated frequencies but zero - profile counts. This is more accurate than checking the entry block - for a zero profile count, since profile insanities sometimes creep in. */ - -static bool -estimated_freqs_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; + edge esucc; edge_iterator ei; - bool non_zero_freq = false; - FOR_EACH_EDGE (ein, ei, e->dest->preds) - { - if (ein->count () > 0) - return false; - non_zero_freq |= ein->src->frequency != 0; - } + profile_probability epath_prob = final_count.probability_in (epath->src->count); - for (unsigned int i = 1; i < path->length (); i++) + if (epath->probability > epath_prob) { - edge epath = (*path)[i]->e; - if (epath->src->count > 0) - return false; - non_zero_freq |= epath->src->frequency != 0; - edge esucc; + profile_probability rev_scale + = (profile_probability::always () - epath->probability) + / (profile_probability::always () - epath_prob); + FOR_EACH_EDGE (esucc, ei, epath->src->succs) + if (esucc != epath) + esucc->probability /= rev_scale; + } + else if (epath->probability < epath_prob) + { + profile_probability scale + = (profile_probability::always () - epath_prob) + / (profile_probability::always () - epath->probability); FOR_EACH_EDGE (esucc, ei, epath->src->succs) - { - if (esucc->count () > 0) - return false; - non_zero_freq |= esucc->src->frequency != 0; - } - } - return non_zero_freq; -} - - -/* Invoked for routines that have guessed frequencies and no profile - counts to record the block and edge frequencies for paths through RD - in the profile count fields of those blocks and edges. This is because - ssa_fix_duplicate_block_edges incrementally updates the block and - edge counts as edges are redirected, and it is difficult to do that - for edge frequencies which are computed on the fly from the source - block frequency and probability. When a block frequency is updated - its outgoing edge frequencies are affected and become difficult to - adjust. */ - -static void -freqs_to_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = profile_count::from_gcov_type - (ein->src->frequency * REG_BR_PROB_BASE); - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding - errors applying the edge probability when the frequencies are very - small. */ - epath->src->count = - profile_count::from_gcov_type - (epath->src->frequency * REG_BR_PROB_BASE); - } -} - - -/* For routines that have guessed frequencies and no profile counts, where we - used freqs_to_counts_path to record block and edge frequencies for paths - through RD, we clear the counts after completing all updates for RD. - The updates in ssa_fix_duplicate_block_edges are based off the count fields, - but the block frequencies and edge probabilities were updated as well, - so we can simply clear the count fields. */ - -static void -clear_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - profile_count val = profile_count::uninitialized (); - if (profile_status_for_fn (cfun) == PROFILE_READ) - val = profile_count::zero (); - - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = val; - - /* First clear counts along original path. */ - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - epath->src->count = val; - } - /* Also need to clear the counts along duplicated path. */ - for (unsigned int i = 0; i < 2; i++) - { - basic_block dup = rd->dup_blocks[i]; - if (!dup) - continue; - dup->count = val; + if (esucc != epath) + esucc->probability *= scale; } + if (epath_prob.initialized_p ()) + epath->probability = epath_prob; } /* Wire up the outgoing edges from the duplicate blocks and @@ -1072,20 +956,6 @@ ssa_fix_duplicate_block_edges (struct re profile_count path_out_count = profile_count::zero (); int path_in_freq = 0; - /* This routine updates profile counts, frequencies, and probabilities - incrementally. Since it is difficult to do the incremental updates - using frequencies/probabilities alone, for routines without profile - data we first take a snapshot of the existing block and edge frequencies - by copying them into the empty profile count fields. These counts are - then used to do the incremental updates, and cleared at the end of this - routine. If the function is marked as having a profile, we still check - to see if the paths through RD are using estimated frequencies because - the routine had zero profile counts. */ - bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ - || estimated_freqs_path (rd)); - if (do_freqs_to_counts) - freqs_to_counts_path (rd); - /* First determine how much profile count to move from original path to the duplicate path. This is tricky in the presence of a joiner (see comments for compute_path_counts), where some portion @@ -1096,7 +966,6 @@ ssa_fix_duplicate_block_edges (struct re &path_in_count, &path_out_count, &path_in_freq); - int cur_path_freq = path_in_freq; for (unsigned int count = 0, i = 1; i < path->length (); i++) { edge epath = (*path)[i]->e; @@ -1162,19 +1031,14 @@ ssa_fix_duplicate_block_edges (struct re } } - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. The path duplicate's - incoming count and frequency are the totals for all edges + incoming count are the totals for all edges incoming to this jump threading path computed earlier. And we know that the duplicated path will have path_out_count flowing out of it (i.e. along the duplicated path out of the duplicated joiner). */ - update_profile (epath, e2, path_in_count, path_out_count, - path_in_freq); - - /* Record the frequency flowing to the downstream duplicated - path blocks. */ - cur_path_freq = EDGE_FREQUENCY (e2); + update_profile (epath, e2, path_in_count, path_out_count); } else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) { @@ -1184,7 +1048,7 @@ ssa_fix_duplicate_block_edges (struct re if (count == 1) single_succ_edge (rd->dup_blocks[1])->aux = NULL; - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. Since we are now after any joiner that may have existed on the path, the count flowing along the duplicated threaded path is path_out_count. @@ -1194,7 +1058,7 @@ ssa_fix_duplicate_block_edges (struct re been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), - path_out_count, path_out_count, cur_path_freq); + path_out_count, path_out_count); } else { @@ -1211,8 +1075,7 @@ ssa_fix_duplicate_block_edges (struct re thread path (path_in_freq). If we had a joiner, it would have been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ - update_profile (epath, NULL, path_out_count, path_out_count, - cur_path_freq); + update_profile (epath, NULL, path_out_count, path_out_count); } /* Increment the index into the duplicated path when we processed @@ -1223,11 +1086,6 @@ ssa_fix_duplicate_block_edges (struct re count++; } } - - /* Done with all profile and frequency updates, clear counts if they - were copied. */ - if (do_freqs_to_counts) - clear_counts_path (rd); } /* Hash table traversal callback routine to create duplicate blocks. */ @@ -2137,7 +1995,6 @@ duplicate_thread_path (edge entry, edge struct loop *loop = entry->dest->loop_father; edge exit_copy; edge redirected; - int curr_freq; profile_count curr_count; if (!can_copy_bbs_p (region, n_region)) @@ -2170,7 +2027,6 @@ duplicate_thread_path (edge entry, edge the jump-thread path in order. */ curr_count = entry->count (); - curr_freq = EDGE_FREQUENCY (entry); for (i = 0; i < n_region; i++) { @@ -2181,10 +2037,8 @@ duplicate_thread_path (edge entry, edge /* Watch inconsistent profile. */ if (curr_count > region[i]->count) curr_count = region[i]->count; - if (curr_freq > region[i]->frequency) - curr_freq = region[i]->frequency; /* Scale current BB. */ - if (region[i]->count > 0 && curr_count.initialized_p ()) + if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) { /* In the middle of the path we only scale the frequencies. In last BB we need to update probabilities of outgoing edges @@ -2195,24 +2049,11 @@ duplicate_thread_path (edge entry, edge region[i]->count); else update_bb_profile_for_threading (region[i], - curr_freq, curr_count, + curr_count, exit); scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, region_copy[i]->count); } - else if (region[i]->frequency) - { - if (i + 1 != n_region) - scale_bbs_frequencies_int (region + i, 1, - region[i]->frequency - curr_freq, - region[i]->frequency); - else - update_bb_profile_for_threading (region[i], - curr_freq, curr_count, - exit); - scale_bbs_frequencies_int (region_copy + i, 1, curr_freq, - region_copy[i]->frequency); - } if (single_succ_p (bb)) { @@ -2221,7 +2062,6 @@ duplicate_thread_path (edge entry, edge || region_copy[i + 1] == single_succ_edge (bb)->dest); if (i + 1 != n_region) { - curr_freq = EDGE_FREQUENCY (single_succ_edge (bb)); curr_count = single_succ_edge (bb)->count (); } continue; @@ -2252,7 +2092,6 @@ duplicate_thread_path (edge entry, edge } else { - curr_freq = EDGE_FREQUENCY (e); curr_count = e->count (); } } Index: tree-switch-conversion.c =================================================================== --- tree-switch-conversion.c (revision 254348) +++ tree-switch-conversion.c (working copy) @@ -1443,10 +1443,10 @@ gen_inbound_check (gswitch *swtch, struc } /* frequencies of the new BBs */ - bb1->frequency = EDGE_FREQUENCY (e01); - bb2->frequency = EDGE_FREQUENCY (e02); + bb1->count = e01->count (); + bb2->count = e02->count (); if (!info->default_case_nonstandard) - bbf->frequency = EDGE_FREQUENCY (e1f) + EDGE_FREQUENCY (e2f); + bbf->count = e1f->count () + e2f->count (); /* Tidy blocks that have become unreachable. */ prune_bbs (bbd, info->final_bb, Index: tree-tailcall.c =================================================================== --- tree-tailcall.c (revision 254348) +++ tree-tailcall.c (working copy) @@ -805,12 +805,9 @@ adjust_return_value (basic_block bb, tre /* Subtract COUNT and FREQUENCY from the basic block and it's outgoing edge. */ static void -decrease_profile (basic_block bb, profile_count count, int frequency) +decrease_profile (basic_block bb, profile_count count) { bb->count = bb->count - count; - bb->frequency -= frequency; - if (bb->frequency < 0) - bb->frequency = 0; if (!single_succ_p (bb)) { gcc_assert (!EDGE_COUNT (bb->succs)); @@ -892,11 +889,10 @@ eliminate_tail_call (struct tailcall *t) /* Number of executions of function has reduced by the tailcall. */ e = single_succ_edge (gsi_bb (t->call_gsi)); - decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count (), EDGE_FREQUENCY (e)); - decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count (), - EDGE_FREQUENCY (e)); + decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count ()); + decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count ()); if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - decrease_profile (e->dest, e->count (), EDGE_FREQUENCY (e)); + decrease_profile (e->dest, e->count ()); /* Replace the call by a jump to the start of function. */ e = redirect_edge_and_branch (single_succ_edge (gsi_bb (t->call_gsi)), Index: tree-vect-loop-manip.c =================================================================== --- tree-vect-loop-manip.c (revision 254348) +++ tree-vect-loop-manip.c (working copy) @@ -1843,7 +1843,6 @@ vect_do_peeling (loop_vec_info loop_vinf /* Simply propagate profile info from guard_bb to guard_to which is a merge point of control flow. */ - guard_to->frequency = guard_bb->frequency; guard_to->count = guard_bb->count; /* Scale probability of epilog loop back. FIXME: We should avoid scaling down and back up. Profile may Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 254348) +++ tree-vect-loop.c (working copy) @@ -7229,20 +7229,14 @@ scale_profile_for_vect_loop (struct loop gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); profile_count freq_h = loop->header->count, freq_e = preheader->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader)); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { profile_probability p; /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ - if (!(freq_e > profile_count::from_gcov_type (1))) - freq_e = profile_count::from_gcov_type (1); + if (!(freq_e == profile_count::zero ())) + freq_e = freq_e.force_nonzero (); p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); scale_loop_frequencies (loop, p); } @@ -7781,7 +7775,7 @@ optimize_mask_stores (struct loop *loop) efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); /* Put STORE_BB to likely part. */ efalse->probability = profile_probability::unlikely (); - store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse); + store_bb->count = efalse->count (); make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); if (dom_info_available_p (CDI_DOMINATORS)) set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 254348) +++ tree-vect-stmts.c (working copy) @@ -3221,7 +3221,7 @@ vectorizable_simd_clone_call (gimple *st vec<tree> vargs = vNULL; size_t i, nargs; tree lhs, rtype, ratype; - vec<constructor_elt, va_gc> *ret_ctor_elts; + vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; /* Is STMT a vectorizable call? */ if (!is_gimple_call (stmt)) Index: ubsan.c =================================================================== --- ubsan.c (revision 254348) +++ ubsan.c (working copy) @@ -804,6 +804,7 @@ ubsan_expand_null_ifn (gimple_stmt_itera this edge is unlikely taken, so set up the probability accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); /* Connect 'then block' with the 'else block'. This is needed as the ubsan routines we call in the 'then block' are not noreturn. @@ -1085,6 +1086,7 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); } else { @@ -1098,12 +1100,14 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat e = make_edge (cond_neg_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); cond_pos_bb = create_empty_bb (cond_bb); add_bb_to_loop (cond_pos_bb, cond_bb->loop_father); e = make_edge (cond_bb, cond_pos_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::even (); + cond_pos_bb->count = e->count (); e = make_edge (cond_pos_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); Index: value-prof.c =================================================================== --- value-prof.c (revision 254348) +++ value-prof.c (working copy) @@ -583,7 +583,7 @@ static bool check_counter (gimple *stmt, const char * name, gcov_type *count, gcov_type *all, profile_count bb_count_d) { - gcov_type bb_count = bb_count_d.to_gcov_type (); + gcov_type bb_count = bb_count_d.ipa ().to_gcov_type (); if (*all != bb_count || *count > *all) { location_t locus; @@ -1299,7 +1299,7 @@ check_ic_target (gcall *call_stmt, struc gcall * gimple_ic (gcall *icall_stmt, struct cgraph_node *direct_call, - profile_probability prob, profile_count count, profile_count all) + profile_probability prob) { gcall *dcall_stmt; gassign *load_stmt; @@ -1354,11 +1354,11 @@ gimple_ic (gcall *icall_stmt, struct cgr /* Edge e_cd connects cond_bb to dcall_bb, etc; note the first letters. */ e_cd = split_block (cond_bb, cond_stmt); dcall_bb = e_cd->dest; - dcall_bb->count = count; + dcall_bb->count = cond_bb->count.apply_probability (prob); e_di = split_block (dcall_bb, dcall_stmt); icall_bb = e_di->dest; - icall_bb->count = all - count; + icall_bb->count = cond_bb->count - dcall_bb->count; /* Do not disturb existing EH edges from the indirect call. */ if (!stmt_ends_bb_p (icall_stmt)) @@ -1376,7 +1376,7 @@ gimple_ic (gcall *icall_stmt, struct cgr if (e_ij != NULL) { join_bb = e_ij->dest; - join_bb->count = all; + join_bb->count = cond_bb->count; } e_cd->flags = (e_cd->flags & ~EDGE_FALLTHRU) | EDGE_TRUE_VALUE; @@ -1518,7 +1518,7 @@ gimple_ic_transform (gimple_stmt_iterato count = histogram->hvalue.counters [1]; all = histogram->hvalue.counters [2]; - bb_all = gimple_bb (stmt)->count.to_gcov_type (); + bb_all = gimple_bb (stmt)->count.ipa ().to_gcov_type (); /* The order of CHECK_COUNTER calls is important - since check_counter can correct the third parameter and we want to make count <= all <= bb_all. */ Index: value-prof.h =================================================================== --- value-prof.h (revision 254348) +++ value-prof.h (working copy) @@ -90,8 +90,7 @@ void gimple_move_stmt_histograms (struct void verify_histograms (void); void free_histograms (function *); void stringop_block_profile (gimple *, unsigned int *, HOST_WIDE_INT *); -gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability, - profile_count, profile_count); +gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability); bool check_ic_target (gcall *, struct cgraph_node *);
This breaks gfortran.dg/bounds_check_15.f90 with -O3 -funroll-loops -fbounds-check on ia64: Error: qsort comparator non-negative on sorted output: 1 during RTL pass: mach ../gcc/testsuite/gfortran.dg/bounds_check_15.f90:32:0: internal compiler error: qsort checking failed 0x40000000001cc1ff qsort_chk_error ../../gcc/vec.c:222 0x400000000223130f qsort_chk(void*, unsigned long, unsigned long, int (*)(void const*, void const*)) ../../gcc/vec.c:274 0x400000000114ba4f vec<_expr*, va_heap, vl_embed>::qsort(int (*)(void const*, void const*)) ../../gcc/vec.h:973 0x400000000114ba4f vec<_expr*, va_heap, vl_ptr>::qsort(int (*)(void const*, void const*)) ../../gcc/vec.h:1735 0x400000000114ba4f fill_vec_av_set ../../gcc/sel-sched.c:3725 0x40000000011517df fill_ready_list ../../gcc/sel-sched.c:4022 0x40000000011517df find_best_expr ../../gcc/sel-sched.c:4382 0x40000000011517df fill_insns ../../gcc/sel-sched.c:5539 0x40000000011517df schedule_on_fences ../../gcc/sel-sched.c:7356 0x40000000011517df sel_sched_region_2 ../../gcc/sel-sched.c:7494 0x4000000001158d0f sel_sched_region_1 ../../gcc/sel-sched.c:7536 0x4000000001158d0f sel_sched_region(int) ../../gcc/sel-sched.c:7637 0x400000000115a0af run_selective_scheduling() ../../gcc/sel-sched.c:7713 0x4000000001a50a5f ia64_reorg ../../gcc/config/ia64/ia64.c:9854 0x40000000010c2a8f execute ../../gcc/reorg.c:3947 Andreas.
On 2017.11.03 at 16:48 +0100, Jan Hubicka wrote:
> this is updated patch which I have comitted after profiledbootstrapping x86-64
Unfortunately, compiling tramp3d-v4.cpp is 6-7% slower after this patch.
This happens with an LTO/PGO bootstrapped gcc using --enable-checking=release.
On X86_64:
Before:
Performance counter stats for 'g++ -w -Ofast tramp3d-v4.cpp' (4 runs):
25040.360183 task-clock (msec) # 1.000 CPUs utilized ( +- 0.25% )
650 context-switches # 0.026 K/sec ( +- 76.87% )
2 cpu-migrations # 0.000 K/sec ( +- 28.87% )
268,141 page-faults # 0.011 M/sec ( +- 0.01% )
80,210,085,167 cycles # 3.203 GHz ( +- 0.26% ) (66.67%)
21,061,765,388 stalled-cycles-frontend # 26.26% frontend cycles idle ( +- 0.37% ) (66.67%)
24,699,976,439 stalled-cycles-backend # 30.79% backend cycles idle ( +- 0.57% ) (66.68%)
69,167,169,243 instructions # 0.86 insn per cycle
# 0.36 stalled cycles per insn ( +- 0.05% ) (66.68%)
15,230,229,662 branches # 608.227 M/sec ( +- 0.06% ) (66.68%)
986,612,296 branch-misses # 6.48% of all branches ( +- 0.07% ) (66.68%)
25.046439011 seconds time elapsed ( +- 0.25% )
After:
Performance counter stats for 'g++ -w -Ofast tramp3d-v4.cpp' (4 runs):
26710.577065 task-clock (msec) # 1.000 CPUs utilized ( +- 0.27% )
199 context-switches # 0.007 K/sec ( +- 21.12% )
2 cpu-migrations # 0.000 K/sec ( +- 14.29% )
267,676 page-faults # 0.010 M/sec ( +- 0.01% )
85,561,962,974 cycles # 3.203 GHz ( +- 0.26% ) (66.66%)
19,581,827,643 stalled-cycles-frontend # 22.89% frontend cycles idle ( +- 0.30% ) (66.66%)
26,056,535,726 stalled-cycles-backend # 30.45% backend cycles idle ( +- 0.65% ) (66.68%)
77,222,167,966 instructions # 0.90 insn per cycle
# 0.34 stalled cycles per insn ( +- 0.04% ) (66.68%)
17,471,652,187 branches # 654.110 M/sec ( +- 0.05% ) (66.69%)
1,082,141,013 branch-misses # 6.19% of all branches ( +- 0.04% ) (66.69%)
26.713823720 seconds time elapsed ( +- 0.27% )
==================================================================================================================
On PPC64le:
Before:
Performance counter stats for 'g++ -w -Ofast tramp3d-v4.cpp' (4 runs):
24281.894597 task-clock (msec) # 0.989 CPUs utilized ( +- 1.85% )
166 context-switches # 0.007 K/sec ( +- 2.46% )
5 cpu-migrations # 0.000 K/sec ( +- 18.03% )
52,908 page-faults # 0.002 M/sec ( +- 11.61% )
84,939,354,171 cycles # 3.498 GHz ( +- 1.82% ) (66.71%)
4,680,693,343 stalled-cycles-frontend # 5.51% frontend cycles idle ( +- 8.75% ) (49.98%)
46,697,372,688 stalled-cycles-backend # 54.98% backend cycles idle ( +- 2.06% ) (50.05%)
94,990,460,746 instructions # 1.12 insn per cycle
# 0.49 stalled cycles per insn ( +- 0.10% ) (66.72%)
19,562,344,992 branches # 805.635 M/sec ( +- 0.07% ) (50.06%)
807,701,262 branch-misses # 4.13% of all branches ( +- 0.45% ) (50.05%)
24.550558669 seconds time elapsed ( +- 1.83% )
After:
Performance counter stats for 'g++ -w -Ofast tramp3d-v4.cpp' (4 runs):
26383.472582 task-clock (msec) # 0.995 CPUs utilized ( +- 1.83% )
202 context-switches # 0.008 K/sec ( +- 1.68% )
5 cpu-migrations # 0.000 K/sec ( +- 14.29% )
53,114 page-faults # 0.002 M/sec ( +- 17.86% )
92,099,443,793 cycles # 3.491 GHz ( +- 0.96% ) (66.68%)
3,706,147,243 stalled-cycles-frontend # 4.02% frontend cycles idle ( +- 8.31% ) (50.00%)
51,376,299,749 stalled-cycles-backend # 55.78% backend cycles idle ( +- 0.83% ) (50.05%)
105,872,124,981 instructions # 1.15 insn per cycle
# 0.49 stalled cycles per insn ( +- 0.05% ) (66.74%)
22,348,839,937 branches # 847.077 M/sec ( +- 0.16% ) (50.04%)
847,288,219 branch-misses # 3.79% of all branches ( +- 0.06% ) (50.02%)
26.511790685 seconds time elapsed ( +- 1.84% )
--
Markus
> On 2017.11.03 at 16:48 +0100, Jan Hubicka wrote: > > this is updated patch which I have comitted after profiledbootstrapping x86-64 > > Unfortunately, compiling tramp3d-v4.cpp is 6-7% slower after this patch. > This happens with an LTO/PGO bootstrapped gcc using --enable-checking=release. our periodic testers has also picked up the change and there is no compile time regression reported for tramp3d. https://gcc.opensuse.org/gcc-old/c++bench-czerny/tramp3d/ so I would conclude that it is regression in LTO+PGO bootstrap. I am fixing one checking bug that may cause it (where we mix local and global profiles) so perhaps it will go away afterwards. The patch has changed a lot of details because I had to merge FDO and non-FDO paths everywhere and also decide on code that seemed buggy and did not translate fluently into new API. Looking at today results there are no significant perofrmance implications of this patch on x86-64 nor Itanium but there are significant code size reductions which I am not quite sure where they come from https://gcc.opensuse.org/gcc-old/c++bench-czerny/ My overall plan is the following 1) Fix checking enabled fallout of the cfg patch 2) finish conversion by dropping frequencies from callgraph 3) audit code for bugs and profile updating insanities. Situation is not bad - we have fewer insanties than GCC 7 on tramp3d -Ofast build but it is worse than before conversion, so I may have dragged in some bugs (again numbers are not fully comparable) 4) do full retunning of inliner metrics and FDO params in stage 3. This stage1 we got a lot of changes that affects IPA metrics - the conversion to sreals, context sensitive time estimates, new profile maintenance. Also FDO metrics will hopefully become more realistic as we had fixed many instances of dropped proifles. So hope to get those 7% back for sure :) I will also try to oprofile to see if I spot something obvious if today fixes will not cover this regression. Honza
On 2017.11.05 at 11:55 +0100, Jan Hubicka wrote: > > On 2017.11.03 at 16:48 +0100, Jan Hubicka wrote: > > > this is updated patch which I have comitted after profiledbootstrapping x86-64 > > > > Unfortunately, compiling tramp3d-v4.cpp is 6-7% slower after this patch. > > This happens with an LTO/PGO bootstrapped gcc using --enable-checking=release. > > our periodic testers has also picked up the change and there is no compile time > regression reported for tramp3d. > https://gcc.opensuse.org/gcc-old/c++bench-czerny/tramp3d/ > so I would conclude that it is regression in LTO+PGO bootstrap. I am fixing one checking > bug that may cause it (where we mix local and global profiles) so perhaps it will go away > afterwards. Just to confirm: pure PGO bootstrap is fine, e.g. on Ryzen: (LTO/PGO) 17.65 sec ( +- 0.68% ) (PGO) 15.74 sec ( +- 0.27% )
> On 2017.11.05 at 11:55 +0100, Jan Hubicka wrote: > > > On 2017.11.03 at 16:48 +0100, Jan Hubicka wrote: > > > > this is updated patch which I have comitted after profiledbootstrapping x86-64 > > > > > > Unfortunately, compiling tramp3d-v4.cpp is 6-7% slower after this patch. > > > This happens with an LTO/PGO bootstrapped gcc using --enable-checking=release. > > > > our periodic testers has also picked up the change and there is no compile time > > regression reported for tramp3d. > > https://gcc.opensuse.org/gcc-old/c++bench-czerny/tramp3d/ > > so I would conclude that it is regression in LTO+PGO bootstrap. I am fixing one checking > > bug that may cause it (where we mix local and global profiles) so perhaps it will go away > > afterwards. > > Just to confirm: pure PGO bootstrap is fine, e.g. on Ryzen: > (LTO/PGO) 17.65 sec ( +- 0.68% ) > (PGO) 15.74 sec ( +- 0.27% ) Thanks. I have comitted the patch for inlining profile update bug, so with some luck LTO/PGO may be fine again. I will look for other insanities after chasing out the ICEs. callgraph is currently still having frequencies which I plan to drop next. With that the profile updating will become bitmore consistent over whole program. IPA profile updates will be fun with LTO though - even for gcc we link libbackend into multiple binaries and thus the whole program profile is not really whole program after all. It is bit of the common libbackend profile combined with frontend profiles and thus it can easilly accumulate to false zeros. Honza > > -- > Markus
On 2017.11.07 at 00:12 +0100, Jan Hubicka wrote: > > On 2017.11.05 at 11:55 +0100, Jan Hubicka wrote: > > > > On 2017.11.03 at 16:48 +0100, Jan Hubicka wrote: > > > > > this is updated patch which I have comitted after profiledbootstrapping x86-64 > > > > > > > > Unfortunately, compiling tramp3d-v4.cpp is 6-7% slower after this patch. > > > > This happens with an LTO/PGO bootstrapped gcc using --enable-checking=release. > > > > > > our periodic testers has also picked up the change and there is no compile time > > > regression reported for tramp3d. > > > https://gcc.opensuse.org/gcc-old/c++bench-czerny/tramp3d/ > > > so I would conclude that it is regression in LTO+PGO bootstrap. I am fixing one checking > > > bug that may cause it (where we mix local and global profiles) so perhaps it will go away > > > afterwards. > > > > Just to confirm: pure PGO bootstrap is fine, e.g. on Ryzen: > > (LTO/PGO) 17.65 sec ( +- 0.68% ) > > (PGO) 15.74 sec ( +- 0.27% ) > > Thanks. I have comitted the patch for inlining profile update bug, so with some > luck LTO/PGO may be fine again. It got worse, unfortunately: Pure PGO: Performance counter stats for '/home/trippels/gcc_8/usr/local/bin/g++ -w -Ofast tramp3d-v4.cpp' (4 runs): 16213.529306 task-clock (msec) # 0.999 CPUs utilized ( +- 0.25% ) 1,387 context-switches # 0.086 K/sec ( +- 0.17% ) 4 cpu-migrations # 0.000 K/sec ( +- 14.80% ) 261,764 page-faults # 0.016 M/sec ( +- 0.03% ) 62,633,457,222 cycles # 3.863 GHz ( +- 0.20% ) (83.32%) 13,990,050,204 stalled-cycles-frontend # 22.34% frontend cycles idle ( +- 0.51% ) (83.33%) 13,189,755,888 stalled-cycles-backend # 21.06% backend cycles idle ( +- 0.04% ) (83.31%) 75,194,592,630 instructions # 1.20 insn per cycle # 0.19 stalled cycles per insn ( +- 0.03% ) (83.35%) 17,113,639,942 branches # 1055.516 M/sec ( +- 0.02% ) (83.38%) 634,471,544 branch-misses # 3.71% of all branches ( +- 0.07% ) (83.34%) 16.226375499 seconds time elapsed ( +- 0.24% ) LTO/PGO: Performance counter stats for '/home/trippels/gcc_8/usr/local/bin/g++ -w -Ofast tramp3d-v4.cpp' (4 runs): 18622.496264 task-clock (msec) # 0.999 CPUs utilized ( +- 0.35% ) 1,592 context-switches # 0.086 K/sec ( +- 0.32% ) 4 cpu-migrations # 0.000 K/sec ( +- 14.43% ) 261,370 page-faults # 0.014 M/sec ( +- 0.12% ) 71,849,030,564 cycles # 3.858 GHz ( +- 0.08% ) (83.34%) 15,987,209,604 stalled-cycles-frontend # 22.25% frontend cycles idle ( +- 0.47% ) (83.32%) 14,336,345,458 stalled-cycles-backend # 19.95% backend cycles idle ( +- 0.05% ) (83.33%) 87,674,608,740 instructions # 1.22 insn per cycle # 0.18 stalled cycles per insn ( +- 0.01% ) (83.36%) 20,610,950,144 branches # 1106.777 M/sec ( +- 0.01% ) (83.35%) 638,454,497 branch-misses # 3.10% of all branches ( +- 0.08% ) (83.35%) 18.644370559 seconds time elapsed ( +- 0.38% ) -- Markus
Index: asan.c =================================================================== --- asan.c (revision 254266) +++ asan.c (working copy) @@ -1801,6 +1801,7 @@ create_cond_insert_point (gimple_stmt_it ? profile_probability::very_unlikely () : profile_probability::very_likely (); e->probability = fallthrough_probability.invert (); + then_bb->count = e->count (); if (create_then_fallthru_edge) make_single_succ_edge (then_bb, fallthru_bb, EDGE_FALLTHRU); Index: basic-block.h =================================================================== --- basic-block.h (revision 254266) +++ basic-block.h (working copy) @@ -148,9 +148,6 @@ struct GTY((chain_next ("%h.next_bb"), c /* Expected number of executions: calculated in profile.c. */ profile_count count; - /* Expected frequency. Normalized to be in range 0 to BB_FREQ_MAX. */ - int frequency; - /* The discriminator for this block. The discriminator distinguishes among several basic blocks that share a common locus, allowing for more accurate sample-based profiling. */ @@ -301,7 +298,7 @@ enum cfg_bb_flags ? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0)) /* Return expected execution frequency of the edge E. */ -#define EDGE_FREQUENCY(e) e->probability.apply (e->src->frequency) +#define EDGE_FREQUENCY(e) e->count ().to_frequency (cfun) /* Compute a scale factor (or probability) suitable for scaling of gcov_type values via apply_probability() and apply_scale(). */ Index: bb-reorder.c =================================================================== --- bb-reorder.c (revision 254266) +++ bb-reorder.c (working copy) @@ -256,8 +256,8 @@ push_to_next_round_p (const_basic_block there_exists_another_round = round < number_of_rounds - 1; - block_not_hot_enough = (bb->frequency < exec_th - || bb->count < count_th + block_not_hot_enough = (bb->count.to_frequency (cfun) < exec_th + || bb->count.ipa () < count_th || probably_never_executed_bb_p (cfun, bb)); if (there_exists_another_round @@ -293,9 +293,9 @@ find_traces (int *n_traces, struct trace { bbd[e->dest->index].heap = heap; bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest); - if (e->dest->frequency > max_entry_frequency) - max_entry_frequency = e->dest->frequency; - if (e->dest->count.initialized_p () && e->dest->count > max_entry_count) + if (e->dest->count.to_frequency (cfun) > max_entry_frequency) + max_entry_frequency = e->dest->count.to_frequency (cfun); + if (e->dest->count.ipa_p () && e->dest->count > max_entry_count) max_entry_count = e->dest->count; } @@ -329,8 +329,10 @@ find_traces (int *n_traces, struct trace for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux) - fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency); - fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency); + fprintf (dump_file, "%d [%d] ", bb->index, + bb->count.to_frequency (cfun)); + fprintf (dump_file, "%d [%d]\n", bb->index, + bb->count.to_frequency (cfun)); } fflush (dump_file); } @@ -551,7 +553,7 @@ find_traces_1_round (int branch_th, int continue; prob = e->probability; - freq = e->dest->frequency; + freq = e->dest->count.to_frequency (cfun); /* The only sensible preference for a call instruction is the fallthru edge. Don't bother selecting anything else. */ @@ -573,7 +575,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || ((prob.to_reg_br_prob_base () < branch_th || EDGE_FREQUENCY (e) < exec_th - || e->count () < count_th) && (!for_size))) + || e->count ().ipa () < count_th) && (!for_size))) continue; if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, @@ -671,7 +673,7 @@ find_traces_1_round (int branch_th, int || !prob.initialized_p () || prob.to_reg_br_prob_base () < branch_th || freq < exec_th - || e->count () < count_th) + || e->count ().ipa () < count_th) { /* When partitioning hot/cold basic blocks, make sure the cold blocks (and only the cold blocks) all get @@ -706,7 +708,7 @@ find_traces_1_round (int branch_th, int if (best_edge->dest != bb) { if (EDGE_FREQUENCY (best_edge) - > 4 * best_edge->dest->frequency / 5) + > 4 * best_edge->dest->count.to_frequency (cfun) / 5) { /* The loop has at least 4 iterations. If the loop header is not the first block of the function @@ -783,8 +785,8 @@ find_traces_1_round (int branch_th, int & EDGE_CAN_FALLTHRU) && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX) && single_succ (e->dest) == best_edge->dest - && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge) - || for_size)) + && (2 * e->dest->count.to_frequency (cfun) + >= EDGE_FREQUENCY (best_edge) || for_size)) { best_edge = e; if (dump_file) @@ -945,9 +947,9 @@ bb_to_key (basic_block bb) if (priority) /* The block with priority should have significantly lower key. */ - return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); + return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun)); - return -bb->frequency; + return -bb->count.to_frequency (cfun); } /* Return true when the edge E from basic block BB is better than the temporary @@ -1290,7 +1292,7 @@ connect_traces (int n_traces, struct tra && !connected[bbd[di].start_of_trace] && BB_PARTITION (e2->dest) == current_partition && EDGE_FREQUENCY (e2) >= freq_threshold - && e2->count () >= count_threshold + && e2->count ().ipa () >= count_threshold && (!best2 || e2->probability > best2->probability || (e2->probability == best2->probability @@ -1317,7 +1319,7 @@ connect_traces (int n_traces, struct tra optimize_edge_for_speed_p (best) && EDGE_FREQUENCY (best) >= freq_threshold && (!best->count ().initialized_p () - || best->count () >= count_threshold))) + || best->count ().ipa () >= count_threshold))) { basic_block new_bb; @@ -1375,7 +1377,7 @@ copy_bb_p (const_basic_block bb, int cod int max_size = uncond_jump_length; rtx_insn *insn; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return false; if (EDGE_COUNT (bb->preds) < 2) return false; @@ -1459,7 +1461,6 @@ fix_up_crossing_landing_pad (eh_landing_ last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; new_bb = create_basic_block (new_label, jump, last_bb); new_bb->aux = last_bb->aux; - new_bb->frequency = post_bb->frequency; new_bb->count = post_bb->count; last_bb->aux = new_bb; @@ -1517,7 +1518,6 @@ sanitize_hot_paths (bool walk_up, unsign edge_iterator ei; profile_probability highest_probability = profile_probability::uninitialized (); - int highest_freq = 0; profile_count highest_count = profile_count::uninitialized (); bool found = false; @@ -1544,11 +1544,8 @@ sanitize_hot_paths (bool walk_up, unsign /* The following loop will look for the hottest edge via the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (!highest_count.initialized_p () || e->count () > highest_count) + if (!(e->count () > highest_count)) highest_count = e->count (); - int edge_freq = EDGE_FREQUENCY (e); - if (edge_freq > highest_freq) - highest_freq = edge_freq; if (!highest_probability.initialized_p () || e->probability > highest_probability) highest_probability = e->probability; @@ -1573,17 +1570,12 @@ sanitize_hot_paths (bool walk_up, unsign /* Select the hottest edge using the edge count, if it is non-zero, then fallback to the edge frequency and finally the edge probability. */ - if (highest_count > 0) + if (highest_count.initialized_p ()) { - if (e->count () < highest_count) + if (!(e->count () >= highest_count)) continue; } - else if (highest_freq) - { - if (EDGE_FREQUENCY (e) < highest_freq) - continue; - } - else if (e->probability < highest_probability) + else if (!(e->probability >= highest_probability)) continue; basic_block reach_bb = walk_up ? e->src : e->dest; Index: bt-load.c =================================================================== --- bt-load.c (revision 254266) +++ bt-load.c (working copy) @@ -185,7 +185,7 @@ static int first_btr, last_btr; static int basic_block_freq (const_basic_block bb) { - return bb->frequency; + return bb->count.to_frequency (cfun); } /* If the rtx at *XP references (sets or reads) any branch target Index: cfg.c =================================================================== --- cfg.c (revision 254266) +++ cfg.c (working copy) @@ -68,6 +68,7 @@ init_flow (struct function *the_fun) if (!the_fun->cfg) the_fun->cfg = ggc_cleared_alloc<control_flow_graph> (); n_edges_for_fn (the_fun) = 0; + the_fun->cfg->count_max = profile_count::uninitialized (); ENTRY_BLOCK_PTR_FOR_FN (the_fun) = alloc_block (); ENTRY_BLOCK_PTR_FOR_FN (the_fun)->index = ENTRY_BLOCK; @@ -447,13 +448,18 @@ check_bb_profile (basic_block bb, FILE * } if (bb != ENTRY_BLOCK_PTR_FOR_FN (fun)) { - int sum = 0; + profile_count sum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100) - fprintf (file, - ";; %sInvalid sum of incoming frequencies %i, should be %i\n", - s_indent, sum, bb->frequency); + sum += e->count (); + if (sum.differs_from_p (bb->count)) + { + fprintf (file, ";; %sInvalid sum of incoming counts ", + s_indent); + sum.dump (file); + fprintf (file, ", should be "); + bb->count.dump (file); + fprintf (file, "\n"); + } } if (BB_PARTITION (bb) == BB_COLD_PARTITION) { @@ -751,7 +757,6 @@ dump_bb_info (FILE *outf, basic_block bb fputs (", count ", outf); bb->count.dump (outf); } - fprintf (outf, ", freq %i", bb->frequency); if (maybe_hot_bb_p (fun, bb)) fputs (", maybe hot", outf); if (probably_never_executed_bb_p (fun, bb)) @@ -843,15 +848,15 @@ brief_dump_cfg (FILE *file, dump_flags_t } } -/* An edge originally destinating BB of FREQUENCY and COUNT has been proved to +/* An edge originally destinating BB of COUNT has been proved to leave the block by TAKEN_EDGE. Update profile of BB such that edge E can be redirected to destination of TAKEN_EDGE. This function may leave the profile inconsistent in the case TAKEN_EDGE - frequency or count is believed to be lower than FREQUENCY or COUNT + frequency or count is believed to be lower than COUNT respectively. */ void -update_bb_profile_for_threading (basic_block bb, int edge_frequency, +update_bb_profile_for_threading (basic_block bb, profile_count count, edge taken_edge) { edge c; @@ -866,16 +871,10 @@ update_bb_profile_for_threading (basic_b } bb->count -= count; - bb->frequency -= edge_frequency; - if (bb->frequency < 0) - bb->frequency = 0; - /* Compute the probability of TAKEN_EDGE being reached via threaded edge. Watch for overflows. */ - if (bb->frequency) - /* FIXME: We should get edge frequency as count. */ - prob = profile_probability::probability_in_gcov_type - (edge_frequency, bb->frequency); + if (bb->count.nonzero_p ()) + prob = count.probability_in (bb->count); else prob = profile_probability::never (); if (prob > taken_edge->probability) @@ -899,9 +898,9 @@ update_bb_profile_for_threading (basic_b if (prob == profile_probability::never ()) { if (dump_file) - fprintf (dump_file, "Edge frequencies of bb %i has been reset, " - "frequency of block should end up being 0, it is %i\n", - bb->index, bb->frequency); + fprintf (dump_file, "Edge probabilities of bb %i has been reset, " + "count of block should end up being 0, it is non-zero\n", + bb->index); EDGE_SUCC (bb, 0)->probability = profile_probability::guessed_always (); ei = ei_start (bb->succs); ei_next (&ei); @@ -942,18 +941,10 @@ scale_bbs_frequencies_int (basic_block * for (i = 0; i < nbbs; i++) { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - /* Make sure the frequencies do not grow over BB_FREQ_MAX. */ - if (bbs[i]->frequency > BB_FREQ_MAX) - bbs[i]->frequency = BB_FREQ_MAX; bbs[i]->count = bbs[i]->count.apply_scale (num, den); } } -/* numbers smaller than this value are safe to multiply without getting - 64bit overflow. */ -#define MAX_SAFE_MULTIPLIER (1 << (sizeof (int64_t) * 4 - 1)) - /* Multiply all frequencies of basic blocks in array BBS of length NBBS by NUM/DEN, in gcov_type arithmetic. More accurate than previous function but considerably slower. */ @@ -962,28 +953,9 @@ scale_bbs_frequencies_gcov_type (basic_b gcov_type den) { int i; - gcov_type fraction = RDIV (num * 65536, den); - - gcc_assert (fraction >= 0); - if (num < MAX_SAFE_MULTIPLIER) - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) - bbs[i]->count = bbs[i]->count.apply_scale (num, den); - else - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } - else - for (i = 0; i < nbbs; i++) - { - if (sizeof (gcov_type) > sizeof (int)) - bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); - else - bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536); - bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536); - } + for (i = 0; i < nbbs; i++) + bbs[i]->count = bbs[i]->count.apply_scale (num, den); } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -994,13 +966,9 @@ scale_bbs_frequencies_profile_count (bas profile_count num, profile_count den) { int i; - - for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = RDIV (bbs[i]->frequency * num.to_gcov_type (), - den.to_gcov_type ()); + if (num == profile_count::zero () || den.nonzero_p ()) + for (i = 0; i < nbbs; i++) bbs[i]->count = bbs[i]->count.apply_scale (num, den); - } } /* Multiply all frequencies of basic blocks in array BBS of length NBBS @@ -1013,10 +981,7 @@ scale_bbs_frequencies (basic_block *bbs, int i; for (i = 0; i < nbbs; i++) - { - bbs[i]->frequency = p.apply (bbs[i]->frequency); - bbs[i]->count = bbs[i]->count.apply_probability (p); - } + bbs[i]->count = bbs[i]->count.apply_probability (p); } /* Helper types for hash tables. */ Index: cfg.h =================================================================== --- cfg.h (revision 254266) +++ cfg.h (working copy) @@ -71,6 +71,9 @@ struct GTY(()) control_flow_graph { /* Maximal number of entities in the single jumptable. Used to estimate final flowgraph size. */ int max_jumptable_ents; + + /* Maximal count of BB in function. */ + profile_count count_max; }; @@ -103,7 +106,7 @@ extern void debug_bb (basic_block); extern basic_block debug_bb_n (int); extern void dump_bb_info (FILE *, basic_block, int, dump_flags_t, bool, bool); extern void brief_dump_cfg (FILE *, dump_flags_t); -extern void update_bb_profile_for_threading (basic_block, int, profile_count, edge); +extern void update_bb_profile_for_threading (basic_block, profile_count, edge); extern void scale_bbs_frequencies_int (basic_block *, int, int, int); extern void scale_bbs_frequencies_gcov_type (basic_block *, int, gcov_type, gcov_type); Index: cfgbuild.c =================================================================== --- cfgbuild.c (revision 254266) +++ cfgbuild.c (working copy) @@ -499,7 +499,6 @@ find_bb_boundaries (basic_block bb) remove_edge (fallthru); /* BB is unreachable at this point - we need to determine its profile once edges are built. */ - bb->frequency = 0; bb->count = profile_count::uninitialized (); flow_transfer_insn = NULL; debug_insn = NULL; @@ -669,7 +668,6 @@ find_many_sub_basic_blocks (sbitmap bloc { bool initialized_src = false, uninitialized_src = false; bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) { if (e->count ().initialized_p ()) @@ -679,8 +677,6 @@ find_many_sub_basic_blocks (sbitmap bloc } else uninitialized_src = true; - if (e->probability.initialized_p ()) - bb->frequency += EDGE_FREQUENCY (e); } /* When some edges are missing with read profile, this is most likely because RTL expansion introduced loop. @@ -692,7 +688,7 @@ find_many_sub_basic_blocks (sbitmap bloc precisely once. */ if (!initialized_src || (uninitialized_src - && profile_status_for_fn (cfun) != PROFILE_READ)) + && profile_status_for_fn (cfun) < PROFILE_GUESSED)) bb->count = profile_count::uninitialized (); } /* If nothing changed, there is no need to create new BBs. */ Index: cfgcleanup.c =================================================================== --- cfgcleanup.c (revision 254266) +++ cfgcleanup.c (working copy) @@ -559,8 +559,6 @@ try_forward_edges (int mode, basic_block { /* Save the values now, as the edge may get removed. */ profile_count edge_count = e->count (); - profile_probability edge_probability = e->probability; - int edge_frequency; int n = 0; e->goto_locus = goto_locus; @@ -585,8 +583,6 @@ try_forward_edges (int mode, basic_block /* We successfully forwarded the edge. Now update profile data: for each edge we traversed in the chain, remove the original edge's execution count. */ - edge_frequency = edge_probability.apply (b->frequency); - do { edge t; @@ -596,16 +592,12 @@ try_forward_edges (int mode, basic_block gcc_assert (n < nthreaded_edges); t = threaded_edges [n++]; gcc_assert (t->src == first); - update_bb_profile_for_threading (first, edge_frequency, - edge_count, t); + update_bb_profile_for_threading (first, edge_count, t); update_br_prob_note (first); } else { first->count -= edge_count; - first->frequency -= edge_frequency; - if (first->frequency < 0) - first->frequency = 0; /* It is possible that as the result of threading we've removed edge as it is threaded to the fallthru edge. Avoid @@ -2109,7 +2101,7 @@ try_crossjump_to_edge (int mode, edge e1 else redirect_edges_to = osrc2; - /* Recompute the frequencies and counts of outgoing edges. */ + /* Recompute the counts of destinations of outgoing edges. */ FOR_EACH_EDGE (s, ei, redirect_edges_to->succs) { edge s2; @@ -2132,24 +2124,19 @@ try_crossjump_to_edge (int mode, edge e1 that there is no more than one in the chain, so we can't run into infinite loop. */ if (FORWARDER_BLOCK_P (s->dest)) - { - s->dest->frequency += EDGE_FREQUENCY (s); - } + s->dest->count += s->count (); if (FORWARDER_BLOCK_P (s2->dest)) - { - s2->dest->frequency -= EDGE_FREQUENCY (s); - if (s2->dest->frequency < 0) - s2->dest->frequency = 0; - } + s2->dest->count -= s->count (); - if (!redirect_edges_to->frequency && !src1->frequency) + /* FIXME: Is this correct? Should be rewritten to count API. */ + if (redirect_edges_to->count.nonzero_p () && src1->count.nonzero_p ()) s->probability = s->probability.combine_with_freq - (redirect_edges_to->frequency, - s2->probability, src1->frequency); + (redirect_edges_to->count.to_frequency (cfun), + s2->probability, src1->count.to_frequency (cfun)); } - /* Adjust count and frequency for the block. An earlier jump + /* Adjust count for the block. An earlier jump threading pass may have left the profile in an inconsistent state (see update_bb_profile_for_threading) so we must be prepared for overflows. */ @@ -2157,9 +2144,6 @@ try_crossjump_to_edge (int mode, edge e1 do { tmp->count += src1->count; - tmp->frequency += src1->frequency; - if (tmp->frequency > BB_FREQ_MAX) - tmp->frequency = BB_FREQ_MAX; if (tmp == redirect_edges_to) break; tmp = find_fallthru_edge (tmp->succs)->dest; Index: cfgexpand.c =================================================================== --- cfgexpand.c (revision 254266) +++ cfgexpand.c (working copy) @@ -2516,7 +2516,6 @@ expand_gimple_cond (basic_block bb, gcon redirect_edge_succ (false_edge, new_bb); false_edge->flags |= EDGE_FALLTHRU; new_bb->count = false_edge->count (); - new_bb->frequency = EDGE_FREQUENCY (false_edge); loop_p loop = find_common_loop (bb->loop_father, dest->loop_father); add_bb_to_loop (new_bb, loop); if (loop->latch == bb @@ -3847,11 +3846,7 @@ expand_gimple_tailcall (basic_block bb, if (!(e->flags & (EDGE_ABNORMAL | EDGE_EH))) { if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - e->dest->frequency -= EDGE_FREQUENCY (e); - if (e->dest->frequency < 0) - e->dest->frequency = 0; - } + e->dest->count -= e->count (); probability += e->probability; remove_edge (e); } @@ -5860,7 +5855,6 @@ construct_init_block (void) init_block = create_basic_block (NEXT_INSN (get_insns ()), get_last_insn (), ENTRY_BLOCK_PTR_FOR_FN (cfun)); - init_block->frequency = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; init_block->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (init_block, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father); if (e) @@ -5924,7 +5918,7 @@ construct_exit_block (void) while (NEXT_INSN (head) && NOTE_P (NEXT_INSN (head))) head = NEXT_INSN (head); /* But make sure exit_block starts with RETURN_LABEL, otherwise the - bb frequency counting will be confused. Any instructions before that + bb count counting will be confused. Any instructions before that label are emitted for the case where PREV_BB falls through into the exit block, so append those instructions to prev_bb in that case. */ if (NEXT_INSN (head) != return_label) @@ -5937,7 +5931,6 @@ construct_exit_block (void) } } exit_block = create_basic_block (NEXT_INSN (head), end, prev_bb); - exit_block->frequency = EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency; exit_block->count = EXIT_BLOCK_PTR_FOR_FN (cfun)->count; add_bb_to_loop (exit_block, EXIT_BLOCK_PTR_FOR_FN (cfun)->loop_father); @@ -5957,10 +5950,7 @@ construct_exit_block (void) if (e2 != e) { exit_block->count -= e2->count (); - exit_block->frequency -= EDGE_FREQUENCY (e2); } - if (exit_block->frequency < 0) - exit_block->frequency = 0; update_bb_for_insn (exit_block); } Index: cfghooks.c =================================================================== --- cfghooks.c (revision 254266) +++ cfghooks.c (working copy) @@ -146,10 +146,12 @@ verify_flow_info (void) error ("verify_flow_info: Wrong count of block %i", bb->index); err = 1; } - if (bb->frequency < 0) + /* FIXME: Graphite and SLJL and target code still tends to produce + edges with no probablity. */ + if (profile_status_for_fn (cfun) >= PROFILE_GUESSED + && !bb->count.initialized_p () && !flag_graphite) { - error ("verify_flow_info: Wrong frequency of block %i %i", - bb->index, bb->frequency); + error ("verify_flow_info: Missing count of block %i", bb->index); err = 1; } @@ -164,7 +166,7 @@ verify_flow_info (void) /* FIXME: Graphite and SLJL and target code still tends to produce edges with no probablity. */ if (profile_status_for_fn (cfun) >= PROFILE_GUESSED - && !e->probability.initialized_p () && 0) + && !e->probability.initialized_p () && !flag_graphite) { error ("Uninitialized probability of edge %i->%i", e->src->index, e->dest->index); @@ -315,7 +317,6 @@ dump_bb_for_graph (pretty_printer *pp, b /* TODO: Add pretty printer for counter. */ if (bb->count.initialized_p ()) pp_printf (pp, "COUNT:" "%" PRId64, bb->count.to_gcov_type ()); - pp_printf (pp, " FREQ:%i |", bb->frequency); pp_write_text_to_stream (pp); if (!(dump_flags & TDF_SLIM)) cfg_hooks->dump_bb_for_graph (pp, bb); @@ -513,7 +514,6 @@ split_block_1 (basic_block bb, void *i) return NULL; new_bb->count = bb->count; - new_bb->frequency = bb->frequency; new_bb->discriminator = bb->discriminator; if (dom_info_available_p (CDI_DOMINATORS)) @@ -626,7 +626,6 @@ split_edge (edge e) { basic_block ret; profile_count count = e->count (); - int freq = EDGE_FREQUENCY (e); edge f; bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; struct loop *loop; @@ -640,7 +639,6 @@ split_edge (edge e) ret = cfg_hooks->split_edge (e); ret->count = count; - ret->frequency = freq; single_succ_edge (ret)->probability = profile_probability::always (); if (irr) @@ -869,7 +867,6 @@ make_forwarder_block (basic_block bb, bo fallthru = split_block_after_labels (bb); dummy = fallthru->src; dummy->count = profile_count::zero (); - dummy->frequency = 0; bb = fallthru->dest; /* Redirect back edges we want to keep. */ @@ -879,10 +876,6 @@ make_forwarder_block (basic_block bb, bo if (redirect_edge_p (e)) { - dummy->frequency += EDGE_FREQUENCY (e); - if (dummy->frequency > BB_FREQ_MAX) - dummy->frequency = BB_FREQ_MAX; - dummy->count += e->count (); ei_next (&ei); continue; @@ -1101,19 +1094,10 @@ duplicate_block (basic_block bb, edge e, new_bb->count = new_count; bb->count -= new_count; - new_bb->frequency = EDGE_FREQUENCY (e); - bb->frequency -= EDGE_FREQUENCY (e); - redirect_edge_and_branch_force (e, new_bb); - - if (bb->frequency < 0) - bb->frequency = 0; } else - { - new_bb->count = bb->count; - new_bb->frequency = bb->frequency; - } + new_bb->count = bb->count; set_bb_original (new_bb, bb); set_bb_copy (bb, new_bb); @@ -1463,13 +1447,6 @@ account_profile_record (struct profile_r if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && profile_status_for_fn (cfun) != PROFILE_ABSENT) { - int sum = 0; - FOR_EACH_EDGE (e, ei, bb->preds) - sum += EDGE_FREQUENCY (e); - if (abs (sum - bb->frequency) > 100 - || (MAX (sum, bb->frequency) > 10 - && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10)) - record->num_mismatched_freq_in[after_pass]++; profile_count lsum = profile_count::zero (); FOR_EACH_EDGE (e, ei, bb->preds) lsum += e->count (); Index: cfgloop.c =================================================================== --- cfgloop.c (revision 254266) +++ cfgloop.c (working copy) @@ -607,7 +607,7 @@ find_subloop_latch_edge_by_profile (vec< tcount += e->count(); } - if (!tcount.initialized_p () || tcount < HEAVY_EDGE_MIN_SAMPLES + if (!tcount.initialized_p () || !(tcount.ipa () > HEAVY_EDGE_MIN_SAMPLES) || (tcount - mcount).apply_scale (HEAVY_EDGE_RATIO, 1) > tcount) return NULL; Index: cfgloopanal.c =================================================================== --- cfgloopanal.c (revision 254266) +++ cfgloopanal.c (working copy) @@ -213,9 +213,10 @@ average_num_loop_insns (const struct loo if (NONDEBUG_INSN_P (insn)) binsns++; - ratio = loop->header->frequency == 0 + ratio = loop->header->count.to_frequency (cfun) == 0 ? BB_FREQ_MAX - : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; + : (bb->count.to_frequency (cfun) * BB_FREQ_MAX) + / loop->header->count.to_frequency (cfun); ninsns += binsns * ratio; } free (bbs); @@ -245,8 +246,8 @@ expected_loop_iterations_unbounded (cons /* If we have no profile at all, use AVG_LOOP_NITER. */ if (profile_status_for_fn (cfun) == PROFILE_ABSENT) expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else if (loop->latch && (loop->latch->count.reliable_p () - || loop->header->count.reliable_p ())) + else if (loop->latch && (loop->latch->count.initialized_p () + || loop->header->count.initialized_p ())) { profile_count count_in = profile_count::zero (), count_latch = profile_count::zero (); @@ -258,45 +259,25 @@ expected_loop_iterations_unbounded (cons count_in += e->count (); if (!count_latch.initialized_p ()) - ; - else if (!(count_in > profile_count::zero ())) + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); + else if (!count_in.nonzero_p ()) expected = count_latch.to_gcov_type () * 2; else { expected = (count_latch.to_gcov_type () + count_in.to_gcov_type () - 1) / count_in.to_gcov_type (); - if (read_profile_p) + if (read_profile_p + && count_latch.reliable_p () && count_in.reliable_p ()) *read_profile_p = true; } } - if (expected == -1) - { - int freq_in, freq_latch; - - freq_in = 0; - freq_latch = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (flow_bb_inside_loop_p (loop, e->src)) - freq_latch += EDGE_FREQUENCY (e); - else - freq_in += EDGE_FREQUENCY (e); - - if (freq_in == 0) - { - /* If we have no profile at all, use AVG_LOOP_NITER iterations. */ - if (!freq_latch) - expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); - else - expected = freq_latch * 2; - } - else - expected = (freq_latch + freq_in - 1) / freq_in; - } + else + expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER); HOST_WIDE_INT max = get_max_loop_iterations_int (loop); if (max != -1 && max < expected) return max; + return expected; } Index: cfgloopmanip.c =================================================================== --- cfgloopmanip.c (revision 254266) +++ cfgloopmanip.c (working copy) @@ -536,7 +536,6 @@ scale_loop_profile (struct loop *loop, p if (e) { edge other_e; - int freq_delta; profile_count count_delta; FOR_EACH_EDGE (other_e, ei, e->src->succs) @@ -545,23 +544,18 @@ scale_loop_profile (struct loop *loop, p break; /* Probability of exit must be 1/iterations. */ - freq_delta = EDGE_FREQUENCY (e); count_delta = e->count (); e->probability = profile_probability::always () .apply_scale (1, iteration_bound); other_e->probability = e->probability.invert (); - freq_delta -= EDGE_FREQUENCY (e); count_delta -= e->count (); - /* If latch exists, change its frequency and count, since we changed + /* If latch exists, change its count, since we changed probability of exit. Theoretically we should update everything from source of exit edge to latch, but for vectorizer this is enough. */ if (loop->latch && loop->latch != e->src) { - loop->latch->frequency += freq_delta; - if (loop->latch->frequency < 0) - loop->latch->frequency = 0; loop->latch->count += count_delta; } } @@ -571,7 +565,6 @@ scale_loop_profile (struct loop *loop, p we look at the actual profile, if it is available. */ p = p.apply_scale (iteration_bound, iterations); - bool determined = false; if (loop->header->count.initialized_p ()) { profile_count count_in = profile_count::zero (); @@ -584,21 +577,8 @@ scale_loop_profile (struct loop *loop, p { p = count_in.probability_in (loop->header->count.apply_scale (iteration_bound, 1)); - determined = true; } } - if (!determined && loop->header->frequency) - { - int freq_in = 0; - - FOR_EACH_EDGE (e, ei, loop->header->preds) - if (e->src != loop->latch) - freq_in += EDGE_FREQUENCY (e); - - if (freq_in != 0) - p = profile_probability::probability_in_gcov_type - (freq_in * iteration_bound, loop->header->frequency); - } if (!(p > profile_probability::never ())) p = profile_probability::very_unlikely (); } @@ -800,7 +780,7 @@ create_empty_loop_on_edge (edge entry_ed loop->latch = loop_latch; add_loop (loop, outer); - /* TODO: Fix frequencies and counts. */ + /* TODO: Fix counts. */ scale_loop_frequencies (loop, profile_probability::even ()); /* Update dominators. */ @@ -866,13 +846,11 @@ loopify (edge latch_edge, edge header_ed basic_block pred_bb = header_edge->src; struct loop *loop = alloc_loop (); struct loop *outer = loop_outer (succ_bb->loop_father); - int freq; profile_count cnt; loop->header = header_edge->dest; loop->latch = latch_edge->src; - freq = EDGE_FREQUENCY (header_edge); cnt = header_edge->count (); /* Redirect edges. */ @@ -901,10 +879,9 @@ loopify (edge latch_edge, edge header_ed remove_bb_from_loops (switch_bb); add_bb_to_loop (switch_bb, outer); - /* Fix frequencies. */ + /* Fix counts. */ if (redirect_all_edges) { - switch_bb->frequency = freq; switch_bb->count = cnt; } scale_loop_frequencies (loop, false_scale); @@ -1167,7 +1144,7 @@ duplicate_loop_to_header_edge (struct lo { /* Calculate coefficients by that we have to scale frequencies of duplicated loop bodies. */ - freq_in = header->frequency; + freq_in = header->count.to_frequency (cfun); freq_le = EDGE_FREQUENCY (latch_edge); if (freq_in == 0) freq_in = 1; Index: cfgrtl.c =================================================================== --- cfgrtl.c (revision 254266) +++ cfgrtl.c (working copy) @@ -1533,6 +1533,7 @@ force_nonfallthru_and_redirect (edge e, basic_block bb = create_basic_block (BB_HEAD (e->dest), NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); + bb->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; /* Change the existing edge's source to be the new block, and add a new edge from the entry block to the new block. */ @@ -1628,7 +1629,6 @@ force_nonfallthru_and_redirect (edge e, jump_block = create_basic_block (new_head, NULL, e->src); jump_block->count = count; - jump_block->frequency = EDGE_FREQUENCY (e); /* Make sure new block ends up in correct hot/cold section. */ @@ -1652,7 +1652,6 @@ force_nonfallthru_and_redirect (edge e, { new_edge->probability = new_edge->probability.apply_scale (1, 2); jump_block->count = jump_block->count.apply_scale (1, 2); - jump_block->frequency /= 2; edge new_edge2 = make_edge (new_edge->src, target, e->flags & ~EDGE_FALLTHRU); new_edge2->probability = probability - new_edge->probability; @@ -2245,9 +2244,23 @@ void update_br_prob_note (basic_block bb) { rtx note; - if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) - return; note = find_reg_note (BB_END (bb), REG_BR_PROB, NULL_RTX); + if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ()) + { + if (note) + { + rtx *note_link, this_rtx; + + note_link = ®_NOTES (BB_END (bb)); + for (this_rtx = *note_link; this_rtx; this_rtx = XEXP (this_rtx, 1)) + if (this_rtx == note) + { + *note_link = XEXP (this_rtx, 1); + break; + } + } + return; + } if (!note || XINT (note, 0) == BRANCH_EDGE (bb)->probability.to_reg_br_prob_note ()) return; @@ -3623,7 +3636,6 @@ relink_block_chain (bool stay_in_cfglayo fprintf (dump_file, "compensation "); else fprintf (dump_file, "bb %i ", bb->index); - fprintf (dump_file, " [%i]\n", bb->frequency); } } @@ -5034,7 +5046,7 @@ rtl_account_profile_record (basic_block += insn_cost (insn, true) * bb->count.to_gcov_type (); else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] - += insn_cost (insn, true) * bb->frequency; + += insn_cost (insn, true) * bb->count.to_frequency (cfun); } } Index: cgraph.c =================================================================== --- cgraph.c (revision 254266) +++ cgraph.c (working copy) @@ -862,7 +862,7 @@ symbol_table::create_edge (cgraph_node * edge->next_callee = NULL; edge->lto_stmt_uid = 0; - edge->count = count; + edge->count = count.ipa (); edge->frequency = freq; gcc_checking_assert (freq >= 0); gcc_checking_assert (freq <= CGRAPH_FREQ_MAX); @@ -1308,7 +1308,7 @@ cgraph_edge::redirect_call_stmt_to_calle /* We are producing the final function body and will throw away the callgraph edges really soon. Reset the counts/frequencies to keep verifier happy in the case of roundoff errors. */ - e->count = gimple_bb (e->call_stmt)->count; + e->count = gimple_bb (e->call_stmt)->count.ipa (); e->frequency = compute_call_stmt_bb_frequency (e->caller->decl, gimple_bb (e->call_stmt)); } @@ -1338,7 +1338,7 @@ cgraph_edge::redirect_call_stmt_to_calle prob = profile_probability::even (); new_stmt = gimple_ic (e->call_stmt, dyn_cast<cgraph_node *> (ref->referred), - prob, e->count, e->count + e2->count); + prob); e->speculative = false; e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt, false); @@ -1644,7 +1644,7 @@ cgraph_update_edges_for_call_stmt_node ( /* Otherwise remove edge and create new one; we can't simply redirect since function has changed, so inline plan and other information attached to edge is invalid. */ - count = e->count; + count = e->count.ipa (); frequency = e->frequency; if (e->indirect_unknown_callee || e->inline_failed) e->remove (); @@ -1655,7 +1655,7 @@ cgraph_update_edges_for_call_stmt_node ( { /* We are seeing new direct call; compute profile info based on BB. */ basic_block bb = gimple_bb (new_stmt); - count = bb->count; + count = bb->count.ipa (); frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); } @@ -3082,9 +3082,14 @@ bool cgraph_edge::verify_count_and_frequency () { bool error_found = false; - if (count < 0) + if (!count.verify ()) { - error ("caller edge count is negative"); + error ("caller edge count invalid"); + error_found = true; + } + if (!count.ipa_p ()) + { + error ("caller edge count is local"); error_found = true; } if (frequency < 0) @@ -3183,9 +3188,14 @@ cgraph_node::verify_node (void) identifier_to_locale (e->callee->name ())); error_found = true; } - if (count < 0) + if (!count.verify ()) + { + error ("cgraph count invalid"); + error_found = true; + } + if (!count.ipa_p ()) { - error ("execution count is negative"); + error ("cgraph count is local"); error_found = true; } if (global.inlined_to && same_comdat_group) @@ -3269,7 +3279,9 @@ cgraph_node::verify_node (void) { if (e->verify_count_and_frequency ()) error_found = true; + /* FIXME: re-enable once cgraph is converted to counts. */ if (gimple_has_body_p (e->caller->decl) + && 0 && !e->caller->global.inlined_to && !e->speculative /* Optimized out calls are redirected to __builtin_unreachable. */ Index: cgraphbuild.c =================================================================== --- cgraphbuild.c (revision 254266) +++ cgraphbuild.c (working copy) @@ -190,21 +190,8 @@ record_eh_tables (cgraph_node *node, fun int compute_call_stmt_bb_frequency (tree decl, basic_block bb) { - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN - (DECL_STRUCT_FUNCTION (decl))->frequency; - int freq = bb->frequency; - - if (profile_status_for_fn (DECL_STRUCT_FUNCTION (decl)) == PROFILE_ABSENT) - return CGRAPH_FREQ_BASE; - - if (!entry_freq) - entry_freq = 1, freq++; - - freq = freq * CGRAPH_FREQ_BASE / entry_freq; - if (freq > CGRAPH_FREQ_MAX) - freq = CGRAPH_FREQ_MAX; - - return freq; + return bb->count.to_cgraph_frequency + (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (decl))->count); } /* Mark address taken in STMT. */ @@ -415,7 +402,7 @@ cgraph_edge::rebuild_edges (void) node->remove_callees (); node->remove_all_references (); - node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); FOR_EACH_BB_FN (bb, cfun) { Index: cgraphunit.c =================================================================== --- cgraphunit.c (revision 254266) +++ cgraphunit.c (working copy) @@ -1601,12 +1601,9 @@ init_lowered_empty_function (tree decl, /* Create BB for body of the function and connect it properly. */ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; EXIT_BLOCK_PTR_FOR_FN (cfun)->count = count; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX; bb = create_basic_block (NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun)); bb->count = count; - bb->frequency = BB_FREQ_MAX; e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU); e->probability = profile_probability::always (); e = make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -1852,8 +1849,12 @@ cgraph_node::expand_thunk (bool output_a else resdecl = DECL_RESULT (thunk_fndecl); + profile_count cfg_count = count; + if (!cfg_count.initialized_p ()) + cfg_count = profile_count::from_gcov_type (BB_FREQ_MAX).guessed_local (); + bb = then_bb = else_bb = return_bb - = init_lowered_empty_function (thunk_fndecl, true, count); + = init_lowered_empty_function (thunk_fndecl, true, cfg_count); bsi = gsi_start_bb (bb); @@ -1966,14 +1967,11 @@ cgraph_node::expand_thunk (bool output_a adjustment, because that's why we're emitting a thunk. */ then_bb = create_basic_block (NULL, bb); - then_bb->count = count - count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX - BB_FREQ_MAX / 16; + then_bb->count = cfg_count - cfg_count.apply_scale (1, 16); return_bb = create_basic_block (NULL, then_bb); - return_bb->count = count; - return_bb->frequency = BB_FREQ_MAX; + return_bb->count = cfg_count; else_bb = create_basic_block (NULL, else_bb); - then_bb->count = count.apply_scale (1, 16); - then_bb->frequency = BB_FREQ_MAX / 16; + else_bb->count = cfg_count.apply_scale (1, 16); add_bb_to_loop (then_bb, bb->loop_father); add_bb_to_loop (return_bb, bb->loop_father); add_bb_to_loop (else_bb, bb->loop_father); @@ -2028,8 +2026,10 @@ cgraph_node::expand_thunk (bool output_a } cfun->gimple_df->in_ssa_p = true; + counts_to_freqs (); profile_status_for_fn (cfun) - = count.initialized_p () ? PROFILE_READ : PROFILE_GUESSED; + = cfg_count.initialized_p () && cfg_count.ipa_p () + ? PROFILE_READ : PROFILE_GUESSED; /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ TREE_ASM_WRITTEN (thunk_fndecl) = false; delete_unreachable_blocks (); Index: except.c =================================================================== --- except.c (revision 254266) +++ except.c (working copy) @@ -1003,7 +1003,6 @@ dw2_build_landing_pads (void) bb = emit_to_new_bb_before (seq, label_rtx (lp->post_landing_pad)); bb->count = bb->next_bb->count; - bb->frequency = bb->next_bb->frequency; make_single_succ_edge (bb, bb->next_bb, e_flags); if (current_loops) { Index: final.c =================================================================== --- final.c (revision 254266) +++ final.c (working copy) @@ -694,8 +694,8 @@ compute_alignments (void) } loop_optimizer_init (AVOID_CFG_MODIFICATIONS); FOR_EACH_BB_FN (bb, cfun) - if (bb->frequency > freq_max) - freq_max = bb->frequency; + if (bb->count.to_frequency (cfun) > freq_max) + freq_max = bb->count.to_frequency (cfun); freq_threshold = freq_max / PARAM_VALUE (PARAM_ALIGN_THRESHOLD); if (dump_file) @@ -713,7 +713,8 @@ compute_alignments (void) if (dump_file) fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth %2i skipped.\n", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), + bb->loop_father->num, bb_loop_depth (bb)); continue; } @@ -731,7 +732,7 @@ compute_alignments (void) { fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth" " %2i fall %4i branch %4i", - bb->index, bb->frequency, bb->loop_father->num, + bb->index, bb->count.to_frequency (cfun), bb->loop_father->num, bb_loop_depth (bb), fallthru_frequency, branch_frequency); if (!bb->loop_father->inner && bb->loop_father->num) @@ -753,9 +754,10 @@ compute_alignments (void) if (!has_fallthru && (branch_frequency > freq_threshold - || (bb->frequency > bb->prev_bb->frequency * 10 - && (bb->prev_bb->frequency - <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency / 2)))) + || (bb->count.to_frequency (cfun) + > bb->prev_bb->count.to_frequency (cfun) * 10 + && (bb->prev_bb->count.to_frequency (cfun) + <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) / 2)))) { log = JUMP_ALIGN (label); if (dump_file) @@ -1942,8 +1944,6 @@ dump_basic_block_info (FILE *file, rtx_i edge_iterator ei; fprintf (file, "%s BLOCK %d", ASM_COMMENT_START, bb->index); - if (bb->frequency) - fprintf (file, " freq:%d", bb->frequency); if (bb->count.initialized_p ()) { fprintf (file, ", count:"); Index: gimple-pretty-print.c =================================================================== --- gimple-pretty-print.c (revision 254266) +++ gimple-pretty-print.c (working copy) @@ -82,21 +82,17 @@ debug_gimple_stmt (gimple *gs) by xstrdup_for_dump. */ static const char * -dump_profile (int frequency, profile_count &count) +dump_profile (profile_count &count) { - float minimum = 0.01f; - - gcc_assert (0 <= frequency && frequency <= REG_BR_PROB_BASE); - float fvalue = frequency * 100.0f / REG_BR_PROB_BASE; - if (fvalue < minimum && frequency > 0) - return "[0.01%]"; - char *buf; - if (count.initialized_p ()) - buf = xasprintf ("[%.2f%%] [count: %" PRId64 "]", fvalue, + if (!count.initialized_p ()) + return NULL; + if (count.ipa_p ()) + buf = xasprintf ("[count: %" PRId64 "]", + count.to_gcov_type ()); + else if (count.initialized_p ()) + buf = xasprintf ("[local count: %" PRId64 "]", count.to_gcov_type ()); - else - buf = xasprintf ("[%.2f%%] [count: INV]", fvalue); const char *ret = xstrdup_for_dump (buf); free (buf); @@ -2695,8 +2691,7 @@ dump_gimple_bb_header (FILE *outf, basic fprintf (outf, "%*sbb_%d:\n", indent, "", bb->index); else fprintf (outf, "%*s<bb %d> %s:\n", - indent, "", bb->index, dump_profile (bb->frequency, - bb->count)); + indent, "", bb->index, dump_profile (bb->count)); } } Index: gimple-ssa-isolate-paths.c =================================================================== --- gimple-ssa-isolate-paths.c (revision 254266) +++ gimple-ssa-isolate-paths.c (working copy) @@ -154,7 +154,6 @@ isolate_path (basic_block bb, basic_bloc if (!duplicate) { duplicate = duplicate_block (bb, NULL, NULL); - bb->frequency = 0; bb->count = profile_count::zero (); if (!ret_zero) for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); ) @@ -168,7 +167,7 @@ isolate_path (basic_block bb, basic_bloc flush_pending_stmts (e2); /* Update profile only when redirection is really processed. */ - bb->frequency += EDGE_FREQUENCY (e); + bb->count += e->count (); } /* There may be more than one statement in DUPLICATE which exhibits Index: gimple-streamer-in.c =================================================================== --- gimple-streamer-in.c (revision 254266) +++ gimple-streamer-in.c (working copy) @@ -266,7 +266,6 @@ input_bb (struct lto_input_block *ib, en bb->count = profile_count::stream_in (ib).apply_scale (count_materialization_scale, REG_BR_PROB_BASE); - bb->frequency = streamer_read_hwi (ib); bb->flags = streamer_read_hwi (ib); /* LTO_bb1 has statements. LTO_bb0 does not. */ Index: gimple-streamer-out.c =================================================================== --- gimple-streamer-out.c (revision 254266) +++ gimple-streamer-out.c (working copy) @@ -210,7 +210,6 @@ output_bb (struct output_block *ob, basi streamer_write_uhwi (ob, bb->index); bb->count.stream_out (ob); - streamer_write_hwi (ob, bb->frequency); streamer_write_hwi (ob, bb->flags); if (!gsi_end_p (bsi) || phi_nodes (bb)) Index: haifa-sched.c =================================================================== --- haifa-sched.c (revision 254266) +++ haifa-sched.c (working copy) @@ -3917,8 +3917,8 @@ sched_pressure_start_bb (basic_block bb) - call_saved_regs_num[cl]). */ { int i; - int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; - int bb_freq = bb->frequency; + int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); + int bb_freq = bb->count.to_frequency (cfun); if (bb_freq == 0) { @@ -8141,8 +8141,6 @@ init_before_recovery (basic_block *befor single->count = last->count; empty->count = last->count; - single->frequency = last->frequency; - empty->frequency = last->frequency; BB_COPY_PARTITION (single, last); BB_COPY_PARTITION (empty, last); @@ -8236,7 +8234,6 @@ sched_create_recovery_edges (basic_block in sel-sched.c `check_ds' in create_speculation_check. */ e->probability = profile_probability::very_unlikely (); rec->count = e->count (); - rec->frequency = EDGE_FREQUENCY (e); e2->probability = e->probability.invert (); rtx_code_label *label = block_label (second_bb); Index: hsa-gen.c =================================================================== --- hsa-gen.c (revision 254266) +++ hsa-gen.c (working copy) @@ -6374,7 +6374,7 @@ convert_switch_statements (void) edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); next_edge->probability = new_edge->probability.invert (); - next_bb->frequency = EDGE_FREQUENCY (next_edge); + next_bb->count = next_edge->count (); cur_bb = next_bb; } else /* Link last IF statement and default label Index: ipa-cp.c =================================================================== --- ipa-cp.c (revision 254266) +++ ipa-cp.c (working copy) @@ -3257,6 +3257,8 @@ ipcp_propagate_stage (struct ipa_topo_in if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); + max_count = profile_count::uninitialized (); + FOR_EACH_DEFINED_FUNCTION (node) { struct ipa_node_params *info = IPA_NODE_REF (node); @@ -3270,7 +3272,7 @@ ipcp_propagate_stage (struct ipa_topo_in } if (node->definition && !node->alias) overall_size += ipa_fn_summaries->get (node)->self_size; - if (node->count > max_count) + if (!(node->count > max_count)) max_count = node->count; } @@ -5125,7 +5127,7 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_c_finalize (void) { - max_count = profile_count::zero (); + max_count = profile_count::uninitialized (); overall_size = 0; max_new_size = 0; } Index: ipa-fnsummary.c =================================================================== --- ipa-fnsummary.c (revision 254266) +++ ipa-fnsummary.c (working copy) @@ -1608,7 +1608,7 @@ static basic_block get_minimal_bb (basic_block init_bb, basic_block use_bb) { struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); - if (l && l->header->frequency < init_bb->frequency) + if (l && l->header->count < init_bb->count) return l->header; return init_bb; } @@ -1664,20 +1664,21 @@ param_change_prob (gimple *stmt, int i) { int init_freq; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; if (SSA_NAME_IS_DEFAULT_DEF (base)) - init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else init_freq = get_minimal_bb (gimple_bb (SSA_NAME_DEF_STMT (base)), - gimple_bb (stmt))->frequency; + gimple_bb (stmt))->count.to_frequency (cfun); if (!init_freq) init_freq = 1; - if (init_freq < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1); + if (init_freq < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (init_freq, + bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } @@ -1692,7 +1693,7 @@ param_change_prob (gimple *stmt, int i) if (init != error_mark_node) return 0; - if (!bb->frequency) + if (!bb->count.to_frequency (cfun)) return REG_BR_PROB_BASE; ao_ref_init (&refd, op); info.stmt = stmt; @@ -1708,17 +1709,17 @@ param_change_prob (gimple *stmt, int i) /* Assume that every memory is initialized at entry. TODO: Can we easilly determine if value is always defined and thus we may skip entry block? */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) - max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency; + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) + max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); else max = 1; EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency); + max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun)); BITMAP_FREE (info.bb_set); - if (max < bb->frequency) - return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1); + if (max < bb->count.to_frequency (cfun)) + return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1); else return REG_BR_PROB_BASE; } Index: ipa-profile.c =================================================================== --- ipa-profile.c (revision 254266) +++ ipa-profile.c (working copy) @@ -179,53 +179,54 @@ ipa_profile_generate_summary (void) hash_table<histogram_hash> hashtable (10); FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) - FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - { - int time = 0; - int size = 0; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_CALL - && !gimple_call_fndecl (stmt)) - { - histogram_value h; - h = gimple_histogram_value_of_type - (DECL_STRUCT_FUNCTION (node->decl), - stmt, HIST_TYPE_INDIR_CALL); - /* No need to do sanity check: gimple_ic_transform already - takes away bad histograms. */ - if (h) - { - /* counter 0 is target, counter 1 is number of execution we called target, - counter 2 is total number of executions. */ - if (h->hvalue.counters[2]) - { - struct cgraph_edge * e = node->get_edge (stmt); - if (e && !e->indirect_unknown_callee) - continue; - e->indirect_info->common_target_id - = h->hvalue.counters [0]; - e->indirect_info->common_target_probability - = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); - if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) - { - if (dump_file) - fprintf (dump_file, "Probability capped to 1\n"); - e->indirect_info->common_target_probability = REG_BR_PROB_BASE; - } - } - gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), - stmt, h); - } - } - time += estimate_num_insns (stmt, &eni_time_weights); - size += estimate_num_insns (stmt, &eni_size_weights); - } - if (bb->count.initialized_p ()) - account_time_size (&hashtable, histogram, bb->count.to_gcov_type (), - time, size); - } + if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + int time = 0; + int size = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_fndecl (stmt)) + { + histogram_value h; + h = gimple_histogram_value_of_type + (DECL_STRUCT_FUNCTION (node->decl), + stmt, HIST_TYPE_INDIR_CALL); + /* No need to do sanity check: gimple_ic_transform already + takes away bad histograms. */ + if (h) + { + /* counter 0 is target, counter 1 is number of execution we called target, + counter 2 is total number of executions. */ + if (h->hvalue.counters[2]) + { + struct cgraph_edge * e = node->get_edge (stmt); + if (e && !e->indirect_unknown_callee) + continue; + e->indirect_info->common_target_id + = h->hvalue.counters [0]; + e->indirect_info->common_target_probability + = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]); + if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE) + { + if (dump_file) + fprintf (dump_file, "Probability capped to 1\n"); + e->indirect_info->common_target_probability = REG_BR_PROB_BASE; + } + } + gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl), + stmt, h); + } + } + time += estimate_num_insns (stmt, &eni_time_weights); + size += estimate_num_insns (stmt, &eni_size_weights); + } + if (bb->count.ipa_p () && bb->count.initialized_p ()) + account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (), + time, size); + } histogram.qsort (cmp_counts); } Index: ipa-split.c =================================================================== --- ipa-split.c (revision 254266) +++ ipa-split.c (working copy) @@ -444,7 +444,7 @@ consider_split (struct split_point *curr /* Do not split when we would end up calling function anyway. */ if (incoming_freq - >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency + >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100)) { /* When profile is guessed, we can not expect it to give us @@ -454,13 +454,14 @@ consider_split (struct split_point *curr is likely noticeable win. */ if (back_edge && profile_status_for_fn (cfun) != PROFILE_READ - && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency) + && incoming_freq + < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Split before loop, accepting despite low frequencies %i %i.\n", incoming_freq, - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)); } else { @@ -714,8 +715,10 @@ consider_split (struct split_point *curr out smallest size of header. In future we might re-consider this heuristics. */ if (!best_split_point.split_bbs - || best_split_point.entry_bb->frequency > current->entry_bb->frequency - || (best_split_point.entry_bb->frequency == current->entry_bb->frequency + || best_split_point.entry_bb->count.to_frequency (cfun) + > current->entry_bb->count.to_frequency (cfun) + || (best_split_point.entry_bb->count.to_frequency (cfun) + == current->entry_bb->count.to_frequency (cfun) && best_split_point.split_size < current->split_size)) { @@ -1285,7 +1288,7 @@ split_function (basic_block return_bb, s FOR_EACH_EDGE (e, ei, return_bb->preds) if (bitmap_bit_p (split_point->split_bbs, e->src->index)) { - new_return_bb->frequency += EDGE_FREQUENCY (e); + new_return_bb->count += e->count (); redirect_edge_and_branch (e, new_return_bb); redirected = true; break; Index: ira-build.c =================================================================== --- ira-build.c (revision 254266) +++ ira-build.c (working copy) @@ -2202,7 +2202,8 @@ loop_compare_func (const void *v1p, cons return -1; if (! l1->to_remove_p && l2->to_remove_p) return 1; - if ((diff = l1->loop->header->frequency - l2->loop->header->frequency) != 0) + if ((diff = l1->loop->header->count.to_frequency (cfun) + - l2->loop->header->count.to_frequency (cfun)) != 0) return diff; if ((diff = (int) loop_depth (l1->loop) - (int) loop_depth (l2->loop)) != 0) return diff; @@ -2260,7 +2261,7 @@ mark_loops_for_removal (void) (ira_dump_file, " Mark loop %d (header %d, freq %d, depth %d) for removal (%s)\n", sorted_loops[i]->loop_num, sorted_loops[i]->loop->header->index, - sorted_loops[i]->loop->header->frequency, + sorted_loops[i]->loop->header->count.to_frequency (cfun), loop_depth (sorted_loops[i]->loop), low_pressure_loop_node_p (sorted_loops[i]->parent) && low_pressure_loop_node_p (sorted_loops[i]) @@ -2293,7 +2294,7 @@ mark_all_loops_for_removal (void) " Mark loop %d (header %d, freq %d, depth %d) for removal\n", ira_loop_nodes[i].loop_num, ira_loop_nodes[i].loop->header->index, - ira_loop_nodes[i].loop->header->frequency, + ira_loop_nodes[i].loop->header->count.to_frequency (cfun), loop_depth (ira_loop_nodes[i].loop)); } } Index: loop-doloop.c =================================================================== --- loop-doloop.c (revision 254266) +++ loop-doloop.c (working copy) @@ -506,7 +506,6 @@ doloop_modify (struct loop *loop, struct set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = profile_count::uninitialized (); - set_zero->frequency = 0; te = single_succ_edge (preheader); for (; ass; ass = XEXP (ass, 1)) @@ -522,7 +521,6 @@ doloop_modify (struct loop *loop, struct also be very hard to show that it is impossible, so we must handle this case. */ set_zero->count = preheader->count; - set_zero->frequency = preheader->frequency; } if (EDGE_COUNT (set_zero->preds) == 0) Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 254266) +++ loop-unroll.c (working copy) @@ -863,7 +863,7 @@ unroll_loop_runtime_iterations (struct l unsigned i, j; profile_probability p; basic_block preheader, *body, swtch, ezc_swtch = NULL; - int may_exit_copy, iter_freq, new_freq; + int may_exit_copy; profile_count iter_count, new_count; unsigned n_peel; edge e; @@ -970,12 +970,10 @@ unroll_loop_runtime_iterations (struct l /* Record the place where switch will be built for preconditioning. */ swtch = split_edge (loop_preheader_edge (loop)); - /* Compute frequency/count increments for each switch block and initialize + /* Compute count increments for each switch block and initialize innermost switch block. Switch blocks and peeled loop copies are built from innermost outward. */ - iter_freq = new_freq = swtch->frequency / (max_unroll + 1); iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); - swtch->frequency = new_freq; swtch->count = new_count; for (i = 0; i < n_peel; i++) @@ -995,8 +993,7 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, i + 2); preheader = split_edge (loop_preheader_edge (loop)); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, block_label (preheader), p, @@ -1009,9 +1006,7 @@ unroll_loop_runtime_iterations (struct l swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); single_succ_edge (swtch)->probability = p.invert (); - new_freq += iter_freq; new_count += iter_count; - swtch->frequency = new_freq; swtch->count = new_count; e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); @@ -1024,12 +1019,10 @@ unroll_loop_runtime_iterations (struct l p = profile_probability::always ().apply_scale (1, max_unroll + 1); swtch = ezc_swtch; preheader = split_edge (loop_preheader_edge (loop)); - /* Recompute frequency/count adjustments since initial peel copy may + /* Recompute count adjustments since initial peel copy may have exited and reduced those values that were computed above. */ - iter_freq = swtch->frequency / (max_unroll + 1); iter_count = swtch->count.apply_scale (1, max_unroll + 1); - /* Add in frequency/count of edge from switch block. */ - preheader->frequency += iter_freq; + /* Add in count of edge from switch block. */ preheader->count += iter_count; branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, block_label (preheader), p, Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 254266) +++ lto-streamer-in.c (working copy) @@ -1192,6 +1192,7 @@ input_function (tree fn_decl, struct dat gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest)); } + counts_to_freqs (); fixup_call_stmt_edges (node, stmts); execute_all_ipa_stmt_fixups (node, stmts); Index: omp-expand.c =================================================================== --- omp-expand.c (revision 254266) +++ omp-expand.c (working copy) @@ -1399,6 +1399,7 @@ expand_omp_taskreg (struct omp_region *r if (optimize) optimize_omp_library_calls (entry_stmt); + counts_to_freqs (); cgraph_edge::rebuild_edges (); /* Some EH regions might become dead, see PR34608. If Index: omp-simd-clone.c =================================================================== --- omp-simd-clone.c (revision 254266) +++ omp-simd-clone.c (working copy) @@ -1132,6 +1132,7 @@ simd_clone_adjust (struct cgraph_node *n { basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; incr_bb = create_empty_bb (orig_exit); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty flag. Set it now to be a FALLTHRU_EDGE. */ @@ -1142,11 +1143,13 @@ simd_clone_adjust (struct cgraph_node *n { edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i); redirect_edge_succ (e, incr_bb); + incr_bb->count += e->count (); } } else if (node->simdclone->inbranch) { incr_bb = create_empty_bb (entry_bb); + incr_bb->count = profile_count::zero (); add_bb_to_loop (incr_bb, body_bb->loop_father); } @@ -1243,6 +1246,7 @@ simd_clone_adjust (struct cgraph_node *n gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::unlikely ().guessed (); + incr_bb->count += e->count (); edge fallthru = FALLTHRU_EDGE (loop->header); fallthru->flags = EDGE_FALSE_VALUE; fallthru->probability = profile_probability::likely ().guessed (); Index: predict.c =================================================================== --- predict.c (revision 254266) +++ predict.c (working copy) @@ -137,12 +137,12 @@ maybe_hot_frequency_p (struct function * if (profile_status_for_fn (fun) == PROFILE_ABSENT) return true; if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE - && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) + && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun) * 2 / 3)) return false; if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) return false; if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) - < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) + < ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun)) return false; return true; } @@ -175,10 +175,14 @@ set_hot_bb_threshold (gcov_type min) /* Return TRUE if frequency FREQ is considered to be hot. */ bool -maybe_hot_count_p (struct function *, profile_count count) +maybe_hot_count_p (struct function *fun, profile_count count) { if (!count.initialized_p ()) return true; + if (!count.ipa_p ()) + return maybe_hot_frequency_p (fun, count.to_frequency (fun)); + if (count.ipa () == profile_count::zero ()) + return false; /* Code executed at most once is not hot. */ if (count <= MAX (profile_info ? profile_info->runs : 1, 1)) return false; @@ -192,9 +196,7 @@ bool maybe_hot_bb_p (struct function *fun, const_basic_block bb) { gcc_checking_assert (fun); - if (!maybe_hot_count_p (fun, bb->count)) - return false; - return maybe_hot_frequency_p (fun, bb->frequency); + return maybe_hot_count_p (fun, bb->count); } /* Return true in case BB can be CPU intensive and should be optimized @@ -203,9 +205,7 @@ maybe_hot_bb_p (struct function *fun, co bool maybe_hot_edge_p (edge e) { - if (!maybe_hot_count_p (cfun, e->count ())) - return false; - return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); + return maybe_hot_count_p (cfun, e->count ()); } /* Return true if profile COUNT and FREQUENCY, or function FUN static @@ -213,7 +213,7 @@ maybe_hot_edge_p (edge e) static bool probably_never_executed (struct function *fun, - profile_count count, int) + profile_count count) { gcc_checking_assert (fun); if (count == profile_count::zero ()) @@ -238,7 +238,7 @@ probably_never_executed (struct function bool probably_never_executed_bb_p (struct function *fun, const_basic_block bb) { - return probably_never_executed (fun, bb->count, bb->frequency); + return probably_never_executed (fun, bb->count); } @@ -259,7 +259,7 @@ probably_never_executed_edge_p (struct f { if (unlikely_executed_edge_p (e)) return true; - return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e)); + return probably_never_executed (fun, e->count ()); } /* Return true when current function should always be optimized for size. */ @@ -1289,7 +1289,8 @@ combine_predictions_for_bb (basic_block } clear_bb_predictions (bb); - if (!bb->count.initialized_p () && !dry_run) + if ((!bb->count.nonzero_p () || !first->probability.initialized_p ()) + && !dry_run) { first->probability = profile_probability::from_reg_br_prob_base (combined_probability); @@ -3014,10 +3015,7 @@ propagate_freq (basic_block head, bitmap BLOCK_INFO (bb)->npredecessors = count; /* When function never returns, we will never process exit block. */ if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - bb->count = profile_count::zero (); - bb->frequency = 0; - } + bb->count = profile_count::zero (); } BLOCK_INFO (head)->frequency = 1; @@ -3050,7 +3048,10 @@ propagate_freq (basic_block head, bitmap * BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (e->src)->frequency; tmp *= real_inv_br_prob_base; frequency += tmp; @@ -3082,7 +3083,10 @@ propagate_freq (basic_block head, bitmap = ((e->probability * BLOCK_INFO (bb)->frequency) / REG_BR_PROB_BASE); */ - sreal tmp = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + sreal tmp = e->probability.initialized_p () ? + e->probability.to_reg_br_prob_base () : 0; tmp *= BLOCK_INFO (bb)->frequency; EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; } @@ -3196,10 +3200,20 @@ drop_profile (struct cgraph_node *node, } basic_block bb; - FOR_ALL_BB_FN (bb, fn) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + if (flag_guess_branch_prob) { - bb->count = profile_count::uninitialized (); + FOR_ALL_BB_FN (bb, fn) + bb->count = bb->count.guessed_local (); } + else + { + FOR_ALL_BB_FN (bb, fn) + { + bb->count = profile_count::uninitialized (); + } + } + pop_cfun (); struct cgraph_edge *e; for (e = node->callees; e; e = e->next_caller) @@ -3300,33 +3314,16 @@ handle_missing_profiles (void) bool counts_to_freqs (void) { - gcov_type count_max; - profile_count true_count_max = profile_count::zero (); + profile_count true_count_max = profile_count::uninitialized (); basic_block bb; - /* Don't overwrite the estimated frequencies when the profile for - the function is missing. We may drop this function PROFILE_GUESSED - later in drop_profile (). */ - if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () - || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()) - return false; - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > true_count_max) - true_count_max = bb->count; - - /* If we have no counts to base frequencies on, keep those that are - already there. */ - if (!(true_count_max > 0)) - return false; + if (!(bb->count < true_count_max)) + true_count_max = true_count_max.max (bb->count); - count_max = true_count_max.to_gcov_type (); + cfun->cfg->count_max = true_count_max; - FOR_ALL_BB_FN (bb, cfun) - if (bb->count.initialized_p ()) - bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max); - - return true; + return true_count_max.nonzero_p (); } /* Return true if function is likely to be expensive, so there is no point to @@ -3348,11 +3345,11 @@ expensive_function_p (int threshold) /* Frequencies are out of range. This either means that function contains internal loop executing more than BB_FREQ_MAX times or profile feedback is available and function has not been executed at all. */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) == 0) return true; /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ - limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; + limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * threshold; FOR_EACH_BB_FN (bb, cfun) { rtx_insn *insn; @@ -3360,7 +3357,7 @@ expensive_function_p (int threshold) FOR_BB_INSNS (bb, insn) if (active_insn_p (insn)) { - sum += bb->frequency; + sum += bb->count.to_frequency (cfun); if (sum > limit) return true; } @@ -3409,7 +3406,6 @@ propagate_unlikely_bbs_forward (void) "Basic block %i is marked unlikely by forward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; } else bb->aux = NULL; @@ -3440,9 +3436,6 @@ determine_unlikely_bbs () bb->count = profile_count::zero (); } - if (bb->count == profile_count::zero ()) - bb->frequency = 0; - FOR_EACH_EDGE (e, ei, bb->succs) if (!(e->probability == profile_probability::never ()) && unlikely_executed_edge_p (e)) @@ -3497,7 +3490,6 @@ determine_unlikely_bbs () "Basic block %i is marked unlikely by backward prop\n", bb->index); bb->count = profile_count::zero (); - bb->frequency = 0; FOR_EACH_EDGE (e, ei, bb->preds) if (!(e->probability == profile_probability::never ())) { @@ -3554,8 +3546,13 @@ estimate_bb_frequencies (bool force) FOR_EACH_EDGE (e, ei, bb->succs) { - EDGE_INFO (e)->back_edge_prob - = e->probability.to_reg_br_prob_base (); + /* FIXME: Graphite is producing edges with no profile. Once + this is fixed, drop this. */ + if (e->probability.initialized_p ()) + EDGE_INFO (e)->back_edge_prob + = e->probability.to_reg_br_prob_base (); + else + EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; } } @@ -3564,16 +3561,28 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges. */ estimate_loops (); + bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p (); + freq_max = 0; FOR_EACH_BB_FN (bb, cfun) if (freq_max < BLOCK_INFO (bb)->frequency) freq_max = BLOCK_INFO (bb)->frequency; freq_max = real_bb_freq_max / freq_max; + cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; - bb->frequency = tmp.to_int (); + profile_count count = profile_count::from_gcov_type (tmp.to_int ()); + + /* If we have profile feedback in which this function was never + executed, then preserve this info. */ + if (global0) + bb->count = count.global0 (); + else if (!(bb->count == profile_count::zero ())) + bb->count = count.guessed_local (); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); } free_aux_for_blocks (); @@ -3598,7 +3607,8 @@ compute_function_frequency (void) if (profile_status_for_fn (cfun) != PROFILE_READ) { int flags = flags_from_decl_or_type (current_function_decl); - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () + if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p () + && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ()) || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) != NULL) { @@ -3717,7 +3727,7 @@ pass_profile::execute (function *fun) { struct loop *loop; FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) - if (loop->header->frequency) + if (loop->header->count.initialized_p ()) fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", loop->num, (int)expected_loop_iterations_unbounded (loop)); @@ -3843,15 +3853,12 @@ rebuild_frequencies (void) which may also lead to frequencies incorrectly reduced to 0. There is less precision in the probabilities, so we only do this for small max counts. */ - profile_count count_max = profile_count::zero (); + cfun->cfg->count_max = profile_count::uninitialized (); basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - if (bb->count > count_max) - count_max = bb->count; + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); - if (profile_status_for_fn (cfun) == PROFILE_GUESSED - || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ - && count_max < REG_BR_PROB_BASE / 10)) + if (profile_status_for_fn (cfun) == PROFILE_GUESSED) { loop_optimizer_init (0); add_noreturn_fake_exit_edges (); @@ -4017,17 +4024,19 @@ force_edge_cold (edge e, bool impossible after loop transforms. */ if (!(prob_sum > profile_probability::never ()) && count_sum == profile_count::zero () - && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1)) + && single_pred_p (e->src) && e->src->count.to_frequency (cfun) + > (impossible ? 0 : 1)) { - int old_frequency = e->src->frequency; + int old_frequency = e->src->count.to_frequency (cfun); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Making bb %i %s.\n", e->src->index, impossible ? "impossible" : "cold"); - e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); + int new_frequency = MIN (e->src->count.to_frequency (cfun), + impossible ? 0 : 1); if (impossible) e->src->count = profile_count::zero (); else - e->src->count = e->count ().apply_scale (e->src->frequency, + e->src->count = e->count ().apply_scale (new_frequency, old_frequency); force_edge_cold (single_pred_edge (e->src), impossible); } Index: profile-count.c =================================================================== --- profile-count.c (revision 254266) +++ profile-count.c (working copy) @@ -42,7 +42,11 @@ profile_count::dump (FILE *f) const else { fprintf (f, "%" PRId64, m_val); - if (m_quality == profile_adjusted) + if (m_quality == profile_guessed_local) + fprintf (f, " (estimated locally)"); + else if (m_quality == profile_guessed_global0) + fprintf (f, " (estimated locally, globally 0)"); + else if (m_quality == profile_adjusted) fprintf (f, " (adjusted)"); else if (m_quality == profile_afdo) fprintf (f, " (auto FDO)"); @@ -65,6 +69,7 @@ profile_count::debug () const bool profile_count::differs_from_p (profile_count other) const { + gcc_checking_assert (compatible_p (other)); if (!initialized_p () || !other.initialized_p ()) return false; if ((uint64_t)m_val - (uint64_t)other.m_val < 100 @@ -213,3 +218,40 @@ slow_safe_scale_64bit (uint64_t a, uint6 *res = (uint64_t) -1; return false; } + +/* Return count as frequency within FUN scaled in range 0 to REG_FREQ_MAX + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_frequency (struct function *fun) const +{ + if (!initialized_p ()) + return BB_FREQ_MAX; + if (*this == profile_count::zero ()) + return 0; + gcc_assert (REG_BR_PROB_BASE == BB_FREQ_MAX + && fun->cfg->count_max.initialized_p ()); + profile_probability prob = probability_in (fun->cfg->count_max); + if (!prob.initialized_p ()) + return REG_BR_PROB_BASE; + return prob.to_reg_br_prob_base (); +} + +/* Return count as frequency within FUN scaled in range 0 to CGRAPH_FREQ_MAX + where CGRAPH_FREQ_BASE means that count equals to entry block count. + Used for legacy code and should not be used anymore. */ + +int +profile_count::to_cgraph_frequency (profile_count entry_bb_count) const +{ + if (!initialized_p ()) + return CGRAPH_FREQ_BASE; + if (*this == profile_count::zero ()) + return 0; + gcc_checking_assert (entry_bb_count.initialized_p ()); + uint64_t scale; + if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val, + CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale)) + return CGRAPH_FREQ_MAX; + return MIN (scale, CGRAPH_FREQ_MAX); +} Index: profile-count.h =================================================================== --- profile-count.h (revision 254266) +++ profile-count.h (working copy) @@ -21,21 +21,37 @@ along with GCC; see the file COPYING3. #ifndef GCC_PROFILE_COUNT_H #define GCC_PROFILE_COUNT_H +struct function; + /* Quality of the profile count. Because gengtype does not support enums inside of classes, this is in global namespace. */ enum profile_quality { + /* Profile is based on static branch prediction heuristics and may + or may not match reality. It is local to function and can not be compared + inter-procedurally. Never used by probabilities (they are always local). + */ + profile_guessed_local = 0, + /* Profile was read by feedback and was 0, we used local heuristics to guess + better. This is the case of functions not run in profile fedback. + Never used by probabilities. */ + profile_guessed_global0 = 1, + + /* Profile is based on static branch prediction heuristics. It may or may - not reflect the reality. */ - profile_guessed = 0, + not reflect the reality but it can be compared interprocedurally + (for example, we inlined function w/o profile feedback into function + with feedback and propagated from that). + Never used by probablities. */ + profile_guessed = 2, /* Profile was determined by autofdo. */ - profile_afdo = 1, + profile_afdo = 3, /* Profile was originally based on feedback but it was adjusted by code duplicating optimization. It may not precisely reflect the particular code path. */ - profile_adjusted = 2, + profile_adjusted = 4, /* Profile was read from profile feedback or determined by accurate static method. */ - profile_precise = 3 + profile_precise = 5 }; /* The base value for branch probability notes and edge probabilities. */ @@ -114,15 +130,15 @@ safe_scale_64bit (uint64_t a, uint64_t b class GTY((user)) profile_probability { - static const int n_bits = 30; + static const int n_bits = 29; /* We can technically use ((uint32_t) 1 << (n_bits - 1)) - 2 but that will lead to harder multiplication sequences. */ static const uint32_t max_probability = (uint32_t) 1 << (n_bits - 2); static const uint32_t uninitialized_probability = ((uint32_t) 1 << (n_bits - 1)) - 1; - uint32_t m_val : 30; - enum profile_quality m_quality : 2; + uint32_t m_val : 29; + enum profile_quality m_quality : 3; friend class profile_count; public: @@ -226,14 +242,14 @@ public: static profile_probability from_reg_br_prob_note (int v) { profile_probability ret; - ret.m_val = ((unsigned int)v) / 4; - ret.m_quality = (enum profile_quality)(v & 3); + ret.m_val = ((unsigned int)v) / 8; + ret.m_quality = (enum profile_quality)(v & 7); return ret; } int to_reg_br_prob_note () const { gcc_checking_assert (initialized_p ()); - int ret = m_val * 4 + m_quality; + int ret = m_val * 8 + m_quality; gcc_checking_assert (profile_probability::from_reg_br_prob_note (ret) == *this); return ret; @@ -489,8 +505,9 @@ public: { if (m_val == uninitialized_probability) return m_quality == profile_guessed; - else - return m_val <= max_probability; + else if (m_quality < profile_guessed) + return false; + return m_val <= max_probability; } /* Comparsions are three-state and conservative. False is returned if @@ -530,9 +547,32 @@ public: void stream_out (struct lto_output_stream *); }; -/* Main data type to hold profile counters in GCC. In most cases profile - counts originate from profile feedback. They are 64bit integers - representing number of executions during the train run. +/* Main data type to hold profile counters in GCC. Profile counts originate + either from profile feedback, static profile estimation or both. We do not + perform whole program profile propagation and thus profile estimation + counters are often local to function, while counters from profile feedback + (or special cases of profile estimation) can be used inter-procedurally. + + There are 3 basic types + 1) local counters which are result of intra-procedural static profile + estimation. + 2) ipa counters which are result of profile feedback or special case + of static profile estimation (such as in function main). + 3) counters which counts as 0 inter-procedurally (beause given function + was never run in train feedback) but they hold local static profile + estimate. + + Counters of type 1 and 3 can not be mixed with counters of different type + within operation (because whole function should use one type of counter) + with exception that global zero mix in most operations where outcome is + well defined. + + To take local counter and use it inter-procedurally use ipa member function + which strips information irelevant at the inter-procedural level. + + Counters are 61bit integers representing number of executions during the + train run or normalized frequency within the function. + As the profile is maintained during the compilation, many adjustments are made. Not all transformations can be made precisely, most importantly when code is being duplicated. It also may happen that part of CFG has @@ -567,12 +607,25 @@ class GTY(()) profile_count 64bit. Although a counter cannot be negative, we use a signed type to hold various extra stages. */ - static const int n_bits = 62; + static const int n_bits = 61; static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2; static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1; uint64_t m_val : n_bits; - enum profile_quality m_quality : 2; + enum profile_quality m_quality : 3; + + /* Return true if both values can meaningfully appear in single function + body. We have either all counters in function local or global, otherwise + operations between them are not really defined well. */ + bool compatible_p (const profile_count other) const + { + if (!initialized_p () || !other.initialized_p ()) + return true; + if (*this == profile_count::zero () + || other == profile_count::zero ()) + return true; + return ipa_p () == other.ipa_p (); + } public: /* Used for counters which are expected to be never executed. */ @@ -597,7 +650,7 @@ public: { profile_count c; c.m_val = uninitialized_count; - c.m_quality = profile_guessed; + c.m_quality = profile_guessed_local; return c; } @@ -630,6 +683,11 @@ public: { return m_quality >= profile_adjusted; } + /* Return true if vlaue can be operated inter-procedurally. */ + bool ipa_p () const + { + return !initialized_p () || m_quality >= profile_guessed_global0; + } /* When merging basic blocks, the two different profile counts are unified. Return true if this can be done without losing info about profile. @@ -671,6 +729,7 @@ public: return profile_count::uninitialized (); profile_count ret; + gcc_checking_assert (compatible_p (other)); ret.m_val = m_val + other.m_val; ret.m_quality = MIN (m_quality, other.m_quality); return ret; @@ -688,6 +747,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val += other.m_val; m_quality = MIN (m_quality, other.m_quality); } @@ -699,6 +759,7 @@ public: return *this; if (!initialized_p () || !other.initialized_p ()) return profile_count::uninitialized (); + gcc_checking_assert (compatible_p (other)); profile_count ret; ret.m_val = m_val >= other.m_val ? m_val - other.m_val : 0; ret.m_quality = MIN (m_quality, other.m_quality); @@ -712,6 +773,7 @@ public: return *this = profile_count::uninitialized (); else { + gcc_checking_assert (compatible_p (other)); m_val = m_val >= other.m_val ? m_val - other.m_val: 0; m_quality = MIN (m_quality, other.m_quality); } @@ -721,48 +783,115 @@ public: /* Return false if profile_count is bogus. */ bool verify () const { - return m_val != uninitialized_count || m_quality == profile_guessed; + return m_val != uninitialized_count || m_quality == profile_guessed_local; } /* Comparsions are three-state and conservative. False is returned if the inequality can not be decided. */ bool operator< (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val < other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + if (other == profile_count::zero ()) + return false; + gcc_checking_assert (compatible_p (other)); + return m_val < other.m_val; } bool operator> (const profile_count &other) const { + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return false; + if (other == profile_count::zero ()) + return !(*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); return initialized_p () && other.initialized_p () && m_val > other.m_val; } bool operator< (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val < (uint64_t) other; } bool operator> (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val > (uint64_t) other; } bool operator<= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val <= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (*this == profile_count::zero ()) + return true; + if (other == profile_count::zero ()) + return (*this == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val <= other.m_val; } bool operator>= (const profile_count &other) const { - return initialized_p () && other.initialized_p () && m_val >= other.m_val; + if (!initialized_p () || !other.initialized_p ()) + return false; + if (other == profile_count::zero ()) + return true; + if (*this == profile_count::zero ()) + return !(other == profile_count::zero ()); + gcc_checking_assert (compatible_p (other)); + return m_val >= other.m_val; } bool operator<= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val <= (uint64_t) other; } bool operator>= (const gcov_type other) const { + gcc_checking_assert (ipa_p ()); gcc_checking_assert (other >= 0); return initialized_p () && m_val >= (uint64_t) other; } + /* Return true when value is not zero and can be used for scaling. + This is different from *this > 0 because that requires counter to + be IPA. */ + bool nonzero_p () const + { + return initialized_p () && m_val != 0; + } + + /* Make counter forcingly nonzero. */ + profile_count force_nonzero () const + { + if (!initialized_p ()) + return *this; + profile_count ret = *this; + if (ret.m_val == 0) + ret.m_val = 1; + return ret; + } + + profile_count max (profile_count other) const + { + if (!initialized_p ()) + return other; + if (!other.initialized_p ()) + return *this; + if (*this == profile_count::zero ()) + return other; + if (other == profile_count::zero ()) + return *this; + gcc_checking_assert (compatible_p (other)); + if (m_val < other.m_val || (m_val == other.m_val + && m_quality < other.m_quality)) + return other; + return *this; + } /* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter accordingly. */ @@ -814,13 +943,13 @@ public: } profile_count apply_scale (profile_count num, profile_count den) const { - if (m_val == 0) + if (*this == profile_count::zero ()) return *this; - if (num.m_val == 0) + if (num == profile_count::zero ()) return num; if (!initialized_p () || !num.initialized_p () || !den.initialized_p ()) return profile_count::uninitialized (); - gcc_checking_assert (den > 0); + gcc_checking_assert (den.m_val); if (num == den) return *this; @@ -828,7 +957,30 @@ public: uint64_t val; safe_scale_64bit (m_val, num.m_val, den.m_val, &val); ret.m_val = MIN (val, max_count); - ret.m_quality = MIN (m_quality, profile_adjusted); + ret.m_quality = MIN (MIN (MIN (m_quality, profile_adjusted), + num.m_quality), den.m_quality); + if (num.ipa_p () && !ret.ipa_p ()) + ret.m_quality = MIN (num.m_quality, profile_guessed); + return ret; + } + + /* Return THIS with quality dropped to GUESSED_LOCAL. */ + profile_count guessed_local () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_local; + return ret; + } + + /* We know that profile is globally0 but keep local profile if present. */ + profile_count global0 () const + { + profile_count ret = *this; + if (!initialized_p ()) + return *this; + ret.m_quality = profile_guessed_global0; return ret; } @@ -836,10 +988,21 @@ public: profile_count guessed () const { profile_count ret = *this; - ret.m_quality = profile_guessed; + ret.m_quality = MIN (ret.m_quality, profile_guessed); return ret; } + /* Return variant of profile counte which is always safe to compare + acorss functions. */ + profile_count ipa () const + { + if (m_quality > profile_guessed_global0) + return *this; + if (m_quality == profile_guessed_global0) + return profile_count::zero (); + return profile_count::uninitialized (); + } + /* Return THIS with quality dropped to AFDO. */ profile_count afdo () const { @@ -852,21 +1015,26 @@ public: OVERALL. */ profile_probability probability_in (const profile_count overall) const { - if (!m_val) + if (*this == profile_count::zero ()) return profile_probability::never (); if (!initialized_p () || !overall.initialized_p () || !overall.m_val) return profile_probability::uninitialized (); profile_probability ret; - if (overall < m_val) + gcc_checking_assert (compatible_p (overall)); + + if (overall.m_val < m_val) ret.m_val = profile_probability::max_probability; else ret.m_val = RDIV (m_val * profile_probability::max_probability, overall.m_val); - ret.m_quality = MIN (m_quality, overall.m_quality); + ret.m_quality = MAX (MIN (m_quality, overall.m_quality), profile_guessed); return ret; } + int to_frequency (struct function *fun) const; + int to_cgraph_frequency (profile_count entry_bb_count) const; + /* Output THIS to F. */ void dump (FILE *f) const; Index: profile.c =================================================================== --- profile.c (revision 254266) +++ profile.c (working copy) @@ -476,38 +476,6 @@ read_profile_edge_counts (gcov_type *exe return num_edges; } -#define OVERLAP_BASE 10000 - -/* Compare the static estimated profile to the actual profile, and - return the "degree of overlap" measure between them. - - Degree of overlap is a number between 0 and OVERLAP_BASE. It is - the sum of each basic block's minimum relative weights between - two profiles. And overlap of OVERLAP_BASE means two profiles are - identical. */ - -static int -compute_frequency_overlap (void) -{ - gcov_type count_total = 0, freq_total = 0; - int overlap = 0; - basic_block bb; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - { - count_total += bb_gcov_count (bb); - freq_total += bb->frequency; - } - - if (count_total == 0 || freq_total == 0) - return 0; - - FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - overlap += MIN (bb_gcov_count (bb) * OVERLAP_BASE / count_total, - bb->frequency * OVERLAP_BASE / freq_total); - - return overlap; -} /* Compute the branch probabilities for the various branches. Annotate them accordingly. @@ -676,14 +644,6 @@ compute_branch_probabilities (unsigned c } } } - if (dump_file) - { - int overlap = compute_frequency_overlap (); - gimple_dump_cfg (dump_file, dump_flags); - fprintf (dump_file, "Static profile overlap: %d.%d%%\n", - overlap / (OVERLAP_BASE / 100), - overlap % (OVERLAP_BASE / 100)); - } total_num_passes += passes; if (dump_file) @@ -829,10 +789,18 @@ compute_branch_probabilities (unsigned c } } - FOR_ALL_BB_FN (bb, cfun) - { + /* If we have real data, use them! */ + if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun)) + || !flag_guess_branch_probability) + FOR_ALL_BB_FN (bb, cfun) bb->count = profile_count::from_gcov_type (bb_gcov_count (bb)); - } + /* If function was not trained, preserve local estimates including statically + determined zero counts. */ + else + FOR_ALL_BB_FN (bb, cfun) + if (bb->count != profile_count::zero ()) + bb->count = bb->count.global0 (); + bb_gcov_counts.release (); delete edge_gcov_counts; edge_gcov_counts = NULL; Index: regs.h =================================================================== --- regs.h (revision 254266) +++ regs.h (working copy) @@ -130,8 +130,10 @@ extern size_t reg_info_p_size; frequency. */ #define REG_FREQ_FROM_BB(bb) (optimize_function_for_size_p (cfun) \ ? REG_FREQ_MAX \ - : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ - ? ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ + : ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ + ? ((bb)->count.to_frequency (cfun) \ + * REG_FREQ_MAX / BB_FREQ_MAX) \ : 1) /* Indexed by N, gives number of insns in which register N dies. Index: sched-ebb.c =================================================================== --- sched-ebb.c (revision 254266) +++ sched-ebb.c (working copy) @@ -231,11 +231,9 @@ rank (rtx_insn *insn1, rtx_insn *insn2) basic_block bb1 = BLOCK_FOR_INSN (insn1); basic_block bb2 = BLOCK_FOR_INSN (insn2); - if (bb1->count > bb2->count - || bb1->frequency > bb2->frequency) + if (bb1->count > bb2->count) return -1; - if (bb1->count < bb2->count - || bb1->frequency < bb2->frequency) + if (bb1->count < bb2->count) return 1; return 0; } Index: shrink-wrap.c =================================================================== --- shrink-wrap.c (revision 254266) +++ shrink-wrap.c (working copy) @@ -561,7 +561,7 @@ handle_simple_exit (edge e) BB_END (old_bb) = end; redirect_edge_succ (e, new_bb); - new_bb->frequency = EDGE_FREQUENCY (e); + new_bb->count = e->count (); e->flags |= EDGE_FALLTHRU; e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -887,7 +887,7 @@ try_shrink_wrapping (edge *entry_edge, r if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) { num += EDGE_FREQUENCY (e); - den += e->src->frequency; + den += e->src->count.to_frequency (cfun); } if (den == 0) @@ -920,8 +920,6 @@ try_shrink_wrapping (edge *entry_edge, r if (dump_file) fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); - bb->frequency = RDIV (num * bb->frequency, den); - dup->frequency -= bb->frequency; bb->count = bb->count.apply_scale (num, den); dup->count -= bb->count; } @@ -995,8 +993,7 @@ try_shrink_wrapping (edge *entry_edge, r continue; } - new_bb->count += e->src->count.apply_probability (e->probability); - new_bb->frequency += EDGE_FREQUENCY (e); + new_bb->count += e->count (); redirect_edge_and_branch_force (e, new_bb); if (dump_file) @@ -1181,7 +1178,7 @@ place_prologue_for_one_component (unsign work: this does not always add up to the block frequency at all, and even if it does, rounding error makes for bad decisions. */ - SW (bb)->own_cost = bb->frequency; + SW (bb)->own_cost = bb->count.to_frequency (cfun); edge e; edge_iterator ei; Index: testsuite/gcc.dg/no-strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/no-strict-overflow-3.c (revision 254266) +++ testsuite/gcc.dg/no-strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump "1234" "optimized" } } */ Index: testsuite/gcc.dg/strict-overflow-3.c =================================================================== --- testsuite/gcc.dg/strict-overflow-3.c (revision 254266) +++ testsuite/gcc.dg/strict-overflow-3.c (working copy) @@ -9,7 +9,7 @@ int foo (int i, int j) { - return i + 100 < j + 1000; + return i + 100 < j + 1234; } -/* { dg-final { scan-tree-dump-not "1000" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1234" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c (working copy) @@ -290,7 +290,7 @@ RNG (0, 6, 8, "%s%ls", "1", L"2"); /* Only conditional calls to must_not_eliminate must be made (with any probability): - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } - { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } } + { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } } No unconditional calls to abort should be made: { dg-final { scan-tree-dump-not ";\n *must_not_eliminate" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/dump-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/dump-2.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/dump-2.c (working copy) @@ -6,4 +6,4 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[100\\\.00%\\\] \\\[count: INV\\\]:" "optimized" } } */ +/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-10.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-10.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-10.c (working copy) @@ -26,5 +26,5 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-11.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-11.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-11.c (working copy) @@ -24,5 +24,4 @@ int foo (float *x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-12.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-12.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-12.c (working copy) @@ -29,6 +29,5 @@ int foo (int x) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c (working copy) @@ -39,4 +39,4 @@ int main1 () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-20040816-2.c (working copy) @@ -43,5 +43,4 @@ void foo(const int * __restrict__ zr_in, which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-5.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-5.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-5.c (working copy) @@ -27,4 +27,4 @@ dct_unquantize_h263_inter_c (short *bloc which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-8.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-8.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-8.c (working copy) @@ -22,5 +22,4 @@ void test () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-9.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-9.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-9.c (working copy) @@ -26,4 +26,4 @@ int foo (int x, int n) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-cd.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-cd.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-cd.c (working copy) @@ -32,5 +32,4 @@ void foo (int *x1, int *x2, int *x3, int which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr56541.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-pr56541.c (working copy) @@ -29,5 +29,4 @@ void foo() which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr68583.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-pr68583.c (working copy) @@ -26,5 +26,5 @@ void foo (long *a) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-1.c (working copy) @@ -20,5 +20,4 @@ void foo (int a[], int b[]) which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* Sum is wrong here, but not enough for error to be reported. */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (revision 254266) +++ testsuite/gcc.dg/tree-ssa/ifc-pr69489-2.c (working copy) @@ -21,4 +21,4 @@ foo (const char *u, const char *v, long which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ Index: testsuite/gcc.target/i386/pr61403.c =================================================================== --- testsuite/gcc.target/i386/pr61403.c (revision 254266) +++ testsuite/gcc.target/i386/pr61403.c (working copy) @@ -23,4 +23,4 @@ norm (struct XYZ *in, struct XYZ *out, i } } -/* { dg-final { scan-assembler "blend" } } */ +/* { dg-final { scan-assembler "rsqrtps" } } */ Index: tracer.c =================================================================== --- tracer.c (revision 254266) +++ tracer.c (working copy) @@ -179,7 +179,7 @@ find_best_predecessor (basic_block bb) if (!best || ignore_bb_p (best->src)) return NULL; if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE - < bb->frequency * branch_ratio_cutoff) + < bb->count.to_frequency (cfun) * branch_ratio_cutoff) return NULL; return best; } @@ -194,7 +194,7 @@ find_trace (basic_block bb, basic_block edge e; if (dump_file) - fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun)); while ((e = find_best_predecessor (bb)) != NULL) { @@ -203,11 +203,11 @@ find_trace (basic_block bb, basic_block || find_best_successor (bb2) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); bb = bb2; } if (dump_file) - fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); + fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; /* Follow the trace in forward direction. */ @@ -218,7 +218,7 @@ find_trace (basic_block bb, basic_block || find_best_predecessor (bb) != e) break; if (dump_file) - fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); + fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); trace[i++] = bb; } if (dump_file) @@ -282,11 +282,11 @@ tail_duplicate (void) { int n = count_insns (bb); if (!ignore_bb_p (bb)) - blocks[bb->index] = heap.insert (-bb->frequency, bb); + blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); counts [bb->index] = n; ninsns += n; - weighted_insns += n * bb->frequency; + weighted_insns += n * bb->count.to_frequency (cfun); } if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) @@ -314,7 +314,7 @@ tail_duplicate (void) n = find_trace (bb, trace); bb = trace[0]; - traced_insns += bb->frequency * counts [bb->index]; + traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; if (blocks[bb->index]) { heap.delete_node (blocks[bb->index]); @@ -330,7 +330,7 @@ tail_duplicate (void) heap.delete_node (blocks[bb2->index]); blocks[bb2->index] = NULL; } - traced_insns += bb2->frequency * counts [bb2->index]; + traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; if (EDGE_COUNT (bb2->preds) > 1 && can_duplicate_block_p (bb2) /* We have the tendency to duplicate the loop header @@ -345,11 +345,11 @@ tail_duplicate (void) /* Reconsider the original copy of block we've duplicated. Removing the most common predecessor may make it to be head. */ - blocks[bb2->index] = heap.insert (-bb2->frequency, bb2); + blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2); if (dump_file) fprintf (dump_file, "Duplicated %i as %i [%i]\n", - bb2->index, copy->index, copy->frequency); + bb2->index, copy->index, copy->count.to_frequency (cfun)); bb2 = copy; changed = true; Index: trans-mem.c =================================================================== --- trans-mem.c (revision 254266) +++ trans-mem.c (working copy) @@ -2932,7 +2932,6 @@ expand_transaction (struct tm_region *re edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE); redirect_edge_pred (fallthru_edge, join_bb); - join_bb->frequency = test_bb->frequency = transaction_bb->frequency; join_bb->count = test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -2940,7 +2939,6 @@ expand_transaction (struct tm_region *re ef->probability = profile_probability::unlikely (); code_bb->count = et->count (); - code_bb->frequency = EDGE_FREQUENCY (et); transaction_bb = join_bb; } @@ -2964,7 +2962,6 @@ expand_transaction (struct tm_region *re gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU); - test_bb->frequency = transaction_bb->frequency; test_bb->count = transaction_bb->count; ei->probability = profile_probability::always (); @@ -3006,7 +3003,6 @@ expand_transaction (struct tm_region *re edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags); e->probability = fallthru_edge->probability; test_bb->count = fallthru_edge->count (); - test_bb->frequency = EDGE_FREQUENCY (e); // Now update the edges to the inst/uninist implementations. // For now assume that the paths are equally likely. When using HTM, Index: tree-call-cdce.c =================================================================== --- tree-call-cdce.c (revision 254266) +++ tree-call-cdce.c (working copy) @@ -906,7 +906,6 @@ shrink_wrap_one_built_in_call_with_conds Here we take the second approach because it's slightly simpler and because it's easy to see that it doesn't lose profile counts. */ bi_call_bb->count = profile_count::zero (); - bi_call_bb->frequency = 0; while (!edges.is_empty ()) { edge_pair e = edges.pop (); @@ -919,16 +918,10 @@ shrink_wrap_one_built_in_call_with_conds nocall_edge->probability = profile_probability::always () - call_edge->probability; - unsigned int call_frequency - = call_edge->probability.apply (src_bb->frequency); - bi_call_bb->count += call_edge->count (); - bi_call_bb->frequency += call_frequency; if (nocall_edge->dest != join_tgt_bb) - { - nocall_edge->dest->frequency = src_bb->frequency - call_frequency; - } + nocall_edge->dest->count = src_bb->count - bi_call_bb->count; } if (dom_info_available_p (CDI_DOMINATORS)) Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 254266) +++ tree-cfg.c (working copy) @@ -1071,7 +1071,6 @@ gimple_find_sub_bbs (gimple_seq seq, gim tree_guess_outgoing_edge_probabilities (bb); if (all || profile_status_for_fn (cfun) == PROFILE_READ) bb->count = cnt; - bb->frequency = freq; bb = bb->next_bb; } @@ -2081,7 +2080,6 @@ gimple_merge_blocks (basic_block a, basi if (a->loop_father == b->loop_father) { a->count = a->count.merge (b->count); - a->frequency = MAX (a->frequency, b->frequency); } /* Merge the sequences. */ @@ -2840,7 +2838,6 @@ gimple_split_edge (edge edge_in) after_bb = split_edge_bb_loc (edge_in); new_bb = create_empty_bb (after_bb); - new_bb->frequency = EDGE_FREQUENCY (edge_in); new_bb->count = edge_in->count (); e = redirect_edge_and_branch (edge_in, new_bb); @@ -6306,9 +6303,8 @@ gimple_duplicate_sese_region (edge entry bool free_region_copy = false, copying_header = false; struct loop *loop = entry->dest->loop_father; edge exit_copy; - vec<basic_block> doms; + vec<basic_block> doms = vNULL; edge redirected; - int total_freq = 0, entry_freq = 0; profile_count total_count = profile_count::uninitialized (); profile_count entry_count = profile_count::uninitialized (); @@ -6376,21 +6372,10 @@ gimple_duplicate_sese_region (edge entry if (entry_count > total_count) entry_count = total_count; } - if (!(total_count > 0) || !(entry_count > 0)) - { - total_freq = entry->dest->frequency; - entry_freq = EDGE_FREQUENCY (entry); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - else if (entry_freq > total_freq) - entry_freq = total_freq; - } copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, split_edge_bb_loc (entry), update_dominance); - if (total_count > 0 && entry_count > 0) + if (total_count.initialized_p () && entry_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - entry_count, @@ -6398,12 +6383,6 @@ gimple_duplicate_sese_region (edge entry scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); - } if (copying_header) { @@ -6492,7 +6471,6 @@ gimple_duplicate_sese_tail (edge entry, struct loop *orig_loop = entry->dest->loop_father; basic_block switch_bb, entry_bb, nentry_bb; vec<basic_block> doms; - int total_freq = 0, exit_freq = 0; profile_count total_count = profile_count::uninitialized (), exit_count = profile_count::uninitialized (); edge exits[2], nexits[2], e; @@ -6537,30 +6515,16 @@ gimple_duplicate_sese_tail (edge entry, inside. */ doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region); - if (exit->src->count > 0) - { - total_count = exit->src->count; - exit_count = exit->count (); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (exit_count > total_count) - exit_count = total_count; - } - else - { - total_freq = exit->src->frequency; - exit_freq = EDGE_FREQUENCY (exit); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - if (exit_freq > total_freq) - exit_freq = total_freq; - } + total_count = exit->src->count; + exit_count = exit->count (); + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (exit_count > total_count) + exit_count = total_count; copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop, split_edge_bb_loc (exit), true); - if (total_count.initialized_p ()) + if (total_count.initialized_p () && exit_count.initialized_p ()) { scale_bbs_frequencies_profile_count (region, n_region, total_count - exit_count, @@ -6568,12 +6532,6 @@ gimple_duplicate_sese_tail (edge entry, scale_bbs_frequencies_profile_count (region_copy, n_region, exit_count, total_count); } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq); - } /* Create the switch block, and put the exit condition to it. */ entry_bb = entry->dest; @@ -7614,9 +7572,15 @@ move_sese_region_to_fn (struct function FIXME, this is silly. The CFG ought to become a parameter to these helpers. */ push_cfun (dest_cfun); - make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb->count; + make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU); if (exit_bb) - make_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + { + make_single_succ_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = exit_bb->count; + } + else + EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero (); pop_cfun (); /* Back in the original function, the SESE region has disappeared, @@ -8691,7 +8655,7 @@ gimple_account_profile_record (basic_blo else if (profile_status_for_fn (cfun) == PROFILE_GUESSED) record->time[after_pass] += estimate_num_insns (gsi_stmt (i), - &eni_time_weights) * bb->frequency; + &eni_time_weights) * bb->count.to_frequency (cfun); } } @@ -8843,7 +8807,6 @@ insert_cond_bb (basic_block bb, gimple * edge e = make_edge (bb, new_bb, EDGE_TRUE_VALUE); e->probability = prob; new_bb->count = e->count (); - new_bb->frequency = prob.apply (bb->frequency); make_single_succ_edge (new_bb, fall->dest, EDGE_FALLTHRU); /* Fix edge for split bb. */ Index: tree-complex.c =================================================================== --- tree-complex.c (revision 254266) +++ tree-complex.c (working copy) @@ -1186,7 +1186,6 @@ expand_complex_div_wide (gimple_stmt_ite bb_join = e->dest; bb_true = create_empty_bb (bb_cond); bb_false = create_empty_bb (bb_true); - bb_true->frequency = bb_false->frequency = bb_cond->frequency / 2; bb_true->count = bb_false->count = bb_cond->count.apply_probability (profile_probability::even ()); Index: tree-eh.c =================================================================== --- tree-eh.c (revision 254266) +++ tree-eh.c (working copy) @@ -3224,6 +3224,7 @@ lower_resx (basic_block bb, gresx *stmt, gimple_stmt_iterator gsi2; new_bb = create_empty_bb (bb); + new_bb->count = bb->count; add_bb_to_loop (new_bb, bb->loop_father); lab = gimple_block_label (new_bb); gsi2 = gsi_start_bb (new_bb); Index: tree-inline.c =================================================================== --- tree-inline.c (revision 254266) +++ tree-inline.c (working copy) @@ -1763,16 +1763,15 @@ remap_gimple_stmt (gimple *stmt, copy_bo later */ static basic_block -copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, +copy_bb (copy_body_data *id, basic_block bb, profile_count num, profile_count den) { gimple_stmt_iterator gsi, copy_gsi, seq_gsi; basic_block copy_basic_block; tree decl; - gcov_type freq; basic_block prev; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + bool scale = !num.initialized_p () + || (den.nonzero_p () || num == profile_count::zero ()); /* Search for previous copied basic block. */ prev = bb->prev_bb; @@ -1784,15 +1783,8 @@ copy_bb (copy_body_data *id, basic_block copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux); if (scale) copy_basic_block->count = bb->count.apply_scale (num, den); - - /* We are going to rebuild frequencies from scratch. These values - have just small importance to drive canonicalize_loop_headers. */ - freq = apply_scale ((gcov_type)bb->frequency, frequency_scale); - - /* We recompute frequencies after inlining, so this is quite safe. */ - if (freq > BB_FREQ_MAX) - freq = BB_FREQ_MAX; - copy_basic_block->frequency = freq; + else if (num.initialized_p ()) + copy_basic_block->count = bb->count; copy_gsi = gsi_start_bb (copy_basic_block); @@ -2068,8 +2060,8 @@ copy_bb (copy_body_data *id, basic_block fprintf (dump_file, "Orig bb: %i, orig bb freq %i, new bb freq %i\n", bb->index, - bb->frequency, - copy_basic_block->frequency); + bb->count.to_frequency (cfun), + copy_basic_block->count.to_frequency (cfun)); } } } @@ -2507,11 +2499,8 @@ initialize_cfun (tree new_fndecl, tree c profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); - /* FIXME: When all counts are known to be zero, scaling is also meaningful. - */ if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p () - && count.initialized_p () - && ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()) + && count.ipa ().initialized_p ()) { ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, @@ -2520,10 +2509,13 @@ initialize_cfun (tree new_fndecl, tree c EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count, ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count); } - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency - = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->frequency; - EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = - EXIT_BLOCK_PTR_FOR_FN (src_cfun)->frequency; + else + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; + EXIT_BLOCK_PTR_FOR_FN (cfun)->count + = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count; + } if (src_cfun->eh) init_eh_for_function (); @@ -2680,27 +2672,11 @@ redirect_all_calls (copy_body_data * id, } } -/* Convert estimated frequencies into counts for NODE, scaling COUNT - with each bb's frequency. Used when NODE has a 0-weight entry - but we are about to inline it into a non-zero count call bb. - See the comments for handle_missing_profiles() in predict.c for - when this can happen for COMDATs. */ - -void -freqs_to_counts (struct cgraph_node *node, profile_count count) -{ - basic_block bb; - struct function *fn = DECL_STRUCT_FUNCTION (node->decl); - - FOR_ALL_BB_FN(bb, fn) - bb->count = count.apply_scale (bb->frequency, BB_FREQ_MAX); -} - /* Make a copy of the body of FN so that it can be inserted inline in another function. Walks FN via CFG, returns new fndecl. */ static tree -copy_cfg_body (copy_body_data * id, profile_count count, int frequency_scale, +copy_cfg_body (copy_body_data * id, profile_count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -2712,31 +2688,10 @@ copy_cfg_body (copy_body_data * id, prof tree new_fndecl = NULL; bool need_debug_cleanup = false; int last; - int incoming_frequency = 0; - profile_count incoming_count = profile_count::zero (); - profile_count num = count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; - bool scale = num.initialized_p () - && (den > 0 || num == profile_count::zero ()); + profile_count num = entry_block_map->count; - /* This can happen for COMDAT routines that end up with 0 counts - despite being called (see the comments for handle_missing_profiles() - in predict.c as to why). Apply counts to the blocks in the callee - before inlining, using the guessed edge frequencies, so that we don't - end up with a 0-count inline body which can confuse downstream - optimizations such as function splitting. */ - if (!(ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count > 0) && count > 0) - { - /* Apply the larger of the call bb count and the total incoming - call edge count to the callee. */ - profile_count in_count = profile_count::zero (); - struct cgraph_edge *in_edge; - for (in_edge = id->src_node->callers; in_edge; - in_edge = in_edge->next_caller) - if (in_edge->count.initialized_p ()) - in_count += in_edge->count; - freqs_to_counts (id->src_node, count > in_count ? count : in_count); - } + cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); /* Register specific tree functions. */ gimple_register_cfg_hooks (); @@ -2750,25 +2705,18 @@ copy_cfg_body (copy_body_data * id, prof { edge e; edge_iterator ei; + den = profile_count::zero (); FOR_EACH_EDGE (e, ei, new_entry->preds) if (!e->src->aux) - incoming_frequency += EDGE_FREQUENCY (e); - if (scale) - incoming_count = incoming_count.apply_scale (num, den); - else - incoming_count = profile_count::uninitialized (); - incoming_frequency - = apply_scale ((gcov_type)incoming_frequency, frequency_scale); - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = incoming_count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = incoming_frequency; + den += e->count (); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den; } /* Must have a CFG here at this point. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (callee_fndecl))); - cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl); ENTRY_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = entry_block_map; EXIT_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = exit_block_map; @@ -2784,7 +2732,7 @@ copy_cfg_body (copy_body_data * id, prof FOR_EACH_BB_FN (bb, cfun_to_copy) if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index)) { - basic_block new_bb = copy_bb (id, bb, frequency_scale, num, den); + basic_block new_bb = copy_bb (id, bb, num, den); bb->aux = new_bb; new_bb->aux = bb; new_bb->loop_father = entry_block_map->loop_father; @@ -3011,7 +2959,7 @@ copy_tree_body (copy_body_data *id) another function. */ static tree -copy_body (copy_body_data *id, profile_count count, int frequency_scale, +copy_body (copy_body_data *id, profile_count count, basic_block entry_block_map, basic_block exit_block_map, basic_block new_entry) { @@ -3020,7 +2968,7 @@ copy_body (copy_body_data *id, profile_c /* If this body has a CFG, walk CFG and copy. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl))); - body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map, + body = copy_cfg_body (id, count, entry_block_map, exit_block_map, new_entry); copy_debug_stmts (id); @@ -4771,7 +4719,6 @@ expand_call_inline (basic_block bb, gimp a self-referential call; if we're calling ourselves, we need to duplicate our body before altering anything. */ copy_body (id, cg_edge->callee->count, - GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE), bb, return_block, NULL); reset_debug_bindings (id, stmt_gsi); @@ -5146,6 +5093,7 @@ optimize_inline_calls (tree fn) } /* Fold queued statements. */ + counts_to_freqs (); fold_marked_statements (last, id.statements_to_fold); delete id.statements_to_fold; @@ -6090,7 +6038,7 @@ tree_function_versioning (tree old_decl, } /* Copy the Function's body. */ - copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE, + copy_body (&id, old_entry_block->count, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun), new_entry); @@ -6122,6 +6070,7 @@ tree_function_versioning (tree old_decl, free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); + counts_to_freqs (); fold_marked_statements (0, id.statements_to_fold); delete id.statements_to_fold; delete_unreachable_blocks_update_callgraph (&id); @@ -6141,20 +6090,20 @@ tree_function_versioning (tree old_decl, struct cgraph_edge *e; rebuild_frequencies (); - new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); for (e = new_version_node->callees; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } for (e = new_version_node->indirect_calls; e; e = e->next_callee) { basic_block bb = gimple_bb (e->call_stmt); e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); - e->count = bb->count; + e->count = bb->count.ipa (); } } Index: tree-ssa-coalesce.c =================================================================== --- tree-ssa-coalesce.c (revision 254266) +++ tree-ssa-coalesce.c (working copy) @@ -164,7 +164,7 @@ coalesce_cost (int frequency, bool optim static inline int coalesce_cost_bb (basic_block bb) { - return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb)); + return coalesce_cost (bb->count.to_frequency (cfun), optimize_bb_for_size_p (bb)); } Index: tree-ssa-ifcombine.c =================================================================== --- tree-ssa-ifcombine.c (revision 254266) +++ tree-ssa-ifcombine.c (working copy) @@ -366,7 +366,6 @@ update_profile_after_ifcombine (basic_bl - inner_taken->probability; outer_to_inner->probability = profile_probability::always (); - inner_cond_bb->frequency = outer_cond_bb->frequency; outer2->probability = profile_probability::never (); } Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 254266) +++ tree-ssa-loop-im.c (working copy) @@ -1803,7 +1803,7 @@ execute_sm_if_changed (edge ex, tree mem for (hash_set<basic_block>::iterator it = flag_bbs->begin (); it != flag_bbs->end (); ++it) { - freq_sum += (*it)->frequency; + freq_sum += (*it)->count.to_frequency (cfun); if ((*it)->count.initialized_p ()) count_sum += (*it)->count, ncount ++; IF (Dominated_by_p (CDI_DOMINATORS, ex->src, *it)) @@ -1815,20 +1815,15 @@ execute_sm_if_changed (edge ex, tree mem if (flag_probability.initialized_p ()) ; - else if (ncount == nbbs && count_sum > 0 && preheader->count () >= count_sum) + else if (ncount == nbbs + && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) { flag_probability = count_sum.probability_in (preheader->count ()); if (flag_probability > cap) flag_probability = cap; } - else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum) - { - flag_probability = profile_probability::from_reg_br_prob_base - (GCOV_COMPUTE_SCALE (freq_sum, EDGE_FREQUENCY (preheader))); - if (flag_probability > cap) - flag_probability = cap; - } - else + + if (!flag_probability.initialized_p ()) flag_probability = cap; /* ?? Insert store after previous store if applicable. See note @@ -1861,7 +1856,6 @@ execute_sm_if_changed (edge ex, tree mem old_dest = ex->dest; new_bb = split_edge (ex); then_bb = create_empty_bb (new_bb); - then_bb->frequency = flag_probability.apply (new_bb->frequency); then_bb->count = new_bb->count.apply_probability (flag_probability); if (irr) then_bb->flags = BB_IRREDUCIBLE_LOOP; Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 254266) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -647,7 +647,6 @@ unloop_loops (bitmap loop_closed_ssa_inv add_bb_to_loop (latch_edge->dest, current_loops->tree_root); latch_edge->dest->count = profile_count::zero (); - latch_edge->dest->frequency = 0; set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); gsi = gsi_start_bb (latch_edge->dest); @@ -1090,7 +1089,6 @@ try_peel_loop (struct loop *loop, } } profile_count entry_count = profile_count::zero (); - int entry_freq = 0; edge e; edge_iterator ei; @@ -1099,15 +1097,10 @@ try_peel_loop (struct loop *loop, { if (e->src->count.initialized_p ()) entry_count = e->src->count + e->src->count; - entry_freq += e->src->frequency; gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); } profile_probability p = profile_probability::very_unlikely (); - if (loop->header->count > 0) - p = entry_count.probability_in (loop->header->count); - else if (loop->header->frequency) - p = profile_probability::probability_in_gcov_type - (entry_freq, loop->header->frequency); + p = entry_count.probability_in (loop->header->count); scale_loop_profile (loop, p, 0); bitmap_set_bit (peeled_loops, loop->num); return true; Index: tree-ssa-loop-ivopts.c =================================================================== --- tree-ssa-loop-ivopts.c (revision 254266) +++ tree-ssa-loop-ivopts.c (working copy) @@ -4457,8 +4457,8 @@ get_address_cost (struct ivopts_data *da static comp_cost get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) { - int loop_freq = data->current_loop->header->frequency; - int bb_freq = gimple_bb (at)->frequency; + int loop_freq = data->current_loop->header->count.to_frequency (cfun); + int bb_freq = gimple_bb (at)->count.to_frequency (cfun); if (loop_freq != 0) { gcc_assert (cost.scratch <= cost.cost); Index: tree-ssa-loop-manip.c =================================================================== --- tree-ssa-loop-manip.c (revision 254266) +++ tree-ssa-loop-manip.c (working copy) @@ -1122,6 +1122,9 @@ niter_for_unrolled_loop (struct loop *lo converts back. */ gcov_type new_est_niter = est_niter / factor; + if (est_niter == -1) + return -1; + /* Without profile feedback, loops for which we do not know a better estimate are assumed to roll 10 times. When we unroll such loop, it appears to roll too little, and it may even seem to be cold. To avoid this, we @@ -1370,14 +1373,7 @@ tree_transform_and_unroll_loop (struct l freq_h = loop->header->count; freq_e = (loop_preheader_edge (loop))->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type - (EDGE_FREQUENCY (loop_preheader_edge (loop))); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ @@ -1392,7 +1388,6 @@ tree_transform_and_unroll_loop (struct l .apply_scale (1, new_est_niter + 1); rest->count += new_exit->count (); - rest->frequency += EDGE_FREQUENCY (new_exit); new_nonexit = single_pred_edge (loop->latch); prob = new_nonexit->probability; Index: tree-ssa-loop-niter.c =================================================================== --- tree-ssa-loop-niter.c (revision 254266) +++ tree-ssa-loop-niter.c (working copy) @@ -3901,7 +3901,7 @@ estimate_numbers_of_iterations (struct l recomputing iteration bounds later in the compilation process will just introduce random roundoff errors. */ if (!loop->any_estimate - && loop->header->count > 0) + && loop->header->count.reliable_p ()) { gcov_type nit = expected_loop_iterations_unbounded (loop); bound = gcov_type_to_wide_int (nit); Index: tree-ssa-loop-unswitch.c =================================================================== --- tree-ssa-loop-unswitch.c (revision 254266) +++ tree-ssa-loop-unswitch.c (working copy) @@ -852,7 +852,7 @@ hoist_guard (struct loop *loop, edge gua /* Determine the probability that we skip the loop. Assume that loop has same average number of iterations regardless outcome of guard. */ new_edge->probability = guard->probability; - profile_count skip_count = guard->src->count > 0 + profile_count skip_count = guard->src->count.nonzero_p () ? guard->count ().apply_scale (pre_header->count, guard->src->count) : guard->count ().apply_probability (new_edge->probability); @@ -875,7 +875,6 @@ hoist_guard (struct loop *loop, edge gua to loop header... */ e->probability = new_edge->probability.invert (); e->dest->count = e->count (); - e->dest->frequency = EDGE_FREQUENCY (e); /* ... now update profile to represent that original guard will be optimized away ... */ Index: tree-ssa-sink.c =================================================================== --- tree-ssa-sink.c (revision 254266) +++ tree-ssa-sink.c (working copy) @@ -226,7 +226,8 @@ select_best_block (basic_block early_bb, /* If BEST_BB is at the same nesting level, then require it to have significantly lower execution frequency to avoid gratutious movement. */ if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb) - && best_bb->frequency < (early_bb->frequency * threshold / 100.0)) + && best_bb->count.to_frequency (cfun) + < (early_bb->count.to_frequency (cfun) * threshold / 100.0)) return best_bb; /* No better block found, so return EARLY_BB, which happens to be the Index: tree-ssa-tail-merge.c =================================================================== --- tree-ssa-tail-merge.c (revision 254266) +++ tree-ssa-tail-merge.c (working copy) @@ -1530,8 +1530,6 @@ static void replace_block_by (basic_block bb1, basic_block bb2) { edge pred_edge; - edge e1, e2; - edge_iterator ei; unsigned int i; gphi *bb2_phi; @@ -1560,9 +1558,13 @@ replace_block_by (basic_block bb1, basic bb2->count += bb1->count; + /* FIXME: Fix merging of probabilities. They need to be redistributed + according to the relative counts of merged BBs. */ +#if 0 /* Merge the outgoing edge counts from bb1 onto bb2. */ profile_count out_sum = profile_count::zero (); int out_freq_sum = 0; + edge e1, e2; /* Recompute the edge probabilities from the new merged edge count. Use the sum of the new merged edge counts computed above instead @@ -1580,7 +1582,6 @@ replace_block_by (basic_block bb1, basic out_sum += e1->count (); out_freq_sum += EDGE_FREQUENCY (e1); } - FOR_EACH_EDGE (e1, ei, bb1->succs) { e2 = find_edge (bb2, e1->dest); @@ -1589,9 +1590,9 @@ replace_block_by (basic_block bb1, basic { e2->probability = e2->count ().probability_in (bb2->count); } - else if (bb1->frequency && bb2->frequency) + else if (bb1->count.to_frequency (cfun) && bb2->count.to_frequency (cfun)) e2->probability = e1->probability; - else if (bb2->frequency && !bb1->frequency) + else if (bb2->count.to_frequency (cfun) && !bb1->count.to_frequency (cfun)) ; else if (out_freq_sum) e2->probability = profile_probability::from_reg_br_prob_base @@ -1600,9 +1601,7 @@ replace_block_by (basic_block bb1, basic out_freq_sum)); out_sum += e2->count (); } - bb2->frequency += bb1->frequency; - if (bb2->frequency > BB_FREQ_MAX) - bb2->frequency = BB_FREQ_MAX; +#endif /* Move over any user labels from bb1 after the bb2 labels. */ gimple_stmt_iterator gsi1 = gsi_start_bb (bb1); Index: tree-ssa-threadupdate.c =================================================================== --- tree-ssa-threadupdate.c (revision 254266) +++ tree-ssa-threadupdate.c (working copy) @@ -339,7 +339,6 @@ create_block_for_threading (basic_block e->aux = NULL; /* Zero out the profile, since the block is unreachable for now. */ - rd->dup_blocks[count]->frequency = 0; rd->dup_blocks[count]->count = profile_count::uninitialized (); if (duplicate_blocks) bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); @@ -590,7 +589,7 @@ any_remaining_duplicated_blocks (vec<jum } -/* Compute the amount of profile count/frequency coming into the jump threading +/* Compute the amount of profile count coming into the jump threading path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to @@ -598,7 +597,7 @@ any_remaining_duplicated_blocks (vec<jum edges that need to be ignored in the analysis. Return true if path contains a joiner, false otherwise. - In the non-joiner case, this is straightforward - all the counts/frequency + In the non-joiner case, this is straightforward - all the counts flowing into the jump threading path should flow through the duplicated block and out of the duplicated path. @@ -851,16 +850,14 @@ compute_path_counts (struct redirection_ /* Update the counts and frequencies for both an original path edge EPATH and its duplicate EDUP. The duplicate source block - will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ, + will get a count of PATH_IN_COUNT and PATH_IN_FREQ, and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ static void update_profile (edge epath, edge edup, profile_count path_in_count, - profile_count path_out_count, int path_in_freq) + profile_count path_out_count) { - if (!(path_in_count > 0)) - return; - /* First update the duplicated block's count / frequency. */ + /* First update the duplicated block's count. */ if (edup) { basic_block dup_block = edup->src; @@ -894,167 +891,54 @@ update_profile (edge epath, edge edup, p if (esucc != edup) esucc->probability *= scale; } - edup->probability = edup_prob; + if (edup_prob.initialized_p ()) + edup->probability = edup_prob; - /* FIXME once freqs_to_counts is dropped re-enable this check. */ - gcc_assert (!dup_block->count.initialized_p () || 1); - gcc_assert (dup_block->frequency == 0); + gcc_assert (!dup_block->count.initialized_p ()); dup_block->count = path_in_count; - dup_block->frequency = path_in_freq; } + if (path_in_count == profile_count::zero ()) + return; + profile_count final_count = epath->count () - path_out_count; - /* Now update the original block's count and frequency in the + /* Now update the original block's count in the opposite manner - remove the counts/freq that will flow into the duplicated block. Handle underflow due to precision/ rounding issues. */ epath->src->count -= path_in_count; - epath->src->frequency -= path_in_freq; - if (epath->src->frequency < 0) - epath->src->frequency = 0; /* Next update this path edge's original and duplicated counts. We know that the duplicated path will have path_out_count flowing out of it (in the joiner case this is the count along the duplicated path out of the duplicated joiner). This count can then be removed from the original path edge. */ - if (epath->src->count > 0) - { - edge esucc; - edge_iterator ei; - profile_probability epath_prob = final_count.probability_in (epath->src->count); - - if (epath->probability > epath_prob) - { - profile_probability rev_scale - = (profile_probability::always () - epath->probability) - / (profile_probability::always () - epath_prob); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability /= rev_scale; - } - else if (epath->probability < epath_prob) - { - profile_probability scale - = (profile_probability::always () - epath_prob) - / (profile_probability::always () - epath->probability); - FOR_EACH_EDGE (esucc, ei, epath->src->succs) - if (esucc != epath) - esucc->probability *= scale; - } - epath->probability = epath_prob; - } -} - -/* Check if the paths through RD all have estimated frequencies but zero - profile counts. This is more accurate than checking the entry block - for a zero profile count, since profile insanities sometimes creep in. */ - -static bool -estimated_freqs_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; + edge esucc; edge_iterator ei; - bool non_zero_freq = false; - FOR_EACH_EDGE (ein, ei, e->dest->preds) - { - if (ein->count () > 0) - return false; - non_zero_freq |= ein->src->frequency != 0; - } + profile_probability epath_prob = final_count.probability_in (epath->src->count); - for (unsigned int i = 1; i < path->length (); i++) + if (epath->probability > epath_prob) { - edge epath = (*path)[i]->e; - if (epath->src->count > 0) - return false; - non_zero_freq |= epath->src->frequency != 0; - edge esucc; + profile_probability rev_scale + = (profile_probability::always () - epath->probability) + / (profile_probability::always () - epath_prob); + FOR_EACH_EDGE (esucc, ei, epath->src->succs) + if (esucc != epath) + esucc->probability /= rev_scale; + } + else if (epath->probability < epath_prob) + { + profile_probability scale + = (profile_probability::always () - epath_prob) + / (profile_probability::always () - epath->probability); FOR_EACH_EDGE (esucc, ei, epath->src->succs) - { - if (esucc->count () > 0) - return false; - non_zero_freq |= esucc->src->frequency != 0; - } - } - return non_zero_freq; -} - - -/* Invoked for routines that have guessed frequencies and no profile - counts to record the block and edge frequencies for paths through RD - in the profile count fields of those blocks and edges. This is because - ssa_fix_duplicate_block_edges incrementally updates the block and - edge counts as edges are redirected, and it is difficult to do that - for edge frequencies which are computed on the fly from the source - block frequency and probability. When a block frequency is updated - its outgoing edge frequencies are affected and become difficult to - adjust. */ - -static void -freqs_to_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = profile_count::from_gcov_type - (ein->src->frequency * REG_BR_PROB_BASE); - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding - errors applying the edge probability when the frequencies are very - small. */ - epath->src->count = - profile_count::from_gcov_type - (epath->src->frequency * REG_BR_PROB_BASE); - } -} - - -/* For routines that have guessed frequencies and no profile counts, where we - used freqs_to_counts_path to record block and edge frequencies for paths - through RD, we clear the counts after completing all updates for RD. - The updates in ssa_fix_duplicate_block_edges are based off the count fields, - but the block frequencies and edge probabilities were updated as well, - so we can simply clear the count fields. */ - -static void -clear_counts_path (struct redirection_data *rd) -{ - edge e = rd->incoming_edges->e; - vec<jump_thread_edge *> *path = THREAD_PATH (e); - profile_count val = profile_count::uninitialized (); - if (profile_status_for_fn (cfun) == PROFILE_READ) - val = profile_count::zero (); - - edge ein; - edge_iterator ei; - - FOR_EACH_EDGE (ein, ei, e->dest->preds) - ein->src->count = val; - - /* First clear counts along original path. */ - for (unsigned int i = 1; i < path->length (); i++) - { - edge epath = (*path)[i]->e; - epath->src->count = val; - } - /* Also need to clear the counts along duplicated path. */ - for (unsigned int i = 0; i < 2; i++) - { - basic_block dup = rd->dup_blocks[i]; - if (!dup) - continue; - dup->count = val; + if (esucc != epath) + esucc->probability *= scale; } + if (epath_prob.initialized_p ()) + epath->probability = epath_prob; } /* Wire up the outgoing edges from the duplicate blocks and @@ -1072,20 +956,6 @@ ssa_fix_duplicate_block_edges (struct re profile_count path_out_count = profile_count::zero (); int path_in_freq = 0; - /* This routine updates profile counts, frequencies, and probabilities - incrementally. Since it is difficult to do the incremental updates - using frequencies/probabilities alone, for routines without profile - data we first take a snapshot of the existing block and edge frequencies - by copying them into the empty profile count fields. These counts are - then used to do the incremental updates, and cleared at the end of this - routine. If the function is marked as having a profile, we still check - to see if the paths through RD are using estimated frequencies because - the routine had zero profile counts. */ - bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ - || estimated_freqs_path (rd)); - if (do_freqs_to_counts) - freqs_to_counts_path (rd); - /* First determine how much profile count to move from original path to the duplicate path. This is tricky in the presence of a joiner (see comments for compute_path_counts), where some portion @@ -1096,7 +966,6 @@ ssa_fix_duplicate_block_edges (struct re &path_in_count, &path_out_count, &path_in_freq); - int cur_path_freq = path_in_freq; for (unsigned int count = 0, i = 1; i < path->length (); i++) { edge epath = (*path)[i]->e; @@ -1162,19 +1031,14 @@ ssa_fix_duplicate_block_edges (struct re } } - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. The path duplicate's - incoming count and frequency are the totals for all edges + incoming count are the totals for all edges incoming to this jump threading path computed earlier. And we know that the duplicated path will have path_out_count flowing out of it (i.e. along the duplicated path out of the duplicated joiner). */ - update_profile (epath, e2, path_in_count, path_out_count, - path_in_freq); - - /* Record the frequency flowing to the downstream duplicated - path blocks. */ - cur_path_freq = EDGE_FREQUENCY (e2); + update_profile (epath, e2, path_in_count, path_out_count); } else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) { @@ -1184,7 +1048,7 @@ ssa_fix_duplicate_block_edges (struct re if (count == 1) single_succ_edge (rd->dup_blocks[1])->aux = NULL; - /* Update the counts and frequency of both the original block + /* Update the counts of both the original block and path edge, and the duplicates. Since we are now after any joiner that may have existed on the path, the count flowing along the duplicated threaded path is path_out_count. @@ -1194,7 +1058,7 @@ ssa_fix_duplicate_block_edges (struct re been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), - path_out_count, path_out_count, cur_path_freq); + path_out_count, path_out_count); } else { @@ -1211,8 +1075,7 @@ ssa_fix_duplicate_block_edges (struct re thread path (path_in_freq). If we had a joiner, it would have been updated at the end of that handling to the edge frequency along the duplicated joiner path edge. */ - update_profile (epath, NULL, path_out_count, path_out_count, - cur_path_freq); + update_profile (epath, NULL, path_out_count, path_out_count); } /* Increment the index into the duplicated path when we processed @@ -1223,11 +1086,6 @@ ssa_fix_duplicate_block_edges (struct re count++; } } - - /* Done with all profile and frequency updates, clear counts if they - were copied. */ - if (do_freqs_to_counts) - clear_counts_path (rd); } /* Hash table traversal callback routine to create duplicate blocks. */ @@ -2137,7 +1995,6 @@ duplicate_thread_path (edge entry, edge struct loop *loop = entry->dest->loop_father; edge exit_copy; edge redirected; - int curr_freq; profile_count curr_count; if (!can_copy_bbs_p (region, n_region)) @@ -2170,7 +2027,6 @@ duplicate_thread_path (edge entry, edge the jump-thread path in order. */ curr_count = entry->count (); - curr_freq = EDGE_FREQUENCY (entry); for (i = 0; i < n_region; i++) { @@ -2181,10 +2037,8 @@ duplicate_thread_path (edge entry, edge /* Watch inconsistent profile. */ if (curr_count > region[i]->count) curr_count = region[i]->count; - if (curr_freq > region[i]->frequency) - curr_freq = region[i]->frequency; /* Scale current BB. */ - if (region[i]->count > 0 && curr_count.initialized_p ()) + if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) { /* In the middle of the path we only scale the frequencies. In last BB we need to update probabilities of outgoing edges @@ -2195,24 +2049,11 @@ duplicate_thread_path (edge entry, edge region[i]->count); else update_bb_profile_for_threading (region[i], - curr_freq, curr_count, + curr_count, exit); scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, region_copy[i]->count); } - else if (region[i]->frequency) - { - if (i + 1 != n_region) - scale_bbs_frequencies_int (region + i, 1, - region[i]->frequency - curr_freq, - region[i]->frequency); - else - update_bb_profile_for_threading (region[i], - curr_freq, curr_count, - exit); - scale_bbs_frequencies_int (region_copy + i, 1, curr_freq, - region_copy[i]->frequency); - } if (single_succ_p (bb)) { @@ -2221,7 +2062,6 @@ duplicate_thread_path (edge entry, edge || region_copy[i + 1] == single_succ_edge (bb)->dest); if (i + 1 != n_region) { - curr_freq = EDGE_FREQUENCY (single_succ_edge (bb)); curr_count = single_succ_edge (bb)->count (); } continue; @@ -2252,7 +2092,6 @@ duplicate_thread_path (edge entry, edge } else { - curr_freq = EDGE_FREQUENCY (e); curr_count = e->count (); } } Index: tree-switch-conversion.c =================================================================== --- tree-switch-conversion.c (revision 254266) +++ tree-switch-conversion.c (working copy) @@ -1443,10 +1443,10 @@ gen_inbound_check (gswitch *swtch, struc } /* frequencies of the new BBs */ - bb1->frequency = EDGE_FREQUENCY (e01); - bb2->frequency = EDGE_FREQUENCY (e02); + bb1->count = e01->count (); + bb2->count = e02->count (); if (!info->default_case_nonstandard) - bbf->frequency = EDGE_FREQUENCY (e1f) + EDGE_FREQUENCY (e2f); + bbf->count = e1f->count () + e2f->count (); /* Tidy blocks that have become unreachable. */ prune_bbs (bbd, info->final_bb, Index: tree-tailcall.c =================================================================== --- tree-tailcall.c (revision 254266) +++ tree-tailcall.c (working copy) @@ -805,12 +805,9 @@ adjust_return_value (basic_block bb, tre /* Subtract COUNT and FREQUENCY from the basic block and it's outgoing edge. */ static void -decrease_profile (basic_block bb, profile_count count, int frequency) +decrease_profile (basic_block bb, profile_count count) { bb->count = bb->count - count; - bb->frequency -= frequency; - if (bb->frequency < 0) - bb->frequency = 0; if (!single_succ_p (bb)) { gcc_assert (!EDGE_COUNT (bb->succs)); @@ -892,11 +889,10 @@ eliminate_tail_call (struct tailcall *t) /* Number of executions of function has reduced by the tailcall. */ e = single_succ_edge (gsi_bb (t->call_gsi)); - decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count (), EDGE_FREQUENCY (e)); - decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count (), - EDGE_FREQUENCY (e)); + decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count ()); + decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count ()); if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - decrease_profile (e->dest, e->count (), EDGE_FREQUENCY (e)); + decrease_profile (e->dest, e->count ()); /* Replace the call by a jump to the start of function. */ e = redirect_edge_and_branch (single_succ_edge (gsi_bb (t->call_gsi)), Index: tree-vect-loop-manip.c =================================================================== --- tree-vect-loop-manip.c (revision 254266) +++ tree-vect-loop-manip.c (working copy) @@ -1843,7 +1843,6 @@ vect_do_peeling (loop_vec_info loop_vinf /* Simply propagate profile info from guard_bb to guard_to which is a merge point of control flow. */ - guard_to->frequency = guard_bb->frequency; guard_to->count = guard_bb->count; /* Scale probability of epilog loop back. FIXME: We should avoid scaling down and back up. Profile may Index: tree-vect-loop.c =================================================================== --- tree-vect-loop.c (revision 254266) +++ tree-vect-loop.c (working copy) @@ -7229,20 +7229,14 @@ scale_profile_for_vect_loop (struct loop gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); profile_count freq_h = loop->header->count, freq_e = preheader->count (); - /* Use frequency only if counts are zero. */ - if (!(freq_h > 0) && !(freq_e > 0)) - { - freq_h = profile_count::from_gcov_type (loop->header->frequency); - freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader)); - } - if (freq_h > 0) + if (freq_h.nonzero_p ()) { profile_probability p; /* Avoid dropping loop body profile counter to 0 because of zero count in loop's preheader. */ - if (!(freq_e > profile_count::from_gcov_type (1))) - freq_e = profile_count::from_gcov_type (1); + if (!(freq_e == profile_count::zero ())) + freq_e = freq_e.force_nonzero (); p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); scale_loop_frequencies (loop, p); } @@ -7781,7 +7775,7 @@ optimize_mask_stores (struct loop *loop) efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); /* Put STORE_BB to likely part. */ efalse->probability = profile_probability::unlikely (); - store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse); + store_bb->count = efalse->count (); make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); if (dom_info_available_p (CDI_DOMINATORS)) set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 254266) +++ tree-vect-stmts.c (working copy) @@ -3221,7 +3221,7 @@ vectorizable_simd_clone_call (gimple *st vec<tree> vargs = vNULL; size_t i, nargs; tree lhs, rtype, ratype; - vec<constructor_elt, va_gc> *ret_ctor_elts; + vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; /* Is STMT a vectorizable call? */ if (!is_gimple_call (stmt)) Index: ubsan.c =================================================================== --- ubsan.c (revision 254266) +++ ubsan.c (working copy) @@ -804,6 +804,7 @@ ubsan_expand_null_ifn (gimple_stmt_itera this edge is unlikely taken, so set up the probability accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); /* Connect 'then block' with the 'else block'. This is needed as the ubsan routines we call in the 'then block' are not noreturn. @@ -1085,6 +1086,7 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat accordingly. */ e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); } else { @@ -1098,12 +1100,14 @@ ubsan_expand_ptr_ifn (gimple_stmt_iterat e = make_edge (cond_neg_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); + then_bb->count = e->count (); cond_pos_bb = create_empty_bb (cond_bb); add_bb_to_loop (cond_pos_bb, cond_bb->loop_father); e = make_edge (cond_bb, cond_pos_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::even (); + cond_pos_bb->count = e->count (); e = make_edge (cond_pos_bb, then_bb, EDGE_TRUE_VALUE); e->probability = profile_probability::very_unlikely (); Index: value-prof.c =================================================================== --- value-prof.c (revision 254266) +++ value-prof.c (working copy) @@ -1299,7 +1299,7 @@ check_ic_target (gcall *call_stmt, struc gcall * gimple_ic (gcall *icall_stmt, struct cgraph_node *direct_call, - profile_probability prob, profile_count count, profile_count all) + profile_probability prob) { gcall *dcall_stmt; gassign *load_stmt; @@ -1354,11 +1354,11 @@ gimple_ic (gcall *icall_stmt, struct cgr /* Edge e_cd connects cond_bb to dcall_bb, etc; note the first letters. */ e_cd = split_block (cond_bb, cond_stmt); dcall_bb = e_cd->dest; - dcall_bb->count = count; + dcall_bb->count = cond_bb->count.apply_probability (prob); e_di = split_block (dcall_bb, dcall_stmt); icall_bb = e_di->dest; - icall_bb->count = all - count; + icall_bb->count = cond_bb->count - dcall_bb->count; /* Do not disturb existing EH edges from the indirect call. */ if (!stmt_ends_bb_p (icall_stmt)) @@ -1376,7 +1376,7 @@ gimple_ic (gcall *icall_stmt, struct cgr if (e_ij != NULL) { join_bb = e_ij->dest; - join_bb->count = all; + join_bb->count = cond_bb->count; } e_cd->flags = (e_cd->flags & ~EDGE_FALLTHRU) | EDGE_TRUE_VALUE; Index: value-prof.h =================================================================== --- value-prof.h (revision 254266) +++ value-prof.h (working copy) @@ -90,8 +90,7 @@ void gimple_move_stmt_histograms (struct void verify_histograms (void); void free_histograms (function *); void stringop_block_profile (gimple *, unsigned int *, HOST_WIDE_INT *); -gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability, - profile_count, profile_count); +gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability); bool check_ic_target (gcall *, struct cgraph_node *);