From patchwork Tue Jun 29 14:17:21 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: Fix profile updating in partial inlining Date: Tue, 29 Jun 2010 04:17:21 -0000 From: Jan Hubicka X-Patchwork-Id: 57270 Message-Id: <20100629141721.GH6233@kam.mff.cuni.cz> To: gcc-patches@gcc.gnu.org, jakub@redhat.com Hi, Jakub noticed funny behaviour on the testcase attached where ipa-split splits correctly the unlikely path leading to abort, but inliner inlines it back first and then proceeds by inlining foo. This is because the BB containing abort has frequency of 0 that leads to problems with division by 0 in inline heruistics that funilly conclude that time spent in the split part is 0 and inlining it saves time of 11 that makes the function very good candidate for inliing. I guess this is quite common behaviour on sanity checking code (that is pretty common target for splitting) This patch fixes way entry block frequency is computed and also makes repropagation or rescaling of frequencies after partial inlining is done. The second is needed to get sane profiles in such a split parts of frequency of 0. Bootstrapped/regtested x86_64-linux, comitted. Honza * gcc.dg/tree-ssa/ipa-split-3.c: New testcase. * predict.c (propagate_freq): Clear EXIT_BLOCK_PTR frequency if it is unreachable. (rebuild_frequencies): New function. * predict.h (rebuild_frequencies): Declare. * tree-inline.c (copy_cfg_body): Compute properly count & frequency of entry block and edge reaching new_entry. (tree_function_versioning): When doing partial cloning, rebuild frequencies when done. * passes.c (execute_function_todo): Use rebild_frequencies. Index: testsuite/gcc.dg/tree-ssa/ipa-split-3.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ipa-split-3.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/ipa-split-3.c (revision 0) @@ -0,0 +1,21 @@ +int baz (void); +static int +foo (int x) +{ + if (__builtin_expect (x <= 0, 0)) + { + __builtin_printf ("foo\n"); + __builtin_printf ("foo\n"); + __builtin_printf ("foo\n"); + __builtin_abort (); + } + return 6; +} + +int a,b,c; + +int +bar (int x) +{ + return foo (a) + foo (b) + foo (c); +} Index: predict.c =================================================================== --- predict.c (revision 161494) +++ predict.c (working copy) @@ -1855,9 +1855,6 @@ propagate_freq (basic_block head, bitmap edge_iterator ei; int count = 0; - /* The outermost "loop" includes the exit block, which we can not - look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR - directly. Do the same for the entry block. */ bb = BASIC_BLOCK (i); FOR_EACH_EDGE (e, ei, bb->preds) @@ -1872,6 +1869,9 @@ propagate_freq (basic_block head, bitmap e->src->index, bb->index); } BLOCK_INFO (bb)->npredecessors = count; + /* When function never returns, we will never process exit block. */ + if (!count && bb == EXIT_BLOCK_PTR) + bb->count = bb->frequency = 0; } memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); @@ -2282,3 +2282,27 @@ struct gimple_opt_pass pass_strip_predic TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ } }; + +/* Rebuild function frequencies. Passes are in general expected to + maintain profile by hand, however in some cases this is not possible: + for example when inlining several functions with loops freuqencies might run + out of scale and thus needs to be recomputed. */ + +void +rebuild_frequencies (void) +{ + if (profile_status == PROFILE_GUESSED) + { + loop_optimizer_init (0); + add_noreturn_fake_exit_edges (); + mark_irreducible_loops (); + connect_infinite_loops_to_exit (); + estimate_bb_frequencies (); + remove_fake_exit_edges (); + loop_optimizer_finalize (); + } + else if (profile_status == PROFILE_READ) + counts_to_freqs (); + else + gcc_unreachable (); +} Index: predict.h =================================================================== --- predict.h (revision 161494) +++ predict.h (working copy) @@ -42,5 +42,6 @@ extern const char *predictor_name (enum extern tree build_predict_expr (enum br_predictor, enum prediction); extern void tree_estimate_probability (void); extern void compute_function_frequency (void); +extern void rebuild_frequencies (void); #endif /* GCC_PREDICT_H */ Index: tree-inline.c =================================================================== --- tree-inline.c (revision 161494) +++ tree-inline.c (working copy) @@ -2159,6 +2159,8 @@ copy_cfg_body (copy_body_data * id, gcov bool need_debug_cleanup = false; gcov_type count_scale; int last; + int incomming_frequency = 0; + gcov_type incomming_count = 0; if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count) count_scale = (REG_BR_PROB_BASE * count @@ -2169,6 +2171,28 @@ copy_cfg_body (copy_body_data * id, gcov /* Register specific tree functions. */ gimple_register_cfg_hooks (); + /* If we are inlining just region of the function, make sure to connect new entry + to ENTRY_BLOCK_PTR. Since new entry can be part of loop, we must compute + frequency and probability of ENTRY_BLOCK_PTR based on the frequencies and + probabilities of edges incomming from nonduplicated region. */ + if (new_entry) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, new_entry->preds) + if (!e->src->aux) + { + incomming_frequency += EDGE_FREQUENCY (e); + incomming_count += e->count; + } + incomming_count = incomming_count * count_scale / REG_BR_PROB_BASE; + incomming_frequency + = incomming_frequency * frequency_scale / REG_BR_PROB_BASE; + ENTRY_BLOCK_PTR->count = incomming_count; + ENTRY_BLOCK_PTR->frequency = incomming_frequency; + } + /* Must have a CFG here at this point. */ gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (callee_fndecl))); @@ -2204,10 +2228,9 @@ copy_cfg_body (copy_body_data * id, gcov if (new_entry) { - edge e; - e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU); + edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU); e->probability = REG_BR_PROB_BASE; - e->count = entry_block_map->count; + e->count = incomming_count; } if (gimple_in_ssa_p (cfun)) @@ -5180,6 +5203,23 @@ tree_function_versioning (tree old_decl, if (id.dst_node->analyzed) cgraph_rebuild_references (); update_ssa (TODO_update_ssa); + + /* After partial cloning we need to rescale frequencies, so they are + within proper range in the cloned function. */ + if (new_entry) + { + struct cgraph_edge *e; + rebuild_frequencies (); + + new_version_node->count = ENTRY_BLOCK_PTR->count; + for (e = new_version_node->callees; e; e = e->next_callee) + { + basic_block bb = gimple_bb (e->call_stmt); + e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb); + e->count = bb->count; + } + } + free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); Index: passes.c =================================================================== --- passes.c (revision 161494) +++ passes.c (working copy) @@ -1243,22 +1243,7 @@ execute_function_todo (void *data) } if (flags & TODO_rebuild_frequencies) - { - if (profile_status == PROFILE_GUESSED) - { - loop_optimizer_init (0); - add_noreturn_fake_exit_edges (); - mark_irreducible_loops (); - connect_infinite_loops_to_exit (); - estimate_bb_frequencies (); - remove_fake_exit_edges (); - loop_optimizer_finalize (); - } - else if (profile_status == PROFILE_READ) - counts_to_freqs (); - else - gcc_unreachable (); - } + rebuild_frequencies (); #if defined ENABLE_CHECKING if (flags & TODO_verify_ssa