@@ -780,7 +780,9 @@ edge_badness (struct cgraph_edge *edge, bool dump)
The fraction is upside down, becuase on edge counts and time beneits
the bounds are known. Edge growth is essentially unlimited. */
- else if (max_count)
+ else if (max_count
+ && DECL_READ_PROFILE_P(edge->caller->decl)
+ && DECL_READ_PROFILE_P(edge->callee->decl))
{
int relbenefit = relative_time_benefit (callee_info, edge,
time_growth);
badness =
@@ -114,7 +114,8 @@ static inline bool
maybe_hot_frequency_p (int freq)
{
struct cgraph_node *node = cgraph_get_node (current_function_decl);
- if (!profile_info || !flag_branch_probabilities)
+ if (!profile_info || !flag_branch_probabilities
+ || !DECL_READ_PROFILE_P(current_function_decl))
{
if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
return false;
@@ -162,6 +163,7 @@ bool
cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
{
if (profile_info && flag_branch_probabilities
+ && DECL_READ_PROFILE_P(edge->caller->decl)
&& (edge->count
<= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
return false;
@@ -202,9 +204,11 @@ maybe_hot_edge_p (edge e)
bool
probably_never_executed_bb_p (const_basic_block bb)
{
- if (profile_info && flag_branch_probabilities)
+ if (profile_info && flag_branch_probabilities
+ && DECL_READ_PROFILE_P(current_function_decl))
return ((bb->count + profile_info->runs / 2) / profile_info->runs) ==
0;
- if ((!profile_info || !flag_branch_probabilities)
+ if ((!profile_info || !flag_branch_probabilities
+ || !DECL_READ_PROFILE_P(current_function_decl))
&& (cgraph_get_node (current_function_decl)->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED))
return true;
@@ -2280,7 +2284,8 @@ compute_function_frequency (void)
if (DECL_STATIC_DESTRUCTOR (current_function_decl))
node->only_called_at_exit = true;
- if (!profile_info || !flag_branch_probabilities)
+ if (!profile_info || !flag_branch_probabilities
+ || !DECL_READ_PROFILE_P(current_function_decl))
{
int flags = flags_from_decl_or_type (current_function_decl);
if (lookup_attribute ("cold", DECL_ATTRIBUTES
(current_function_decl))
@@ -489,7 +489,7 @@ compute_branch_probabilities (unsigned cfg_checksum,
unsigned lineno_checksum)
int inconsistent = 0;
/* Very simple sanity checks so we catch bugs in our profiling code. */
- if (!profile_info)
+ if (!profile_info || !exec_counts)
return;
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
{
@@ -230,6 +230,11 @@ extern const enum tree_code_class tree_code_type[];
#define EXPR_P(NODE) IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE
(NODE)))
+/* Returns true iff the decl has a valid profile that is read. */
+#define DECL_READ_PROFILE_P(decl) \
+ (DECL_STRUCT_FUNCTION(decl) && \
+ profile_status_for_function(DECL_STRUCT_FUNCTION(decl)) ==
PROFILE_READ)
+
/* Number of argument-words in each kind of tree-node. */
Reviewers: jh_suse.cz, davidxl, bjanakiraman_google.com, Message: Jan, please take a look and provide some feedback. Thanks, Description: Added better handling of outdated profiles. gcc dumps profile information in .gcda files that contain the function id and the corresponding counter information. The function cfg and line number checksums are also stored along with the ids. When the program source changes between when the instrumentation run is done vs. when the -fprofile-use is passed, gcc will have functions that do not have a valid profile. These functions could be hot and may not be inlined because the summary of the profile is still valid (sum_max, etc.). This can lead to a situation where the performance with -fprofile-use is lower than with -fno-profile-use. It is easy to have a situation where the profile is outdated. A single line change in the header file can potentially renumber all functions and none of the ids will match the profile generated from the old source. This patch makes the performance degradation in the outdated case much less severe. It adds checks that the profile has been read for a certain function before using the counter values (which are 0 in the case where the profile is invalid). Without this patch we see up to 15% performance degradation by using outdated profiles vs. not using any profile information for Chrome. This patch decreases that degradation down to (0% to 4%). Please review this at http://codereview.appspot.com/5989046/ Affected files: M gcc/ipa-inline.c M gcc/predict.c M gcc/profile.c M gcc/tree.h From 88d8094b6408b38d05241ecf574a51edabeabb3a Mon Sep 17 00:00:00 2001 From: Ahmad Sharif <asharif@chromium.org> Date: Wed, 4 Apr 2012 19:09:23 -0700 Subject: [PATCH] Added better handling of outdated profiles. gcc dumps profile information in .gcda files that contain the function id and the corresponding counter information. The function cfg and line number checksums are also stored along with the ids. When the program source changes between when the instrumentation run is done vs. when the -fprofile-use is passed, gcc will have functions that do not have a valid profile. These functions could be hot and may not be inlined because the summary of the profile is still valid (sum_max, etc.). This can lead to a situation where the performance with -fprofile-use is lower than with -fno-profile-use. It is easy to have a situation where the profile is outdated. A single line change in the header file can potentially renumber all functions and none of the ids will match the profile generated from the old source. This patch makes the performance degradation in the outdated case much less severe. It adds checks that the profile has been read for a certain function before using the counter values (which are 0 in the case where the profile is invalid). Without this patch we see up to 15% performance degradation by using outdated profiles vs. not using any profile information for Chrome. This patch decreases that degradation down to (0% to 4%). --- gcc/ipa-inline.c | 4 +++- gcc/predict.c | 13 +++++++++---- gcc/profile.c | 2 +- gcc/tree.h | 5 +++++ 4 files changed, 18 insertions(+), 6 deletions(-) extern const unsigned char tree_code_length[];