Patchwork Added better handling of outdated profile data. (issue 5989046)

login
register
mail settings
Submitter asharif@chromium.org
Date April 5, 2012, 6:53 p.m.
Message ID <001636eee18139993a04bcf30e46@google.com>
Download mbox | patch
Permalink /patch/151007/
State New
Headers show

Comments

asharif@chromium.org - April 5, 2012, 6:53 p.m.
Reviewers: jh_suse.cz, davidxl, bjanakiraman_google.com,

Message:
Jan, please take a look and provide some feedback.

Thanks,

Description:
Added better handling of outdated profiles.

gcc dumps profile information in .gcda files that contain the function
id and
the corresponding counter information. The function cfg and line number
checksums are also stored along with the ids. When the program source
changes
between when the instrumentation run is done vs. when the -fprofile-use
is
passed, gcc will have functions that do not have a valid profile. These
functions could be hot and may not be inlined because the summary of the
profile
is still valid (sum_max, etc.). This can lead to a situation where the
performance with -fprofile-use is lower than with -fno-profile-use.

It is easy to have a situation where the profile is outdated. A single
line
change in the header file can potentially renumber all functions and
none of the
ids will match the profile generated from the old source.

This patch makes the performance degradation in the outdated case much
less
severe. It adds checks that the profile has been read for a certain
function
before using the counter values (which are 0 in the case where the
profile is
invalid).

Without this patch we see up to 15% performance degradation by using
outdated
profiles vs. not using any profile information for Chrome. This patch
decreases
that degradation down to (0% to 4%).


Please review this at http://codereview.appspot.com/5989046/

Affected files:
   M gcc/ipa-inline.c
   M gcc/predict.c
   M gcc/profile.c
   M gcc/tree.h


 From 88d8094b6408b38d05241ecf574a51edabeabb3a Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <asharif@chromium.org>
Date: Wed, 4 Apr 2012 19:09:23 -0700
Subject: [PATCH] Added better handling of outdated profiles.

gcc dumps profile information in .gcda files that contain the function id  
and
the corresponding counter information. The function cfg and line number
checksums are also stored along with the ids. When the program source  
changes
between when the instrumentation run is done vs. when the -fprofile-use is
passed, gcc will have functions that do not have a valid profile. These
functions could be hot and may not be inlined because the summary of the  
profile
is still valid (sum_max, etc.). This can lead to a situation where the
performance with -fprofile-use is lower than with -fno-profile-use.

It is easy to have a situation where the profile is outdated. A single line
change in the header file can potentially renumber all functions and none  
of the
ids will match the profile generated from the old source.

This patch makes the performance degradation in the outdated case much less
severe. It adds checks that the profile has been read for a certain function
before using the counter values (which are 0 in the case where the profile  
is
invalid).

Without this patch we see up to 15% performance degradation by using  
outdated
profiles vs. not using any profile information for Chrome. This patch  
decreases
that degradation down to (0% to 4%).
---
  gcc/ipa-inline.c |    4 +++-
  gcc/predict.c    |   13 +++++++++----
  gcc/profile.c    |    2 +-
  gcc/tree.h       |    5 +++++
  4 files changed, 18 insertions(+), 6 deletions(-)


  extern const unsigned char tree_code_length[];

Patch

Index: gcc/ipa-inline.c
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index d7ccf68..fbc6c4a 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -780,7 +780,9 @@  edge_badness (struct cgraph_edge *edge, bool dump)
      The fraction is upside down, becuase on edge counts and time beneits
      the bounds are known. Edge growth is essentially unlimited.  */

-  else if (max_count)
+  else if (max_count
+           && DECL_READ_PROFILE_P(edge->caller->decl)
+           && DECL_READ_PROFILE_P(edge->callee->decl))
      {
        int relbenefit = relative_time_benefit (callee_info, edge,  
time_growth);
        badness =
Index: gcc/predict.c
diff --git a/gcc/predict.c b/gcc/predict.c
index c12b45f..94af954 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -114,7 +114,8 @@  static inline bool
  maybe_hot_frequency_p (int freq)
  {
    struct cgraph_node *node = cgraph_get_node (current_function_decl);
-  if (!profile_info || !flag_branch_probabilities)
+  if (!profile_info || !flag_branch_probabilities
+      || !DECL_READ_PROFILE_P(current_function_decl))
      {
        if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
          return false;
@@ -162,6 +163,7 @@  bool
  cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
  {
    if (profile_info && flag_branch_probabilities
+      && DECL_READ_PROFILE_P(edge->caller->decl)
        && (edge->count
  	  <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
      return false;
@@ -202,9 +204,11 @@  maybe_hot_edge_p (edge e)
  bool
  probably_never_executed_bb_p (const_basic_block bb)
  {
-  if (profile_info && flag_branch_probabilities)
+  if (profile_info && flag_branch_probabilities
+      && DECL_READ_PROFILE_P(current_function_decl))
      return ((bb->count + profile_info->runs / 2) / profile_info->runs) ==  
0;
-  if ((!profile_info || !flag_branch_probabilities)
+  if ((!profile_info || !flag_branch_probabilities
+       || !DECL_READ_PROFILE_P(current_function_decl))
        && (cgraph_get_node (current_function_decl)->frequency
  	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
      return true;
@@ -2280,7 +2284,8 @@  compute_function_frequency (void)
    if (DECL_STATIC_DESTRUCTOR (current_function_decl))
      node->only_called_at_exit = true;

-  if (!profile_info || !flag_branch_probabilities)
+  if (!profile_info || !flag_branch_probabilities
+      || !DECL_READ_PROFILE_P(current_function_decl))
      {
        int flags = flags_from_decl_or_type (current_function_decl);
        if (lookup_attribute ("cold", DECL_ATTRIBUTES  
(current_function_decl))
Index: gcc/profile.c
diff --git a/gcc/profile.c b/gcc/profile.c
index 10ab756..096e901 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -489,7 +489,7 @@  compute_branch_probabilities (unsigned cfg_checksum,  
unsigned lineno_checksum)
    int inconsistent = 0;

    /* Very simple sanity checks so we catch bugs in our profiling code.  */
-  if (!profile_info)
+  if (!profile_info || !exec_counts)
      return;
    if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
      {
Index: gcc/tree.h
diff --git a/gcc/tree.h b/gcc/tree.h
index da6be99..abb8df7 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -230,6 +230,11 @@  extern const enum tree_code_class tree_code_type[];

  #define EXPR_P(NODE) IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE  
(NODE)))

+/* Returns true iff the decl has a valid profile that is read.  */
+#define DECL_READ_PROFILE_P(decl) \
+    (DECL_STRUCT_FUNCTION(decl) && \
+     profile_status_for_function(DECL_STRUCT_FUNCTION(decl)) ==  
PROFILE_READ)
+
  /* Number of argument-words in each kind of tree-node.  */