Patchwork Avoid ipa-profile dropping functions unlikely when profile is read

login
register
mail settings
Submitter Jan Hubicka
Date Sept. 9, 2013, 7:40 a.m.
Message ID <20130909074049.GA6331@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/273514/
State New
Headers show

Comments

Jan Hubicka - Sept. 9, 2013, 7:40 a.m.
Hi,
ipa-profile does propagation across the CFG in attempt to prove that function is
cold.  This is counter-productive when profile is read and we can easilly work
this out from count itself.   This patch makes it less agressive in this setting
and it also put ipa-profile into busyness to make count based decisions that was
previously done at predict.c in an inferrior way.

This patch fixes problems Martin Liska noticed with inkscape.

Honza

	* ipa-profile.c: Add toplevel comment.
	(ipa_propagate_frequency_1): Be more conservative when profile is read.
	(contains_hot_call_p): New function.
	(ipa_propagate_frequency): Set frequencies based on counts when
	profile is read.
	* predict.c (compute_function_frequency): Use PROFILE_READ gueard for
	profile; do not tamper with profile after inlining if it is read.

Patch

Index: ipa-profile.c
===================================================================
--- ipa-profile.c	(revision 202366)
+++ ipa-profile.c	(working copy)
@@ -17,6 +17,33 @@  You should have received a copy of the G
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* ipa-profile pass implements the following analysis propagating profille
+   inter-procedurally.
+
+   - Count histogram construction.  This is a histogram analyzing how much
+     time is spent executing statements with a given execution count read
+     from profile feedback. This histogram is complette only with LTO,
+     otherwise it contains information only about the current unit.
+
+     Similar histogram is also estimated by coverage runtime.  This histogram
+     is not dependent on LTO, but it suffers from various defects; first
+     gcov runtime is not weighting individual basic block by estimated execution
+     time and second the merging of multiple runs makes assumption that the
+     histogram distribution did not change.  Consequentely histogram constructed
+     here may be more precise.
+
+     The information is used to set hot/cold thresholds.
+   - Next speculative indirect call resolution is performed:  the local
+     profile pass assigns profile-id to each function and provide us with a
+     histogram specifying the most common target.  We look up the callgraph
+     node corresponding to the target and produce a speculative call.
+
+     This call may or may not survive through IPA optimization based on decision
+     of inliner. 
+   - Finally we propagate the following flags: unlikely executed, executed
+     once, executed at startup and executed at exit.  These flags are used to
+     control code size/performance threshold and and code placement (by producing
+     .text.unlikely/.text.hot/.text.startup/.text.exit subsections).  */
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -301,6 +328,18 @@  ipa_propagate_frequency_1 (struct cgraph
 	    d->only_called_at_startup = 0;
           d->only_called_at_exit &= edge->caller->only_called_at_exit;
 	}
+
+      /* When profile feedback is available, do not try to propagate too hard;
+	 counts are already good guide on function frequencies and roundoff
+	 errors can make us to push function into unlikely section even when
+	 it is executed by the train run.  Transfer the function only if all
+	 callers are unlikely executed.  */
+      if (profile_info && flag_branch_probabilities
+	  && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
+	      || (edge->caller->global.inlined_to
+		  && edge->caller->global.inlined_to->frequency
+		     != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
+	  d->maybe_unlikely_executed = false;
       if (!edge->frequency)
 	continue;
       switch (edge->caller->frequency)
@@ -332,6 +371,24 @@  ipa_propagate_frequency_1 (struct cgraph
   return edge != NULL;
 }
 
+/* Return ture if NODE contains hot calls.  */
+
+bool
+contains_hot_call_p (struct cgraph_node *node)
+{
+  struct cgraph_edge *e;
+  for (e = node->callees; e; e = e->next_callee)
+    if (cgraph_maybe_hot_edge_p (e))
+      return true;
+    else if (!e->inline_failed
+	     && contains_hot_call_p (e->callee))
+      return true;
+  for (e = node->indirect_calls; e; e = e->next_callee)
+    if (cgraph_maybe_hot_edge_p (e))
+      return true;
+  return false;
+}
+
 /* See if the frequency of NODE can be updated based on frequencies of its
    callers.  */
 bool
@@ -343,6 +400,7 @@  ipa_propagate_frequency (struct cgraph_n
   /* We can not propagate anything useful about externally visible functions
      nor about virtuals.  */
   if (!node->local.local
+      || node->symbol.alias
       || (flag_devirtualize && DECL_VIRTUAL_P (node->symbol.decl)))
     return false;
   gcc_assert (node->symbol.analyzed);
@@ -369,6 +427,36 @@  ipa_propagate_frequency (struct cgraph_n
 		  cgraph_node_name (node));
        changed = true;
     }
+
+  /* With profile we can decide on hot/normal based on count.  */
+  if (node->count)
+    {
+      bool hot = false;
+      if (node->count >= get_hot_bb_threshold ())
+	hot = true;
+      if (!hot)
+	hot |= contains_hot_call_p (node);
+      if (hot)
+	{
+	  if (node->frequency != NODE_FREQUENCY_HOT)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "Node %s promoted to hot.\n",
+			 cgraph_node_name (node));
+	      node->frequency = NODE_FREQUENCY_HOT;
+	      return true;
+	    }
+	  return false;
+	}
+      else if (node->frequency == NODE_FREQUENCY_HOT)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Node %s reduced to normal.\n",
+		     cgraph_node_name (node));
+	  node->frequency = NODE_FREQUENCY_NORMAL;
+	  changed = true;
+	}
+    }
   /* These come either from profile or user hints; never update them.  */
   if (node->frequency == NODE_FREQUENCY_HOT
       || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
Index: predict.c
===================================================================
--- predict.c	(revision 202366)
+++ predict.c	(working copy)
@@ -2871,13 +2871,14 @@  compute_function_frequency (void)
 {
   basic_block bb;
   struct cgraph_node *node = cgraph_get_node (current_function_decl);
+
   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
     node->only_called_at_startup = true;
   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
     node->only_called_at_exit = true;
 
-  if (!profile_info || !flag_branch_probabilities)
+  if (profile_status != PROFILE_READ)
     {
       int flags = flags_from_decl_or_type (current_function_decl);
       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
@@ -2895,7 +2896,13 @@  compute_function_frequency (void)
         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
       return;
     }
-  node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+
+  /* Only first time try to drop function into unlikely executed.
+     After inlining the roundoff errors may confuse us.
+     Ipa-profile pass will drop functions only called from unlikely
+     functions to unlikely and that is most of what we care about.  */
+  if (!cfun->after_inlining)
+    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
   FOR_EACH_BB (bb)
     {
       if (maybe_hot_bb_p (cfun, bb))