diff mbox

Simplify and improve inliner's badness metric

Message ID 20150101170428.GE13958@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka Jan. 1, 2015, 5:04 p.m. UTC
Hi,
this patch removes use of relative_time_benefit.  This was introduced primarily to get
a known range of the value (0...RELATIVE_TIME_BENEFIT_RANGE) to aid fixed point arithmetics.
Because it is gone it seems more natural to use the actual speedup.
This simplifies the calculations and also improves inliner's behaviour on tramp3d and
some other benchmarks.  I plan to commit the patch after a bit of furhter testing.

Bootstrapped/regtested x86_64-linux.

Honza

	* ipa-inline.c (compute_uninlined_call_time, compute_inlined_call_time):
	Simplify.
	(RELATIVE_TIME_BENEFIT_RANGE, relative_time_benefit): Remove.
	(edge_badness): Handle DECL_DISREGARD_INLINE_LIMITS more regularly;
	use estimated speedup instead of relative time benefit for cost
	metrics.
	(inline_small_functions): Skip optimize_size functions.
diff mbox

Patch

Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 219107)
+++ ipa-inline.c	(working copy)
@@ -524,12 +524,17 @@  inline sreal
 compute_uninlined_call_time (struct inline_summary *callee_info,
 			     struct cgraph_edge *edge)
 {
-  sreal uninlined_call_time = (sreal)callee_info->time
-			      * MAX (edge->frequency, 1)
-			      * cgraph_freq_base_rec;
-  int caller_time = inline_summaries->get (edge->caller->global.inlined_to
-				           ? edge->caller->global.inlined_to
-				           : edge->caller)->time;
+  sreal uninlined_call_time = (sreal)callee_info->time;
+  cgraph_node *caller = (edge->caller->global.inlined_to 
+			 ? edge->caller->global.inlined_to
+			 : edge->caller);
+
+  if (caller->count)
+    uninlined_call_time *= ((sreal) MAX (edge->count, 1)) / caller->count;
+  else if (edge->frequency)
+    uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
+
+  int caller_time = inline_summaries->get (caller)->time;
   return uninlined_call_time + caller_time;
 }
 
@@ -540,13 +545,26 @@  inline sreal
 compute_inlined_call_time (struct cgraph_edge *edge,
 			   int edge_time)
 {
-  int caller_time = inline_summaries->get (edge->caller->global.inlined_to
-				           ? edge->caller->global.inlined_to
-				           : edge->caller)->time;
-  sreal time = (sreal)caller_time
-	       + ((sreal) (edge_time - inline_edge_summary (edge)->call_stmt_time)
-	          * MAX (edge->frequency, 1)
-	          * cgraph_freq_base_rec);
+  cgraph_node *caller = (edge->caller->global.inlined_to 
+			 ? edge->caller->global.inlined_to
+			 : edge->caller);
+  int caller_time = inline_summaries->get (caller)->time;
+  sreal time = edge_time;
+
+  if (caller->count)
+    time *= ((sreal) MAX (edge->count, 1)) / caller->count;
+  else if (edge->frequency)
+    time *= cgraph_freq_base_rec * edge->frequency;
+
+  /* This calculation should match one in ipa-inline-analysis.
+     FIXME: Once ipa-inline-analysis is converted to sreal this can be
+     simplified.  */
+  time -= (sreal) ((gcov_type) edge->frequency
+		   * inline_edge_summary (edge)->call_stmt_time
+	           * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
+  time += caller_time;
+  if (time <= 0)
+    time = ((sreal) 1) >> 8;
   gcc_checking_assert (time >= 0);
   return time;
 }
@@ -557,8 +575,10 @@  compute_inlined_call_time (struct cgraph
 static bool
 big_speedup_p (struct cgraph_edge *e)
 {
-  sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee), e);
+  sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
+					    e);
   sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
+
   if (time - inlined_time
       > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
 	 * percent_rec)
@@ -856,49 +876,6 @@  want_inline_function_to_all_callers_p (s
   return true;
 }
 
-#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
-
-/* Return relative time improvement for inlining EDGE in range
-   as value NUMERATOR/DENOMINATOR.  */
-
-static inline void
-relative_time_benefit (struct inline_summary *callee_info,
-		       struct cgraph_edge *edge,
-		       int edge_time,
-		       sreal *numerator,
-		       sreal *denominator)
-{
-  /* Inlining into extern inline function is not a win.  */
-  if (DECL_EXTERNAL (edge->caller->global.inlined_to
-		     ? edge->caller->global.inlined_to->decl
-		     : edge->caller->decl))
-    {
-      *numerator = (sreal) 1;
-      *denominator = (sreal) 1024;
-      return;
-    }
-
-  sreal uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
-  sreal inlined_call_time = compute_inlined_call_time (edge, edge_time);
-
-  /* Compute relative time benefit, i.e. how much the call becomes faster.
-     ??? perhaps computing how much the caller+calle together become faster
-     would lead to more realistic results.  */
-  if (uninlined_call_time == (sreal) 0)
-    uninlined_call_time = 1;
-
-  /* Avoid zeros, these are not useful later in calculations.  */
-  if (uninlined_call_time == inlined_call_time)
-    *numerator = ((sreal) 1)>>8;
-  else
-    *numerator = uninlined_call_time - inlined_call_time;
-  *denominator = uninlined_call_time;
-#ifdef ENABLE_CHECKING
-  gcc_checking_assert (*numerator >= 0);
-  gcc_checking_assert (*denominator >= 0);
-#endif
-}
-
 /* A cost model driving the inlining heuristics in a way so the edges with
    smallest badness are inlined first.  After each inlining is performed
    the costs of all caller edges of nodes affected are recomputed so the
@@ -913,9 +890,9 @@  edge_badness (struct cgraph_edge *edge,
   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
   struct inline_summary *callee_info = inline_summaries->get (callee);
   inline_hints hints;
-
-  if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
-    return sreal::min ();
+  cgraph_node *caller = (edge->caller->global.inlined_to 
+			 ? edge->caller->global.inlined_to
+			 : edge->caller);
 
   growth = estimate_edge_growth (edge);
   edge_time = estimate_edge_time (edge);
@@ -948,59 +925,72 @@  edge_badness (struct cgraph_edge *edge,
 	fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
 		 growth);
     }
+   /* Inlining into EXTERNAL functions is not going to change anything unless
+      they are themselves inlined.  */
+   else if (DECL_EXTERNAL (caller->decl))
+    {
+      if (dump)
+	fprintf (dump_file, "      max: function is external\n");
+      return sreal::max ();
+    }
 
   /* When profiling is available, compute badness as:
 
-	        edge_count * relative_time_benefit
+	        caller_count * time_saved
      goodness = -------------------------------------------
 		growth_of_caller
      badness = - goodness 
 
-    The fraction is upside down, because on edge counts and time beneits
-    the bounds are known. Edge growth is essentially unlimited.  */
+     Badness is defined to be negative and lower than one compute bellow
+     for calls w/o (guessed) profile.  */
 
-  else if (max_count)
+  else if (caller->count)
     {
       sreal numerator, denominator;
-      relative_time_benefit (callee_info, edge, edge_time, &numerator,
-			     &denominator);
-
-      if (edge->count)
-        numerator *= edge->count;
-      denominator *= growth;
+      cgraph_node *caller = (edge->caller->global.inlined_to 
+			     ? edge->caller->global.inlined_to
+			     : edge->caller);
+      numerator = (compute_uninlined_call_time (callee_info, edge)
+		   - compute_inlined_call_time (edge, edge_time));
+
+      if (caller->count)
+        numerator *= caller->count;
+      denominator = growth;
 
       badness = - numerator / denominator;
  
       if (dump)
 	{
-	  sreal num,den;
-          relative_time_benefit (callee_info, edge, edge_time, &num, &den);
 	  fprintf (dump_file,
 		   "      %f: profile info. count %"PRId64
-		   " * Relative benefit %f / growth %i\n",
+		   " * (uninlined_time %f - inlined_time %f) / growth %i\n",
 		   badness.to_double (), (int64_t)edge->count,
-		   (num / den * 100).to_double (), growth);
+		   compute_uninlined_call_time (callee_info, edge).to_double (),
+		   compute_inlined_call_time (edge, edge_time).to_double (),
+		   growth);
 	}
     }
 
   /* When function local profile is available. Compute badness as:
      
-                 relative_time_benefit
+                 time_saved
      goodness =  ---------------------------------
 	         growth_of_caller * overall_growth
 
      badness = - goodness
 
-     compensated by the inline hints.
+     Again use negative value to make calls with profile appear hotter
+     then calls without.
   */
-  /* TODO: We ought suport mixing units where some functions are profiled
-     and some not.  */
-  else if (flag_guess_branch_prob)
+  else if (opt_for_fn (caller->decl, flag_guess_branch_prob))
     {
       sreal numerator, denominator;
-      relative_time_benefit (callee_info, edge, edge_time, &numerator,
-			     &denominator);
-      denominator *= growth;
+
+      numerator = (compute_uninlined_call_time (callee_info, edge)
+		   - compute_inlined_call_time (edge, edge_time));
+      if (numerator == 0)
+	numerator = ((sreal) 1 >> 8);
+      denominator = growth;
       if (callee_info->growth > 0)
 	denominator *= callee_info->growth;
 
@@ -1008,14 +998,11 @@  edge_badness (struct cgraph_edge *edge,
 
       if (dump)
 	{
-	  sreal num,den;
-          relative_time_benefit (callee_info, edge, edge_time, &num, &den);
 	  fprintf (dump_file,
 		   "      %f: guessed profile. frequency %f,"
-		   " benefit %f%%, time w/o inlining %f, time w inlining %f"
+		   " time w/o inlining %f, time w inlining %f"
 		   " overall growth %i (current) %i (original)\n",
 		   badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
-		   (num/den).to_double () * 100, 
 		   compute_uninlined_call_time (callee_info, edge).to_double (),
 		   compute_inlined_call_time (edge, edge_time).to_double (),
 		   estimate_growth (callee),
@@ -1056,7 +1043,9 @@  edge_badness (struct cgraph_edge *edge,
     badness = badness.shift (badness > 0 ? 2 : -2);
   else if (hints & (INLINE_HINT_cross_module))
     badness = badness.shift (badness > 0 ? 1 : -1);
-  if ((hints & INLINE_HINT_declared_inline))
+  if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
+    badness = badness.shift (badness > 0 ? -4 : 4);
+  else if ((hints & INLINE_HINT_declared_inline))
     badness = badness.shift (badness > 0 ? -3 : 3);
   if (dump)
     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
@@ -1586,6 +1575,7 @@  inline_small_functions (void)
 	    /* Do not account external functions, they will be optimized out
 	       if not inlined.  Also only count the non-cold portion of program.  */
 	    if (!DECL_EXTERNAL (node->decl)
+		&& !opt_for_fn (node->decl, optimize_size)
 		&& node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
 	      initial_size += info->size;
 	    info->growth = estimate_growth (node);