Patchwork [Google] AutoFDO cleanup patch

login
register
mail settings
Submitter Dehao Chen
Date May 11, 2013, 10:53 p.m.
Message ID <CAO2gOZU0CTsnZ6UXSLfh6Rf_HxaJ26Nf=ykvTdKeAFQt=artVw@mail.gmail.com>
Download mbox | patch
Permalink /patch/243153/
State New
Headers show

Comments

Dehao Chen - May 11, 2013, 10:53 p.m.
In AutoFDO, we early-inline callsites that was inlined in profiling
runs regardless of the size limit. With this change, the existing
ipa-inline tunings for AutoFDO is unnecessary: it's fine to just use
the traditional FDO based heuristic. This patch cleans up the original
tunings and make it easier to port to gcc 4_8.

Bootstrapped and passed all regression test. And passed benchmark
performance tests.

Is it ok for google-4_7 branch?

Thanks,
Dehao

http://codereview.appspot.com/9250047
Xinliang David Li - May 12, 2013, 4:30 a.m.
Looks good.

David

On Sat, May 11, 2013 at 3:53 PM, Dehao Chen <dehao@google.com> wrote:
> In AutoFDO, we early-inline callsites that was inlined in profiling
> runs regardless of the size limit. With this change, the existing
> ipa-inline tunings for AutoFDO is unnecessary: it's fine to just use
> the traditional FDO based heuristic. This patch cleans up the original
> tunings and make it easier to port to gcc 4_8.
>
> Bootstrapped and passed all regression test. And passed benchmark
> performance tests.
>
> Is it ok for google-4_7 branch?
>
> Thanks,
> Dehao
>
> http://codereview.appspot.com/9250047

Patch

Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(revision 198796)
+++ gcc/ipa-inline.c	(working copy)
@@ -481,21 +481,13 @@  edge_hot_enough_p (struct cgraph_edge *edge)
 {
   if (cgraph_maybe_hot_edge_p (edge))
     return true;
-  if (flag_auto_profile)
-    {
-      gcov_type callsite_total_count;
-      /* Check if total sample counts in the callee is available.  */
-      if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true)
-	  && maybe_hot_count_p (callsite_total_count))
-	return true;
-      /* We disable hot-caller heuristic if the callee's entry count is
-	 0 because in this case we do not have enough information to
-	 calculate the scaling factor.  */
-      if (edge->callee->count == 0 && edge->callee->max_bb_count > 0)
-	return false;
-      /* In AutoFDO, if the preivous few heuristic fail, we will fall
-	 back to use hot-caller heuristic as is used by FDO.  */
-    }
+
+  /* We disable hot-caller heuristic if the callee's entry count is
+     0 because in this case we do not have enough information to
+     calculate the scaling factor.  */
+  if (flag_auto_profile && edge->callee->count == 0
+      && edge->callee->max_bb_count > 0)
+    return false;
   if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
       && maybe_hot_count_p (edge->caller->max_bb_count))
     return true;
@@ -874,32 +866,10 @@  edge_badness (struct cgraph_edge *edge, bool dump)
   else if (max_count)
     {
       int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
-      if (flag_auto_profile && edge->count == 0)
-	{
-	  gcov_type callsite_count;
-	  if (afdo_get_callsite_count (edge, &callsite_count, NULL, false))
-	    edge->count = callsite_count;
-	  if (edge->count > max_count)
-	    max_count = edge->count;
-	}
       badness =
 	((int)
 	 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
 	 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-      if (flag_auto_profile && profile_info->sum_all > 0)
-	{
-	  gcov_type callsite_total_count;
-	  if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true))
-	    {
-	      gcov_type afdo_badness =
-		((int)
-		 ((double) callsite_total_count * INT_MIN / 2 /
-		 profile_info->sum_all / 64) *
-		 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-	      if (afdo_badness < badness)
-		badness = afdo_badness;
-	    }
-	}
 
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
@@ -1568,7 +1538,7 @@  inline_small_functions (void)
 	 of date value on it, we re-insert it now.  */
       current_badness = edge_badness (edge, false);
       gcc_assert (cached_badness == current_badness);
-      gcc_assert (flag_auto_profile || current_badness >= badness);
+      gcc_assert (current_badness >= badness);
       if (current_badness != badness)
 	{
 	  edge->aux = fibheap_insert (heap, current_badness, edge);
Index: gcc/ipa-inline-transform.c
===================================================================
--- gcc/ipa-inline-transform.c	(revision 198796)
+++ gcc/ipa-inline-transform.c	(working copy)
@@ -139,23 +139,6 @@  void
 clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
 		     bool update_original, int *overall_size)
 {
-  bool has_callsite_profile = false;
-  gcov_type callsite_total_count, callsite_max_count; 
-
-  if (flag_auto_profile)
-    {
-      has_callsite_profile =
-	  afdo_get_callsite_count (e, &callsite_total_count,
-				   &callsite_max_count, true);
-      /* If the callsite is inlined in the profile-collection build,
-	 i.e. the cloned callee has its separate profile, we will use
-	 this separate profile to annotate the callee, and the real
-	 callee body will not be affected. Thus here we need to disable
-	 update_original.  */
-      if (has_callsite_profile)
-	update_original = false;
-    }
-
   if (duplicate)
     {
       /* We may eliminate the need for out-of-line copy to be output.
@@ -196,23 +179,6 @@  clone_inlined_nodes (struct cgraph_edge *e, bool d
 	}
     }
 
-  if (flag_auto_profile && has_callsite_profile)
-    {
-      /* The callee's total count will be non-zero if the callsite
-         was inlined in the profile-collection build, In this case,
-         the original callee may be label unlikely_executed, which
-         may prevent its callees being inlined. Thus we need to reset
-         its frequency to normal.  */
-      if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-	e->callee->frequency = NODE_FREQUENCY_NORMAL;
-      /* we do not have enough information to calculate the node count
-	 and max_bb_count. Thus we set them to the same value to make
-	 other optimizations aware that they are from cloned inline
-	 instances.  */
-      e->callee->count = callsite_total_count;
-      e->callee->max_bb_count = callsite_max_count;
-    }
-
   if (e->caller->global.inlined_to)
     e->callee->global.inlined_to = e->caller->global.inlined_to;
   else
@@ -417,8 +383,6 @@  inline_call (struct cgraph_edge *e, bool update_or
 
   clone_inlined_nodes (e, true, update_original, overall_size);
 
-  if (flag_auto_profile)
-    afdo_add_copy_scale (e);
 
   gcc_assert (curr->callee->global.inlined_to == to);
 
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c	(revision 198796)
+++ gcc/auto-profile.c	(working copy)
@@ -178,9 +178,6 @@  static htab_t function_htab;
 /* Hash table to hold stack information.  */
 static htab_t stack_htab;
 
-/* Hash table to hold inline scale information.  */
-static htab_t stack_scale_htab;
-
 /* Hash table to hold assembler name to bfd name mapping.  */
 static htab_t bfd_name_htab;
 
@@ -815,142 +812,6 @@  afdo_add_bfd_name_mapping (const char *as_name, co
     free (entry);
 }
 
-/* When EDGE is inlined, the callee is cloned recursively. This function
-   updates the copy scale recursively along the callee. STACK stores the
-   call stack info from the original inlined edge to the caller of EDGE.
-
-   E.g. foo calls bar with call count 100;
-	bar calls baz with call count 300;
-	bar has an entry count of 400, baz has an entry count of 1000;
-   Initial callgraph looks like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz(1000)
-
-   Consider baz is first inlined into bar, we will have a call graph like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz.clone(300)
-     baz(700)
-   At this point, a copyscale mapping is added:
-     (bar->baz) --> 0.3
-
-   Consider bar is then inlined into foo, we will have a call graph like:
-     foo --(100)--> bar.clone(100)
-     bar.clone --(75)-->baz.clone_2(75)
-     bar --(225)->baz.clone(225)
-     baz(700)
-   At this point, two copyscale mappings are added:
-     (foo->bar) --> 0.25
-     (foo->bar->baz)  --> 0.25 * 0.3
-*/
-
-static void
-afdo_propagate_copy_scale (struct cgraph_edge *edge, struct gcov_stack *stack)
-{
-  struct gcov_stack *new_stack, *entry, **stack_slot;
-  struct cgraph_edge *e;
-
-  if (edge->callee->global.inlined_to == NULL)
-    return;
-  if (stack->count == 0)
-    return;
-
-  new_stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  new_stack->func_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->caller->decl));
-  new_stack->callee_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  new_stack->size = get_inline_stack_size_by_stmt (edge->call_stmt);
-  new_stack->stack = (struct gcov_callsite_pos *) xmalloc (
-      sizeof (struct gcov_callsite_pos) * (new_stack->size + stack->size));
-  get_inline_stack_by_stmt (edge->call_stmt, edge->caller->decl,
-			    new_stack->stack, false);
-  entry = (struct gcov_stack *) htab_find (stack_scale_htab, new_stack);
-  if (entry == NULL)
-    {
-      free (new_stack->stack);
-      free (new_stack);
-      return;
-    }
-
-  new_stack->func_name = stack->func_name;
-  new_stack->count = entry->count * stack->count / REG_BR_PROB_BASE;
-  memcpy (new_stack->stack + new_stack->size,
-	  stack->stack, stack->size * sizeof (struct gcov_callsite_pos));
-  new_stack->size += stack->size;
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, new_stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = new_stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, new_stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, new_stack);
-}
-
-/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count)
-   is recorded in a hash map.  */
-
-void
-afdo_add_copy_scale (struct cgraph_edge *edge)
-{
-  struct gcov_stack *stack;
-  struct gcov_stack **stack_slot;
-  int scale;
-  int size = get_inline_stack_size_by_edge (edge);
-  struct cgraph_node *n = edge->callee->clone_of;
-  struct cgraph_edge *e;
-  gcov_type sum_cloned_count;
-
-  if (edge->callee->clone_of)
-    {
-      n = edge->callee->clone_of->clones;
-      sum_cloned_count = edge->callee->clone_of->count;
-    }
-  else
-    {
-      n = edge->callee->clones;
-      sum_cloned_count = edge->callee->count;
-    }
-
-  for (; n; n = n->next_sibling_clone)
-    sum_cloned_count += n->count;
-  if (sum_cloned_count > 0)
-    scale = (double) edge->count * REG_BR_PROB_BASE / sum_cloned_count;
-  else if (edge->caller->count == 0 && edge->caller->max_bb_count == 0)
-    scale = 0;
-  else
-    scale = REG_BR_PROB_BASE;
-  if (scale > REG_BR_PROB_BASE)
-    scale = REG_BR_PROB_BASE;
-
-  if (size == 0)
-    return;
-  stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  stack->func_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
-	edge->caller->global.inlined_to ?
-	    edge->caller->global.inlined_to->decl : edge->caller->decl));
-  stack->callee_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  stack->size = size;
-  stack->stack = (struct gcov_callsite_pos *)
-      xmalloc (sizeof (struct gcov_callsite_pos) * size);
-  stack->count = scale;
-
-  get_inline_stack_by_edge (edge, stack->stack);
-
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, stack);
-}
-
 /* For a given POS_STACK with SIZE, get the COUNT, MAX_COUNT, NUM_INST,
    HIST_SIZE and HIST for the inline stack. If CALLEE_NAME is non-null,
    the COUNT/MAX_COUNT represents the total/max count in the inline stack.
@@ -964,56 +825,24 @@  get_stack_count (struct gcov_callsite_pos *pos_sta
 		 gcov_type *count, gcov_type *max_count, gcov_type *num_inst,
 		 gcov_unsigned_t *hist_size, struct gcov_hist **hist)
 {
-  int i;
-
-  for (i = 0; i < size; i++)
+  struct gcov_stack stack, *entry;
+  stack.func_name = pos_stack[size - 1].func;
+  stack.callee_name = callee_name;
+  stack.stack = pos_stack;
+  stack.size = size;
+  entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
+  if (entry)
     {
-      struct gcov_stack stack, *entry;
-      stack.func_name = pos_stack[size - i - 1].func;
-      stack.callee_name = callee_name;
-      stack.stack = pos_stack;
-      stack.size = size - i;
-      entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
-      if (entry)
+      *count = entry->count;
+      *num_inst = entry->num_inst;
+      if (max_count)
+	*max_count = entry->max_count;
+      if (hist_size)
 	{
-	  if (i == 0)
-	    {
-	      *count = entry->count;
-	      *num_inst = entry->num_inst;
-	      if (max_count)
-		*max_count = entry->max_count;
-	      if (hist_size)
-		{
-		  *hist_size = entry->hist_size;
-		  *hist = entry->hist;
-		}
-	      return true;
-	    }
-	  else
-	    {
-	      struct gcov_stack scale_stack, *scale_entry;
-	      scale_stack.stack = pos_stack + size - i;
-	      scale_stack.size = i;
-	      scale_stack.func_name = pos_stack[size - 1].func;
-	      scale_stack.callee_name = stack.func_name;
-	      scale_entry = (struct gcov_stack *)
-		  htab_find (stack_scale_htab, &scale_stack);
-	      if (scale_entry)
-		{
-		  *count = entry->count * scale_entry->count
-			   / REG_BR_PROB_BASE;
-		  *num_inst = entry->num_inst;
-		  if (max_count)
-		    *max_count = entry->max_count;
-		  if (hist_size)
-		    {
-		      *hist_size = entry->hist_size;
-		      *hist = entry->hist;
-		    }
-		  return true;
-		}
-	    }
+	  *hist_size = entry->hist_size;
+	  *hist = entry->hist;
 	}
+      return true;
     }
   *count = 0;
   *num_inst = 0;
@@ -1057,14 +886,14 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count
    to COUNT, otherwise save EDGE's count to COUNT.  */
 
-bool
+static bool
 afdo_get_callsite_count (struct cgraph_edge *edge, gcov_type *count,
-			 gcov_type *max_count, bool is_total)
+			 gcov_type *max_count)
 {
   struct gcov_callsite_pos *pos_stack;
   gcov_type num_inst;
-  const char *callee_name = is_total ?
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)) : NULL;
+  const char *callee_name =
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
   int size = get_inline_stack_size_by_edge (edge);
 
   if (size == 0)
@@ -1074,10 +903,6 @@  afdo_get_callsite_count (struct cgraph_edge *edge,
 
   get_inline_stack_by_edge (edge, pos_stack);
 
-  if (!is_total)
-    pos_stack[0].discr =
-	get_discriminator_from_locus (gimple_location (edge->call_stmt));
-
   return get_stack_count (pos_stack, callee_name,
 			  size, count, max_count, &num_inst, NULL, NULL);
 }
@@ -1131,11 +956,19 @@  afdo_annotate_cfg (void)
 	max_count = bb->count;
     }
   if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count)
-    ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+    {
+      ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+      ENTRY_BLOCK_PTR->next_bb->flags |= BB_ANNOTATED;
+    }
+  if (ENTRY_BLOCK_PTR->count > EXIT_BLOCK_PTR->prev_bb->count)
+    {
+      EXIT_BLOCK_PTR->prev_bb->count = ENTRY_BLOCK_PTR->count;
+      EXIT_BLOCK_PTR->prev_bb->flags |= BB_ANNOTATED;
+    }
   if (max_count > 0)
     {
-      counts_to_freqs ();
       afdo_calculate_branch_prob ();
+      counts_to_freqs ();
       profile_status = PROFILE_READ;
     }
   if (flag_value_profile_transformations)
@@ -1416,13 +1249,6 @@  init_auto_profile (void)
 				  0,
 				  xcalloc,
 				  free);
-  /* Initialize the stack scale hash table.  */
-  stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
-				  afdo_stack_hash,
-				  afdo_stack_eq,
-				  0,
-				  xcalloc,
-				  free);
   /* Initialize the bfd name mapping table.  */
   bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
 				     afdo_bfd_name_hash,
@@ -1477,7 +1303,6 @@  end_auto_profile (void)
   free (file_names);
   htab_delete (function_htab);
   htab_delete (stack_htab);
-  htab_delete (stack_scale_htab);
   htab_delete (bfd_name_htab);
   htab_delete (module_htab);
   profile_info = NULL;
@@ -1845,7 +1670,7 @@  bool
 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
 {
   gcov_type count, max_count;
-  if (afdo_get_callsite_count (edge, &count, &max_count, true))
+  if (afdo_get_callsite_count (edge, &count, &max_count))
     {
       bool is_hot;
       const struct gcov_ctr_summary *saved_profile_info = profile_info;
Index: gcc/auto-profile.h
===================================================================
--- gcc/auto-profile.h	(revision 198796)
+++ gcc/auto-profile.h	(working copy)
@@ -32,16 +32,9 @@  extern void afdo_set_current_function_count (void)
 /* Add the assembly_name to bfd_name mapping.  */
 extern void afdo_add_bfd_name_mapping (const char *, const char *);
 
-/* Add copy scale for an inlined edge to stack_scale_map.  */
-extern void afdo_add_copy_scale (struct cgraph_edge *);
-
 /* Calculate branch probability in both AutoFDO pass and after inlining.  */
 extern void afdo_calculate_branch_prob (void);
 
-/* Calculate total sample count of an inlined callsite.  */
-extern bool afdo_get_callsite_count (struct cgraph_edge *, gcov_type *,
-				     gcov_type *, bool);
-
 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
 extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);