Patchwork [Google] AutoFDO cleanup patch

login
register
mail settings
Submitter Dehao Chen
Date May 13, 2013, 1:31 a.m.
Message ID <CAO2gOZXZCyONfh0yn1MGEVRTQ-QfjQgEdpZoZUxovr4d1U20_Q@mail.gmail.com>
Download mbox | patch
Permalink /patch/243228/
State New
Headers show

Comments

Dehao Chen - May 13, 2013, 1:31 a.m.
I've added some more cleanup to make AutoFDO isolated as well as
consistent wil 4.8.

Bootstrapped and passed regression test and benchmark performance test.

OK for google branches?

Thanks,
Dehao

http://codereview.appspot.com/9250047

On Sat, May 11, 2013 at 9:30 PM, Xinliang David Li <davidxl@google.com> wrote:
> Looks good.
>
> David
>
> On Sat, May 11, 2013 at 3:53 PM, Dehao Chen <dehao@google.com> wrote:
>> In AutoFDO, we early-inline callsites that was inlined in profiling
>> runs regardless of the size limit. With this change, the existing
>> ipa-inline tunings for AutoFDO is unnecessary: it's fine to just use
>> the traditional FDO based heuristic. This patch cleans up the original
>> tunings and make it easier to port to gcc 4_8.
>>
>> Bootstrapped and passed all regression test. And passed benchmark
>> performance tests.
>>
>> Is it ok for google-4_7 branch?
>>
>> Thanks,
>> Dehao
>>
>> http://codereview.appspot.com/9250047

Patch

Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(revision 198796)
+++ gcc/ipa-inline.c	(working copy)
@@ -481,21 +481,13 @@  edge_hot_enough_p (struct cgraph_edge *edge)
 {
   if (cgraph_maybe_hot_edge_p (edge))
     return true;
-  if (flag_auto_profile)
-    {
-      gcov_type callsite_total_count;
-      /* Check if total sample counts in the callee is available.  */
-      if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true)
-	  && maybe_hot_count_p (callsite_total_count))
-	return true;
-      /* We disable hot-caller heuristic if the callee's entry count is
-	 0 because in this case we do not have enough information to
-	 calculate the scaling factor.  */
-      if (edge->callee->count == 0 && edge->callee->max_bb_count > 0)
-	return false;
-      /* In AutoFDO, if the preivous few heuristic fail, we will fall
-	 back to use hot-caller heuristic as is used by FDO.  */
-    }
+
+  /* We disable hot-caller heuristic if the callee's entry count is
+     0 because in this case we do not have enough information to
+     calculate the scaling factor.  */
+  if (flag_auto_profile && edge->callee->count == 0
+      && edge->callee->max_bb_count > 0)
+    return false;
   if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
       && maybe_hot_count_p (edge->caller->max_bb_count))
     return true;
@@ -874,36 +866,14 @@  edge_badness (struct cgraph_edge *edge, bool dump)
   else if (max_count)
     {
       int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
-      if (flag_auto_profile && edge->count == 0)
-	{
-	  gcov_type callsite_count;
-	  if (afdo_get_callsite_count (edge, &callsite_count, NULL, false))
-	    edge->count = callsite_count;
-	  if (edge->count > max_count)
-	    max_count = edge->count;
-	}
       badness =
 	((int)
 	 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
 	 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-      if (flag_auto_profile && profile_info->sum_all > 0)
-	{
-	  gcov_type callsite_total_count;
-	  if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true))
-	    {
-	      gcov_type afdo_badness =
-		((int)
-		 ((double) callsite_total_count * INT_MIN / 2 /
-		 profile_info->sum_all / 64) *
-		 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-	      if (afdo_badness < badness)
-		badness = afdo_badness;
-	    }
-	}
 
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
-      gcc_assert (flag_auto_profile || max_count >= edge->count);
+      gcc_assert (max_count >= edge->count);
       if (dump)
 	{
 	  fprintf (dump_file,
@@ -1534,7 +1504,6 @@  inline_small_functions (void)
       }
 
   gcc_assert (in_lto_p
-	      || flag_auto_profile
 	      || !max_count
 	      || (profile_info && flag_branch_probabilities));
 
@@ -1568,7 +1537,7 @@  inline_small_functions (void)
 	 of date value on it, we re-insert it now.  */
       current_badness = edge_badness (edge, false);
       gcc_assert (cached_badness == current_badness);
-      gcc_assert (flag_auto_profile || current_badness >= badness);
+      gcc_assert (current_badness >= badness);
       if (current_badness != badness)
 	{
 	  edge->aux = fibheap_insert (heap, current_badness, edge);
Index: gcc/cgraph.c
===================================================================
--- gcc/cgraph.c	(revision 198796)
+++ gcc/cgraph.c	(working copy)
@@ -2326,11 +2326,6 @@  cgraph_clone_node (struct cgraph_node *n, tree dec
                               / n->count) * count;
   new_node->is_versioned_clone = n->is_versioned_clone;
   new_node->frequency = n->frequency;
-  /* In AutoFDO, a cloned callee may be hot even when the original
-     function is profiled cold.  */
-  if (flag_auto_profile && count > 0
-      && new_node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-    new_node->frequency = NODE_FREQUENCY_NORMAL;
   new_node->clone = n->clone;
   new_node->clone.tree_map = 0;
   if (n->count)
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c	(revision 198796)
+++ gcc/auto-profile.c	(working copy)
@@ -25,6 +25,8 @@  along with GCC; see the file COPYING3.  If not see
 
 #include "config.h"
 #include "system.h"
+#include "coretypes.h"
+#include "tree.h"
 #include "flags.h"	      /* for auto_profile_file.  */
 #include "basic-block.h"      /* for gcov_type.	 */
 #include "diagnostic-core.h"  /* for inform ().  */
@@ -178,9 +180,6 @@  static htab_t function_htab;
 /* Hash table to hold stack information.  */
 static htab_t stack_htab;
 
-/* Hash table to hold inline scale information.  */
-static htab_t stack_scale_htab;
-
 /* Hash table to hold assembler name to bfd name mapping.  */
 static htab_t bfd_name_htab;
 
@@ -525,7 +524,7 @@  read_aux_modules (void)
       if ((aux_entry->lang & GCOV_MODULE_ASM_STMTS)
 	   && flag_ripa_disallow_asm_modules)
 	{
-	  if (flag_opt_info >= OPT_INFO_MIN)
+	  if (flag_opt_info)
 	    inform (0, "Not importing %s: contains "
 		    "assembler statements", aux_entry->name);
 	  continue;
@@ -533,7 +532,7 @@  read_aux_modules (void)
       afdo_add_module (&module_infos[curr_module], aux_entry, false);
       if (incompatible_cl_args (module_infos[0], module_infos[curr_module]))
 	{
-	  if (flag_opt_info >= OPT_INFO_MIN)
+	  if (flag_opt_info)
 	    inform (0, "Not importing %s: command-line"
 		    " arguments not compatible with primary module",
 		    aux_entry->name);
@@ -815,142 +814,6 @@  afdo_add_bfd_name_mapping (const char *as_name, co
     free (entry);
 }
 
-/* When EDGE is inlined, the callee is cloned recursively. This function
-   updates the copy scale recursively along the callee. STACK stores the
-   call stack info from the original inlined edge to the caller of EDGE.
-
-   E.g. foo calls bar with call count 100;
-	bar calls baz with call count 300;
-	bar has an entry count of 400, baz has an entry count of 1000;
-   Initial callgraph looks like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz(1000)
-
-   Consider baz is first inlined into bar, we will have a call graph like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz.clone(300)
-     baz(700)
-   At this point, a copyscale mapping is added:
-     (bar->baz) --> 0.3
-
-   Consider bar is then inlined into foo, we will have a call graph like:
-     foo --(100)--> bar.clone(100)
-     bar.clone --(75)-->baz.clone_2(75)
-     bar --(225)->baz.clone(225)
-     baz(700)
-   At this point, two copyscale mappings are added:
-     (foo->bar) --> 0.25
-     (foo->bar->baz)  --> 0.25 * 0.3
-*/
-
-static void
-afdo_propagate_copy_scale (struct cgraph_edge *edge, struct gcov_stack *stack)
-{
-  struct gcov_stack *new_stack, *entry, **stack_slot;
-  struct cgraph_edge *e;
-
-  if (edge->callee->global.inlined_to == NULL)
-    return;
-  if (stack->count == 0)
-    return;
-
-  new_stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  new_stack->func_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->caller->decl));
-  new_stack->callee_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  new_stack->size = get_inline_stack_size_by_stmt (edge->call_stmt);
-  new_stack->stack = (struct gcov_callsite_pos *) xmalloc (
-      sizeof (struct gcov_callsite_pos) * (new_stack->size + stack->size));
-  get_inline_stack_by_stmt (edge->call_stmt, edge->caller->decl,
-			    new_stack->stack, false);
-  entry = (struct gcov_stack *) htab_find (stack_scale_htab, new_stack);
-  if (entry == NULL)
-    {
-      free (new_stack->stack);
-      free (new_stack);
-      return;
-    }
-
-  new_stack->func_name = stack->func_name;
-  new_stack->count = entry->count * stack->count / REG_BR_PROB_BASE;
-  memcpy (new_stack->stack + new_stack->size,
-	  stack->stack, stack->size * sizeof (struct gcov_callsite_pos));
-  new_stack->size += stack->size;
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, new_stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = new_stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, new_stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, new_stack);
-}
-
-/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count)
-   is recorded in a hash map.  */
-
-void
-afdo_add_copy_scale (struct cgraph_edge *edge)
-{
-  struct gcov_stack *stack;
-  struct gcov_stack **stack_slot;
-  int scale;
-  int size = get_inline_stack_size_by_edge (edge);
-  struct cgraph_node *n = edge->callee->clone_of;
-  struct cgraph_edge *e;
-  gcov_type sum_cloned_count;
-
-  if (edge->callee->clone_of)
-    {
-      n = edge->callee->clone_of->clones;
-      sum_cloned_count = edge->callee->clone_of->count;
-    }
-  else
-    {
-      n = edge->callee->clones;
-      sum_cloned_count = edge->callee->count;
-    }
-
-  for (; n; n = n->next_sibling_clone)
-    sum_cloned_count += n->count;
-  if (sum_cloned_count > 0)
-    scale = (double) edge->count * REG_BR_PROB_BASE / sum_cloned_count;
-  else if (edge->caller->count == 0 && edge->caller->max_bb_count == 0)
-    scale = 0;
-  else
-    scale = REG_BR_PROB_BASE;
-  if (scale > REG_BR_PROB_BASE)
-    scale = REG_BR_PROB_BASE;
-
-  if (size == 0)
-    return;
-  stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  stack->func_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
-	edge->caller->global.inlined_to ?
-	    edge->caller->global.inlined_to->decl : edge->caller->decl));
-  stack->callee_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  stack->size = size;
-  stack->stack = (struct gcov_callsite_pos *)
-      xmalloc (sizeof (struct gcov_callsite_pos) * size);
-  stack->count = scale;
-
-  get_inline_stack_by_edge (edge, stack->stack);
-
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, stack);
-}
-
 /* For a given POS_STACK with SIZE, get the COUNT, MAX_COUNT, NUM_INST,
    HIST_SIZE and HIST for the inline stack. If CALLEE_NAME is non-null,
    the COUNT/MAX_COUNT represents the total/max count in the inline stack.
@@ -964,56 +827,24 @@  get_stack_count (struct gcov_callsite_pos *pos_sta
 		 gcov_type *count, gcov_type *max_count, gcov_type *num_inst,
 		 gcov_unsigned_t *hist_size, struct gcov_hist **hist)
 {
-  int i;
-
-  for (i = 0; i < size; i++)
+  struct gcov_stack stack, *entry;
+  stack.func_name = pos_stack[size - 1].func;
+  stack.callee_name = callee_name;
+  stack.stack = pos_stack;
+  stack.size = size;
+  entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
+  if (entry)
     {
-      struct gcov_stack stack, *entry;
-      stack.func_name = pos_stack[size - i - 1].func;
-      stack.callee_name = callee_name;
-      stack.stack = pos_stack;
-      stack.size = size - i;
-      entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
-      if (entry)
+      *count = entry->count;
+      *num_inst = entry->num_inst;
+      if (max_count)
+	*max_count = entry->max_count;
+      if (hist_size)
 	{
-	  if (i == 0)
-	    {
-	      *count = entry->count;
-	      *num_inst = entry->num_inst;
-	      if (max_count)
-		*max_count = entry->max_count;
-	      if (hist_size)
-		{
-		  *hist_size = entry->hist_size;
-		  *hist = entry->hist;
-		}
-	      return true;
-	    }
-	  else
-	    {
-	      struct gcov_stack scale_stack, *scale_entry;
-	      scale_stack.stack = pos_stack + size - i;
-	      scale_stack.size = i;
-	      scale_stack.func_name = pos_stack[size - 1].func;
-	      scale_stack.callee_name = stack.func_name;
-	      scale_entry = (struct gcov_stack *)
-		  htab_find (stack_scale_htab, &scale_stack);
-	      if (scale_entry)
-		{
-		  *count = entry->count * scale_entry->count
-			   / REG_BR_PROB_BASE;
-		  *num_inst = entry->num_inst;
-		  if (max_count)
-		    *max_count = entry->max_count;
-		  if (hist_size)
-		    {
-		      *hist_size = entry->hist_size;
-		      *hist = entry->hist;
-		    }
-		  return true;
-		}
-	    }
+	  *hist_size = entry->hist_size;
+	  *hist = entry->hist;
 	}
+      return true;
     }
   *count = 0;
   *num_inst = 0;
@@ -1057,14 +888,14 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count
    to COUNT, otherwise save EDGE's count to COUNT.  */
 
-bool
-afdo_get_callsite_count (struct cgraph_edge *edge, gcov_type *count,
-			 gcov_type *max_count, bool is_total)
+static bool
+get_callsite_count (struct cgraph_edge *edge, gcov_type *count,
+		    gcov_type *max_count)
 {
   struct gcov_callsite_pos *pos_stack;
   gcov_type num_inst;
-  const char *callee_name = is_total ?
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)) : NULL;
+  const char *callee_name =
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
   int size = get_inline_stack_size_by_edge (edge);
 
   if (size == 0)
@@ -1074,19 +905,15 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 
   get_inline_stack_by_edge (edge, pos_stack);
 
-  if (!is_total)
-    pos_stack[0].discr =
-	get_discriminator_from_locus (gimple_location (edge->call_stmt));
-
   return get_stack_count (pos_stack, callee_name,
 			  size, count, max_count, &num_inst, NULL, NULL);
 }
 
 /* For a given BB, return its execution count, and annotate value profile
-   on statements if ANNOTATE_VPT is true.  */
+   on statements.  */
 
-gcov_type
-afdo_get_bb_count (basic_block bb, bool annotate_vpt)
+static gcov_type
+afdo_get_bb_count (basic_block bb)
 {
   gimple_stmt_iterator gsi;
   gcov_type max_count = 0;
@@ -1103,7 +930,7 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 	  if (count > max_count)
 	    max_count = count;
 	  has_annotated = true;
-	  if (annotate_vpt && hist_size > 0)
+	  if (hist_size > 0)
 	    afdo_vpt (stmt, hist, hist_size);
 	}
     }
@@ -1126,16 +953,24 @@  afdo_annotate_cfg (void)
 
   FOR_EACH_BB (bb)
     {
-      bb->count = afdo_get_bb_count (bb, true);
+      bb->count = afdo_get_bb_count (bb);
       if (bb->count > max_count)
 	max_count = bb->count;
     }
   if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count)
-    ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+    {
+      ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+      ENTRY_BLOCK_PTR->next_bb->flags |= BB_ANNOTATED;
+    }
+  if (ENTRY_BLOCK_PTR->count > EXIT_BLOCK_PTR->prev_bb->count)
+    {
+      EXIT_BLOCK_PTR->prev_bb->count = ENTRY_BLOCK_PTR->count;
+      EXIT_BLOCK_PTR->prev_bb->flags |= BB_ANNOTATED;
+    }
   if (max_count > 0)
     {
-      counts_to_freqs ();
       afdo_calculate_branch_prob ();
+      counts_to_freqs ();
       profile_status = PROFILE_READ;
     }
   if (flag_value_profile_transformations)
@@ -1416,13 +1251,6 @@  init_auto_profile (void)
 				  0,
 				  xcalloc,
 				  free);
-  /* Initialize the stack scale hash table.  */
-  stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
-				  afdo_stack_hash,
-				  afdo_stack_eq,
-				  0,
-				  xcalloc,
-				  free);
   /* Initialize the bfd name mapping table.  */
   bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
 				     afdo_bfd_name_hash,
@@ -1477,7 +1305,6 @@  end_auto_profile (void)
   free (file_names);
   htab_delete (function_htab);
   htab_delete (stack_htab);
-  htab_delete (stack_scale_htab);
   htab_delete (bfd_name_htab);
   htab_delete (module_htab);
   profile_info = NULL;
@@ -1845,7 +1672,7 @@  bool
 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
 {
   gcov_type count, max_count;
-  if (afdo_get_callsite_count (edge, &count, &max_count, true))
+  if (get_callsite_count (edge, &count, &max_count))
     {
       bool is_hot;
       const struct gcov_ctr_summary *saved_profile_info = profile_info;
Index: gcc/auto-profile.h
===================================================================
--- gcc/auto-profile.h	(revision 198796)
+++ gcc/auto-profile.h	(working copy)
@@ -32,19 +32,10 @@  extern void afdo_set_current_function_count (void)
 /* Add the assembly_name to bfd_name mapping.  */
 extern void afdo_add_bfd_name_mapping (const char *, const char *);
 
-/* Add copy scale for an inlined edge to stack_scale_map.  */
-extern void afdo_add_copy_scale (struct cgraph_edge *);
-
 /* Calculate branch probability in both AutoFDO pass and after inlining.  */
 extern void afdo_calculate_branch_prob (void);
 
-/* Calculate total sample count of an inlined callsite.  */
-extern bool afdo_get_callsite_count (struct cgraph_edge *, gcov_type *,
-				     gcov_type *, bool);
-
 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
 extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
 
-/* Calculate basic block count.  */
-extern gcov_type afdo_get_bb_count (basic_block, bool);
 #endif /* AUTO_PROFILE_H */
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 198796)
+++ gcc/Makefile.in	(working copy)
@@ -2309,7 +2309,7 @@  tree-inline.o : tree-inline.c $(CONFIG_H) $(SYSTEM
    intl.h $(FUNCTION_H) $(GIMPLE_H) \
    debug.h $(DIAGNOSTIC_H) $(EXCEPT_H) $(TREE_FLOW_H) tree-iterator.h tree-mudflap.h \
    $(IPA_PROP_H) value-prof.h $(TREE_PASS_H) $(TARGET_H) $(INTEGRATE_H) \
-   tree-pretty-print.h l-ipo.h $(AUTO_PROFILE_H)
+   tree-pretty-print.h l-ipo.h
 print-tree.o : print-tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
    $(GGC_H) langhooks.h tree-iterator.h \
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_PASS_H) gimple-pretty-print.h
@@ -3030,7 +3030,7 @@  ipa-inline-transform.o : ipa-inline-transform.c $(
    $(DIAGNOSTIC_H) $(PARAMS_H) $(TIMEVAR_H) $(TREE_PASS_H) \
    $(HASHTAB_H) $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(IPA_PROP_H) \
    gimple-pretty-print.h ipa-inline.h $(LTO_STREAMER_H) tree-pass.h \
-   l-ipo.h $(AUTO_PROFILE_H)
+   l-ipo.h
 ipa-utils.o : ipa-utils.c $(IPA_UTILS_H) $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) langhooks.h \
    pointer-set.h $(GGC_H) $(GIMPLE_H) $(SPLAY_TREE_H) \
@@ -3054,9 +3054,9 @@  coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
    opts.h $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h l-ipo.h dwarf2asm.h \
    $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) $(AUTO_PROFILE_H)
 auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
-   $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
+   $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
    $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) value-prof.h \
-   $(COVERAGE_H) $(AUTO_PROFILE_H)
+   $(COVERAGE_H) coretypes.h $(TREE_H) $(AUTO_PROFILE_H)
 cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
    $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(TREE_PASS_H) \
Index: gcc/tree-inline.c
===================================================================
--- gcc/tree-inline.c	(revision 198796)
+++ gcc/tree-inline.c	(working copy)
@@ -51,7 +51,6 @@  along with GCC; see the file COPYING3.  If not see
 #include "integrate.h"
 #include "langhooks.h"
 #include "l-ipo.h"
-#include "auto-profile.h"
 
 #include "rtl.h"	/* FIXME: For asm_str_count.  */
 
@@ -1826,19 +1825,6 @@  copy_bb (copy_body_data *id, basic_block bb, int f
       copy_gsi = gsi_last_bb (copy_basic_block);
     }
 
-  if (flag_auto_profile && profile_info)
-    {
-      /* If the same inline happens in the profile-collection binary, use
-	 that instance's profile count. Otherwise use the scaled count.
-	 Do *not* annotate value histogram on it because no value profile
-	 transformations will happen after ipa-inline.  */
-      gcov_type count = afdo_get_bb_count (copy_basic_block, false);
-      if (copy_basic_block->flags & BB_ANNOTATED)
-	copy_basic_block->count = count;
-      else if (bb->flags & BB_ANNOTATED)
-	copy_basic_block->flags |= BB_ANNOTATED;
-    }
-
   return copy_basic_block;
 }
 
@@ -2269,8 +2255,6 @@  copy_cfg_body (copy_body_data * id, gcov_type coun
   if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
     count_scale = (REG_BR_PROB_BASE * (double)count
 		   / ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count);
-  else if (flag_auto_profile && count == 0)
-    count_scale = 0;
   else
     count_scale = REG_BR_PROB_BASE;
   if (count_scale > REG_BR_PROB_BASE)
Index: gcc/predict.c
===================================================================
--- gcc/predict.c	(revision 198796)
+++ gcc/predict.c	(working copy)
@@ -2875,14 +2875,7 @@  rebuild_frequencies (void)
 	}
     }
   else if (profile_status == PROFILE_READ)
-    {
-      if (flag_auto_profile)
-	{
-	  afdo_calculate_branch_prob ();
-	  compute_function_frequency ();
-	}
-      counts_to_freqs ();
-    }
+    counts_to_freqs ();
   else
     gcc_unreachable ();
   timevar_pop (TV_REBUILD_FREQUENCIES);
Index: gcc/ipa-inline-transform.c
===================================================================
--- gcc/ipa-inline-transform.c	(revision 198796)
+++ gcc/ipa-inline-transform.c	(working copy)
@@ -47,7 +47,6 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-pass.h"
 #include "l-ipo.h"
-#include "auto-profile.h"
 #include "diagnostic-core.h"
 #include "params.h"
 
@@ -139,23 +138,6 @@  void
 clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
 		     bool update_original, int *overall_size)
 {
-  bool has_callsite_profile = false;
-  gcov_type callsite_total_count, callsite_max_count; 
-
-  if (flag_auto_profile)
-    {
-      has_callsite_profile =
-	  afdo_get_callsite_count (e, &callsite_total_count,
-				   &callsite_max_count, true);
-      /* If the callsite is inlined in the profile-collection build,
-	 i.e. the cloned callee has its separate profile, we will use
-	 this separate profile to annotate the callee, and the real
-	 callee body will not be affected. Thus here we need to disable
-	 update_original.  */
-      if (has_callsite_profile)
-	update_original = false;
-    }
-
   if (duplicate)
     {
       /* We may eliminate the need for out-of-line copy to be output.
@@ -196,23 +178,6 @@  clone_inlined_nodes (struct cgraph_edge *e, bool d
 	}
     }
 
-  if (flag_auto_profile && has_callsite_profile)
-    {
-      /* The callee's total count will be non-zero if the callsite
-         was inlined in the profile-collection build, In this case,
-         the original callee may be label unlikely_executed, which
-         may prevent its callees being inlined. Thus we need to reset
-         its frequency to normal.  */
-      if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-	e->callee->frequency = NODE_FREQUENCY_NORMAL;
-      /* we do not have enough information to calculate the node count
-	 and max_bb_count. Thus we set them to the same value to make
-	 other optimizations aware that they are from cloned inline
-	 instances.  */
-      e->callee->count = callsite_total_count;
-      e->callee->max_bb_count = callsite_max_count;
-    }
-
   if (e->caller->global.inlined_to)
     e->callee->global.inlined_to = e->caller->global.inlined_to;
   else
@@ -417,8 +382,6 @@  inline_call (struct cgraph_edge *e, bool update_or
 
   clone_inlined_nodes (e, true, update_original, overall_size);
 
-  if (flag_auto_profile)
-    afdo_add_copy_scale (e);
 
   gcc_assert (curr->callee->global.inlined_to == to);