Index: gcc/cgraphbuild.c
===================================================================
--- gcc/cgraphbuild.c	(revision 191813)
+++ gcc/cgraphbuild.c	(working copy)
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "except.h"
 #include "l-ipo.h"
 #include "ipa-inline.h"
+#include "auto-profile.h"
 
 /* Context of record_reference.  */
 struct record_reference_ctx
@@ -497,6 +498,9 @@ build_cgraph_edges (void)
   tree decl;
   unsigned ix;
 
+  if (flag_auto_profile)
+    afdo_set_current_function_count ();
+
   /* Create the callgraph edges and record the nodes referenced by the function.
      body.  */
   FOR_EACH_BB (bb)
@@ -607,8 +611,9 @@ rebuild_cgraph_edges (void)
   cgraph_node_remove_callees (node);
   ipa_remove_all_references (&node->ref_list);
 
-  node->count = ENTRY_BLOCK_PTR->count;
-  node->max_bb_count = 0;
+  if (!flag_auto_profile)
+    node->count = ENTRY_BLOCK_PTR->count;
+  node->max_bb_count = node->count;
 
   FOR_EACH_BB (bb)
     {
Index: gcc/cgraph.c
===================================================================
--- gcc/cgraph.c	(revision 191813)
+++ gcc/cgraph.c	(working copy)
@@ -101,6 +101,7 @@ The callgraph:
 #include "l-ipo.h"
 #include "ipa-inline.h"
 #include "opts.h"
+#include "auto-profile.h"
 
 const char * const ld_plugin_symbol_resolution_names[]=
 {
@@ -2183,10 +2184,17 @@ cgraph_clone_node (struct cgraph_node *n, tree dec
   new_node->count = count;
   new_node->max_bb_count = count;
   if (n->count)
-    new_node->max_bb_count = ((n->max_bb_count + n->count / 2)
-                              / n->count) * count;
+    {
+      new_node->max_bb_count = ((n->max_bb_count + n->count / 2)
+                                / n->count) * count;
+      new_node->total_count = ((n->total_count + n->count / 2)
+                                / n->count) * count;
+    }
   new_node->is_versioned_clone = n->is_versioned_clone;
   new_node->frequency = n->frequency;
+  if (flag_auto_profile && count > 0
+      && new_node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+    new_node->frequency = NODE_FREQUENCY_NORMAL;
   new_node->clone = n->clone;
   new_node->clone.tree_map = 0;
   if (n->count)
@@ -2268,6 +2276,9 @@ clone_function_name (tree decl, const char *suffix
   prefix[len] = '_';
 #endif
   ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
+  if (flag_auto_profile)
+    afdo_add_bfd_name_mapping (xstrdup (tmp_name),
+			       xstrdup (lang_hooks.dwarf_name (decl, 0)));
   return get_identifier (tmp_name);
 }
 
Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h	(revision 191813)
+++ gcc/cgraph.h	(working copy)
@@ -198,6 +198,8 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%
 
   /* Expected number of executions: calculated in profile.c.  */
   gcov_type count;
+  /* Total sampled count of the function.  */
+  gcov_type total_count;
   /* Maximum count of any basic block in the function.  */
   gcov_type max_bb_count;
   /* How to scale counts at materialization time; used to merge
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 191813)
+++ gcc/tree-pass.h	(working copy)
@@ -465,6 +465,7 @@ extern struct gimple_opt_pass pass_tree_convert_bu
 extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
 extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
 extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
+extern struct simple_ipa_opt_pass pass_ipa_auto_profile;
 
 extern struct simple_ipa_opt_pass pass_early_local_passes;
 
Index: gcc/cfghooks.c
===================================================================
--- gcc/cfghooks.c	(revision 191813)
+++ gcc/cfghooks.c	(working copy)
@@ -775,6 +775,19 @@ make_forwarder_block (basic_block bb, bool (*redir
         }
     }
 
+  if (flag_auto_profile)
+    {
+      dummy->frequency = 0;
+      dummy->count = 0;
+      for (ei = ei_start (dummy->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
+	{
+	  dummy->frequency += EDGE_FREQUENCY (e);
+	  dummy->count += e->count;
+	}
+      if (dummy->frequency > REG_BR_PROB_BASE)
+	dummy->frequency = REG_BR_PROB_BASE;
+    }
+
   if (dom_info_available_p (CDI_DOMINATORS))
     {
       VEC (basic_block, heap) *doms_to_fix = VEC_alloc (basic_block, heap, 2);
Index: gcc/ipa-inline-transform.c
===================================================================
--- gcc/ipa-inline-transform.c	(revision 191813)
+++ gcc/ipa-inline-transform.c	(working copy)
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-pass.h"
 #include "l-ipo.h"
+#include "auto-profile.h"
 #include "diagnostic-core.h"
 
 int ncalls_inlined;
@@ -165,6 +166,16 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d
 	        *overall_size -= inline_summary (e->callee)->size;
 	      nfunctions_inlined++;
 	    }
+	  if (flag_auto_profile)
+	    {
+	      e->callee->total_count = afdo_get_callsite_count (e);
+	      /* For functions that does not exist in the profile-collection
+		 build, we need to add the scale factor to the copy_scale
+		 table. Because the cloned callee only has as scaled portion
+		 of the counts from the actual callee.  */
+	      if (e->callee->total_count == 0)
+		afdo_add_copy_scale (e);
+	    }
 	  duplicate = false;
 	  e->callee->local.externally_visible = false;
           update_noncloned_frequencies (e->callee, e->frequency);
@@ -172,11 +183,46 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d
       else
 	{
 	  struct cgraph_node *n;
+	  gcov_type count = e->count;
+	  gcov_type callsite_total_count = 0;
+	  if (flag_auto_profile)
+	    {
+	      callsite_total_count = afdo_get_callsite_count (e);
+	      /* If the callsite is inlined in the profile-collection build,
+		 i.e. the cloned callee has its separate profile, we will use
+		 this separate profile to annotate the callee, and the real
+		 callee body will not be affected. Thus here we need to disable
+		 update_original.  */
+	      if (callsite_total_count > 0)
+		update_original = false;
+	    }
 	  n = cgraph_clone_node (e->callee, e->callee->decl,
-				 e->count, e->frequency,
+				 count, e->frequency,
 				 update_original, NULL, true);
+	  if (flag_auto_profile)
+	    {
+	      n->total_count = callsite_total_count;
+	      if (callsite_total_count == 0)
+		afdo_add_copy_scale (e);
+	    }
 	  cgraph_redirect_edge_callee (e, n);
 	}
+      if (flag_auto_profile && e->callee->total_count > 0)
+	{
+	  /* The callee's total count will be non-zero if the callsite
+	     was inlined in the profile-collection build, In this case,
+	     the original callee may be label unlikely_executed, which
+	     may prevent its callees being inlined. Thus we need to reset
+	     its frequency to normal.  */
+	  if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+	    e->callee->frequency = NODE_FREQUENCY_NORMAL;
+	  /* we do not have enough information to calculate the node count
+	     and max_bb_count. Thus we set them to the same value to make
+	     other optimizations aware that they are from cloned inline
+	     instances.  */
+	  e->callee->count = e->callee->total_count;
+	  e->callee->max_bb_count = e->callee->total_count;
+	}
     }
 
   if (e->caller->global.inlined_to)
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(revision 191813)
+++ gcc/toplev.c	(working copy)
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-alias.h"
 #include "plugin.h"
 #include "tree-threadsafe-analyze.h"
+#include "auto-profile.h"
 
 #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
 #include "dwarf2out.h"
@@ -700,6 +701,10 @@ compile_file (void)
     }
 #endif
 
+  /* Auto profile finalization. */
+  if (flag_auto_profile)
+    end_auto_profile ();
+
   /* Invoke registered plugin callbacks.  */
   invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL);
 
@@ -1494,6 +1499,9 @@ process_options (void)
     error ("target system does not support the \"%s\" debug format",
 	   debug_type_names[write_symbols]);
 
+  if (flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE)
+    debug_hooks = &auto_profile_debug_hooks;
+
   /* We know which debug output will be used so we can set flag_var_tracking
      and flag_var_tracking_uninit if the user has not specified them.  */
   if (debug_info_level < DINFO_LEVEL_NORMAL
Index: gcc/debug.h
===================================================================
--- gcc/debug.h	(revision 191813)
+++ gcc/debug.h	(working copy)
@@ -171,6 +171,7 @@ extern const struct gcc_debug_hooks sdb_debug_hook
 extern const struct gcc_debug_hooks xcoff_debug_hooks;
 extern const struct gcc_debug_hooks dwarf2_debug_hooks;
 extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
+extern const struct gcc_debug_hooks auto_profile_debug_hooks;
 
 /* Dwarf2 frame information.  */
 
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c	(revision 191813)
+++ gcc/cgraphunit.c	(working copy)
@@ -143,6 +143,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-utils.h"
 #include "lto-streamer.h"
 #include "l-ipo.h"
+#include "auto-profile.h"
 
 static void cgraph_expand_all_functions (void);
 static void cgraph_mark_functions_to_output (void);
@@ -1346,6 +1347,11 @@ cgraph_finalize_compilation_unit (void)
 
   backend_entered_p = true;
 
+  /* Before compilation, auto profile will process the profile to build the
+     hash tables for later optimizations.  */
+  if (flag_auto_profile)
+    process_auto_profile ();
+
   /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE.  */
   if (flag_lto)
     lto_streamer_hooks_init ();
@@ -2517,6 +2523,7 @@ cgraph_copy_node_for_versioning (struct cgraph_nod
    new_version->rtl = old_version->rtl;
    new_version->reachable = true;
    new_version->count = old_version->count;
+   new_version->total_count = old_version->total_count;
    new_version->max_bb_count = old_version->max_bb_count;
    new_version->is_versioned_clone = true;
 
Index: gcc/regs.h
===================================================================
--- gcc/regs.h	(revision 191813)
+++ gcc/regs.h	(working copy)
@@ -136,6 +136,7 @@ extern size_t reg_info_p_size;
    frequency.  */
 #define REG_FREQ_FROM_BB(bb) (optimize_size				      \
 			      || (flag_branch_probabilities		      \
+				  && !flag_auto_profile			      \
 				  && !ENTRY_BLOCK_PTR->count)		      \
 			      ? REG_FREQ_MAX				      \
 			      : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 191813)
+++ gcc/gcov-io.h	(working copy)
@@ -456,6 +456,9 @@ typedef unsigned HOST_WIDEST_INT gcov_type_unsigne
 #define GCOV_TAG_PMU_BRANCH_MISPREDICT_LENGTH (8)
 #define GCOV_TAG_PMU_TOOL_HEADER ((gcov_unsigned_t)0xa9000000)
 #define GCOV_TAG_MODULE_INFO ((gcov_unsigned_t)0xab000000)
+#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000)
+#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000)
+#define GCOV_TAG_AFDO_MODULE_GROUPING ((gcov_unsigned_t)0xae000000)
 #define GCOV_TAG_PMU_STRING_TABLE_ENTRY ((gcov_unsigned_t)0xad000000)
 #define GCOV_TAG_PMU_STRING_TABLE_ENTRY_LENGTH(filename) \
   (gcov_string_length (filename) + 1)
Index: gcc/ira-int.h
===================================================================
--- gcc/ira-int.h	(revision 191813)
+++ gcc/ira-int.h	(working copy)
@@ -44,7 +44,8 @@ along with GCC; see the file COPYING3.  If not see
    executed, frequency is always equivalent.  Otherwise rescale the
    edge frequency.  */
 #define REG_FREQ_FROM_EDGE_FREQ(freq)					   \
-  (optimize_size || (flag_branch_probabilities && !ENTRY_BLOCK_PTR->count) \
+  (optimize_size || (flag_branch_probabilities				   \
+   && !flag_auto_profile && !ENTRY_BLOCK_PTR->count) 			   \
    ? REG_FREQ_MAX : (freq * REG_FREQ_MAX / BB_FREQ_MAX)			   \
    ? (freq * REG_FREQ_MAX / BB_FREQ_MAX) : 1)
 
Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(revision 191813)
+++ gcc/ipa-inline.c	(working copy)
@@ -119,10 +119,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "ipa-inline.h"
 #include "ipa-utils.h"
+#include "auto-profile.h"
 
 /* Statistics we collect about inlining algorithm.  */
 static int overall_size;
 static gcov_type max_count;
+static gcov_type max_total_count;
 
 /* Global variable to denote if it is in ipa-inline pass. */
 bool is_in_ipa_inline = false;
@@ -478,6 +480,9 @@ edge_hot_enough_p (struct cgraph_edge *edge)
 {
   if (cgraph_maybe_hot_edge_p (edge))
     return true;
+  if (flag_auto_profile
+      && maybe_hot_count_p (afdo_get_callsite_count (edge)))
+    return true;
   if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
       && maybe_hot_count_p (edge->caller->max_bb_count))
     return true;
@@ -860,6 +865,16 @@ edge_badness (struct cgraph_edge *edge, bool dump)
 	((int)
 	 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
 	 relative_time_benefit (callee_info, edge, time_growth)) / growth;
+      if (flag_auto_profile && max_total_count > 0)
+	{
+	  gcov_type afdo_badness =
+	    ((int)
+	     ((double) afdo_get_callsite_count (edge) * INT_MIN / 2 /
+	     max_total_count / 512) *
+	     relative_time_benefit (callee_info, edge, time_growth)) / growth;
+	  if (afdo_badness < badness)
+	    badness = afdo_badness;
+	}
       
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
@@ -1445,6 +1460,7 @@ inline_small_functions (void)
      metrics.  */
 
   max_count = 0;
+  max_total_count = 0;
   initialize_growth_caches ();
 
   FOR_EACH_DEFINED_FUNCTION (node)
@@ -1459,6 +1475,9 @@ inline_small_functions (void)
 	      initial_size += info->size;
 	  }
 
+	if (max_total_count < node->total_count)
+	  max_total_count = node->total_count;
+
 	for (edge = node->callers; edge; edge = edge->next_caller)
 	  if (max_count < edge->count)
 	    max_count = edge->count;
@@ -1493,6 +1512,7 @@ inline_small_functions (void)
       }
 
   gcc_assert (in_lto_p
+	      || flag_auto_profile
 	      || !max_count
 	      || (profile_info && flag_branch_probabilities));
 
@@ -1526,7 +1546,7 @@ inline_small_functions (void)
 	 of date value on it, we re-insert it now.  */
       current_badness = edge_badness (edge, false);
       gcc_assert (cached_badness == current_badness);
-      gcc_assert (current_badness >= badness);
+      gcc_assert (flag_auto_profile || current_badness >= badness);
       if (current_badness != badness)
 	{
 	  edge->aux = fibheap_insert (heap, current_badness, edge);
@@ -1646,7 +1666,7 @@ inline_small_functions (void)
 	     is propagated from caller.  We don't track when this happen and
 	     thus we need to recompute everything all the time.  Once this is
 	     solved, "|| 1" should go away.  */
-	  if (callee->global.inlined_to || 1)
+	  if (callee->global.inlined_to)// || 1)
 	    update_all_callee_keys (heap, callee, updated_nodes);
 	  else
 	    update_callee_keys (heap, edge->callee, updated_nodes);
Index: gcc/dwarf2out.c
===================================================================
--- gcc/dwarf2out.c	(revision 191813)
+++ gcc/dwarf2out.c	(working copy)
@@ -2722,6 +2722,41 @@ const struct gcc_debug_hooks dwarf2_debug_hooks =
   1,                            /* start_end_main_source_file */
   TYPE_SYMTAB_IS_DIE            /* tree_type_symtab_field */
 };
+
+const struct gcc_debug_hooks auto_profile_debug_hooks =
+{
+  debug_nothing_charstar,
+  debug_nothing_charstar,
+  debug_nothing_void,
+  debug_nothing_int_charstar,
+  debug_nothing_int_charstar,
+  debug_nothing_int_charstar,
+  debug_nothing_int,
+  debug_nothing_int_int,                 /* begin_block */
+  debug_nothing_int_int,                 /* end_block */
+  dwarf2out_ignore_block,                 /* ignore_block */
+  debug_nothing_int_charstar_int_bool,   /* source_line */
+  debug_nothing_int_charstar,            /* begin_prologue */
+  debug_nothing_int_charstar,            /* end_prologue */
+  debug_nothing_int_charstar,            /* begin_epilogue */
+  debug_nothing_int_charstar,            /* end_epilogue */
+  debug_nothing_tree,                    /* begin_function */
+  debug_nothing_int,                     /* end_function */
+  debug_nothing_tree,                    /* function_decl */
+  debug_nothing_tree,                    /* global_decl */
+  debug_nothing_tree_int,                /* type_decl */
+  debug_nothing_tree_tree_tree_bool,     /* imported_module_or_decl */
+  debug_nothing_tree,                    /* deferred_inline_function */
+  debug_nothing_tree,                    /* outlining_inline_function */
+  debug_nothing_rtx,                     /* label */
+  debug_nothing_int,                     /* handle_pch */
+  debug_nothing_rtx,                     /* var_location */
+  debug_nothing_void,                    /* switch_text_section */
+  debug_nothing_tree_tree,               /* set_name */
+  0,                                     /* start_end_main_source_file */
+  TYPE_SYMTAB_IS_ADDRESS                 /* tree_type_symtab_field */
+};
+
 
 /* NOTE: In the comments in this file, many references are made to
    "Debugging Information Entries".  This term is abbreviated as `DIE'
Index: gcc/opts.c
===================================================================
--- gcc/opts.c	(revision 191813)
+++ gcc/opts.c	(working copy)
@@ -1653,6 +1653,33 @@ common_handle_option (struct gcc_options *opts,
 	opts->x_flag_gcse_after_reload = value;
       break;
 
+    case OPT_fauto_profile_:
+      auto_profile_file = xstrdup (arg);
+      opts->x_flag_auto_profile = true;
+      value = true;
+    /* No break here - do -fauto-profile processing. */
+    case OPT_fauto_profile:
+      if (!opts_set->x_flag_branch_probabilities)
+	opts->x_flag_branch_probabilities = value;
+      if (!opts_set->x_flag_unroll_loops)
+	opts->x_flag_unroll_loops = value;
+      if (!opts_set->x_flag_peel_loops)
+	opts->x_flag_peel_loops = value;
+      if (!opts_set->x_flag_inline_functions)
+	opts->x_flag_inline_functions = value;
+      if (!opts_set->x_flag_ipa_cp)
+	opts->x_flag_ipa_cp = value;
+      if (!opts_set->x_flag_ipa_cp_clone
+	  && value && opts->x_flag_ipa_cp)
+	opts->x_flag_ipa_cp_clone = value;
+      if (!opts_set->x_flag_predictive_commoning)
+	opts->x_flag_predictive_commoning = value;
+      if (!opts_set->x_flag_unswitch_loops)
+	opts->x_flag_unswitch_loops = value;
+      if (!opts_set->x_flag_gcse_after_reload)
+	opts->x_flag_gcse_after_reload = value;
+      break;
+
     case OPT_fprofile_generate_:
       opts->x_profile_data_prefix = xstrdup (arg);
       value = true;
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	(revision 191813)
+++ gcc/timevar.def	(working copy)
@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_WHOPR_LTRANS          , "whopr ltra
 DEFTIMEVAR (TV_WHOPR_WPA_LTRANS_EXEC , "whopr wpa->ltrans")
 DEFTIMEVAR (TV_IPA_REFERENCE         , "ipa reference")
 DEFTIMEVAR (TV_IPA_PROFILE           , "ipa profile")
+DEFTIMEVAR (TV_IPA_AUTOFDO           , "auto profile")
 DEFTIMEVAR (TV_IPA_PURE_CONST        , "ipa pure const")
 DEFTIMEVAR (TV_IPA_PTA               , "ipa points-to")
 DEFTIMEVAR (TV_IPA_SRA               , "ipa SRA")
Index: gcc/predict.c
===================================================================
--- gcc/predict.c	(revision 191813)
+++ gcc/predict.c	(working copy)
@@ -60,6 +60,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "cfgloop.h"
 #include "pointer-set.h"
+#include "auto-profile.h"
 
 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
 		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
@@ -2749,7 +2750,8 @@ compute_function_frequency (void)
   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
     node->only_called_at_exit = true;
 
-  if (!profile_info || !flag_branch_probabilities)
+  if (!profile_info || !flag_branch_probabilities
+      || (flag_auto_profile && profile_status == PROFILE_GUESSED))
     {
       int flags = flags_from_decl_or_type (current_function_decl);
       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
@@ -2851,16 +2853,30 @@ rebuild_frequencies (void)
   timevar_push (TV_REBUILD_FREQUENCIES);
   if (profile_status == PROFILE_GUESSED)
     {
-      loop_optimizer_init (0);
-      add_noreturn_fake_exit_edges ();
-      mark_irreducible_loops ();
-      connect_infinite_loops_to_exit ();
-      estimate_bb_frequencies ();
-      remove_fake_exit_edges ();
-      loop_optimizer_finalize ();
+      if (flag_auto_profile && counts_to_freqs ())
+	{
+	  afdo_calculate_branch_prob ();
+	  counts_to_freqs();
+	  profile_status = PROFILE_READ;
+	  compute_function_frequency ();
+	}
+      else
+	{
+	  loop_optimizer_init (0);
+	  add_noreturn_fake_exit_edges ();
+	  mark_irreducible_loops ();
+	  connect_infinite_loops_to_exit ();
+	  estimate_bb_frequencies ();
+	  remove_fake_exit_edges ();
+	  loop_optimizer_finalize ();
+	}
     }
   else if (profile_status == PROFILE_READ)
-    counts_to_freqs ();
+    {
+      if (flag_auto_profile)
+	afdo_calculate_branch_prob ();
+      counts_to_freqs ();
+    }
   else
     gcc_unreachable ();
   timevar_pop (TV_REBUILD_FREQUENCIES);
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c	(revision 0)
+++ gcc/auto-profile.c	(revision 0)
@@ -0,0 +1,1580 @@
+/* Calculate branch probabilities, and basic block execution counts.
+   Copyright (C) 2011. Free Software Foundation, Inc.
+   Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Read and annotate call graph profile from the auto profile data
+   file.  */
+
+#include <string.h>
+
+#include "config.h"
+#include "system.h"
+#include "flags.h"	      /* for auto_profile_file.  */
+#include "basic-block.h"      /* for gcov_type.	 */
+#include "diagnostic-core.h"  /* for inform().  */
+#include "gcov-io.h"	      /* for gcov_read_unsigned().  */
+#include "input.h"	      /* for expanded_location.	 */
+#include "profile.h"	      /* for profile_info.  */
+#include "langhooks.h"	      /* for langhooks.	 */
+#include "opts.h"	      /* for in_fnames.	 */
+#include "tree-pass.h"	      /* for ipa pass.  */
+#include "cfgloop.h"	      /* for loop_optimizer_init.  */
+#include "gimple.h"
+#include "cgraph.h"
+#include "tree-flow.h"
+#include "auto-profile.h"
+
+/* The following routines implements AutoFDO optimization.
+
+   This optimization uses sampling profiles to annotate basic block counts
+   and uses heuristics to estimate branch probabilities.
+
+   There are three phases in AutoFDO:
+
+   Phase 1: Read profile from the profile data file.
+     The following info is read from the profile datafile:
+	* Function names and file names.
+	* Source level profile, which is a mapping from inline stack to
+	  its sample counts. 
+	* Module profile: Module to aux-modules mapping
+     Phase 1 just reads in data without processing it. It is invoked
+     before tree parsing because LIPO needs module profile before tree
+     parsing. (read_aux_modules)
+
+   Phase 2: Process profile to build internal data structure (hashmap).
+     This is done after tree parsing, because the processing requires the map
+     from function name to its debug name (bfd_name). The following hashmaps
+     is used to store profile.
+	* function_htab: map from function_name to its entry_bb count
+	* stack_htab: map from inline stack to its sample count
+	* bfd_name_htab: map from function name to its debug name (bfd_name)
+	* module_htab: map from module name to its aux-module names
+
+   Phase 3: Annotate control flow graph.
+     AutoFDO invokes a separate pass over the control flow graph to:
+	* Annotate basic block count
+	* Estimate branch probability
+
+   After the above 3 phases, all profile is readily annotated on the GCC IR.
+   AutoFDO tries to reuse all FDO infrastructure as much as possible to make
+   use of the profile. E.g. it uses existing mechanism to calculate the basic
+   block/edge frequency, as well as the cgraph node/edge count.
+
+   However, AutoFDO still differs from FDO in the following aspects:
+
+   * Profile is not accurate, because AutoFDO uses sampling to collect
+     profile, and uses debug info to represent the profile. As a result,
+     some hot basic blocks may have zero sample count. Because of this,
+     some optimization needs to be adjusted (e.g. loop peeling/unrolling).
+   * Each cloned context has its own profile, but these contexts may
+     not even exist when doing annotation. This provides more context-
+     sensitive profiles, but at the same time, adds complexity to the
+     implementation. Because of this, additional profile annotation is
+     needed for each function after the inline pass, and count scaling
+     is tricky in the second annotation.
+*/
+
+#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+#define SP_HTAB_INIT_SIZE 2000
+
+/* GCOV data structures to represent profile stored in the .afdo file.  */
+
+struct gcov_callsite_pos
+{
+  const char *file;
+  const char *func;
+  gcov_unsigned_t line;
+  gcov_unsigned_t discr;
+};
+
+struct gcov_callsite
+{
+  struct gcov_callsite_pos pos;
+  const char *func;
+  gcov_type count;
+};
+
+struct gcov_stack
+{
+  const char *func_name;
+  const char *callee_name;
+  struct gcov_callsite_pos *stack;
+  gcov_unsigned_t size;
+  gcov_type num_inst;
+  gcov_type count;
+};
+
+struct gcov_function
+{
+  const char *name;
+  const char *file;
+  gcov_type total_count;
+  gcov_type entry_count;
+  gcov_type max_count;
+  /* Number of call stacks in the function.  */
+  gcov_unsigned_t stack_num;
+  /* All the call stacks in the function.  */
+  struct gcov_stack *stacks;
+};
+
+struct afdo_bfd_name
+{
+  const char *assembler_name;
+  /* bfd_name is the name that debugger used for function name matching.
+     Different assembler names could map to the same bfd_name.  */
+  const char *bfd_name;
+};
+
+struct afdo_module
+{
+  const char *name;
+  int ident;
+  unsigned exported;
+  unsigned has_asm;
+  unsigned num_aux_modules;
+  unsigned num_quote_paths;
+  unsigned num_bracket_paths;
+  unsigned num_cpp_defines;
+  unsigned num_cpp_includes;
+  unsigned num_cl_args;
+  const char **strings;
+};
+
+/* Store the file name strings read from the profile data file.	 */
+static const char **file_names;
+
+/* gcov_ctr_summary structure to store the profile_info.  */
+static struct gcov_ctr_summary *afdo_profile_info;
+
+/* Hash table to hold function information.  */
+static htab_t function_htab;
+
+/* Hash table to hold stack information.  */
+static htab_t stack_htab;
+
+/* Hash table to hold inline scale information.  */
+static htab_t stack_scale_htab;
+
+/* Hash table to hold assembler name to bfd name mapping.  */
+static htab_t bfd_name_htab;
+
+/* Hash table to hold module informaition.  */
+static htab_t module_htab;
+
+/* Store the module hash table contents.  */
+static struct afdo_module *modules;
+
+/* File static variables, which is used to pass information between
+   init_auto_profile and process_auto_profile.  */
+static gcov_unsigned_t function_num;
+static gcov_unsigned_t total_module_num;
+static struct gcov_function *gcov_functions;
+
+/* Check if PATH_NAME is absolute path, if yes, strip the directory part
+   of the PATH_NAME, return the file name.  */
+
+static const char *
+afdo_get_filename (const char *path_name)
+{
+  const char* last;
+  return path_name;
+  if (path_name == NULL)
+    return NULL;
+  last = strrchr(path_name, '/');
+  return ((last == 0) ? path_name : last + 1);
+}
+
+/* Given an assembler function NAME, return its original name. strip the
+   suffix at the end of the function name, added by optimizations such as
+   constant propagation etc.  */
+
+static gcov_unsigned_t
+afdo_get_original_name_size (const char *name)
+{
+  const char *ret;
+  if (!name)
+    return 0;
+  ret = strchr (name, '.');
+  if (!ret)
+    return strlen(name);
+  else
+    return ret - name;
+}
+
+/* Given an asssembler function NAME, return its corresponding bfd name.
+   If the mapping cannot be found, it means that the assembler function
+   name is not used/emitted in the current module(s).  */
+
+static const char *
+afdo_get_bfd_name (const char *name)
+{
+  struct afdo_bfd_name bfd, *bfd_entry;
+  gcov_unsigned_t size = afdo_get_original_name_size (name);
+  /* If the function name is cloned, we want to find its original name.  */
+  char *buf = (char *) alloca (size + 1);
+  strncpy (buf, name, size);
+  buf[size] = 0;
+  bfd.assembler_name = buf;
+  bfd_entry = (struct afdo_bfd_name *) htab_find (bfd_name_htab, &bfd);
+  if (!bfd_entry)
+    return name;
+  return bfd_entry->bfd_name;
+}
+
+/* Traverse the cgraph, add each function's name to to bfd_name mapping.  */
+
+static void
+afdo_read_bfd_names (void)
+{
+  struct cgraph_node *node;
+
+  for (node = cgraph_nodes; node; node = node->next)
+    {
+      const char *bfd_name;
+      if (lang_hooks.dwarf_name (node->decl, 0) == NULL)
+	continue;
+      bfd_name = xstrdup (lang_hooks.dwarf_name (node->decl, 0));
+      afdo_add_bfd_name_mapping
+	(IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)), bfd_name);
+    }
+}
+
+/* Hash function for struct afdo_stack.  */
+
+static hashval_t
+afdo_stack_hash (const void *stack)
+{
+  gcov_unsigned_t i;
+  /* An arbitrary initial value borrowed from hashtab.c.  */
+  hashval_t h = 0x9e3779b9;
+  const struct gcov_stack *s = (const struct gcov_stack *) stack;
+  if (s->callee_name)
+    h = iterative_hash (afdo_get_bfd_name (s->callee_name),
+			strlen (afdo_get_bfd_name (s->callee_name)), h);
+  if (s->func_name)
+    h = iterative_hash (s->func_name,
+			afdo_get_original_name_size (s->func_name), h);
+  for (i = 0; i < s->size; i++) {
+    const struct gcov_callsite_pos *p = s->stack + i;
+    const char *file = afdo_get_filename (p->file);
+    h = iterative_hash (file, strlen (file), h);
+    h = iterative_hash (&p->line, sizeof (p->line), h);
+    if (i == 0)
+      h = iterative_hash (&p->discr, sizeof (p->discr), h);
+  }
+  return h;
+}
+
+/* Check if two afdo_stack P and Q are identical.  */
+
+static int
+afdo_stack_eq (const void *p, const void *q)
+{
+  const struct gcov_stack *s1 = (const struct gcov_stack *) p;
+  const struct gcov_stack *s2 = (const struct gcov_stack *) q;
+
+  gcov_unsigned_t i;
+  if (s1->func_name == NULL || s2->func_name == NULL)
+    return 0;
+
+  if (s1->callee_name == NULL)
+    {
+      if (s2->callee_name != NULL)
+	return 0;
+    }
+  else if (s2->callee_name != NULL
+	   && strcmp (afdo_get_bfd_name (s1->callee_name),
+		      afdo_get_bfd_name (s2->callee_name)))
+    return 0;
+
+  i = afdo_get_original_name_size (s1->func_name);
+  if (i != afdo_get_original_name_size (s2->func_name))
+    return 0;
+
+  if (strncmp (s1->func_name, s2->func_name, i))
+    return 0;
+
+  if (s1->size != s2->size)
+    return 0;
+  for (i = 0; i < s1->size; i++)
+    {
+      const struct gcov_callsite_pos *p1 = s1->stack + i;
+      const struct gcov_callsite_pos *p2 = s2->stack + i;
+      if (strcmp (afdo_get_filename(p1->file), afdo_get_filename(p2->file))
+	  || p1->line != p2->line || (i== 0 && p1->discr != p2->discr))
+	return 0;
+    }
+  return 1;
+}
+
+/* Hash function for struct afdo_function.  */
+
+static hashval_t
+afdo_function_hash (const void *func)
+{
+  /* An arbitrary initial value borrowed from hashtab.c.  */
+  hashval_t h = 0x9e3779b9;
+  const struct gcov_function *f = (const struct gcov_function *) func;
+
+  if (f->name)
+    h = iterative_hash (f->name, afdo_get_original_name_size (f->name), h);
+  return h;
+}
+
+/* Check if two afdo_function P and Q are identical.  */
+
+static int
+afdo_function_eq (const void *p, const void *q)
+{
+  const struct gcov_function *f1 = (const struct gcov_function *) p;
+  const struct gcov_function *f2 = (const struct gcov_function *) q;
+  gcov_unsigned_t i;
+
+  if (f1->name == NULL || f2->name == NULL)
+    return 0;
+
+  i = afdo_get_original_name_size (f1->name);
+  if (i != afdo_get_original_name_size (f2->name))
+    return 0;
+
+  return !strncmp (f1->name, f2->name, i);
+}
+
+/* Hash function for struct afdo_bfd_name.  */
+
+static hashval_t
+afdo_bfd_name_hash (const void *func)
+{
+  hashval_t h = 0x9e3779b9;
+  const struct afdo_bfd_name *f = (const struct afdo_bfd_name *) func;
+
+  if (f->assembler_name)
+    h = iterative_hash (f->assembler_name, strlen (f->assembler_name), h);
+  return h;
+}
+
+/* Check if two struct afdo_bfd_name P and Q are identical.  */
+
+static int
+afdo_bfd_name_eq (const void *p, const void *q)
+{
+  const struct afdo_bfd_name *b1 = (const struct afdo_bfd_name *) p;
+  const struct afdo_bfd_name *b2 = (const struct afdo_bfd_name *) q;
+
+  if (b1->assembler_name == NULL || b2->assembler_name == NULL)
+    return 0;
+
+  return !strcmp (b1->assembler_name, b2->assembler_name);
+}
+
+/* Free the hash table entry P.	 */
+
+static void
+afdo_bfd_name_del (void *p)
+{
+  free (p);
+}
+
+/* Hash Function for struct afdo_module.  */
+
+static hashval_t
+afdo_module_hash (const void *module)
+{
+  hashval_t h = 0x9e3779b9;
+  const struct afdo_module *m = (const struct afdo_module *)module;
+
+  if (m->name)
+    h = iterative_hash (m->name, strlen (m->name), h);
+
+  return h;
+}
+
+/* Check if two struct afdo_module P and Q are identical.	 */
+
+static int
+afdo_module_eq (const void *p, const void *q)
+{
+  const struct afdo_module *m1 = (const struct afdo_module *)p;
+  const struct afdo_module *m2 = (const struct afdo_module *)q;
+
+  if (m1->name == NULL || m2->name == NULL)
+    return 0;
+
+  return !strcmp (m1->name, m2->name);
+}
+
+/* Return the total number of emitted string for MODULE.  */
+
+static unsigned long long
+afdo_module_num_strings (const struct afdo_module *module)
+{
+  return module->num_quote_paths +
+    module->num_bracket_paths +
+    module->num_cpp_defines +
+    module->num_cpp_includes +
+    module->num_cl_args;
+}
+
+/* Add a module (specified in MODULE) into gcov_module_info format in
+   MODULE_INFO, which is used by LIPO to import auxiliary modules.
+   Set the is_primary flag if IS_PRIMARY is set.  */
+
+static void
+afdo_add_module (struct gcov_module_info **module_info,
+		 const struct afdo_module *module,
+		 gcov_unsigned_t is_primary)
+{
+  unsigned i;
+  size_t info_sz;
+
+  info_sz = sizeof (struct gcov_module_info) +
+    sizeof (void *) * afdo_module_num_strings (module);
+  *module_info = XCNEWVAR (struct gcov_module_info, info_sz);
+  (*module_info)->ident = module->ident;
+  (*module_info)->is_primary = is_primary;
+  (*module_info)->is_exported = is_primary ? module->exported : 1;
+  (*module_info)->source_filename = (char *) module->name;
+  (*module_info)->num_quote_paths = module->num_quote_paths;
+  (*module_info)->num_bracket_paths = module->num_bracket_paths;
+  (*module_info)->num_cpp_defines = module->num_cpp_defines;
+  (*module_info)->num_cpp_includes = module->num_cpp_includes;
+  (*module_info)->num_cl_args = module->num_cl_args;
+  for (i = 0; i < afdo_module_num_strings (module); i++)
+    (*module_info)->string_array[i] = (char *)
+	module->strings[module->num_aux_modules + i];
+}
+
+/* Read in the auxiliary modules for the current primary module.  */
+
+static void
+read_aux_modules (void)
+{
+  unsigned i, curr_module = 1;
+  struct afdo_module module, *entry;
+
+  module.name = in_fnames[0];
+  entry = (struct afdo_module *) htab_find (module_htab, &module);
+  if (!entry)
+    {
+      inform (0, "primary module %s cannot be found.", in_fnames[0]);
+      return;
+    }
+  module_infos = XCNEWVEC (struct gcov_module_info *,
+			   entry->num_aux_modules + 1);
+  afdo_add_module (module_infos, entry, true);
+  primary_module_id = entry->ident;
+  for (i = 0; i < entry->num_aux_modules; i++)
+    {
+      struct afdo_module *aux_entry;
+      module.name = entry->strings[i];
+      if (!strcmp (module.name, in_fnames[0]))
+	continue;
+      aux_entry = (struct afdo_module *) htab_find (module_htab, &module);
+      if (!aux_entry)
+	{
+	  inform (0, "aux module %s cannot be found.", module.name);
+	  continue;
+	}
+      afdo_add_module (&module_infos[curr_module++], aux_entry, false);
+      add_input_filename (module.name);
+    }
+}
+
+/* Return the size of the inline stack of the STMT.  */
+
+static int
+get_inline_stack_size_by_stmt (gimple stmt)
+{
+  tree block;
+  int size = 1;
+
+  if (!stmt)
+    return 0;
+  block = gimple_block (stmt);
+  if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt))
+    return 0;
+
+  for ( block = BLOCK_SUPERCONTEXT (block);
+	block && (TREE_CODE (block) == BLOCK);
+	block = BLOCK_SUPERCONTEXT (block)) {
+    /* Traverse the nesting blocks. If the block contains the source
+       location info, save the source location info to the inline stack.  */
+    if (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION)
+      continue;
+    size ++;
+  }
+  return size;
+}
+
+/* Return the size of the inline stack of the EDGE. All inlined callsites
+   along he inline chain are recorded.  */
+
+static int
+get_inline_stack_size_by_edge (struct cgraph_edge *edge)
+{
+  struct cgraph_edge *e;
+  int size = 0;
+  for (e= edge; e; e = e->caller->callers)
+    {
+      gimple stmt = e->call_stmt;
+      if (!stmt)
+	break;
+      size += get_inline_stack_size_by_stmt (stmt);
+      if (!e->caller->global.inlined_to)
+	break;
+    }
+  return size;
+}
+
+/* Return the function name of a given lexical BLOCK.  */
+
+static const char *
+get_function_name_from_block (tree block)
+{
+  tree decl;
+  for (decl = BLOCK_ABSTRACT_ORIGIN (block);
+       decl && (TREE_CODE (decl) == BLOCK);
+       decl = BLOCK_ABSTRACT_ORIGIN (decl))
+    if (TREE_CODE (decl) == FUNCTION_DECL)
+      break;
+  return decl ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) : NULL;
+}
+
+/* Store the inline stack of STMT to POS_STACK, return the size of the
+   stack. Set the discriminator of the inline stack if DISCR is TRUE.  */
+
+static int
+get_inline_stack_by_stmt (gimple stmt, tree decl,
+			  struct gcov_callsite_pos *pos_stack, bool discr)
+{
+  tree block;
+  int idx = 0;
+  source_location loc;
+
+  if (!stmt)
+    return 0;
+  block = gimple_block (stmt);
+  if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt))
+    return 0;
+
+  loc = gimple_location (stmt);
+  pos_stack[idx].file = expand_location(loc).file;
+  pos_stack[idx].line = expand_location(loc).line;
+  if (discr)
+    pos_stack[idx].discr = get_discriminator_from_locus (loc);
+  else
+    pos_stack[idx].discr = 0;
+  idx++;
+  for (block = BLOCK_SUPERCONTEXT (block);
+       block && (TREE_CODE (block) == BLOCK);
+       block = BLOCK_SUPERCONTEXT (block))
+    {
+      if (! BLOCK_SOURCE_LOCATION (block) > 0)
+	continue;
+      loc = BLOCK_SOURCE_LOCATION (block);
+      pos_stack[idx].file = expand_location (loc).file;
+      pos_stack[idx].line = expand_location (loc).line;
+      pos_stack[idx - 1].func = get_function_name_from_block (block);
+      pos_stack[idx++].discr = 0;
+    }
+  if (decl)
+    pos_stack[idx - 1].func = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  return idx;
+}
+
+/* Store the inline stack of EDGE to POS_STACK, return the size of the
+   stack. All inlined callsites along the inline stack are recorded.  */
+
+static int
+get_inline_stack_by_edge (struct cgraph_edge *edge,
+			  struct gcov_callsite_pos *pos_stack)
+{
+  struct cgraph_edge *e;
+  int size = 0;
+
+  for (e = edge; e; e = e->caller->callers)
+    {
+      gimple stmt = e->call_stmt;
+      if (!stmt)
+	break;
+      size += get_inline_stack_by_stmt (stmt, e->caller->decl,
+					pos_stack + size, false);
+      if (!e->caller->global.inlined_to)
+	break;      
+    }
+  return size;
+}
+
+/* Read sample count info of the function with DECL, and save them
+   to ENTRY_COUNT and TOTAL_COUNT respectively.  */
+
+static void
+afdo_get_function_count (tree decl,
+			 gcov_type *entry_count,
+			 gcov_type *total_count)
+{
+  struct gcov_function func;
+  const struct gcov_function *func_entry;
+
+  *entry_count = 0;
+  *total_count = 0;
+  func.name =
+    IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  func.file = DECL_SOURCE_FILE (decl);
+  func_entry = (const struct gcov_function *)
+    htab_find (function_htab, &func);
+  if (func_entry)
+    {
+      (*total_count) += func_entry->total_count;
+      (*entry_count) += func_entry->entry_count;
+      return;
+    }
+  func.name = afdo_get_bfd_name (func.name);
+  func_entry = (const struct gcov_function *)
+    htab_find (function_htab, &func);
+  if (func_entry)
+    {
+      (*total_count) += func_entry->total_count;
+      (*entry_count) += func_entry->entry_count;
+    }
+}
+
+/* Set the node count of the current function, and update the entry_bb
+   count.  */
+
+void
+afdo_set_current_function_count (void)
+{
+  gcov_type entry_count;
+  gcov_type total_count;
+  struct cgraph_node *node = cgraph_get_create_node (current_function_decl);
+
+  afdo_get_function_count (current_function_decl, &entry_count, &total_count);
+  node->total_count += total_count;
+  node->count += entry_count;
+  ENTRY_BLOCK_PTR->count = node->count;
+}
+
+/* Add the AS_NAME->BFD_NAME to the assembler_name to bfd_name mapping.  */
+
+void
+afdo_add_bfd_name_mapping (const char *as_name, const char *bfd_name)
+{
+  struct afdo_bfd_name **slot;
+  struct afdo_bfd_name *entry = (struct afdo_bfd_name *)
+    xmalloc (sizeof (struct afdo_bfd_name));
+
+  entry->assembler_name = as_name;
+  entry->bfd_name = bfd_name;
+  slot = (struct afdo_bfd_name **)
+    htab_find_slot (bfd_name_htab, entry, INSERT);
+  if (!*slot)
+    *slot = entry;
+  else
+    free (entry);
+}
+
+/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count)
+   is recorded in a hash map.  */
+
+void
+afdo_add_copy_scale (struct cgraph_edge *edge)
+{
+  struct gcov_stack *stack;
+  struct gcov_stack **stack_slot;
+  int scale = edge->caller->count ?
+      (double) REG_BR_PROB_BASE * edge->count / edge->caller->count
+      : REG_BR_PROB_BASE;
+  int size = get_inline_stack_size_by_edge (edge);
+
+  if (size == 0)
+    return;
+  stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
+  stack->func_name
+      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
+	edge->caller->global.inlined_to ?
+	    edge->caller->global.inlined_to->decl : edge->caller->decl));
+  stack->callee_name
+      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
+  stack->size = size;
+  stack->stack = (struct gcov_callsite_pos *)
+      xmalloc (sizeof (struct gcov_callsite_pos) * size);
+  stack->count = scale;
+
+  get_inline_stack_by_edge (edge, stack->stack);
+
+  stack_slot = (struct gcov_stack **)
+      htab_find_slot (stack_scale_htab, stack, INSERT);
+  if (!*stack_slot)
+    *stack_slot = stack;
+  else
+    (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count);
+}
+
+/* For a given POS_STACK with SIZE, get the COUNT/NUM_INST info for the
+   inline stack. If CALLEE_NAME is non-null, the COUNT represents the
+   total count in the inline stack. Otherwise, the COUNT represents the
+   count of an ordinary statement. Return FALSE if profile is not found
+   for the given POS_STACK.  */
+
+static bool
+get_stack_count (struct gcov_callsite_pos *pos_stack,
+		 const char *callee_name,
+		 int size,
+		 gcov_type *count, gcov_type *num_inst)
+{
+  int i;
+
+  for (i = 0; i < size; i++)
+    {
+      struct gcov_stack stack, *entry;
+      stack.func_name = pos_stack[size - i - 1].func;
+      stack.callee_name = callee_name;
+      stack.stack = pos_stack;
+      stack.size = size - i;
+      entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
+      if (entry)
+	{
+	  if (i == 0)
+	    {
+	      *count = entry->count;
+	      *num_inst = entry->num_inst;
+	      return true;
+	    }
+	  else
+	    {
+	      struct gcov_stack scale_stack, *scale_entry;
+	      scale_stack.stack = pos_stack + size - i;
+	      scale_stack.size = i;
+	      scale_stack.func_name = pos_stack[size - 1].func;
+	      scale_stack.callee_name = stack.func_name;
+	      scale_entry = (struct gcov_stack *)
+		  htab_find (stack_scale_htab, &scale_stack);
+	      if (scale_entry)
+		{
+		  *count = entry->count * scale_entry->count
+			   / REG_BR_PROB_BASE;
+		  *num_inst = entry->num_inst;
+		  return true;
+		}
+	    }
+	}
+    }
+  *count = 0;
+  *num_inst = 0;
+  return false;
+}
+
+/* For a given STMT, get the COUNT and NUM_INST from its profile.
+   Return FALSE if profile is not found for STMT.  */
+
+static bool
+get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst)
+{
+  struct gcov_callsite_pos *pos_stack;
+  int size;
+
+  if (!stmt)
+    return false;
+  size = get_inline_stack_size_by_stmt (stmt);
+  if (size == 0)
+    return false;
+
+  pos_stack = (struct gcov_callsite_pos *)
+      alloca (sizeof (struct gcov_callsite_pos) * size);
+
+  get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true);
+
+  return get_stack_count (pos_stack, NULL, size, count, num_inst);
+}
+
+/* For a given EDGE, return the total count of the inlined callsite.  */
+
+gcov_type
+afdo_get_callsite_count (struct cgraph_edge *edge)
+{
+  struct gcov_callsite_pos *pos_stack;
+  gcov_type count, num_inst;
+  const char *callee_name =
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
+  int size = get_inline_stack_size_by_edge (edge);
+
+  if (size == 0)
+    return 0;
+  pos_stack = (struct gcov_callsite_pos *)
+      alloca (sizeof (struct gcov_callsite_pos) * size);
+
+  get_inline_stack_by_edge (edge, pos_stack);
+
+  if (get_stack_count (pos_stack, callee_name, size, &count, &num_inst))
+    return count;
+  else
+    return 0;
+}
+
+/* For a given BB, return its execution count.  */
+
+gcov_type
+afdo_get_bb_count (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  gcov_type max_count = 0;
+  bool has_annotated = false;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gcov_type count, num_inst;
+      gimple stmt = gsi_stmt (gsi);
+      if (get_stmt_count (stmt, &count, &num_inst))
+	{
+	  if (count > max_count)
+	    max_count = count;
+	  has_annotated = true;
+	}
+    }
+  if (has_annotated)
+    {
+      bb->flags |= BB_ANNOTATED;
+      return max_count;
+    }
+  else
+    return 0;
+}
+
+/* Annotate auto profile to the control flow graph.  */
+
+static void
+afdo_annotate_cfg (void)
+{
+  basic_block bb;
+  gcov_type max_count = 0;
+
+  FOR_EACH_BB (bb)
+    {
+      bb->count = afdo_get_bb_count (bb);
+      if (bb->count > max_count)
+	max_count = bb->count;
+    }
+  if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count)
+    ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+  if (max_count > 0)
+    {
+      counts_to_freqs ();
+      afdo_calculate_branch_prob ();
+      profile_status = PROFILE_READ;
+    }
+}
+
+/* Read profile from profile data file. Write to the module hashmap.  */
+
+static void
+read_profile (void)
+{
+  gcov_unsigned_t i, j, k, file_name_num;
+
+  if (gcov_open (auto_profile_file, 1) == -1)
+    {
+      inform (0, "Cannot open profile file %s.", auto_profile_file);
+      flag_auto_profile = 0;
+      return;
+    }
+
+  if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
+    {
+      inform (0, "Magic number does not mathch.");
+      flag_auto_profile = 0;
+      return;
+    }
+
+  if (gcov_read_unsigned () != GCOV_VERSION)
+    {
+/*      inform (0, "Vesion number does not mathch.");
+      flag_auto_profile = 0;
+      return;*/;
+    }
+
+  /* Skip the empty integer.  */
+  gcov_read_unsigned ();
+  gcc_assert (gcov_read_unsigned () == GCOV_TAG_AFDO_FILE_NAMES);
+
+  /* Skip the length of the section.  */
+  gcov_read_unsigned ();
+
+  /* Read in the file name table.  */
+  file_name_num = gcov_read_unsigned ();
+  file_names = (const char **)
+    xmalloc (sizeof (const char *) * file_name_num);
+  for (i = 0; i < file_name_num; i++)
+    file_names[i] = xstrdup (gcov_read_string ());
+
+  if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
+    {
+      inform (0, "Not expected TAG.");
+      return;
+    }
+
+  /* Skip the length of the section.  */
+  gcov_read_unsigned ();
+
+  /* Read in the function/callsite profile, and store it in local
+     data structure.  */
+  function_num = gcov_read_unsigned ();
+  gcov_functions = (struct gcov_function *)
+    xmalloc (function_num * sizeof (struct gcov_function));
+  for (i = 0; i < function_num; i++)
+    {
+      gcov_functions[i].name = xstrdup (gcov_read_string ());
+      gcov_functions[i].file = file_names[gcov_read_unsigned ()];
+      gcov_functions[i].total_count = gcov_read_counter ();
+      gcov_functions[i].entry_count = gcov_read_counter ();
+      gcov_functions[i].max_count = 0;
+      gcov_functions[i].stack_num = gcov_read_unsigned ();
+      gcov_functions[i].stacks = (struct gcov_stack *)
+	xmalloc (gcov_functions[i].stack_num * sizeof (struct gcov_stack));
+      for (j = 0; j < gcov_functions[i].stack_num; j++)
+	{
+	  gcov_functions[i].stacks[j].func_name = gcov_functions[i].name;
+	  gcov_functions[i].stacks[j].callee_name = NULL;
+	  gcov_functions[i].stacks[j].size = gcov_read_unsigned ();
+	  gcov_functions[i].stacks[j].stack = (struct gcov_callsite_pos *)
+	    xmalloc (gcov_functions[i].stacks[j].size
+		     * sizeof (struct gcov_callsite_pos));
+	  for (k = 0; k < gcov_functions[i].stacks[j].size; k++)
+	    {
+	      gcov_functions[i].stacks[j].stack[k].func =
+		file_names[gcov_read_unsigned ()];
+	      gcov_functions[i].stacks[j].stack[k].file =
+		file_names[gcov_read_unsigned ()];
+	      gcov_functions[i].stacks[j].stack[k].line =
+		gcov_read_unsigned ();
+	      gcov_functions[i].stacks[j].stack[k].discr =
+		gcov_read_unsigned ();
+	    }
+	  gcov_functions[i].stacks[j].count = gcov_read_counter ();
+	  gcov_functions[i].stacks[j].num_inst = gcov_read_counter ();
+	}
+    }
+
+  /* Read in the module info.  */
+  if (gcov_read_unsigned () != GCOV_TAG_AFDO_MODULE_GROUPING)
+    {
+      inform (0, "Not expected TAG.");
+      return;
+    }
+  /* Skip the length of the section.  */
+  gcov_read_unsigned ();
+
+  /* Read in the file name table.  */
+  total_module_num = gcov_read_unsigned ();
+  modules = (struct afdo_module *)
+    xmalloc (total_module_num * sizeof (struct afdo_module));
+  for (i = 0; i < total_module_num; i++)
+    {
+      unsigned num_strings;
+      struct afdo_module **slot;
+      modules[i].name = xstrdup (gcov_read_string());
+      modules[i].ident = i + 1;
+      /* exported flag.	 */
+      modules[i].exported = gcov_read_unsigned();
+      /* has_asm flag.  */
+      modules[i].has_asm = gcov_read_unsigned();
+      /* aux_module and 5 options.  */
+      modules[i].num_aux_modules = gcov_read_unsigned();
+      modules[i].num_quote_paths = gcov_read_unsigned();
+      modules[i].num_bracket_paths = gcov_read_unsigned();
+      modules[i].num_cpp_defines = gcov_read_unsigned();
+      modules[i].num_cpp_includes = gcov_read_unsigned();
+      modules[i].num_cl_args = gcov_read_unsigned();
+      num_strings = modules[i].num_aux_modules
+	+ modules[i].num_quote_paths
+	+ modules[i].num_bracket_paths
+	+ modules[i].num_cpp_defines
+	+ modules[i].num_cpp_includes
+	+ modules[i].num_cl_args;
+      modules[i].strings = (const char **)
+	xmalloc (num_strings * sizeof (char *));
+      for (j = 0; j < num_strings; j++)
+	modules[i].strings[j] = xstrdup (gcov_read_string());
+      slot = (struct afdo_module **)
+	htab_find_slot (module_htab, &modules[i], INSERT);
+      if (!*slot)
+	*slot = &modules[i];
+      else
+	gcc_unreachable ();
+    }
+}
+
+/* Process the profile data and build the function/callsite/callstack
+   hash maps.  */
+
+void
+process_auto_profile (void)
+{
+  unsigned i;
+
+  afdo_read_bfd_names ();
+  for (i = 0; i < function_num; i++)
+    {
+      struct gcov_function **func_slot = (struct gcov_function **)
+	  htab_find_slot (function_htab, gcov_functions + i, INSERT);
+      if (!*func_slot)
+	*func_slot = gcov_functions + i;
+    }
+
+  for (i = 0; i < function_num; i++)
+    {
+      unsigned j;
+      struct gcov_function *func = gcov_functions + i;
+      for (j = 0; j < func->stack_num; j++)
+	{
+	  unsigned k;
+	  unsigned stack_size = func->stacks[j].size;
+	  gcov_type count = func->stacks[j].count;
+	  struct gcov_stack **stack_slot = (struct gcov_stack **)
+		  htab_find_slot (stack_htab, func->stacks + j, INSERT);
+	  if (func->stacks[j].num_inst && count > afdo_profile_info->sum_max)
+	    afdo_profile_info->sum_max = count / func->stacks[j].num_inst;
+	  if (*stack_slot)
+	    {
+	      (*stack_slot)->count += count;
+	      if ((*stack_slot)->num_inst < func->stacks[j].num_inst)
+		(*stack_slot)->num_inst = func->stacks[j].num_inst;
+	    }
+	  else
+	    *stack_slot = func->stacks + j;
+	  for (k = 1; k < stack_size; k++)
+	    {
+	      struct gcov_stack *new_stack = (struct gcov_stack *)
+		  xmalloc (sizeof (struct gcov_stack));
+	      new_stack->func_name = func->stacks[j].func_name;
+	      new_stack->callee_name =
+		  func->stacks[j].stack[stack_size - k - 1].func;
+	      new_stack->stack = func->stacks[j].stack + stack_size - k;
+	      new_stack->size = k;
+	      new_stack->num_inst = 0;
+	      new_stack->count = 0;
+	      stack_slot = (struct gcov_stack **)
+		  htab_find_slot (stack_htab, new_stack, INSERT);
+	      if (*stack_slot)
+		{
+		  (*stack_slot)->count += count;
+		  free (new_stack);
+		}
+	      else
+		*stack_slot = new_stack;
+	    }
+	}
+    }
+}
+
+/* Create the hash tables, and read the profile from the profile data
+   file.  */
+
+void
+init_auto_profile (void)
+{
+  if (auto_profile_file == NULL)
+    auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
+
+  /* Initialize the function hash table.  */
+  function_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+				     afdo_function_hash,
+				     afdo_function_eq,
+				     0,
+				     xcalloc,
+				     free);
+  /* Initialize the stack hash table.  */
+  stack_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+				  afdo_stack_hash,
+				  afdo_stack_eq,
+				  0,
+				  xcalloc,
+				  free);
+  /* Initialize the stack scale hash table.  */
+  stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+				  afdo_stack_hash,
+				  afdo_stack_eq,
+				  0,
+				  xcalloc,
+				  free);
+  /* Initialize the bfd name mapping table.  */
+  bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+				     afdo_bfd_name_hash,
+				     afdo_bfd_name_eq,
+				     afdo_bfd_name_del,
+				     xcalloc,
+				     free);
+  /* Initialize the module hash table.  */
+  module_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+				   afdo_module_hash,
+				   afdo_module_eq,
+				   0,
+				   xcalloc,
+				   free);
+
+  afdo_profile_info = (struct gcov_ctr_summary *)
+    xcalloc (1, sizeof (struct gcov_ctr_summary));
+  afdo_profile_info->runs = 1;
+  afdo_profile_info->sum_max = 0;
+
+  /* Read the profile from the profile file.  */
+  read_profile ();
+
+  if (flag_dyn_ipa)
+    read_aux_modules();
+}
+
+/* Free the resources.  */
+
+void
+end_auto_profile (void)
+{
+  unsigned i, j;
+
+  for (i = 0; i < function_num; i++)
+    {
+      for (j = 0; j < gcov_functions[i].stack_num; ++j)
+        free (gcov_functions[i].stacks[j].stack);
+      free (gcov_functions[i].stacks);
+    }
+  free (gcov_functions);
+
+  for (i = 0; i < total_module_num; i++)
+    free (modules[i].strings);
+  free (modules);
+  free (afdo_profile_info);
+  free (file_names);
+  htab_delete (function_htab);
+  htab_delete (stack_htab);
+  htab_delete (stack_scale_htab);
+  htab_delete (bfd_name_htab);
+  htab_delete (module_htab);
+  profile_info = NULL;
+}
+
+/* BB1 and BB2 are in an equivalent class iff:
+   1. BB1 dominates BB2.
+   2. BB2 post-dominates BB1.
+   3. BB1 and BB2 are in the same loop nest.
+   This function finds the equivalent class for each basic block, and
+   stores a pointer to the first BB in its equivalent class. Meanwhile,
+   set bb counts for the same equivalent class to be idenical.  */
+
+static void
+afdo_find_equiv_class (void)
+{
+  basic_block bb;
+
+  FOR_ALL_BB (bb)
+    bb->aux = NULL;
+
+  FOR_ALL_BB (bb)
+    {
+      VEC (basic_block, heap) *dom_bbs;
+      basic_block bb1;
+      int i;
+
+      if (bb->aux != NULL)
+	continue;
+      bb->aux = bb;
+      dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
+      FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1)
+	if (bb1->aux == NULL
+	    && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
+	    && bb1->loop_father == bb->loop_father)
+	  {
+	    bb1->aux = bb;
+	    if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0)
+	      {
+		bb->count = MAX (bb->count, bb1->count);
+		bb->flags |= BB_ANNOTATED;
+	      }
+	  }
+      dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
+      FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1)
+	if (bb1->aux == NULL
+	    && dominated_by_p (CDI_DOMINATORS, bb, bb1)
+	    && bb1->loop_father == bb->loop_father)
+	  {
+	    bb1->aux = bb;
+	    if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0)
+	      {
+		bb->count = MAX (bb->count, bb1->count);
+		bb->flags |= BB_ANNOTATED;
+	      }
+	  }
+    }
+}
+
+/* If a baisk block only has one in/out edge, then the bb count and he
+   edge count should be the same.
+   IS_SUCC is true if the out edge of the basic block is examined.
+   Return TRUE if any basic block/edge count is changed.  */
+
+static bool
+afdo_propagate_single_edge (bool is_succ)
+{
+  basic_block bb;
+  bool changed = false;
+
+  FOR_EACH_BB (bb)
+    if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
+      {
+	edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
+	if (((e->flags & EDGE_ANNOTATED) == 0)
+	    && ((bb->flags & BB_ANNOTATED) != 0))
+	  {
+	    e->count = bb->count;
+	    e->flags |= EDGE_ANNOTATED;
+	    changed = true;
+	  }
+	else if (((e->flags & EDGE_ANNOTATED) != 0)
+	    && ((bb->flags & BB_ANNOTATED) == 0))
+	  {
+	    bb->count = e->count;
+	    bb->flags |= BB_ANNOTATED;
+	    changed = true;
+	  }
+	else if (bb->count != e->count)
+	  {
+	    e->count = bb->count = MAX (bb->count, e->count);
+	    changed = true;
+	  }
+      }
+  return changed;
+}
+
+/* If a basic block's count is known, and only one of its in/out edges' count
+   is unknown, its count can be calculated.
+   Meanwhile, if all of the in/out edges' counts are known, then the basic
+   block's unknown count can also be calculated.
+   IS_SUCC is true if out edges of a basic blocks are examined.
+   Return TRUE if any basic block/edge count is changed.  */
+
+static bool
+afdo_propagate_multi_edge (bool is_succ)
+{
+  basic_block bb;
+  bool changed = false;
+
+  FOR_EACH_BB (bb)
+    {
+      edge e, unknown_edge = NULL;
+      edge_iterator ei;
+      int num_unknown_edge = 0;
+      gcov_type total_known_count = 0;
+
+      if (is_succ)
+	{
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    if ((e->flags & EDGE_ANNOTATED) == 0)
+	      num_unknown_edge ++, unknown_edge = e;
+	    else
+	      total_known_count += e->count;
+	}
+      else
+	{
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    if ((e->flags & EDGE_ANNOTATED) == 0)
+	      num_unknown_edge ++, unknown_edge = e;
+	    else
+	      total_known_count += e->count;
+	}
+
+      if (num_unknown_edge == 0)
+	{
+	  if (total_known_count > bb->count)
+	    {
+	      bb->count = total_known_count;
+	      changed = true;
+	    }
+	  if ((bb->flags & BB_ANNOTATED) == 0)
+	    {
+	      bb->flags |= BB_ANNOTATED;
+	      changed = true;
+	    }
+	}
+      else if (num_unknown_edge == 1
+	       && (bb->flags & BB_ANNOTATED) != 0)
+	{
+	  if (bb->count >= total_known_count)
+	    {
+	      unknown_edge->count = bb->count - total_known_count;
+	      unknown_edge->flags |= EDGE_ANNOTATED;
+	      changed = true;
+	    }
+	}
+    }
+  return changed;
+}
+
+/* Special propagation for circuit expressions. Because GCC translates
+   control flow into data flow for circuit expressions. E.g.
+   BB1:
+   if (a && b)
+     BB2
+   else
+     BB3
+
+   will be translated into:
+
+   BB1:
+     if (a)
+       goto BB.t1
+     else
+       goto BB.t3
+   BB.t1:
+     if (b)
+       goto BB.t2
+     else
+       goto BB.t3
+   BB.t2:
+     goto BB.t3
+   BB.t3:
+     tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
+     if (tmp)
+       goto BB2
+     else
+       goto BB3
+
+   In this case, we need to propagate through PHI to determine the edge
+   count of BB1->BB.t1, BB.t1->BB.t2.  */
+
+static void
+afdo_propagate_circuit (void)
+{
+  basic_block bb;
+  FOR_ALL_BB (bb)
+    {
+      gimple phi_stmt;
+      tree cmp_rhs, cmp_lhs;
+      gimple cmp_stmt = last_stmt (bb);
+      edge e;
+      edge_iterator ei;
+
+      if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
+	continue;
+      cmp_rhs = gimple_cond_rhs (cmp_stmt);
+      cmp_lhs = gimple_cond_lhs (cmp_stmt);
+      if (!TREE_CONSTANT (cmp_rhs)
+	  || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
+	continue;
+      if (TREE_CODE (cmp_lhs) != SSA_NAME)
+	continue;
+      if ((bb->flags & BB_ANNOTATED) == 0)
+	continue;
+      phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
+      while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN
+	     && gimple_assign_single_p (phi_stmt)
+	     && TREE_CODE(gimple_assign_rhs1 (phi_stmt)) == SSA_NAME)
+	phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt));
+      if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
+	continue;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  unsigned i, total = 0;
+	  edge only_one;
+	  bool check_value_one = (((integer_onep (cmp_rhs))
+		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
+		    ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
+	  if ((e->flags & EDGE_ANNOTATED) == 0)
+	    continue;
+	  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
+	    {
+	      tree val = gimple_phi_arg_def (phi_stmt, i);
+	      edge ep = gimple_phi_arg_edge (phi_stmt, i);
+
+	      if (!TREE_CONSTANT (val) || !(integer_zerop (val)
+		  || integer_onep (val)))
+		continue;
+	      if (check_value_one ^ integer_onep (val))
+		continue;
+	      total++;
+	      only_one = ep;
+	      if (e->probability == 0 && (e->flags & EDGE_ANNOTATED) == 0)
+		{
+		  ep->probability = 0;
+		  ep->count = 0;
+		  ep->flags |= EDGE_ANNOTATED;
+		}
+	    }
+	  if (total == 1 && (only_one->flags & EDGE_ANNOTATED) == 0)
+	    {
+	      only_one->probability = e->probability;
+	      only_one->count = e->count;
+	      only_one->flags |= EDGE_ANNOTATED;
+	    }
+	}
+    }
+}
+
+/* Propagate the basic block count and edge count on the control flow
+   graph. We do the propagation iteratively until stablize.  */
+
+static void
+afdo_propagate (void)
+{
+  basic_block bb;
+  bool changed = true;
+
+  FOR_ALL_BB (bb)
+    {
+      bb->count = ((basic_block) bb->aux)->count;
+      if ((((basic_block) bb->aux)->flags & BB_ANNOTATED) != 0)
+	bb->flags |= BB_ANNOTATED;
+    }
+
+  while (changed)
+    {
+      changed = false;
+
+      if (afdo_propagate_single_edge (true))
+	changed = true;
+      if (afdo_propagate_single_edge (false))
+	changed = true;
+      if (afdo_propagate_multi_edge (true))
+	changed = true;
+      if (afdo_propagate_multi_edge (false))
+	changed = true;
+      afdo_propagate_circuit ();
+    }
+}
+
+/* Propagate counts on control flow graph and calculate branch
+   probabilities.  */
+
+void
+afdo_calculate_branch_prob (void)
+{
+  basic_block bb;
+  bool has_sample = false;
+
+  FOR_EACH_BB (bb)
+    if (bb->count > 0)
+      has_sample = true;
+
+  if (!has_sample)
+    return;
+
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  calculate_dominance_info (CDI_DOMINATORS);
+  loop_optimizer_init (0);
+
+  afdo_find_equiv_class ();
+  afdo_propagate ();
+
+  FOR_EACH_BB (bb)
+    {
+      edge e;
+      edge_iterator ei;
+      int num_unknown_succ = 0;
+      gcov_type total_count = 0;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  if ((e->flags & EDGE_ANNOTATED) == 0)
+	    num_unknown_succ ++;
+	  else
+	    total_count += e->count;
+	}
+      if (num_unknown_succ == 0 && total_count > 0)
+	{
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    e->probability =
+		(double) e->count * REG_BR_PROB_BASE / total_count;
+	}
+    }
+  FOR_ALL_BB (bb)
+    {
+      edge e;
+      edge_iterator ei;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	e->count =
+		(double) bb->count * e->probability / REG_BR_PROB_BASE;
+      bb->aux = NULL;
+    }
+
+  loop_optimizer_finalize ();
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+}
+
+/* Use AutoFDO profile to annoate the control flow graph.  */
+
+static unsigned int
+auto_profile (void)
+{
+  struct cgraph_node *node;
+
+  if (cgraph_state == CGRAPH_STATE_FINISHED)
+    return 0;
+
+  init_node_map();
+  profile_info = afdo_profile_info;
+
+  for (node = cgraph_nodes; node; node = node->next)
+    {
+      if (!node->analyzed
+          || !gimple_has_body_p (node->decl)
+          || !(!node->clone_of || node->decl != node->clone_of->decl))
+        continue;
+
+      /* Don't profile functions produced for builtin stuff.  */
+      if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION
+          || DECL_STRUCT_FUNCTION (node->decl)->after_tree_profile)
+        continue;
+
+      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+      current_function_decl = node->decl;
+
+      afdo_annotate_cfg ();
+      compute_function_frequency ();
+
+      current_function_decl = NULL;
+      pop_cfun ();
+    }
+
+  return TODO_rebuild_cgraph_edges;
+}
+
+static bool
+gate_auto_profile_ipa (void)
+{
+  return flag_auto_profile;
+}
+
+struct simple_ipa_opt_pass pass_ipa_auto_profile =
+{
+ {
+  SIMPLE_IPA_PASS,
+  "afdo",                              /* name */
+  gate_auto_profile_ipa,               /* gate */
+  auto_profile,                        /* execute */
+  NULL,                                /* sub */
+  NULL,                                /* next */
+  0,                                   /* static_pass_number */
+  TV_IPA_AUTOFDO,                      /* tv_id */
+  0,                                   /* properties_required */
+  0,                                   /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  TODO_dump_func                       /* todo_flags_finish */
+ }
+};
Index: gcc/auto-profile.h
===================================================================
--- gcc/auto-profile.h	(revision 0)
+++ gcc/auto-profile.h	(revision 0)
@@ -0,0 +1,46 @@
+/* coverage.h - Defines data exported from coverage.c
+   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef AUTO_PROFILE_H
+#define AUTO_PROFILE_H
+
+/* Read, process, finalize AutoFDO data structures.  */
+extern void init_auto_profile (void);
+extern void end_auto_profile (void);
+extern void process_auto_profile (void);
+
+/* Annotate function's count and total count.  */
+extern void afdo_set_current_function_count (void);
+
+/* Add the assembly_name to bfd_name mapping.  */
+extern void afdo_add_bfd_name_mapping (const char *, const char *);
+
+/* Add copy scale for an inlined edge to stack_scale_map.  */
+extern void afdo_add_copy_scale (struct cgraph_edge *);
+
+/* Calculate branch probability in both AutoFDO pass and after inlining.  */
+extern void afdo_calculate_branch_prob (void);
+
+/* Calculate total sample count of an inlined callsite.  */
+extern gcov_type afdo_get_callsite_count (struct cgraph_edge *);
+
+/* Calculate basic block count.  */
+extern gcov_type afdo_get_bb_count (basic_block);
+#endif /* AUTO_PROFILE_H */
Index: gcc/profile.c
===================================================================
--- gcc/profile.c	(revision 191813)
+++ gcc/profile.c	(working copy)
@@ -662,6 +662,7 @@ compute_branch_probabilities (unsigned cfg_checksu
   /* Very simple sanity checks so we catch bugs in our profiling code.  */
   if (!profile_info)
     return;
+
   if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
     {
       error ("corrupted profile info: run_max * runs < sum_max");
@@ -1447,7 +1448,7 @@ branch_prob (void)
   if (flag_profile_values)
     gimple_find_values_to_profile (&values);
 
-  if (flag_branch_probabilities)
+  if (flag_branch_probabilities && !flag_auto_profile)
     {
       compute_branch_probabilities (cfg_checksum, lineno_checksum);
       if (flag_profile_values)
Index: gcc/loop-unroll.c
===================================================================
--- gcc/loop-unroll.c	(revision 191813)
+++ gcc/loop-unroll.c	(working copy)
@@ -1154,6 +1154,10 @@ decide_unroll_runtime_iterations (struct loop *loo
       return;
     }
 
+  if (flag_auto_profile
+      && expected_loop_iterations (loop) > loop->header->count)
+    return;
+
   /* Success; now force nunroll to be power of 2, as we are unable to
      cope with overflows in computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c	(revision 191813)
+++ gcc/coverage.c	(working copy)
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "filenames.h"
 #include "dwarf2asm.h"
 #include "target.h"
+#include "auto-profile.h"
 
 #include "gcov-io.h"
 #include "gcov-io.c"
@@ -274,7 +275,6 @@ static struct opt_desc force_matching_cg_opts[] =
   {
     { "-fexceptions", "-fno-exceptions", true },
     { "-fsized-delete", "-fno-sized-delete", false },
-    { "-frtti", "-fno-rtti", true },
     { NULL, NULL, false }
   };
 
@@ -2296,6 +2296,8 @@ coverage_init (const char *filename, const char* s
       init_pmu_profiling ();
       tree_init_instrumentation_sampling ();
     }
+  if (flag_auto_profile)
+    init_auto_profile ();
 }
 
 /* Return True if any type of profiling is enabled which requires linking
Index: gcc/tree-ssa-live.c
===================================================================
--- gcc/tree-ssa-live.c	(revision 191813)
+++ gcc/tree-ssa-live.c	(working copy)
@@ -546,7 +546,7 @@ remove_unused_scope_block_p (tree scope)
    else if (!nsubblocks)
      ;
    /* For terse debug info we can eliminate info on unused variables.  */
-   else if (!generate_debug_line_table
+   else if (!flag_auto_profile && !generate_debug_line_table
 	    && (debug_info_level == DINFO_LEVEL_NONE
 		|| debug_info_level == DINFO_LEVEL_TERSE))
      {
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 191813)
+++ gcc/common.opt	(working copy)
@@ -913,6 +913,16 @@ fauto-inc-dec
 Common Report Var(flag_auto_inc_dec) Init(1)
 Generate auto-inc/dec instructions
 
+fauto-profile
+Common Report Var(flag_auto_profile) Optimization
+Use sample profile information for call graph node weights. The default
+profile file is fbdata.afdo in 'pwd'.
+
+fauto-profile=
+Common Joined RejectNegative Var(auto_profile_file)
+Use sample profile information for call graph node weights. The profile
+file is specified in the argument.
+
 ; -fcheck-bounds causes gcc to generate array bounds checks.
 ; For C, C++ and ObjC: defaults off.
 ; For Java: defaults to on.
Index: gcc/tree-inline.c
===================================================================
--- gcc/tree-inline.c	(revision 191813)
+++ gcc/tree-inline.c	(working copy)
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "integrate.h"
 #include "langhooks.h"
 #include "l-ipo.h"
+#include "auto-profile.h"
 
 #include "rtl.h"	/* FIXME: For asm_str_count.  */
 
@@ -1824,6 +1825,15 @@ copy_bb (copy_body_data *id, basic_block bb, int f
       copy_gsi = gsi_last_bb (copy_basic_block);
     }
 
+  if (flag_auto_profile && profile_info)
+    {
+      gcov_type count = afdo_get_bb_count (copy_basic_block);
+      if (copy_basic_block->flags & BB_ANNOTATED)
+	copy_basic_block->count = count;
+      else if (bb->flags & BB_ANNOTATED)
+	copy_basic_block->flags |= BB_ANNOTATED;
+    }
+
   return copy_basic_block;
 }
 
@@ -1918,9 +1928,10 @@ copy_edges_for_bb (basic_block bb, gcov_type count
 	edge new_edge;
 
 	flags = old_edge->flags;
+	flags &= (~EDGE_ANNOTATED);
 
 	/* Return edges do get a FALLTHRU flag when the get inlined.  */
-	if (old_edge->dest->index == EXIT_BLOCK && !old_edge->flags
+	if (old_edge->dest->index == EXIT_BLOCK && !flags
 	    && old_edge->dest->aux != EXIT_BLOCK_PTR)
 	  flags |= EDGE_FALLTHRU;
 	new_edge = make_edge (new_bb, (basic_block) old_edge->dest->aux, flags);
@@ -2253,6 +2264,8 @@ copy_cfg_body (copy_body_data * id, gcov_type coun
   if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
     count_scale = (REG_BR_PROB_BASE * (double)count
 		   / ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count);
+  else if (flag_auto_profile && count == 0)
+    count_scale = 0;
   else
     count_scale = REG_BR_PROB_BASE;
 
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c	(revision 191813)
+++ gcc/tree-profile.c	(working copy)
@@ -1642,7 +1642,7 @@ direct_call_profiling (void)
 static bool
 gate_tree_profile_ipa (void)
 {
-  return (!in_lto_p
+  return (!in_lto_p && !flag_auto_profile
 	  && (flag_branch_probabilities || flag_test_coverage
 	      || profile_arc_flag || flag_profile_reusedist
               || flag_optimize_locality));
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 191813)
+++ gcc/Makefile.in	(working copy)
@@ -874,6 +874,7 @@ GIMPLE_H = gimple.h gimple.def gsstruct.def pointe
 TRANS_MEM_H = trans-mem.h
 GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
 COVERAGE_H = coverage.h $(GCOV_IO_H)
+AUTO_PROFILE_H = auto-profile.h
 DEMANGLE_H = $(srcdir)/../include/demangle.h
 RECOG_H = recog.h
 ALIAS_H = alias.h coretypes.h
@@ -1161,6 +1162,7 @@ OBJS = \
 	alias.o \
 	alloc-pool.o \
 	auto-inc-dec.o \
+	auto-profile.o \
 	bb-reorder.o \
 	bitmap.o \
 	bt-load.o \
@@ -3034,6 +3036,10 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
    $(HASHTAB_H) tree-iterator.h $(CGRAPH_H) $(TREE_PASS_H) gcov-io.c $(TM_P_H) \
    opts.h $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h l-ipo.h dwarf2asm.h \
    $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H)
+auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
+   $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
+   $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \
+   $(AUTO_PROFILE_H)
 cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
    $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(TREE_PASS_H) \
Index: gcc/basic-block.h
===================================================================
--- gcc/basic-block.h	(revision 191813)
+++ gcc/basic-block.h	(working copy)
@@ -89,8 +89,9 @@ DEF_VEC_ALLOC_P(edge,heap);
 					   and cold sections, when we
 					   do partitioning.  */
 #define EDGE_PRESERVE		0x4000	/* Never merge blocks via this edge. */
-#define EDGE_ALL_FLAGS		0x7fff
-
+#define EDGE_ANNOTATED		0x8000  /* Edge has been annotated by AutoFDO
+					   profile.  */
+#define EDGE_ALL_FLAGS          0xffff
 #define EDGE_COMPLEX \
   (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_EH | EDGE_PRESERVE)
 
@@ -268,7 +269,11 @@ enum bb_flags
   /* Set on blocks that are in a transaction.  This is calculated on
      demand, and is available after calling
      compute_transaction_bits().  */
-  BB_IN_TRANSACTION = 1 << 13
+  BB_IN_TRANSACTION = 1 << 13,
+
+  /* Set on blocks that has been annotated by AutoFDO profile. This is
+     calculated while propagating profiles on control flow graph.  */
+  BB_ANNOTATED = 1 << 14
 };
 
 /* Dummy flag for convenience in the hot/cold partitioning code.  */
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 191813)
+++ gcc/tree-cfg.c	(working copy)
@@ -1806,6 +1806,14 @@ gimple_merge_blocks (basic_block a, basic_block b)
 	}
     }
 
+  /* When merging two BBs, if their counts are different, the larger
+     count is selected as the new bb count.  */
+  if (flag_auto_profile && a->count != b->count)
+    {
+      a->count = MAX (a->count, b->count);
+      a->frequency = MAX (a->frequency, b->frequency);
+    } 
+
   /* Merge the sequences.  */
   last = gsi_last_bb (a);
   gsi_insert_seq_after (&last, bb_seq (b), GSI_NEW_STMT);
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(revision 191813)
+++ gcc/passes.c	(working copy)
@@ -1251,6 +1251,7 @@ init_optimization_passes (void)
       NEXT_PASS (pass_rebuild_cgraph_edges);
       NEXT_PASS (pass_inline_parameters);
     }
+  NEXT_PASS (pass_ipa_auto_profile);
   NEXT_PASS (pass_ipa_tree_profile);
     {
       struct opt_pass **p = &pass_ipa_tree_profile.pass.sub;
