Patchwork [GOOGLE] Add AutoFDO based indirect call value profiling to GCC

login
register
mail settings
Submitter Dehao Chen
Date April 17, 2013, 3:41 a.m.
Message ID <CAO2gOZW3fUDsUQimdB3558=uC7CWgQmPpZhyQQUdJOSTG-HkOw@mail.gmail.com>
Download mbox | patch
Permalink /patch/237138/
State New
Headers show

Comments

Dehao Chen - April 17, 2013, 3:41 a.m.
This patch implements indirect call promotion for AutoFDO.

Bootstrapped and passed gcc regression tests.

Is it okay for gcc-4_7 branch?

Thanks,
Dehao

http://codereview.appspot.com/8814043

  enum hist_type);
Xinliang David Li - April 17, 2013, 5:21 p.m.
https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c
File gcc/auto-profile.c (right):

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode161
gcc/auto-profile.c:161: };
Why not sharing the same hist_type enum in value-prof.h?

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode171
gcc/auto-profile.c:171: int count;
int --> gcov_type?

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode519
gcc/auto-profile.c:519: static void
New line above.

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode550
gcc/auto-profile.c:550: if (actual_count < 2)
Are the targets sorted?

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode570
gcc/auto-profile.c:570: static void afdo_vpt (gimple stmt, struct
gcov_hist *v, int hist_size)
New line.

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode914
gcc/auto-profile.c:914: for the given POS_STACK.  */
Document new parameters.

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode1193
gcc/auto-profile.c:1193: gcov_functions[i].stacks[j].hist_size =
gcov_read_unsigned ();
Why is the data not optional?

https://codereview.appspot.com/8814043/diff/1/gcc/auto-profile.c#newcode1203
gcc/auto-profile.c:1203: file_names[gcov_read_counter()];
missing white space (many other instances)

https://codereview.appspot.com/8814043/diff/1/gcc/tree-inline.c
File gcc/tree-inline.c (right):

https://codereview.appspot.com/8814043/diff/1/gcc/tree-inline.c#newcode1833
gcc/tree-inline.c:1833: gcov_type count = afdo_get_bb_count
(copy_basic_block, false);
comment about the 'false' parameter.

https://codereview.appspot.com/8814043/diff/1/gcc/value-prof.c
File gcc/value-prof.c (right):

https://codereview.appspot.com/8814043/diff/1/gcc/value-prof.c#newcode1223
gcc/value-prof.c:1223: if (!strcmp(fname, name))
You can use assembler_name_hash to find the cgraph_node

https://codereview.appspot.com/8814043/diff/1/gcc/value-prof.c#newcode1626
gcc/value-prof.c:1626: direct_call1 = find_func_by_global_id (val1);
May be add a parameter to find_func_by_global_id (val, is_auto_fdo)?

https://codereview.appspot.com/8814043/

On Tue, Apr 16, 2013 at 8:41 PM, Dehao Chen <dehao@google.com> wrote:
> This patch implements indirect call promotion for AutoFDO.
>
> Bootstrapped and passed gcc regression tests.
>
> Is it okay for gcc-4_7 branch?
>
> Thanks,
> Dehao
>
> http://codereview.appspot.com/8814043
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index d17b250..c217846 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -3039,7 +3039,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H)
> $(SYSTEM_H) coretypes.h \
>     $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) $(AUTO_PROFILE_H)
>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
> -   $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \
> +   $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
> value-prof.h \
>     $(AUTO_PROFILE_H)
>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
> diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
> index cf17370..38f4209 100644
> --- a/gcc/auto-profile.c
> +++ b/gcc/auto-profile.c
> @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple.h"
>  #include "cgraph.h"
>  #include "tree-flow.h"
> +#include "value-prof.h"
>  #include "auto-profile.h"
>
>  /* The following routines implements AutoFDO optimization.
> @@ -109,6 +110,8 @@ struct gcov_stack
>    const char *callee_name;
>    struct gcov_callsite_pos *stack;
>    gcov_unsigned_t size;
> +  struct gcov_hist *hist;
> +  gcov_unsigned_t hist_size;
>    gcov_type num_inst;
>    gcov_type count;
>    gcov_type max_count;
> @@ -150,6 +153,24 @@ struct afdo_module
>    char **strings;
>  };
>
> +enum afdo_histogram_type
> +{
> +  CALL_HIST = 1,
> +  STRINGOP_HIST,
> +  DIVMOD_HIST
> +};
> +
> +struct gcov_hist
> +{
> +  enum afdo_histogram_type type;
> +  union
> +    {
> +      const char *func_name;
> +      unsigned long long value;
> +    } value;
> +  int count;
> +};
> +
>  /* Store the file name strings read from the profile data file. */
>  static const char **file_names;
>
> @@ -493,6 +514,64 @@ read_aux_modules (void)
>      }
>  }
>
> +/* From AutoFDO profiles, find values inside STMT for that we want to measure
> +   histograms for indirect-call optimization.  */
> +static void
> +afdo_indirect_call (gimple stmt, struct gcov_hist *values, int hist_size)
> +{
> +  tree callee;
> +  int i, total = 0;
> +  int actual_count = 0;
> +  histogram_value hist;
> +
> +  if (gimple_code (stmt) != GIMPLE_CALL
> +      || gimple_call_fndecl (stmt) != NULL_TREE)
> +    return;
> +
> +  callee = gimple_call_fn (stmt);
> +
> +  for (i = 0; i < hist_size; i++)
> +    if (values[i].type == CALL_HIST)
> +      break;
> +
> +  if (i == hist_size)
> +    return;
> +
> +  hist = gimple_alloc_histogram_value (cfun, HIST_TYPE_INDIR_CALL_TOPN,
> +       stmt, callee);
> +  hist->n_counters = (GCOV_ICALL_TOPN_VAL << 2) + 1;
> +  hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
> +  gimple_add_histogram_value (cfun, stmt, hist);
> +
> +  for (i = 0; i < hist_size; i++)
> +    if (values[i].type == CALL_HIST)
> +      {
> + total += values[i].count;
> + if (actual_count < 2)
> +  {
> +    hist->hvalue.counters[actual_count * 2 + 1] =
> + (unsigned long long) values[i].value.func_name;
> +    hist->hvalue.counters[actual_count * 2 + 2] = values[i].count;
> +    actual_count ++;
> +  }
> +      }
> +
> +  hist->hvalue.counters[0] = total;
> +
> +  if (actual_count == 1)
> +    {
> +      hist->hvalue.counters[3] = 0;
> +      hist->hvalue.counters[4] = 0;
> +    }
> +}
> +
> +/* From AutoFDO profiles, find values inside STMT for that we want to measure
> +   histograms and adds them to list VALUES.  */
> +static void afdo_vpt (gimple stmt, struct gcov_hist *v, int hist_size)
> +{
> +  afdo_indirect_call (stmt, v, hist_size);
> +}
> +
>  /* Return the size of the inline stack of the STMT.  */
>
>  static int
> @@ -838,7 +917,8 @@ static bool
>  get_stack_count (struct gcov_callsite_pos *pos_stack,
>   const char *callee_name,
>   int size,
> - gcov_type *count, gcov_type *max_count, gcov_type *num_inst)
> + gcov_type *count, gcov_type *max_count, gcov_type *num_inst,
> + gcov_unsigned_t *hist_size, struct gcov_hist **hist)
>  {
>    int i;
>
> @@ -858,6 +938,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack,
>        *num_inst = entry->num_inst;
>        if (max_count)
>   *max_count = entry->max_count;
> +      if (hist_size)
> + {
> +  *hist_size = entry->hist_size;
> +  *hist = entry->hist;
> + }
>        return true;
>      }
>    else
> @@ -876,6 +961,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack,
>    *num_inst = entry->num_inst;
>    if (max_count)
>      *max_count = entry->max_count;
> +  if (hist_size)
> +    {
> +      *hist_size = entry->hist_size;
> +      *hist = entry->hist;
> +    }
>    return true;
>   }
>      }
> @@ -885,6 +975,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack,
>    *num_inst = 0;
>    if (max_count)
>      *max_count = 0;
> +  if (hist_size)
> +    {
> +      *hist_size = 0;
> +      *hist = 0;
> +    }
>    return false;
>  }
>
> @@ -892,7 +987,8 @@ get_stack_count (struct gcov_callsite_pos *pos_stack,
>     Return FALSE if profile is not found for STMT.  */
>
>  static bool
> -get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst)
> +get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst,
> + gcov_unsigned_t *hist_size, struct gcov_hist **hist)
>  {
>    struct gcov_callsite_pos *pos_stack;
>    int size;
> @@ -910,7 +1006,8 @@ get_stmt_count (gimple stmt, gcov_type *count,
> gcov_type *num_inst)
>
>    get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true);
>
> -  return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst);
> +  return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst,
> +  hist_size, hist);
>  }
>
>  /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count
> @@ -938,13 +1035,13 @@ afdo_get_callsite_count (struct cgraph_edge
> *edge, gcov_type *count,
>   get_discriminator_from_locus(gimple_location(edge->call_stmt));
>
>    return get_stack_count (pos_stack, callee_name,
> -  size, count, max_count, &num_inst);
> +  size, count, max_count, &num_inst, NULL, NULL);
>  }
>
>  /* For a given BB, return its execution count.  */
>
>  gcov_type
> -afdo_get_bb_count (basic_block bb)
> +afdo_get_bb_count (basic_block bb, bool annotate_vpt)
>  {
>    gimple_stmt_iterator gsi;
>    gcov_type max_count = 0;
> @@ -953,12 +1050,16 @@ afdo_get_bb_count (basic_block bb)
>    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>      {
>        gcov_type count, num_inst;
> +      gcov_unsigned_t hist_size;
> +      struct gcov_hist *hist;
>        gimple stmt = gsi_stmt (gsi);
> -      if (get_stmt_count (stmt, &count, &num_inst))
> +      if (get_stmt_count (stmt, &count, &num_inst, &hist_size, &hist))
>   {
>    if (count > max_count)
>      max_count = count;
>    has_annotated = true;
> +  if (annotate_vpt && hist_size > 0)
> +    afdo_vpt (stmt, hist, hist_size);
>   }
>      }
>    if (has_annotated)
> @@ -980,7 +1081,7 @@ afdo_annotate_cfg (void)
>
>    FOR_EACH_BB (bb)
>      {
> -      bb->count = afdo_get_bb_count (bb);
> +      bb->count = afdo_get_bb_count (bb, true);
>        if (bb->count > max_count)
>   max_count = bb->count;
>      }
> @@ -992,6 +1093,8 @@ afdo_annotate_cfg (void)
>        afdo_calculate_branch_prob ();
>        profile_status = PROFILE_READ;
>      }
> +  if (flag_value_profile_transformations)
> +    gimple_value_profile_transformations ();
>  }
>
>  extern gcov_working_set_t *gcov_working_sets;
> @@ -1055,7 +1158,7 @@ read_profile (void)
>      xmalloc (function_num * sizeof (struct gcov_function));
>    for (i = 0; i < function_num; i++)
>      {
> -      gcov_functions[i].name = xstrdup (gcov_read_string ());
> +      gcov_functions[i].name = file_names[gcov_read_unsigned ()];
>        gcov_functions[i].file = file_names[gcov_read_unsigned ()];
>        gcov_functions[i].total_count = gcov_read_counter ();
>        gcov_functions[i].entry_count = gcov_read_counter ();
> @@ -1087,6 +1190,22 @@ read_profile (void)
>      }
>    gcov_functions[i].stacks[j].count = gcov_read_counter ();
>    gcov_functions[i].stacks[j].num_inst = gcov_read_counter ();
> +  gcov_functions[i].stacks[j].hist_size = gcov_read_unsigned ();
> +  gcov_functions[i].stacks[j].hist = (struct gcov_hist *)
> +    xmalloc (gcov_functions[i].stacks[j].hist_size
> +     * sizeof (struct gcov_hist));
> +  for (k = 0; k < gcov_functions[i].stacks[j].hist_size; k++)
> +    {
> +      gcov_functions[i].stacks[j].hist[k].type =
> +  (enum afdo_histogram_type) gcov_read_unsigned();
> +      if (gcov_functions[i].stacks[j].hist[k].type == CALL_HIST)
> + gcov_functions[i].stacks[j].hist[k].value.func_name =
> +    file_names[gcov_read_counter()];
> +      else
> + gcov_functions[i].stacks[j].hist[k].value.value =
> +    gcov_read_counter();
> +      gcov_functions[i].stacks[j].hist[k].count = gcov_read_counter();
> +    }
>   }
>      }
>
> @@ -1698,6 +1817,7 @@ auto_profile (void)
>
>        afdo_annotate_cfg ();
>        compute_function_frequency ();
> +      update_ssa (TODO_update_ssa);
>
>        current_function_decl = NULL;
>        pop_cfun ();
> diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
> index 8e0a670..f7175ff 100644
> --- a/gcc/auto-profile.h
> +++ b/gcc/auto-profile.h
> @@ -43,5 +43,5 @@ extern bool afdo_get_callsite_count (struct
> cgraph_edge *, gcov_type *,
>       gcov_type *, bool);
>
>  /* Calculate basic block count.  */
> -extern gcov_type afdo_get_bb_count (basic_block);
> +extern gcov_type afdo_get_bb_count (basic_block, bool);
>  #endif /* AUTO_PROFILE_H */
> diff --git a/gcc/opts.c b/gcc/opts.c
> index 4657d6a..dab2564 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -1665,6 +1665,8 @@ common_handle_option (struct gcc_options *opts,
>   opts->x_flag_unroll_loops = value;
>        if (!opts_set->x_flag_peel_loops)
>   opts->x_flag_peel_loops = value;
> +      if (!opts_set->x_flag_value_profile_transformations)
> + opts->x_flag_value_profile_transformations = value;
>        if (!opts_set->x_flag_inline_functions)
>   opts->x_flag_inline_functions = value;
>        if (!opts_set->x_flag_ipa_cp)
> diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
> index f09f201..62180fc 100644
> --- a/gcc/tree-inline.c
> +++ b/gcc/tree-inline.c
> @@ -1830,7 +1830,7 @@ copy_bb (copy_body_data *id, basic_block bb, int
> frequency_scale,
>      {
>        /* If the same inline happens in the profile-collection binary, use
>   that instance's profile count. Otherwise use the scaled count.  */
> -      gcov_type count = afdo_get_bb_count (copy_basic_block);
> +      gcov_type count = afdo_get_bb_count (copy_basic_block, false);
>        if (copy_basic_block->flags & BB_ANNOTATED)
>   copy_basic_block->count = count;
>        else if (bb->flags & BB_ANNOTATED)
> diff --git a/gcc/value-prof.c b/gcc/value-prof.c
> index 81465bc..c20d13a 100644
> --- a/gcc/value-prof.c
> +++ b/gcc/value-prof.c
> @@ -94,7 +94,7 @@ static bool gimple_ic_transform (gimple);
>
>  /* Allocate histogram value.  */
>
> -static histogram_value
> +histogram_value
>  gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED,
>        enum hist_type type, gimple stmt, tree value)
>  {
> @@ -1205,6 +1205,33 @@ find_func_by_funcdef_no (int func_id)
>    return VEC_index (cgraph_node_ptr, cgraph_node_map, func_id);
>  }
>
> +/* Return cgraph node for function with name. We need this because
> +   AutoFDO doesn't record the function id for each function
> +   in the profile.
> +   TODO: need to transform this lookup method to hash table.  */
> +
> +static struct cgraph_node *
> +find_func_by_name (char *name)
> +{
> +  struct cgraph_node *n;
> +  struct cgraph_node *ret = NULL;
> +  int match = 0;
> +
> +  for (n = cgraph_nodes; n; n = n->next)
> +    {
> +      const char *fname = IDENTIFIER_POINTER (decl_assembler_name (n->decl));
> +      if (!strcmp(fname, name))
> + {
> +  match ++;
> +  ret = n;
> + }
> +    }
> +  if (match == 1)
> +    return ret;
> +  else
> +    return NULL;
> +}
> +
>  /* Initialize map of gids (gid -> cgraph node) */
>
>  static htab_t gid_map = NULL;
> @@ -1562,7 +1589,8 @@ gimple_ic_transform_mult_targ (gimple stmt,
> histogram_value histogram)
>    count1 = histogram->hvalue.counters [2];
>    val2 = histogram->hvalue.counters [3];
>    count2 = histogram->hvalue.counters [4];
> -  bb_all = gimple_bb (stmt)->count;
> +  bb_all = flag_auto_profile ? histogram->hvalue.counters[0]
> +     : gimple_bb (stmt)->count;
>    all = bb_all;
>
>    gimple_remove_histogram_value (cfun, stmt, histogram);
> @@ -1592,18 +1620,26 @@ gimple_ic_transform_mult_targ (gimple stmt,
> histogram_value histogram)
>    else
>      prob1 = prob2 = 0;
>
> -  direct_call1 = find_func_by_global_id (val1);
> +  if (flag_auto_profile)
> +    direct_call1 = find_func_by_name ((char *) val1);
> +  else
> +    direct_call1 = find_func_by_global_id (val1);
>
>    if (val2 && (100 * count2 >= all * perc_threshold)
>        && count2 > count_threshold)
> -    direct_call2 = find_func_by_global_id (val2);
> +    {
> +      if (flag_auto_profile)
> + direct_call2 = find_func_by_name ((char *) val2);
> +      else
> + direct_call2 = find_func_by_global_id (val2);
> +    }
>
>    locus = (stmt != NULL) ? gimple_location (stmt)
>        : DECL_SOURCE_LOCATION (current_function_decl);
>    if (direct_call1 == NULL
>        || !check_ic_target (stmt, direct_call1))
>      {
> -      if (flag_opt_info >= OPT_INFO_MAX)
> +      if (flag_opt_info >= OPT_INFO_MAX && !flag_auto_profile)
>          {
>            if (!direct_call1)
>              inform (locus, "Can not find indirect call target decl "
> @@ -1645,9 +1681,12 @@ gimple_ic_transform_mult_targ (gimple stmt,
> histogram_value histogram)
>        print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
>        fprintf (dump_file, "=> ");
>        print_generic_expr (dump_file, direct_call1->decl, TDF_SLIM);
> -      fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
> -               EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1),
> -               EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1));
> +      if (flag_auto_profile)
> + fprintf (dump_file, " (%s)\n", (char *) val1);
> +      else
> + fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
> +                 EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1),
> +                 EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1));
>        fprintf (dump_file, "Transformation on insn:\n");
>        print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>        fprintf (dump_file, "==>\n");
> @@ -1683,9 +1722,12 @@ gimple_ic_transform_mult_targ (gimple stmt,
> histogram_value histogram)
>            print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
>            fprintf (dump_file, "=> ");
>            print_generic_expr (dump_file, direct_call2->decl, TDF_SLIM);
> -          fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
> -                   EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2),
> -                   EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2));
> +  if (flag_auto_profile)
> +    fprintf (dump_file, " (%s)\n", (char *) val2);
> +  else
> +    fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
> +                     EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2),
> +                     EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2));
>            fprintf (dump_file, "Transformation on insn\n");
>            print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>            fprintf (dump_file, "=>\n");
> diff --git a/gcc/value-prof.h b/gcc/value-prof.h
> index 9425e25..2d84fe9 100644
> --- a/gcc/value-prof.h
> +++ b/gcc/value-prof.h
> @@ -77,6 +77,8 @@ typedef VEC(histogram_value,heap) *histogram_values;
>  extern void gimple_find_values_to_profile (histogram_values *);
>  extern bool gimple_value_profile_transformations (void);
>
> +histogram_value gimple_alloc_histogram_value (struct function *, enum
> hist_type,
> +      gimple stmt, tree);
>  histogram_value gimple_histogram_value (struct function *, gimple);
>  histogram_value gimple_histogram_value_of_type (struct function *, gimple,
>   enum hist_type);

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d17b250..c217846 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3039,7 +3039,7 @@  coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H)
$(SYSTEM_H) coretypes.h \
    $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) $(AUTO_PROFILE_H)
 auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
    $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
-   $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \
+   $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
value-prof.h \
    $(AUTO_PROFILE_H)
 cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index cf17370..38f4209 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -38,6 +38,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "cgraph.h"
 #include "tree-flow.h"
+#include "value-prof.h"
 #include "auto-profile.h"

 /* The following routines implements AutoFDO optimization.
@@ -109,6 +110,8 @@  struct gcov_stack
   const char *callee_name;
   struct gcov_callsite_pos *stack;
   gcov_unsigned_t size;
+  struct gcov_hist *hist;
+  gcov_unsigned_t hist_size;
   gcov_type num_inst;
   gcov_type count;
   gcov_type max_count;
@@ -150,6 +153,24 @@  struct afdo_module
   char **strings;
 };

+enum afdo_histogram_type
+{
+  CALL_HIST = 1,
+  STRINGOP_HIST,
+  DIVMOD_HIST
+};
+
+struct gcov_hist
+{
+  enum afdo_histogram_type type;
+  union
+    {
+      const char *func_name;
+      unsigned long long value;
+    } value;
+  int count;
+};
+
 /* Store the file name strings read from the profile data file. */
 static const char **file_names;

@@ -493,6 +514,64 @@  read_aux_modules (void)
     }
 }

+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+   histograms for indirect-call optimization.  */
+static void
+afdo_indirect_call (gimple stmt, struct gcov_hist *values, int hist_size)
+{
+  tree callee;
+  int i, total = 0;
+  int actual_count = 0;
+  histogram_value hist;
+
+  if (gimple_code (stmt) != GIMPLE_CALL
+      || gimple_call_fndecl (stmt) != NULL_TREE)
+    return;
+
+  callee = gimple_call_fn (stmt);
+
+  for (i = 0; i < hist_size; i++)
+    if (values[i].type == CALL_HIST)
+      break;
+
+  if (i == hist_size)
+    return;
+
+  hist = gimple_alloc_histogram_value (cfun, HIST_TYPE_INDIR_CALL_TOPN,
+       stmt, callee);
+  hist->n_counters = (GCOV_ICALL_TOPN_VAL << 2) + 1;
+  hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
+  gimple_add_histogram_value (cfun, stmt, hist);
+
+  for (i = 0; i < hist_size; i++)
+    if (values[i].type == CALL_HIST)
+      {
+ total += values[i].count;
+ if (actual_count < 2)
+  {
+    hist->hvalue.counters[actual_count * 2 + 1] =
+ (unsigned long long) values[i].value.func_name;
+    hist->hvalue.counters[actual_count * 2 + 2] = values[i].count;
+    actual_count ++;
+  }
+      }
+
+  hist->hvalue.counters[0] = total;
+
+  if (actual_count == 1)
+    {
+      hist->hvalue.counters[3] = 0;
+      hist->hvalue.counters[4] = 0;
+    }
+}
+
+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+   histograms and adds them to list VALUES.  */
+static void afdo_vpt (gimple stmt, struct gcov_hist *v, int hist_size)
+{
+  afdo_indirect_call (stmt, v, hist_size);
+}
+
 /* Return the size of the inline stack of the STMT.  */

 static int
@@ -838,7 +917,8 @@  static bool
 get_stack_count (struct gcov_callsite_pos *pos_stack,
  const char *callee_name,
  int size,
- gcov_type *count, gcov_type *max_count, gcov_type *num_inst)
+ gcov_type *count, gcov_type *max_count, gcov_type *num_inst,
+ gcov_unsigned_t *hist_size, struct gcov_hist **hist)
 {
   int i;

@@ -858,6 +938,11 @@  get_stack_count (struct gcov_callsite_pos *pos_stack,
       *num_inst = entry->num_inst;
       if (max_count)
  *max_count = entry->max_count;
+      if (hist_size)
+ {
+  *hist_size = entry->hist_size;
+  *hist = entry->hist;
+ }
       return true;
     }
   else
@@ -876,6 +961,11 @@  get_stack_count (struct gcov_callsite_pos *pos_stack,
   *num_inst = entry->num_inst;
   if (max_count)
     *max_count = entry->max_count;
+  if (hist_size)
+    {
+      *hist_size = entry->hist_size;
+      *hist = entry->hist;
+    }
   return true;
  }
     }
@@ -885,6 +975,11 @@  get_stack_count (struct gcov_callsite_pos *pos_stack,
   *num_inst = 0;
   if (max_count)
     *max_count = 0;
+  if (hist_size)
+    {
+      *hist_size = 0;
+      *hist = 0;
+    }
   return false;
 }

@@ -892,7 +987,8 @@  get_stack_count (struct gcov_callsite_pos *pos_stack,
    Return FALSE if profile is not found for STMT.  */

 static bool
-get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst)
+get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst,
+ gcov_unsigned_t *hist_size, struct gcov_hist **hist)
 {
   struct gcov_callsite_pos *pos_stack;
   int size;
@@ -910,7 +1006,8 @@  get_stmt_count (gimple stmt, gcov_type *count,
gcov_type *num_inst)

   get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true);

-  return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst);
+  return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst,
+  hist_size, hist);
 }

 /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count
@@ -938,13 +1035,13 @@  afdo_get_callsite_count (struct cgraph_edge
*edge, gcov_type *count,
  get_discriminator_from_locus(gimple_location(edge->call_stmt));

   return get_stack_count (pos_stack, callee_name,
-  size, count, max_count, &num_inst);
+  size, count, max_count, &num_inst, NULL, NULL);
 }

 /* For a given BB, return its execution count.  */

 gcov_type
-afdo_get_bb_count (basic_block bb)
+afdo_get_bb_count (basic_block bb, bool annotate_vpt)
 {
   gimple_stmt_iterator gsi;
   gcov_type max_count = 0;
@@ -953,12 +1050,16 @@  afdo_get_bb_count (basic_block bb)
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
       gcov_type count, num_inst;
+      gcov_unsigned_t hist_size;
+      struct gcov_hist *hist;
       gimple stmt = gsi_stmt (gsi);
-      if (get_stmt_count (stmt, &count, &num_inst))
+      if (get_stmt_count (stmt, &count, &num_inst, &hist_size, &hist))
  {
   if (count > max_count)
     max_count = count;
   has_annotated = true;
+  if (annotate_vpt && hist_size > 0)
+    afdo_vpt (stmt, hist, hist_size);
  }
     }
   if (has_annotated)
@@ -980,7 +1081,7 @@  afdo_annotate_cfg (void)

   FOR_EACH_BB (bb)
     {
-      bb->count = afdo_get_bb_count (bb);
+      bb->count = afdo_get_bb_count (bb, true);
       if (bb->count > max_count)
  max_count = bb->count;
     }
@@ -992,6 +1093,8 @@  afdo_annotate_cfg (void)
       afdo_calculate_branch_prob ();
       profile_status = PROFILE_READ;
     }
+  if (flag_value_profile_transformations)
+    gimple_value_profile_transformations ();
 }

 extern gcov_working_set_t *gcov_working_sets;
@@ -1055,7 +1158,7 @@  read_profile (void)
     xmalloc (function_num * sizeof (struct gcov_function));
   for (i = 0; i < function_num; i++)
     {
-      gcov_functions[i].name = xstrdup (gcov_read_string ());
+      gcov_functions[i].name = file_names[gcov_read_unsigned ()];
       gcov_functions[i].file = file_names[gcov_read_unsigned ()];
       gcov_functions[i].total_count = gcov_read_counter ();
       gcov_functions[i].entry_count = gcov_read_counter ();
@@ -1087,6 +1190,22 @@  read_profile (void)
     }
   gcov_functions[i].stacks[j].count = gcov_read_counter ();
   gcov_functions[i].stacks[j].num_inst = gcov_read_counter ();
+  gcov_functions[i].stacks[j].hist_size = gcov_read_unsigned ();
+  gcov_functions[i].stacks[j].hist = (struct gcov_hist *)
+    xmalloc (gcov_functions[i].stacks[j].hist_size
+     * sizeof (struct gcov_hist));
+  for (k = 0; k < gcov_functions[i].stacks[j].hist_size; k++)
+    {
+      gcov_functions[i].stacks[j].hist[k].type =
+  (enum afdo_histogram_type) gcov_read_unsigned();
+      if (gcov_functions[i].stacks[j].hist[k].type == CALL_HIST)
+ gcov_functions[i].stacks[j].hist[k].value.func_name =
+    file_names[gcov_read_counter()];
+      else
+ gcov_functions[i].stacks[j].hist[k].value.value =
+    gcov_read_counter();
+      gcov_functions[i].stacks[j].hist[k].count = gcov_read_counter();
+    }
  }
     }

@@ -1698,6 +1817,7 @@  auto_profile (void)

       afdo_annotate_cfg ();
       compute_function_frequency ();
+      update_ssa (TODO_update_ssa);

       current_function_decl = NULL;
       pop_cfun ();
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
index 8e0a670..f7175ff 100644
--- a/gcc/auto-profile.h
+++ b/gcc/auto-profile.h
@@ -43,5 +43,5 @@  extern bool afdo_get_callsite_count (struct
cgraph_edge *, gcov_type *,
      gcov_type *, bool);

 /* Calculate basic block count.  */
-extern gcov_type afdo_get_bb_count (basic_block);
+extern gcov_type afdo_get_bb_count (basic_block, bool);
 #endif /* AUTO_PROFILE_H */
diff --git a/gcc/opts.c b/gcc/opts.c
index 4657d6a..dab2564 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1665,6 +1665,8 @@  common_handle_option (struct gcc_options *opts,
  opts->x_flag_unroll_loops = value;
       if (!opts_set->x_flag_peel_loops)
  opts->x_flag_peel_loops = value;
+      if (!opts_set->x_flag_value_profile_transformations)
+ opts->x_flag_value_profile_transformations = value;
       if (!opts_set->x_flag_inline_functions)
  opts->x_flag_inline_functions = value;
       if (!opts_set->x_flag_ipa_cp)
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index f09f201..62180fc 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -1830,7 +1830,7 @@  copy_bb (copy_body_data *id, basic_block bb, int
frequency_scale,
     {
       /* If the same inline happens in the profile-collection binary, use
  that instance's profile count. Otherwise use the scaled count.  */
-      gcov_type count = afdo_get_bb_count (copy_basic_block);
+      gcov_type count = afdo_get_bb_count (copy_basic_block, false);
       if (copy_basic_block->flags & BB_ANNOTATED)
  copy_basic_block->count = count;
       else if (bb->flags & BB_ANNOTATED)
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 81465bc..c20d13a 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -94,7 +94,7 @@  static bool gimple_ic_transform (gimple);

 /* Allocate histogram value.  */

-static histogram_value
+histogram_value
 gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED,
       enum hist_type type, gimple stmt, tree value)
 {
@@ -1205,6 +1205,33 @@  find_func_by_funcdef_no (int func_id)
   return VEC_index (cgraph_node_ptr, cgraph_node_map, func_id);
 }

+/* Return cgraph node for function with name. We need this because
+   AutoFDO doesn't record the function id for each function
+   in the profile.
+   TODO: need to transform this lookup method to hash table.  */
+
+static struct cgraph_node *
+find_func_by_name (char *name)
+{
+  struct cgraph_node *n;
+  struct cgraph_node *ret = NULL;
+  int match = 0;
+
+  for (n = cgraph_nodes; n; n = n->next)
+    {
+      const char *fname = IDENTIFIER_POINTER (decl_assembler_name (n->decl));
+      if (!strcmp(fname, name))
+ {
+  match ++;
+  ret = n;
+ }
+    }
+  if (match == 1)
+    return ret;
+  else
+    return NULL;
+}
+
 /* Initialize map of gids (gid -> cgraph node) */

 static htab_t gid_map = NULL;
@@ -1562,7 +1589,8 @@  gimple_ic_transform_mult_targ (gimple stmt,
histogram_value histogram)
   count1 = histogram->hvalue.counters [2];
   val2 = histogram->hvalue.counters [3];
   count2 = histogram->hvalue.counters [4];
-  bb_all = gimple_bb (stmt)->count;
+  bb_all = flag_auto_profile ? histogram->hvalue.counters[0]
+     : gimple_bb (stmt)->count;
   all = bb_all;

   gimple_remove_histogram_value (cfun, stmt, histogram);
@@ -1592,18 +1620,26 @@  gimple_ic_transform_mult_targ (gimple stmt,
histogram_value histogram)
   else
     prob1 = prob2 = 0;

-  direct_call1 = find_func_by_global_id (val1);
+  if (flag_auto_profile)
+    direct_call1 = find_func_by_name ((char *) val1);
+  else
+    direct_call1 = find_func_by_global_id (val1);

   if (val2 && (100 * count2 >= all * perc_threshold)
       && count2 > count_threshold)
-    direct_call2 = find_func_by_global_id (val2);
+    {
+      if (flag_auto_profile)
+ direct_call2 = find_func_by_name ((char *) val2);
+      else
+ direct_call2 = find_func_by_global_id (val2);
+    }

   locus = (stmt != NULL) ? gimple_location (stmt)
       : DECL_SOURCE_LOCATION (current_function_decl);
   if (direct_call1 == NULL
       || !check_ic_target (stmt, direct_call1))
     {
-      if (flag_opt_info >= OPT_INFO_MAX)
+      if (flag_opt_info >= OPT_INFO_MAX && !flag_auto_profile)
         {
           if (!direct_call1)
             inform (locus, "Can not find indirect call target decl "
@@ -1645,9 +1681,12 @@  gimple_ic_transform_mult_targ (gimple stmt,
histogram_value histogram)
       print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
       fprintf (dump_file, "=> ");
       print_generic_expr (dump_file, direct_call1->decl, TDF_SLIM);
-      fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
-               EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1),
-               EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1));
+      if (flag_auto_profile)
+ fprintf (dump_file, " (%s)\n", (char *) val1);
+      else
+ fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
+                 EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1),
+                 EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1));
       fprintf (dump_file, "Transformation on insn:\n");
       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
       fprintf (dump_file, "==>\n");
@@ -1683,9 +1722,12 @@  gimple_ic_transform_mult_targ (gimple stmt,
histogram_value histogram)
           print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
           fprintf (dump_file, "=> ");
           print_generic_expr (dump_file, direct_call2->decl, TDF_SLIM);
-          fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
-                   EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2),
-                   EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2));
+  if (flag_auto_profile)
+    fprintf (dump_file, " (%s)\n", (char *) val2);
+  else
+    fprintf (dump_file, " (module_id:%d, func_id:%d)\n",
+                     EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2),
+                     EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2));
           fprintf (dump_file, "Transformation on insn\n");
           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
           fprintf (dump_file, "=>\n");
diff --git a/gcc/value-prof.h b/gcc/value-prof.h
index 9425e25..2d84fe9 100644
--- a/gcc/value-prof.h
+++ b/gcc/value-prof.h
@@ -77,6 +77,8 @@  typedef VEC(histogram_value,heap) *histogram_values;
 extern void gimple_find_values_to_profile (histogram_values *);
 extern bool gimple_value_profile_transformations (void);

+histogram_value gimple_alloc_histogram_value (struct function *, enum
hist_type,
+      gimple stmt, tree);
 histogram_value gimple_histogram_value (struct function *, gimple);
 histogram_value gimple_histogram_value_of_type (struct function *, gimple,