Patchwork [google-4_6] indirect_call promotion for streaming LIPO (issue6306054)

login
register
mail settings
Submitter Rong Xu
Date June 7, 2012, 8:29 p.m.
Message ID <20120607202930.9E03BC1602@rong.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/163671/
State New
Headers show

Comments

Rong Xu - June 7, 2012, 8:29 p.m.
Hi, 

This patch is for google-4_6 branch only.

It implements the multi-target indirect-call promotion as a regular IPA
passes. I took a shortcut by opening and modifying the IR in IPA execute stage.
The early version pushes the candidates to obstack and makes cgraph_node clones
, and performs the transformation in clone materialization stage. But it got
some issues. I may revisit that method later.

Tested with spec2k spec2006 and google internal benchmarks.

2012-06-06  Rong Xu  <xur@google.com>

	* lto-symtab.c (lto_cgraph_replace_node): indirect-call promotion
        support in streaming LIPO.
	(Entry_BB_profile_count): Ditto.
	(COMDAT_prevailing_node_p): Ditto.
	(lto_symtab_resolve_symbols): Ditto.
	* cgraph.h (struct cgraph_node): Add field gid for icall promotion
        support in streaming LIPO.
        (icall_promotion_info_t): New type.
        (icall_promotion_info_htab): New decls.
        (init_icall_promotion_info_htab): Ditto.
        (icall_promotion_info_hash): Ditto.
        (icall_promotion_update_gid): Ditto.
        (icall_promotion_insert_gid): Ditto.
        (ipa_icall_promotion_transform): Ditto.
	* value-prof.c (htab_gid_hash): icall promotion support in
        streaming LIPO.
	(htab_gid_del): Ditto.
	(icall_promotion_update_gid): Ditto.
	(icall_promotion_insert_gid): Ditto.
	(init_gid_map): Ditto.
	(gimple_ic): Ditto.
	(icall_promotion_info_hash): Ditto.
	(icall_promotion_info_eq): Ditto.
	(free_icall_promotion_info_t): Ditto.
	(init_icall_promotion_info_htab): Ditto.
	(fini_icall_promotion_info_htab): Ditto.
	(gimple_ic_transform_mult_targ2): Ditto.
	(gimple_ic_transform_mult_targ): Ditto.
	(gimple_ic_transform_mult_targ1): Ditto.
	(gimple_find_values_to_profile): Ditto.
	(ipa_icall_promotion): Ditto.
	(gate_ipa_icall_promotion): Ditto.
	(pass_ipa_icall_promotion): Ditto.
	* tree-pass.h: (pass_ipa_icall_promotion) New decl.
	* lto-cgraph.c (lto_output_edge): icall promotion support in
        streaming LIPO.
	(lto_output_node): Ditto.
	(input_node): Ditto.
	(input_edge): Ditto.
	* lto-streamer-in.c (input_ssa_names): Fix incorrect types in streaming.
	(input_types_compatible_p): New function.
	(fixup_icall_promotion_info): New function.
	(fixup_call_stmt_edges_1): New function.
	* ipa-split.c (execute_split_functions): Sync the behavior for FE and
        streaming LIPO.
	* lto/lto.c (lto_main): icall promotion support in streaming LIPO.
        * passes.c (init_optimization_passes): New pass.


--
This patch is available for review at http://codereview.appspot.com/6306054

Patch

Index: lto-symtab.c
===================================================================
--- lto-symtab.c	(revision 188292)
+++ lto-symtab.c	(working copy)
@@ -148,8 +148,8 @@  lto_symtab_register_decl (tree decl,
 		  || TREE_CODE (decl) == FUNCTION_DECL)
 	      && DECL_ASSEMBLER_NAME_SET_P (decl));
 
-  /* In LIPO mode, we may externalize a decl while keeping it's 
-     initializer (for const propagation). 
+  /* In LIPO mode, we may externalize a decl while keeping it's
+     initializer (for const propagation).
      Disable this check in streaming LIPO.  */
   if (TREE_CODE (decl) == VAR_DECL
       && DECL_INITIAL (decl)
@@ -206,7 +206,6 @@  lto_symtab_get_resolution (tree decl)
   return e->resolution;
 }
 
-
 /* Replace the cgraph node NODE with PREVAILING_NODE in the cgraph, merging
    all edges and removing the old node.  */
 
@@ -290,6 +289,10 @@  lto_cgraph_replace_node (struct cgraph_node *node,
       node->same_body = NULL;
     }
 
+  /* Update the gid map in streaming LIPO.  */
+  if (flag_ripa_stream)
+    icall_promotion_update_gid (node, prevailing_node);
+
   /* Finally remove the replaced node.  */
   if (node->same_body_alias)
     cgraph_remove_same_body_alias (node);
@@ -505,19 +508,128 @@  lto_symtab_resolve_can_prevail_p (lto_symtab_entry
    has profile information.  */
 
 static gcov_type
-Entry_BB_profile_count (tree decl)
+Entry_BB_profile_count (struct cgraph_node *node)
 {
   gcov_type *ctrs = NULL;
-  unsigned n;
-  struct function* f = DECL_STRUCT_FUNCTION (decl);
+  unsigned int n;
+  tree decl = node->decl;
+  struct function* f;
 
+  if (node->count)
+    return node->count;
+
+  f = DECL_STRUCT_FUNCTION (decl);
+  if (!f)
+    return 0;
+
   ctrs = get_coverage_counts_no_warn (f, GCOV_COUNTER_ARCS, &n);
-  if (ctrs) 
+  if (ctrs)
     return ctrs[0];
 
   return 0;
 }
 
+/* In this function, we handle the prevailing node for
+   COMDAT groups. We pick the group that has the largest
+   profile count as the prevailing one. Note all decls
+   in this group will marked as prevailing.  */
+
+static void
+COMDAT_prevailing_node_p (lto_symtab_entry_t e)
+{
+  gcov_type group_profile_cnt_sum=0;
+  gcov_type group_profile_cnt_max=0;
+  unsigned int group_size = 0;
+  lto_symtab_entry_t e1, prevailing_e = 0;
+  unsigned n_group = 0;
+
+  for (e1 = e; e1; e1= e1->next)
+    {
+      struct cgraph_node *node = e1->node;
+      struct cgraph_node *n = node;
+      gcov_type group_profile_cnt = 0;
+      unsigned int group_size1 = 0;
+ 
+      if (!n)
+        continue;
+      if (!lto_symtab_resolve_can_prevail_p (e1))
+        continue;
+
+      if (prevailing_e == 0)
+        prevailing_e = e1;
+
+      ++n_group;
+      do
+        {    
+          group_profile_cnt += Entry_BB_profile_count (n);
+          n = n->same_comdat_group;
+          ++group_size1;
+          if (!n) 
+            break;
+        }    
+      while (n != node);
+
+      if (!group_size)
+        group_size = group_size1;
+      else
+        gcc_assert (group_size == group_size1);
+
+      group_profile_cnt_sum += group_profile_cnt;
+      if (group_profile_cnt > group_profile_cnt_max)
+        {
+          group_profile_cnt_max = group_profile_cnt;
+          prevailing_e = e1;
+        }
+    }
+
+    if (flag_opt_info >= OPT_INFO_MAX)
+      inform (UNKNOWN_LOCATION,
+          "%s:n_group=%u, group_size=%u, max_count=%lld, sum_count=%lld\n",
+          IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (e->decl)),
+          n_group, group_size, (long long int) group_profile_cnt_max,
+          (long long int) group_profile_cnt_sum);
+
+  /* (1) Set the prevailing nodes are the functions with largest COMDAT
+     profile counts.
+     (2) Set the resolution for other COMDAT groups resolved, so that
+     they will won't process again later.  */
+  for (e1 = e; e1; e1= e1->next)
+    {
+      struct cgraph_node *node = e1->node;
+      struct cgraph_node *n = node;
+      lto_symtab_entry_t e2;
+      enum ld_plugin_symbol_resolution resolution;
+
+      if (!n)
+        continue;
+      if (!lto_symtab_resolve_can_prevail_p (e1))
+        continue;
+      if (prevailing_e != e1)
+          resolution = LDPR_PREEMPTED_IR;
+      else
+          resolution = LDPR_PREVAILING_DEF_IRONLY;
+      
+      e1->resolution = resolution;
+      e1->guessed = true;
+
+      do
+        {    
+          n = n->same_comdat_group;
+          if (!n) 
+            break;
+          e2 = lto_symtab_get ((*targetm.asm_out.mangle_assembler_name)
+			       (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl))));
+         for (; e2; e2 = e2->next)
+           {
+             if (e2->node == n)
+               e2->resolution = resolution;
+               e2->guessed = true;
+           }
+        }    
+      while (n != node);
+    }
+}
+
 /* Resolve the symbol with the candidates in the chain *SLOT and store
    their resolutions.  */
 
@@ -526,7 +638,6 @@  lto_symtab_resolve_symbols (void **slot)
 {
   lto_symtab_entry_t e;
   lto_symtab_entry_t prevailing = NULL;
-  gcov_type p_cnt;
 
   /* Always set e->node so that edges are updated to reflect decl merging. */
   for (e = (lto_symtab_entry_t) *slot; e; e = e->next)
@@ -573,37 +684,28 @@  lto_symtab_resolve_symbols (void **slot)
   if (prevailing)
     goto found;
 
-  /* Do a second round choosing one from the replaceable prevailing decls.  */
-  p_cnt = 0;
+  /* Do a second round choosing one from the replaceable prevailing decls.
+     For COMDAT functions choose the one with largest profile count and 
+     make all the functions in this group prevailing.  */
+  e = (lto_symtab_entry_t) *slot;
+  if (flag_ripa_stream &&
+      TREE_CODE (e->decl) == FUNCTION_DECL &&
+      e->node && DECL_COMDAT (e->decl))
+    {
+      COMDAT_prevailing_node_p (e);
+      return;
+    }
+
   for (e = (lto_symtab_entry_t) *slot; e; e = e->next)
     {
       if (e->resolution != LDPR_PREEMPTED_IR)
-	continue;
+        continue;
 
-      /* Choose the first function that can prevail as prevailing.  */
-      /* for comdat functions, we choose the one with larger profile count.  */
-      if (TREE_CODE (e->decl) == FUNCTION_DECL)
-	{
-          if (DECL_COMDAT (e->decl))
-            {
-              gcov_type cnt = Entry_BB_profile_count (e->decl);
-              if (!prevailing || cnt > p_cnt)
-                {
-                  p_cnt = cnt;
-                  prevailing = e;
-                }
-              continue;
-            }
-          
-	    prevailing = e;
-            break;
-	}
-
       /* From variables that can prevail choose the largest one.  */
-      if (!prevailing
-	  || tree_int_cst_lt (DECL_SIZE (prevailing->decl),
-			      DECL_SIZE (e->decl)))
-	prevailing = e;
+      if (!prevailing ||
+          tree_int_cst_lt (DECL_SIZE (prevailing->decl),
+        		      DECL_SIZE (e->decl)))
+        prevailing = e;
     }
 
   if (!prevailing)
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 188292)
+++ cgraph.h	(working copy)
@@ -240,6 +240,8 @@  struct GTY((chain_next ("%h.next"), chain_prev ("%
   /* How to scale counts at materialization time; used to merge
      LTO units with different number of profile runs.  */
   int count_materialization_scale;
+  /* gid for the node, only used in streaming LIPO.  */
+  unsigned HOST_WIDEST_INT gid;
   /* Unique id of the node.  */
   int uid;
   /* Ordering of all cgraph nodes.  */
@@ -380,6 +382,32 @@  typedef enum {
   CIF_N_REASONS
 } cgraph_inline_failed_t;
 
+/* We store 5 records for each indirect call in promtion_info.
+    0: hottest target guid,
+    1: counter value for [0],
+    2: second hottest target guid,
+    3: counter value for [2],
+    4: bb counter value for this indirect call.  */
+#define ICALL_PROMOTION_INFO_SIZE 5
+typedef struct
+{
+  tree caller_fn_decl;
+  gimple call_stmt;
+  bool is_lto_uid;
+  gcov_type promotion_info[ICALL_PROMOTION_INFO_SIZE];
+} icall_promotion_info_t;
+
+/* hash_table that stores icall promotion information.  */
+extern htab_t icall_promotion_info_htab;
+extern void init_icall_promotion_info_htab (void);
+extern hashval_t icall_promotion_info_hash (const void *p);
+
+extern void icall_promotion_update_gid
+              (struct cgraph_node*, struct cgraph_node*);
+extern void icall_promotion_insert_gid
+              (struct cgraph_node*, unsigned HOST_WIDEST_INT);
+extern void ipa_icall_promotion_transform (void);
+
 /* Structure containing additional information about an indirect call.  */
 
 struct GTY(()) cgraph_indirect_call_info
Index: value-prof.c
===================================================================
--- value-prof.c	(revision 188292)
+++ value-prof.c	(working copy)
@@ -80,7 +80,6 @@  along with GCC; see the file COPYING3.  If not see
    kept as a linked list of struct histogram_value_t's, which contain the
    same information as above.  */
 
-
 static tree gimple_divmod_fixed_value (gimple, tree, int, gcov_type, gcov_type);
 static tree gimple_mod_pow2 (gimple, int, gcov_type, gcov_type);
 static tree gimple_mod_subtract (gimple, int, int, int, gcov_type, gcov_type,
@@ -534,7 +533,7 @@  check_ic_counter (gimple stmt, gcov_type *count1,
         }
       *count2 = all;
     }
-  
+
   if (*count2 > *count1)
     {
       if (flag_opt_info >= OPT_INFO_MAX)
@@ -1226,7 +1225,7 @@  typedef struct func_gid_entry
 
 /* Hash function for function global unique ids.  */
 
-static hashval_t 
+static hashval_t
 htab_gid_hash (const void * ent)
 {
   const func_gid_entry_t *const entry = (const func_gid_entry_t *) ent;
@@ -1250,6 +1249,47 @@  htab_gid_del (void *ent)
   free (entry);
 }
 
+/* When we replace cgraph_node with prevailing_node, update the
+   gid_map also.  */
+
+void
+icall_promotion_update_gid (struct cgraph_node *node,
+                            struct cgraph_node *prevailing_node)
+{
+  func_gid_entry_t ent;
+  func_gid_entry_t **slot;
+
+  ent.node = node;
+  ent.gid = node->gid;
+  slot = (func_gid_entry_t **) htab_find_slot (gid_map, &ent, NO_INSERT);
+  if (!slot || !*slot)
+    return;
+  gcc_assert ((*slot)->gid == ent.gid && (*slot)->node == node);
+
+  (*slot)->node = prevailing_node;
+}
+
+/* Insert the gid to hashtab to build the gid_map.  */
+
+void
+icall_promotion_insert_gid (struct cgraph_node *n,
+                            unsigned HOST_WIDEST_INT gid)
+{
+  func_gid_entry_t ent, *entp;
+  func_gid_entry_t **slot;
+
+  ent.node = n;
+  ent.gid = gid;
+  slot = (func_gid_entry_t **) htab_find_slot (gid_map, &ent, INSERT);
+  gcc_assert (!*slot || ((*slot)->gid == ent.gid && (*slot)->node == n));
+  if (!*slot)
+    {
+      *slot = entp = XCNEW (func_gid_entry_t);
+      entp->node = n;
+      entp->gid = ent.gid;
+    }
+}
+
 /* Initialize the global unique id map for functions.  */
 
 static void
@@ -1262,6 +1302,12 @@  init_gid_map (void)
   gid_map
       = htab_create (10, htab_gid_hash, htab_gid_eq, htab_gid_del);
 
+  /* For streaming LIPO, We cannot build the gid_map here like in FE
+     LIPO because the body may not be available.
+     So we build the gid_map in the streaming-in stage.  */
+  if (flag_ripa_stream)
+    return;
+
   for (n = cgraph_nodes; n; n = n->next)
     {
       func_gid_entry_t ent, *entp;
@@ -1474,6 +1520,16 @@  gimple_ic (gimple icall_stmt, struct cgraph_node *
 	}
     }
 
+  /* Update the call graph.  */
+  if (flag_ripa_stream)
+    {
+      cgraph_create_edge (cgraph_get_node (current_function_decl),
+                          direct_call, dcall_stmt,
+                          dcall_bb->count, dcall_bb->frequency,
+                          dcall_bb->loop_depth);
+      direct_call->needed = 1;
+    }
+
   return dcall_stmt;
 }
 
@@ -1538,41 +1594,98 @@  gimple_ic_transform_single_targ (gimple stmt, hist
   return true;
 }
 
+/* hash_table that stores ic promotion information.  */
+
+htab_t icall_promotion_info_htab = NULL;
+
+/* hash function for loop_lsm_limit_map_htab.  */
+
+hashval_t
+icall_promotion_info_hash (const void *p)
+{
+  const icall_promotion_info_t *const x = (const icall_promotion_info_t *) p;
+  return htab_hash_pointer (x->call_stmt);
+}
+
+/* hash equal function for icall_promotion_info_htab.  */
+
+static int
+icall_promotion_info_eq (const void *p1, const void *p2)
+{
+  const icall_promotion_info_t *const ptr1 = (const icall_promotion_info_t *) p1;
+  const icall_promotion_info_t *const ptr2 = (const icall_promotion_info_t *) p2;
+
+  return (htab_eq_pointer (ptr1->caller_fn_decl, ptr2->caller_fn_decl) &&
+          htab_eq_pointer (ptr1->call_stmt, ptr2->call_stmt));
+}
+
+/* free one entry in icall_promotion_info hashtab.  */
+static int
+free_icall_promotion_info_t (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  free (*(icall_promotion_info_t**) slot);
+  return 1;
+}
+
+/* Initalization function for indirect call promotion information.  */
+
+void
+init_icall_promotion_info_htab (void)
+{
+  if (icall_promotion_info_htab)
+    return;
+
+  icall_promotion_info_htab = htab_create (59, icall_promotion_info_hash,
+                                   icall_promotion_info_eq, NULL);
+}
+
+/* Cleanup function: destroy loop structure and free space.  */
+
+static void
+fini_icall_promotion_info_htab (void)
+{
+  if (icall_promotion_info_htab)
+    {
+      htab_traverse (icall_promotion_info_htab,
+          free_icall_promotion_info_t, NULL);
+      htab_delete (icall_promotion_info_htab);
+    }
+}
+
 /* Convert indirect function call STMT into guarded direct function
    calls. Multiple indirect call targets are supported. HISTOGRAM
    is the target distribution for the callsite.  */
 
-static bool
-gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram)
+static int
+gimple_ic_transform_mult_targ2 (gimple stmt,
+          gcov_type icall_counters[ICALL_PROMOTION_INFO_SIZE])
 {
-  gcov_type val1, val2, count1, count2, all, bb_all;
+  gcov_type all;
   gcov_type prob1, prob2;
   gimple modify1, modify2;
   struct cgraph_node *direct_call1 = 0, *direct_call2 = 0;
   int perc_threshold, count_threshold, always_inline;
   location_t locus;
+  int performed = 0;
+  gcov_type val1 = icall_counters[0];
+  gcov_type count1 = icall_counters[1];
+  gcov_type val2 = icall_counters[2];
+  gcov_type count2 = icall_counters[3];
+  gcov_type bb_all = icall_counters[4];
 
-  val1 = histogram->hvalue.counters [1];
-  count1 = histogram->hvalue.counters [2];
-  val2 = histogram->hvalue.counters [3];
-  count2 = histogram->hvalue.counters [4];
-  bb_all = gimple_bb (stmt)->count;
   all = bb_all;
-
-  gimple_remove_histogram_value (cfun, stmt, histogram);
-
   if (count1 == 0)
-    return false;
+    return 0;
 
   perc_threshold = PARAM_VALUE (PARAM_ICALL_PROMOTE_PERCENT_THRESHOLD);
   count_threshold = PARAM_VALUE (PARAM_ICALL_PROMOTE_COUNT_THRESHOLD);
   always_inline = PARAM_VALUE (PARAM_ALWAYS_INLINE_ICALL_TARGET);
 
   if (100 * count1 < all * perc_threshold || count1 < count_threshold)
-    return false;
+    return 0;
 
   if (check_ic_counter (stmt, &count1, &count2, all))
-    return false;
+    return 0;
 
   if (all > 0)
     {
@@ -1588,10 +1701,43 @@  gimple_ic_transform_single_targ (gimple stmt, hist
 
   direct_call1 = find_func_by_global_id (val1);
 
-  if (val2 && (100 * count2 >= all * perc_threshold)
-      && count2 > count_threshold)
-    direct_call2 = find_func_by_global_id (val2);
+  if (!flag_ripa_stream)
+    {
+      if (val2 && (100 * count2 >= all * perc_threshold)
+          && count2 > count_threshold)
+        direct_call2 = find_func_by_global_id (val2);
+    }
+  else /* for streaming lipo.  */
+    {
+      if (val2 && ((count2 * 2) >= (all - count1))
+          && count2 > count_threshold)
+        direct_call2 = find_func_by_global_id (val2);
+    }
 
+  if (direct_call1 && !direct_call1->decl)
+    direct_call1 = 0;
+  if (direct_call2 && !direct_call2->decl)
+    direct_call2 = 0;
+
+  if (!in_lto_p && flag_ripa_stream)
+    {
+      void **slot;
+      icall_promotion_info_t *ret;
+
+      init_icall_promotion_info_htab();
+      ret = (icall_promotion_info_t *) XNEW (icall_promotion_info_t);
+      ret->caller_fn_decl = current_function_decl;
+      ret->call_stmt = stmt;
+      ret->promotion_info[0] = val1;
+      ret->promotion_info[1] = count1;
+      ret->promotion_info[2] = val2;
+      ret->promotion_info[3] = count2;
+      ret->promotion_info[4] = bb_all;
+      slot = htab_find_slot (icall_promotion_info_htab, ret, INSERT);
+      (*slot) = (void **)ret;
+      return 0;
+    }
+
   locus = (stmt != NULL) ? gimple_location (stmt)
       : DECL_SOURCE_LOCATION (current_function_decl);
   if (direct_call1 == NULL
@@ -1611,7 +1757,7 @@  gimple_ic_transform_single_targ (gimple stmt, hist
                     EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1),
                     EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1), (unsigned) count1);
         }
-      return false;
+      return 0;
     }
 
   /* Don't indirect-call promote if the target is in auxiliary module and
@@ -1622,11 +1768,13 @@  gimple_ic_transform_single_targ (gimple stmt, hist
       && ! TREE_PUBLIC (direct_call1->decl))
     return false;
 
+  ++performed;
+
   modify1 = gimple_ic (stmt, direct_call1, prob1, count1, all);
   if (flag_opt_info >= OPT_INFO_MIN)
     inform (locus, "Promote indirect call to target (call count:%u) %s",
-	    (unsigned) count1,
-	    lang_hooks.decl_printable_name (direct_call1->decl, 3));
+            (unsigned) count1,
+            lang_hooks.decl_printable_name (direct_call1->decl, 3));
 
   if (always_inline && count1 >= always_inline)
     {
@@ -1648,7 +1796,7 @@  gimple_ic_transform_single_targ (gimple stmt, hist
       fprintf (dump_file, "==>\n");
       print_gimple_stmt (dump_file, modify1, 0, TDF_SLIM);
       fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
-	       " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count1, all);
+               " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count1, all);
     }
 
   if (direct_call2 && check_ic_target (stmt, direct_call2)
@@ -1659,6 +1807,7 @@  gimple_ic_transform_single_targ (gimple stmt, hist
 	    && DECL_ARTIFICIAL (direct_call2->decl)
 	    && ! TREE_PUBLIC (direct_call2->decl)))
     {
+ ++performed;
       modify2 = gimple_ic (stmt, direct_call2,
                            prob2, count2, all - count1);
 
@@ -1692,9 +1841,56 @@  gimple_ic_transform_single_targ (gimple stmt, hist
         }
     }
 
-  return true;
+  return performed;
 }
 
+/* This is the entry point for icall-promotion in FE LIPO.  */
+
+static bool
+gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram)
+{
+  gcov_type icall_counters[ICALL_PROMOTION_INFO_SIZE];
+
+  icall_counters[0] = histogram->hvalue.counters [1];
+  icall_counters[1] = histogram->hvalue.counters [2];
+  icall_counters[2] = histogram->hvalue.counters [3];
+  icall_counters[3] = histogram->hvalue.counters [4];
+  icall_counters[4] = gimple_bb (stmt)->count;
+
+  gimple_remove_histogram_value (cfun, stmt, histogram);
+
+  return (gimple_ic_transform_mult_targ2 (stmt, icall_counters) != 0);
+}
+
+/* This is the entry point for icall-promotion in streaming LIPO.  */
+
+static int
+gimple_ic_transform_mult_targ1 (struct cgraph_edge *call_edge)
+{
+  int num_transformed;
+  void **slot;
+  icall_promotion_info_t *val, *ret;
+  gimple stmt = call_edge->call_stmt;
+
+  if (!icall_promotion_info_htab)
+    return 0;
+
+  val = (icall_promotion_info_t *) XNEW (icall_promotion_info_t);
+  val->caller_fn_decl = call_edge->caller->decl;
+  val->call_stmt = stmt;
+  slot = htab_find_slot (icall_promotion_info_htab, val, NO_INSERT);
+  if (!slot)
+    return 0;
+
+  ret = *(icall_promotion_info_t**)slot;
+  gcc_assert (ret->is_lto_uid == false);
+
+  num_transformed = gimple_ic_transform_mult_targ2 (stmt, ret->promotion_info);
+
+  return num_transformed;
+}
+
+
 /* Perform indirect call (STMT) to guarded direct function call
    transformation using value profile data.  */
 
@@ -2205,3 +2401,72 @@  gimple_find_values_to_profile (histogram_values *v
         }
     }
 }
+
+/* Perform icall_promotion as a regular IPA pass.  */
+
+static unsigned int
+ipa_icall_promotion (void)
+{
+  struct cgraph_node *node;
+  struct cgraph_edge *e;
+
+  for (node = cgraph_nodes; node; node = node->next)
+    {
+      if (!(node->indirect_calls))
+        continue;
+
+      {
+        tree save = current_function_decl;
+        tree decl = node->decl;
+
+        current_function_decl = decl;
+        push_cfun (DECL_STRUCT_FUNCTION (decl));
+
+        for (e = node->indirect_calls; e; e = e->next_callee)
+          gimple_ic_transform_mult_targ1 (e);
+
+        pop_cfun ();
+        current_function_decl = save;
+      }
+    }
+
+  fini_icall_promotion_info_htab ();
+
+  return 0;
+}
+
+/* Perform the pass in streaming LIPO for profile-use, for now. */
+
+static bool
+gate_ipa_icall_promotion (void)
+{
+  return (optimize && flag_ripa_stream && flag_profile_use);
+}
+
+struct ipa_opt_pass_d pass_ipa_icall_promotion =
+{
+ {
+  IPA_PASS,
+  "icall_promotion",                   /* name */
+  gate_ipa_icall_promotion,             /* gate */
+  ipa_icall_promotion,                  /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_CGRAPHOPT,                         /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  0                                     /* todo_flags_finish */
+ },
+ NULL,                                  /* generate_summary */
+ NULL,                                  /* write_summary */
+ NULL,                                  /* read_summary */
+ NULL,                                  /* write_optimization_summary */
+ NULL,                                  /* read_optimization_summary */
+ NULL,                                  /* stmt_fixup */
+ 0,                                     /* TODOs */
+ 0,                                     /* function_transform */
+ NULL                                   /* variable_transform */
+};
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 188292)
+++ tree-pass.h	(working copy)
@@ -476,6 +476,7 @@  extern struct ipa_opt_pass_d pass_ipa_lto_finish_o
 extern struct ipa_opt_pass_d pass_ipa_profile;
 extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;
 extern struct simple_ipa_opt_pass pass_ipa_multiversion_dispatch;
+extern struct ipa_opt_pass_d pass_ipa_icall_promotion;
 
 extern struct gimple_opt_pass pass_all_optimizations;
 extern struct gimple_opt_pass pass_cleanup_cfg_post_optimizing;
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 188292)
+++ lto-cgraph.c	(working copy)
@@ -260,6 +260,7 @@  lto_output_edge (struct lto_simple_output_block *o
   unsigned int uid;
   intptr_t ref;
   struct bitpack_d bp;
+  icall_promotion_info_t *icall_promotion_info = 0;
 
   if (edge->indirect_unknown_callee)
     lto_output_uleb128_stream (ob->main_stream, LTO_cgraph_indirect_edge);
@@ -304,8 +305,31 @@  lto_output_edge (struct lto_simple_output_block *o
 			     | ECF_SIBCALL
 			     | ECF_LEAF
 			     | ECF_NOVOPS)));
+
+      /* Check if we have icall_promotion_info.  */
+      if (icall_promotion_info_htab)
+        {
+          void **slot;
+          icall_promotion_info_t t;
+          t.caller_fn_decl = edge->caller->decl;
+          t.call_stmt = edge->call_stmt;
+          slot = htab_find_slot (icall_promotion_info_htab, &t, NO_INSERT);
+          if (slot)
+            icall_promotion_info = *(icall_promotion_info_t**) slot;
+        }
+
+      bp_pack_value (&bp, icall_promotion_info != 0 , 1);
     }
   lto_output_bitpack (&bp);
+
+  if (icall_promotion_info)
+    {
+      int i;
+
+      for (i = 0; i < ICALL_PROMOTION_INFO_SIZE; i++)
+        lto_output_sleb128_stream (ob->main_stream,
+                                   icall_promotion_info->promotion_info[i]);
+    }
 }
 
 /* Return if LIST contain references from other partitions.  */
@@ -460,6 +484,16 @@  lto_output_node (struct lto_simple_output_block *o
   lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
   lto_output_sleb128_stream (ob->main_stream, node->count);
   lto_output_sleb128_stream (ob->main_stream, node->max_bb_count);
+  /* Output the gid of this cgraph node.  */
+  if (flag_ripa_stream)
+    {
+      unsigned HOST_WIDEST_INT gid = 0;
+      struct function *f = DECL_STRUCT_FUNCTION (node->decl);
+
+      if (f && !DECL_ABSTRACT (node->decl))
+        gid = FUNC_DECL_GLOBAL_ID (f);
+      lto_output_sleb128_stream (ob->main_stream, gid);
+    }
   lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale);
 
   if (tag == LTO_cgraph_analyzed_node)
@@ -1049,6 +1083,15 @@  input_node (struct lto_file_decl_data *file_data,
 
   node->count = lto_input_sleb128 (ib);
   node->max_bb_count = lto_input_sleb128 (ib);
+  /* Input the gid of this cgraph node and build gid_map.  */
+  if (flag_ripa_stream)
+    {
+      unsigned HOST_WIDEST_INT gid = lto_input_sleb128 (ib);
+
+      if (gid)
+        icall_promotion_insert_gid (node, gid);
+      node->gid = gid;
+    }
   node->count_materialization_scale = lto_input_sleb128 (ib);
 
   if (tag == LTO_cgraph_analyzed_node)
@@ -1267,6 +1310,24 @@  input_edge (struct lto_input_block *ib, VEC(cgraph
       if (bp_unpack_value (&bp, 1))
 	ecf_flags |= ECF_RETURNS_TWICE;
       edge->indirect_info->ecf_flags = ecf_flags;
+
+      /* Input icall promotion info.  */
+      if (bp_unpack_value (&bp, 1))
+        {
+          void **slot;
+          icall_promotion_info_t *ret;
+          int i;
+
+          init_icall_promotion_info_htab();
+          ret = (icall_promotion_info_t *) XNEW (icall_promotion_info_t);
+          ret->caller_fn_decl = caller->decl;
+          ret->call_stmt = (gimple) (unsigned long) stmt_id;
+          ret->is_lto_uid = true;
+          for (i = 0; i < ICALL_PROMOTION_INFO_SIZE; i++)
+            ret->promotion_info[i] = (gcov_type) lto_input_sleb128 (ib);
+          slot = htab_find_slot (icall_promotion_info_htab, ret, INSERT);
+          (*slot) = (void**) ret;
+        }
     }
 }
 
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 188292)
+++ lto-streamer-in.c	(working copy)
@@ -875,6 +875,30 @@  input_ssa_names (struct lto_input_block *ib, struc
     }
 }
 
+/* Check the point-to type for pointer types to see if two
+   types are compatible.  */
+
+static bool
+input_types_compatible_p (tree type1, tree type2)
+{
+  tree t1, t2;
+  int l1 = 0, l2 = 0;
+
+  t1 = type1;
+  while (POINTER_TYPE_P (t1))
+    {
+      t1 = TREE_TYPE (t1);
+      ++l1;
+    }
+  t2 = type2;
+  while (POINTER_TYPE_P (t2))
+    {
+      t2 = TREE_TYPE (t2);
+      ++l2;
+    }
+  return ((l1 == l2) && types_compatible_p (t1, t2));
+}
+
 /* Read a statement with tag TAG in function FN from block IB using
    descriptors in DATA_IN.  */
 
@@ -969,8 +993,8 @@  input_gimple_stmt (struct lto_input_block *ib, str
 			  == DECL_NONADDRESSABLE_P (field)
 			  && gimple_compare_field_offset (tem, field))
 			{
-			  if (types_compatible_p (TREE_TYPE (tem),
-						  TREE_TYPE (field)))
+			  if (input_types_compatible_p (TREE_TYPE (tem),
+						        TREE_TYPE (field)))
 			    break;
 			  else
 			    closest_match = tem;
@@ -1125,6 +1149,41 @@  input_bb (struct lto_input_block *ib, enum LTO_tag
     }
 }
 
+/* Fix up the icall_promotion_info.  */
+
+static void
+fixup_icall_promotion_info (struct cgraph_edge *edge, gimple *stmts)
+{
+  void **slot;
+  icall_promotion_info_t *val, *ret;
+
+  if (icall_promotion_info_htab == 0)
+    return;
+
+  val = (icall_promotion_info_t *) XNEW (icall_promotion_info_t);
+  val->caller_fn_decl = edge->caller->decl;
+  val->call_stmt = (gimple) (long long)edge->lto_stmt_uid;
+  slot = htab_find_slot (icall_promotion_info_htab, val, NO_INSERT);
+  if (!slot)
+    return;
+
+  ret = *(icall_promotion_info_t**)slot;
+  gcc_assert (ret->is_lto_uid);
+  htab_remove_elt_with_hash (icall_promotion_info_htab, val,
+                             icall_promotion_info_hash (val));
+
+  ret->is_lto_uid = false;
+  ret->call_stmt = stmts[edge->lto_stmt_uid];
+  gcc_assert (ret->call_stmt);
+  slot = htab_find_slot (icall_promotion_info_htab, ret, INSERT);
+  (*slot) = (void **) ret;
+
+  slot = htab_find_slot (icall_promotion_info_htab, val, NO_INSERT);
+  gcc_assert (slot == 0);
+
+  XDELETE(val);
+}
+
 /* Go through all NODE edges and fixup call_stmt pointers
    so they point to STMTS.  */
 
@@ -1135,7 +1194,10 @@  fixup_call_stmt_edges_1 (struct cgraph_node *node,
   for (cedge = node->callees; cedge; cedge = cedge->next_callee)
     cedge->call_stmt = stmts[cedge->lto_stmt_uid];
   for (cedge = node->indirect_calls; cedge; cedge = cedge->next_callee)
-    cedge->call_stmt = stmts[cedge->lto_stmt_uid];
+    {
+      cedge->call_stmt = stmts[cedge->lto_stmt_uid];
+      fixup_icall_promotion_info (cedge, stmts);
+    }
 }
 
 /* Fixup call_stmt pointers in NODE and all clones.  */
Index: ipa-split.c
===================================================================
--- ipa-split.c	(revision 188292)
+++ ipa-split.c	(working copy)
@@ -1311,7 +1311,7 @@  execute_split_functions (void)
      then inlining would still benefit.  */
   if ((!node->callers || !node->callers->next_caller)
       && !node->address_taken
-      && (!flag_lto || !node->local.externally_visible))
+      && (!flag_lto || flag_ripa_stream || !node->local.externally_visible))
     {
       if (dump_file)
 	fprintf (dump_file, "Not splitting: not called directly "
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 188292)
+++ lto/lto.c	(working copy)
@@ -2465,6 +2465,9 @@  lto_main (void)
 
   lto_init_reader ();
 
+  if (flag_ripa_stream)
+    cgraph_init_gid_map ();
+
   /* Read all the symbols and call graph from all the files in the
      command line.  */
   read_cgraph_and_symbols (num_in_fnames, in_fnames);
Index: passes.c
===================================================================
--- passes.c	(revision 188292)
+++ passes.c	(working copy)
@@ -1260,6 +1260,7 @@  init_optimization_passes (void)
   p = &all_regular_ipa_passes;
   NEXT_PASS (pass_ipa_whole_program_visibility);
   NEXT_PASS (pass_ipa_profile);
+  NEXT_PASS (pass_ipa_icall_promotion);
   NEXT_PASS (pass_ipa_cp);
   NEXT_PASS (pass_ipa_cdtor_merge);
   NEXT_PASS (pass_ipa_inline);