Patchwork Implement cross-module indirect call value profiling

login
register
mail settings
Submitter Jan Hubicka
Date Aug. 9, 2013, 4:55 p.m.
Message ID <20130809165537.GB6579@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/266105/
State New
Headers show

Comments

Jan Hubicka - Aug. 9, 2013, 4:55 p.m.
Hi,
this patch makes indirect call profiling to work cross module.  Unlike LIPO I
am not adding module IDs, since I do not know how to make them stable across
multiple uses of same .o files.  Instead I simply assign unique ID to each
possibly indirectly called function in program.  This is done by combining its
assembler name, file&line and gcov filename into single hash.  For GCC this
gives no colisions.

The rest of updates is quite obvious.  Currently we have moudle local
__gcov_indirect_call_callee and __gcov_indirect_call_counters to track the
calls.  I made the global and define them in libgcov.
__gcov_indirect_call_profiler used to take these two as parameters and I
replaced it by __gcov_indirect_call_profiler_v2 that has those two
hard coded to simplify the call sequence.

This patch has only purpose to measure the cross-module calls and get sane
histograms attached to indirect calls.  In the third patch of series I will
actually make them used by the LTO ipa-profile pass.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

Patch

Index: libgcc/libgcov.c
===================================================================
--- libgcc/libgcov.c	(revision 201539)
+++ libgcc/libgcov.c	(working copy)
@@ -1121,6 +1121,20 @@  __gcov_one_value_profiler (gcov_type *co
 
 #ifdef L_gcov_indirect_call_profiler
 
+/* These two variables are used to actually track caller and callee.  Keep
+   them in TLS memory so races are not common (they are written to often).
+   The variables are set directly by GCC instrumented code, so declaration
+   here must match one in tree-profile.c  */
+
+#ifdef HAVE_CC_TLS
+__thread 
+#endif
+void * __gcov_indirect_call_callee;
+#ifdef HAVE_CC_TLS
+__thread 
+#endif
+gcov_type * __gcov_indirect_call_counters;
+
 /* By default, the C++ compiler will use function addresses in the
    vtable entries.  Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero
    tells the compiler to use function descriptors instead.  The value
@@ -1140,19 +1154,43 @@  __gcov_one_value_profiler (gcov_type *co
 
 /* Tries to determine the most common value among its inputs. */
 void
-__gcov_indirect_call_profiler (gcov_type* counter, gcov_type value,
-			       void* cur_func, void* callee_func)
+__gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func)
 {
   /* If the C++ virtual tables contain function descriptors then one
      function may have multiple descriptors and we need to dereference
      the descriptors to see if they point to the same function.  */
-  if (cur_func == callee_func
-      || (VTABLE_USES_DESCRIPTORS && callee_func
-	  && *(void **) cur_func == *(void **) callee_func))
-    __gcov_one_value_profiler_body (counter, value);
+  if (cur_func == __gcov_indirect_call_callee
+      || (VTABLE_USES_DESCRIPTORS && __gcov_indirect_call_callee
+	  && *(void **) cur_func == *(void **) __gcov_indirect_call_callee))
+    __gcov_one_value_profiler_body (__gcov_indirect_call_counters, value);
 }
 #endif
 
 
 #ifdef L_gcov_average_profiler
 /* Increase corresponding COUNTER by VALUE.  FIXME: Perhaps we want
Index: gcc/value-prof.c
===================================================================
--- gcc/value-prof.c	(revision 201632)
+++ gcc/value-prof.c	(working copy)
@@ -1173,24 +1173,56 @@  gimple_mod_subtract_transform (gimple_st
   return true;
 }
 
-static vec<cgraph_node_ptr> cgraph_node_map
-    = vNULL;
+static pointer_map_t *cgraph_node_map;
 
 /* Initialize map from FUNCDEF_NO to CGRAPH_NODE.  */
 
 void
-init_node_map (void)
+init_node_map (bool local)
 {
   struct cgraph_node *n;
+  cgraph_node_map = pointer_map_create ();
 
-  if (get_last_funcdef_no ())
-    cgraph_node_map.safe_grow_cleared (get_last_funcdef_no ());
-
-  FOR_EACH_FUNCTION (n)
-    {
-      if (DECL_STRUCT_FUNCTION (n->symbol.decl))
-        cgraph_node_map[DECL_STRUCT_FUNCTION (n->symbol.decl)->funcdef_no] = n;
-    }
+  FOR_EACH_DEFINED_FUNCTION (n)
+    if (cgraph_function_with_gimple_body_p (n)
+	&& !cgraph_only_called_directly_p (n))
+      {
+	void **val;
+	if (local)
+	  {
+	    n->profile_id = coverage_compute_profile_id (n);
+	    while ((val = pointer_map_contains (cgraph_node_map, (void *)(size_t)n->profile_id)) || !n->profile_id)
+	      {
+		if (dump_file)
+		  fprintf (dump_file, "Local profile-id %i conflict with nodes %s/%i %s/%i\n",
+			   n->profile_id,
+			   cgraph_node_name (n),
+			   n->symbol.order,
+			   symtab_node_name (*(symtab_node*)val),
+			   (*(symtab_node *)val)->symbol.order);
+		n->profile_id = (n->profile_id + 1) & 0x7fffffff;
+	      }
+	  }
+	else if (!n->profile_id)
+	  {
+	    if (dump_file)
+	      fprintf (dump_file, "Node %s/%i has no profile-id (profile feedback missing?)\n",
+		       cgraph_node_name (n),
+		       n->symbol.order);
+	    continue;
+	  }
+	else if ((val = pointer_map_contains (cgraph_node_map, (void *)(size_t)n->profile_id)))
+	  {
+	    if (dump_file)
+	      fprintf (dump_file, "Node %s/%i has IP profile-id %i conflict. Giving up.\n",
+		       cgraph_node_name (n),
+		       n->symbol.order,
+		       n->profile_id);
+	    *val = NULL;
+	    continue;
+	  }
+	*pointer_map_insert (cgraph_node_map, (void *)(size_t)n->profile_id) = (void *)n;
+      }
 }
 
 /* Delete the CGRAPH_NODE_MAP.  */
@@ -1198,27 +1230,20 @@  init_node_map (void)
 void
 del_node_map (void)
 {
-   cgraph_node_map.release ();
+  pointer_map_destroy (cgraph_node_map);
 }
 
 /* Return cgraph node for function with pid */
 
-static inline struct cgraph_node*
-find_func_by_funcdef_no (int func_id)
+struct cgraph_node*
+find_func_by_profile_id (int func_id)
 {
-  int max_id = get_last_funcdef_no ();
-  if (func_id >= max_id || cgraph_node_map[func_id] == NULL)
-    {
-      if (flag_profile_correction)
-        inform (DECL_SOURCE_LOCATION (current_function_decl),
-                "Inconsistent profile: indirect call target (%d) does not exist", func_id);
-      else
-        error ("Inconsistent profile: indirect call target (%d) does not exist", func_id);
-
-      return NULL;
-    }
-
-  return cgraph_node_map[func_id];
+  void **val = pointer_map_contains (cgraph_node_map,
+				     (void *)(size_t)func_id);
+  if (val)
+    return (struct cgraph_node *)*val;
+  else
+    return NULL;
 }
 
 /* Perform sanity check on the indirect call target. Due to race conditions,
@@ -1415,10 +1440,12 @@  gimple_ic_transform (gimple_stmt_iterato
   val = histogram->hvalue.counters [0];
   count = histogram->hvalue.counters [1];
   all = histogram->hvalue.counters [2];
-  gimple_remove_histogram_value (cfun, stmt, histogram);
 
   if (4 * count <= 3 * all)
-    return false;
+    {
+      gimple_remove_histogram_value (cfun, stmt, histogram);
+      return false;
+    }
 
   bb_all = gimple_bb (stmt)->count;
   /* The order of CHECK_COUNTER calls is important -
@@ -1426,16 +1453,31 @@  gimple_ic_transform (gimple_stmt_iterato
      and we want to make count <= all <= bb_all. */
   if ( check_counter (stmt, "ic", &all, &bb_all, bb_all)
       || check_counter (stmt, "ic", &count, &all, all))
-    return false;
+    {
+      gimple_remove_histogram_value (cfun, stmt, histogram);
+      return false;
+    }
 
   if (all > 0)
     prob = GCOV_COMPUTE_SCALE (count, all);
   else
     prob = 0;
-  direct_call = find_func_by_funcdef_no ((int)val);
+  direct_call = find_func_by_profile_id ((int)val);
 
   if (direct_call == NULL)
-    return false;
+    {
+      if (val)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, "Indirect call -> direct call from other module");
+	      print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
+	      fprintf (dump_file, "=> %i (will resolve only with LTO)\n", (int)val);
+	    }
+	}
+      return false;
+    }
+  gimple_remove_histogram_value (cfun, stmt, histogram);
 
   if (!check_ic_target (stmt, direct_call))
     return false;
Index: gcc/value-prof.h
===================================================================
--- gcc/value-prof.h	(revision 201632)
+++ gcc/value-prof.h	(working copy)
@@ -103,6 +103,10 @@  extern void gimple_gen_average_profiler
 extern void gimple_gen_ior_profiler (histogram_value, unsigned, unsigned);
 extern void stream_out_histogram_value (struct output_block *, histogram_value);
 extern void stream_in_histogram_value (struct lto_input_block *, gimple);
+extern struct cgraph_node* find_func_by_profile_id (int func_id);
+extern gimple gimple_ic (gimple, struct cgraph_node *,
+			 int, gcov_type, gcov_type);
+
 
 /* In profile.c.  */
 extern void init_branch_prob (void);
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 201632)
+++ gcc/gcov-io.h	(working copy)
@@ -515,7 +515,7 @@  extern void __gcov_merge_ior (gcov_type
 extern void __gcov_interval_profiler (gcov_type *, gcov_type, int, unsigned);
 extern void __gcov_pow2_profiler (gcov_type *, gcov_type);
 extern void __gcov_one_value_profiler (gcov_type *, gcov_type);
-extern void __gcov_indirect_call_profiler (gcov_type *, gcov_type, void *, void *);
+extern void __gcov_indirect_call_profiler_v2 (gcov_type, void *);
 extern void __gcov_average_profiler (gcov_type *, gcov_type);
 extern void __gcov_ior_profiler (gcov_type *, gcov_type);
 
Index: gcc/profile.h
===================================================================
--- gcc/profile.h	(revision 201632)
+++ gcc/profile.h	(working copy)
@@ -43,7 +43,7 @@  extern void mcf_smooth_cfg (void);
 
 extern gcov_type sum_edge_counts (vec<edge, va_gc> *edges);
 
-extern void init_node_map (void);
+extern void init_node_map (bool);
 extern void del_node_map (void);
 
 extern void get_working_sets (void);
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c	(revision 201632)
+++ gcc/coverage.c	(working copy)
@@ -539,6 +539,28 @@  coverage_compute_lineno_checksum (void)
   return chksum;
 }
 
+/* Compute profile ID.  This is better to be unique in whole program.  */
+
+unsigned
+coverage_compute_profile_id (struct cgraph_node *n)
+{
+  expanded_location xloc
+    = expand_location (DECL_SOURCE_LOCATION (n->symbol.decl));
+  unsigned chksum = xloc.line;
+
+  chksum = coverage_checksum_string (chksum, xloc.file);
+  chksum = coverage_checksum_string
+    (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->symbol.decl)));
+  if (first_global_object_name)
+    chksum = coverage_checksum_string
+      (chksum, first_global_object_name);
+  chksum = coverage_checksum_string
+    (chksum, aux_base_name);
+
+  /* Non-negative integers are hopefully small enough to fit in all targets.  */
+  return chksum & 0x7fffffff;
+}
+
 /* Compute cfg checksum for the current function.
    The checksum is calculated carefully so that
    source code changes that doesn't affect the control flow graph
Index: gcc/coverage.h
===================================================================
--- gcc/coverage.h	(revision 201632)
+++ gcc/coverage.h	(working copy)
@@ -35,6 +35,9 @@  extern void coverage_end_function (unsig
 /* Compute the control flow checksum for the current function.  */
 extern unsigned coverage_compute_cfg_checksum (void);
 
+/* Compute the profile id of function N.  */
+extern unsigned coverage_compute_profile_id (struct cgraph_node *n);
+
 /* Compute the line number checksum for the current function.  */
 extern unsigned coverage_compute_lineno_checksum (void);
 
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c	(revision 201632)
+++ gcc/tree-profile.c	(working copy)
@@ -57,8 +57,8 @@  static GTY(()) tree ptr_void;
 /* Do initialization work for the edge profiler.  */
 
 /* Add code:
-   static gcov*	__gcov_indirect_call_counters; // pointer to actual counter
-   static void*	__gcov_indirect_call_callee; // actual callee address
+   __thread gcov*	__gcov_indirect_call_counters; // pointer to actual counter
+  __thread  void*	__gcov_indirect_call_callee; // actual callee address
 */
 static void
 init_ic_make_global_vars (void)
@@ -72,7 +72,8 @@  init_ic_make_global_vars (void)
 		  get_identifier ("__gcov_indirect_call_callee"),
 		  ptr_void);
   TREE_STATIC (ic_void_ptr_var) = 1;
-  TREE_PUBLIC (ic_void_ptr_var) = 0;
+  TREE_PUBLIC (ic_void_ptr_var) = 1;
+  DECL_EXTERNAL (ic_void_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
   DECL_INITIAL (ic_void_ptr_var) = NULL;
   if (targetm.have_tls)
@@ -87,7 +88,8 @@  init_ic_make_global_vars (void)
 		  get_identifier ("__gcov_indirect_call_counters"),
 		  gcov_type_ptr);
   TREE_STATIC (ic_gcov_type_ptr_var) = 1;
-  TREE_PUBLIC (ic_gcov_type_ptr_var) = 0;
+  TREE_PUBLIC (ic_gcov_type_ptr_var) = 1;
+  DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
   DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
   if (targetm.have_tls)
@@ -155,14 +157,14 @@  gimple_init_edge_profiler (void)
 
       init_ic_make_global_vars ();
 
-      /* void (*) (gcov_type *, gcov_type, void *, void *)  */
+      /* void (*) (gcov_type, void *)  */
       ic_profiler_fn_type
 	       = build_function_type_list (void_type_node,
-					  gcov_type_ptr, gcov_type_node,
+					  gcov_type_node,
 					  ptr_void,
-					  ptr_void, NULL_TREE);
+					  NULL_TREE);
       tree_indirect_call_profiler_fn
-	      = build_fn_decl ("__gcov_indirect_call_profiler",
+	      = build_fn_decl ("__gcov_indirect_call_profiler_v2",
 				     ic_profiler_fn_type);
       TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
       DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
@@ -352,7 +354,7 @@  gimple_gen_ic_func_profiler (void)
   struct cgraph_node * c_node = cgraph_get_node (current_function_decl);
   gimple_stmt_iterator gsi;
   gimple stmt1, stmt2;
-  tree tree_uid, cur_func, counter_ptr, ptr_var, void0;
+  tree tree_uid, cur_func, void0;
 
   if (cgraph_only_called_directly_p (c_node))
     return;
@@ -361,27 +363,19 @@  gimple_gen_ic_func_profiler (void)
 
   /* Insert code:
 
-    stmt1: __gcov_indirect_call_profiler (__gcov_indirect_call_counters,
-					  current_function_funcdef_no,
-					  &current_function_decl,
-					  __gcov_indirect_call_callee);
+    stmt1: __gcov_indirect_call_profiler (profile_id,
+					  &current_function_decl)
    */
-  gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
+  gsi = gsi_after_labels (split_edge (single_succ_edge (ENTRY_BLOCK_PTR)));
 
   cur_func = force_gimple_operand_gsi (&gsi,
 				       build_addr (current_function_decl,
 						   current_function_decl),
 				       true, NULL_TREE,
 				       true, GSI_SAME_STMT);
-  counter_ptr = force_gimple_operand_gsi (&gsi, ic_gcov_type_ptr_var,
-					  true, NULL_TREE, true,
-					  GSI_SAME_STMT);
-  ptr_var = force_gimple_operand_gsi (&gsi, ic_void_ptr_var,
-				      true, NULL_TREE, true,
-				      GSI_SAME_STMT);
-  tree_uid = build_int_cst (gcov_type_node, current_function_funcdef_no);
-  stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 4,
-			     counter_ptr, tree_uid, cur_func, ptr_var);
+  tree_uid = build_int_cst (gcov_type_node, cgraph_get_node (current_function_decl)->profile_id);
+  stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
+			     tree_uid, cur_func);
   gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
 
   /* Set __gcov_indirect_call_callee to 0,
@@ -461,7 +455,7 @@  tree_profiling (void)
      cgraphunit.c:ipa_passes().  */
   gcc_assert (cgraph_state == CGRAPH_STATE_IPA_SSA);
 
-  init_node_map();
+  init_node_map (true);
 
   FOR_EACH_DEFINED_FUNCTION (node)
     {