diff mbox

Time profiler - phase 1

Message ID CAObPJ3PKTGRyZpv=V6YxfVKDfRBat1imKdps1EHmfyJys_6iVQ@mail.gmail.com
State New
Headers show

Commit Message

Martin Liška Oct. 29, 2013, 9:46 a.m. UTC
Hello,
   I've cooperating with Jan on a new profile-based function
reordering stuff. This first patch introduces a new GCOV counter that
instruments each function call and stores the time of first run of a
function.

Bootstrapped/regtested on x86_64-linux and i686-linux.

Thanks,
Martin

Comments

Andi Kleen Oct. 31, 2013, 1:56 p.m. UTC | #1
Martin Liška <marxin.liska@gmail.com> writes:

> Hello,
>    I've cooperating with Jan on a new profile-based function
> reordering stuff. This first patch introduces a new GCOV counter that
> instruments each function call and stores the time of first run of a
> function.

I'm curious, do you have any numbers how much that slows down a 
typical instrumented run?

Seems like heavy weight instrumentation.

-Andi
Jan Hubicka Nov. 4, 2013, 10:46 a.m. UTC | #2
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index fca665b..3b62bcc 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,31 @@
> +2013-10-29  Martin Liska  <marxin.liska@gmail.com>
> +						Jan Hubicka  <jh@suse.cz>
> +
> +	* cgraph.c (dump_cgraph_node): Profile dump added.
> +	* cgraph.h (struct cgraph_node): New time profile variable added.
> +	* cgraphclones.c (cgraph_clone_node): Time profile is cloned.
> +	* gcov-io.h (gcov_type): New profiler type introduced.
> +	* ipa-profile.c (lto_output_node): Streaming for time profile added.
> +	(input_node): Time profiler is read from LTO stream.
> +	* predict.c (maybe_hot_count_p): Hot prediction changed.
> +	* profile.c (instrument_values): New case for time profiler added.
> +	(compute_value_histograms): Read of time profile.
> +	* tree-pretty-print.c (dump_function_header): Time profiler is dumped.
> +	* tree-profile.c (init_ic_make_global_vars): Time profiler function added.
> +	(gimple_init_edge_profiler): TP function instrumentation.
> +	(gimple_gen_time_profiler): New.
> +	* value-prof.c (gimple_add_histogram_value): Support for time profiler
> +	added.
> +	(dump_histogram_value): TP type added to dumps.
> +	(visit_hist): More sensitive check that takes TP into account.
> +	(gimple_find_values_to_profile): TP instrumentation.
> +	* value-prof.h (hist_type): New histogram type added.
> +	(struct histogram_value_t): Pointer to struct function added.
> +	* libgcc/Makefile.in: New GCOV merge function for TP added.
> +	* libgcov.c: function_counter variable introduced.
> +	(_gcov_merge_time_profile): New.
> +	(_gcov_time_profiler): New.	
> +
>  2013-10-29  David Malcolm  <dmalcolm@redhat.com>
>  
>  	* doc/gty.texi ("Inheritance and GTY"): Make it clear that
> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> index 52d9ab0..c95a54e 100644
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -1890,6 +1890,7 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node)
>    if (node->profile_id)
>      fprintf (f, "  Profile id: %i\n",
>  	     node->profile_id);
> +  fprintf (f, "  First run: %i\n", node->tp_first_run);
>    fprintf (f, "  Function flags:");
>    if (node->count)
>      fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x",
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 7706419..479d49f 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -247,7 +247,6 @@ struct GTY(()) cgraph_clone_info
>    bitmap combined_args_to_skip;
>  };
>  
> -
>  /* The cgraph data structure.
>     Each function decl has assigned cgraph_node listing callees and callers.  */
>  
> @@ -324,6 +323,8 @@ struct GTY(()) cgraph_node {
>    unsigned tm_clone : 1;
>    /* True if this decl is a dispatcher for function versions.  */
>    unsigned dispatcher_function : 1;
> +  /* Time profiler: first run of function.  */
> +  int tp_first_run;

Move this up after profile_id.
> --- a/gcc/gcov-io.c
> +++ b/gcc/gcov-io.c
> @@ -68,7 +68,7 @@ gcov_open (const char *name, int mode)
>  #if IN_LIBGCOV
>    const int mode = 0;
>  #endif
> -#if GCOV_LOCKED
> +#if GCOV_LOCKED  
>    struct flock s_flock;
>    int fd;
>  
Accidental change?
> @@ -651,6 +658,9 @@ lto_symtab_prevailing_decl (tree decl)
>    if (TREE_CODE (decl) == FUNCTION_DECL && DECL_BUILT_IN (decl))
>      return decl;
>  
> +  if (!DECL_ASSEMBLER_NAME_SET_P (decl))
> +    return decl;
> +
>    /* Ensure DECL_ASSEMBLER_NAME will not set assembler name.  */
>    gcc_assert (DECL_ASSEMBLER_NAME_SET_P (decl));
>  
Remove this change - it is unrelated hack from my old tree.
> diff --git a/gcc/predict.c b/gcc/predict.c
> index cc9a053..4b655d3 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -170,7 +170,7 @@ maybe_hot_count_p (struct function *fun, gcov_type count)
>    if (fun && profile_status_for_function (fun) != PROFILE_READ)
>      return true;
>    /* Code executed at most once is not hot.  */
> -  if (profile_info->runs >= count)
> +  if (count <= 1)
>      return false;
>    return (count >= get_hot_bb_threshold ());
>  }
And also this change.
> @@ -895,9 +907,19 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
>        hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
>        for (j = 0; j < hist->n_counters; j++)
>          if (aact_count)
> -	  hist->hvalue.counters[j] = aact_count[j];
> -	else
> -	  hist->hvalue.counters[j] = 0;
> +          hist->hvalue.counters[j] = aact_count[j];
> +        else
> +          hist->hvalue.counters[j] = 0;
> +
> +      if (hist->type == HIST_TYPE_TIME_PROFILE)
> +        {
> +          node = cgraph_get_node (hist->fun->decl);
> +      
> +          node->tp_first_run = hist->hvalue.counters[0];
> +
> +          if (dump_file)
> +            fprintf (dump_file, "Read tp_first_run: %d\n", node->tp_first_run);
> +        }
Probably add a comment why you need to annotate counter here.
> diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
> index b2c5411..0fe262c 100644
> --- a/gcc/tree-pretty-print.c
> +++ b/gcc/tree-pretty-print.c
> @@ -3390,7 +3390,9 @@ dump_function_header (FILE *dump_file, tree fdecl, int flags)
>      fprintf (dump_file, ", decl_uid=%d", DECL_UID (fdecl));
>    if (node)
>      {
> -      fprintf (dump_file, ", symbol_order=%d)%s\n\n", node->symbol.order,
> +      fprintf (dump_file, ", tp_first_run=%d, symbol_order=%d)%s\n\n",
> +               node->tp_first_run,
> +               node->symbol.order,
>                 node->frequency == NODE_FREQUENCY_HOT
>                 ? " (hot)"
>                 : node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED

I would skip this change for now - we do not want to increase verbosity too much. Pehraps this can
go with -details or so.
>  static void
>  init_ic_make_global_vars (void)
>  {
> -  tree  gcov_type_ptr;
> +  tree gcov_type_ptr;
>  
> -  ptr_void = build_pointer_type (void_type_node);
> +  ptr_void = build_pointer_type (void_type_node); 
Be cureful about accidental whitespace changes.
>  
>    /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
>    if (flag_lto)
> @@ -102,7 +104,7 @@ init_ic_make_global_vars (void)
>  
>    varpool_finalize_decl (ic_void_ptr_var);
>  
> -  gcov_type_ptr = build_pointer_type (get_gcov_type ());
> +  gcov_type_ptr = build_pointer_type (get_gcov_type ());  
>    /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
>    if (flag_lto)
>      {
Here too.
> @@ -455,6 +471,20 @@ gimple_gen_ic_func_profiler (void)
>    gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
>  }
>  
> +
> +void
> +gimple_gen_time_profiler (unsigned tag, unsigned base,
> +                          gimple_stmt_iterator &gsi)

Add block comment before function.
> @@ -545,7 +575,7 @@ tree_profiling (void)
>  
>        if (! flag_branch_probabilities
>  	  && flag_profile_values)
> -	gimple_gen_ic_func_profiler ();
> +          gimple_gen_ic_func_profiler ();
>  
>        if (flag_branch_probabilities
>  	  && flag_profile_values
Maybe also accidental?
> @@ -692,13 +702,16 @@ gcov_exit (void)
>  
>  	  if (gi_ptr->merge[t_ix])
>  	    {
> -	      if (!cs_prg->runs++)
> +	      int first_run = !cs_prg->runs;
> +
> +		    cs_prg->runs++;
> +	      if (first_run)
>  	        cs_prg->num = cs_tprg->num;
>  	      cs_prg->sum_all += cs_tprg->sum_all;
>  	      if (cs_prg->run_max < cs_tprg->run_max)
>  		cs_prg->run_max = cs_tprg->run_max;
>  	      cs_prg->sum_max += cs_tprg->run_max;
> -              if (cs_prg->runs == 1)
> +              if (first_run)
>                  memcpy (cs_prg->histogram, cs_tprg->histogram,
>                          sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
>                else

This change is probably part of the patch getting fork working right and unrelated to this patch, right?
I would skip it along with the whitespace changes so Rons' patch to break up libgcov have fewer rejects.
> @@ -795,17 +808,23 @@ gcov_exit (void)
>  	  gcov_write_unsigned (gfi_ptr->cfg_checksum);
>  
>  	  ci_ptr = gfi_ptr->ctrs;
> +
>  	  for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++)
>  	    {
> -	      if (!gi_ptr->merge[t_ix])
> +	      gcov_merge_fn merge = gi_ptr->merge[t_ix];
> +
> +	      if (!merge)
>  		continue;
>  
>  	      n_counts = ci_ptr->num;
>  	      gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix),
>  				     GCOV_TAG_COUNTER_LENGTH (n_counts));
> +
>  	      gcov_type *c_ptr = ci_ptr->values;
> +        gcov_type value;

Whitespace looks wrong.
>  	      while (n_counts--)
> -		gcov_write_counter (*c_ptr++);
> +          gcov_write_counter (*c_ptr++);
> +
>  	      ci_ptr++;
>  	    }
>  	  if (buffered)
> @@ -824,6 +843,8 @@ gcov_exit (void)
>  		   "profiling:%s:Error writing\n",
>  		   gi_filename);
>      }
> +
> +  gcov_clear ();
>  }
>  
>  /* Reset all counters to zero.  */

I believe this is also unrelated (part of fork/vfork changes)
> @@ -851,6 +872,7 @@ gcov_clear (void)
>  		continue;
>  	      
>  	      memset (ci_ptr->values, 0, sizeof (gcov_type) * ci_ptr->num);
> +
>  	      ci_ptr++;
>  	    }
>  	}
> @@ -912,7 +934,6 @@ __gcov_flush (void)
>    __gthread_mutex_lock (&__gcov_flush_mx);
>  
>    gcov_exit ();
> -  gcov_clear ();
>  
>    __gthread_mutex_unlock (&__gcov_flush_mx);
>  }

Also unrelated.
> @@ -974,6 +995,24 @@ __gcov_merge_ior (gcov_type *counters, unsigned n_counters)
>  }
>  #endif
>  
> +#ifdef L_gcov_merge_time_profile
> +void
> +__gcov_merge_time_profile (gcov_type *counters, unsigned n_counters)
> +{
> +  unsigned int i;
> +  gcov_type value;
> +
> +  for (i = 0; i < n_counters; i++)
> +    {
> +      value = gcov_read_counter ();
> +
> +      if (value && (!counters[i] || value < counters[i]))
> +        counters[i] = value;
> +    }
> +}
> +#endif /* L_gcov_merge_time_profile */
Add comments for this function explaining how merging goes and also for
L_gcov_time_profiler.
> +
> +
>  #ifdef L_gcov_merge_single
>  /* The profile merging function for choosing the most common value.
>     It is given an array COUNTERS of N_COUNTERS old counters and it
> @@ -1171,6 +1210,7 @@ __thread
>  #endif
>  gcov_type * __gcov_indirect_call_counters;
>  
> +
>  /* By default, the C++ compiler will use function addresses in the
>     vtable entries.  Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero
>     tells the compiler to use function descriptors instead.  The value
> @@ -1202,6 +1242,16 @@ __gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func)
>  }
>  #endif
>  
> +#ifdef L_gcov_time_profiler
> +
> +void
> +__gcov_time_profiler (gcov_type* counters)
> +{
> +  if (!counters[0])
> +    counters[0] = ++function_counter;
> +}
> +#endif
> +
>  #ifdef L_gcov_average_profiler
>  /* Increase corresponding COUNTER by VALUE.  FIXME: Perhaps we want
>     to saturate up.  */
> @@ -1233,10 +1283,12 @@ __gcov_fork (void)
>  {
>    pid_t pid;
>    extern __gthread_mutex_t __gcov_flush_mx;
> +
>    __gcov_flush ();
>    pid = fork ();
>    if (pid == 0)
>      __GTHREAD_MUTEX_INIT_FUNCTION (&__gcov_flush_mx);
> +
>    return pid;
>  }
>  #endif


Please post updated patch. Basically it looks fine with these changes.
Try to also prepare a testcase (in testsuite/gcc.dg/tree-prof you can
see existing examples how to profile and grep dump files for first run counters).

Honza
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fca665b..3b62bcc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@ 
+2013-10-29  Martin Liska  <marxin.liska@gmail.com>
+						Jan Hubicka  <jh@suse.cz>
+
+	* cgraph.c (dump_cgraph_node): Profile dump added.
+	* cgraph.h (struct cgraph_node): New time profile variable added.
+	* cgraphclones.c (cgraph_clone_node): Time profile is cloned.
+	* gcov-io.h (gcov_type): New profiler type introduced.
+	* ipa-profile.c (lto_output_node): Streaming for time profile added.
+	(input_node): Time profiler is read from LTO stream.
+	* predict.c (maybe_hot_count_p): Hot prediction changed.
+	* profile.c (instrument_values): New case for time profiler added.
+	(compute_value_histograms): Read of time profile.
+	* tree-pretty-print.c (dump_function_header): Time profiler is dumped.
+	* tree-profile.c (init_ic_make_global_vars): Time profiler function added.
+	(gimple_init_edge_profiler): TP function instrumentation.
+	(gimple_gen_time_profiler): New.
+	* value-prof.c (gimple_add_histogram_value): Support for time profiler
+	added.
+	(dump_histogram_value): TP type added to dumps.
+	(visit_hist): More sensitive check that takes TP into account.
+	(gimple_find_values_to_profile): TP instrumentation.
+	* value-prof.h (hist_type): New histogram type added.
+	(struct histogram_value_t): Pointer to struct function added.
+	* libgcc/Makefile.in: New GCOV merge function for TP added.
+	* libgcov.c: function_counter variable introduced.
+	(_gcov_merge_time_profile): New.
+	(_gcov_time_profiler): New.	
+
 2013-10-29  David Malcolm  <dmalcolm@redhat.com>
 
 	* doc/gty.texi ("Inheritance and GTY"): Make it clear that
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 52d9ab0..c95a54e 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1890,6 +1890,7 @@  dump_cgraph_node (FILE *f, struct cgraph_node *node)
   if (node->profile_id)
     fprintf (f, "  Profile id: %i\n",
 	     node->profile_id);
+  fprintf (f, "  First run: %i\n", node->tp_first_run);
   fprintf (f, "  Function flags:");
   if (node->count)
     fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x",
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 7706419..479d49f 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -247,7 +247,6 @@  struct GTY(()) cgraph_clone_info
   bitmap combined_args_to_skip;
 };
 
-
 /* The cgraph data structure.
    Each function decl has assigned cgraph_node listing callees and callers.  */
 
@@ -324,6 +323,8 @@  struct GTY(()) cgraph_node {
   unsigned tm_clone : 1;
   /* True if this decl is a dispatcher for function versions.  */
   unsigned dispatcher_function : 1;
+  /* Time profiler: first run of function.  */
+  int tp_first_run;
 };
 
 
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c
index 800dd2c..a05fd77 100644
--- a/gcc/cgraphclones.c
+++ b/gcc/cgraphclones.c
@@ -207,6 +207,7 @@  cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq,
   new_node->frequency = n->frequency;
   new_node->clone = n->clone;
   new_node->clone.tree_map = NULL;
+  new_node->tp_first_run = n->tp_first_run;
   if (n->count)
     {
       if (new_node->count > n->count)
diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c
index 5a21c1f..aea272b 100644
--- a/gcc/gcov-io.c
+++ b/gcc/gcov-io.c
@@ -68,7 +68,7 @@  gcov_open (const char *name, int mode)
 #if IN_LIBGCOV
   const int mode = 0;
 #endif
-#if GCOV_LOCKED
+#if GCOV_LOCKED  
   struct flock s_flock;
   int fd;
 
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index ea8d9a7..4650836 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -342,9 +342,10 @@  typedef unsigned HOST_WIDEST_INT gcov_type_unsigned;
 				      counter.  */
 #define GCOV_COUNTER_IOR	7  /* IOR of the all values passed to
 				      counter.  */
-#define GCOV_LAST_VALUE_COUNTER 7  /* The last of counters used for value
+#define GCOV_TIME_PROFILER  8 /* Time profile collecting first run of a function */
+#define GCOV_LAST_VALUE_COUNTER 8  /* The last of counters used for value
 				      profiling.  */
-#define GCOV_COUNTERS		8
+#define GCOV_COUNTERS		9
 
 /* Number of counters used for value profiling.  */
 #define GCOV_N_VALUE_COUNTERS \
@@ -352,7 +353,7 @@  typedef unsigned HOST_WIDEST_INT gcov_type_unsigned;
 
   /* A list of human readable names of the counters */
 #define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", \
-      				 "delta", "indirect_call", "average", "ior"}
+      				 "delta", "indirect_call", "average", "ior", "time_profiler"}
 
   /* Names of merge functions for counters.  */
 #define GCOV_MERGE_FUNCTIONS	{"__gcov_merge_add",	\
@@ -362,7 +363,8 @@  typedef unsigned HOST_WIDEST_INT gcov_type_unsigned;
 				 "__gcov_merge_delta",  \
 				 "__gcov_merge_single", \
 				 "__gcov_merge_add",	\
-				 "__gcov_merge_ior"}
+				 "__gcov_merge_ior",  \
+         "__gcov_merge_time_profile" }
 
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)				\
@@ -511,6 +513,8 @@  extern void __gcov_merge_delta (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
 /* The merge function that just ors the counters together.  */
 extern void __gcov_merge_ior (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
 
+extern void __gcov_merge_time_profile (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
+
 /* The profiler functions.  */
 extern void __gcov_interval_profiler (gcov_type *, gcov_type, int, unsigned);
 extern void __gcov_pow2_profiler (gcov_type *, gcov_type);
@@ -518,6 +522,7 @@  extern void __gcov_one_value_profiler (gcov_type *, gcov_type);
 extern void __gcov_indirect_call_profiler_v2 (gcov_type, void *);
 extern void __gcov_average_profiler (gcov_type *, gcov_type);
 extern void __gcov_ior_profiler (gcov_type *, gcov_type);
+extern void __gcov_time_profiler (gcov_type *);
 
 #ifndef inhibit_libc
 /* The wrappers around some library functions..  */
diff --git a/gcc/ipa-profile.c b/gcc/ipa-profile.c
index d19d6df..b48af54 100644
--- a/gcc/ipa-profile.c
+++ b/gcc/ipa-profile.c
@@ -465,6 +465,7 @@  ipa_propagate_frequency (struct cgraph_node *node)
   if (d.maybe_unlikely_executed)
     {
       node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+      node->tp_first_run = 0;
       if (dump_file)
 	fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
 		 cgraph_node_name (node));
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 3d32fe4..bef3bfb 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -481,6 +481,8 @@  lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
     ref = LCC_NOT_FOUND;
   streamer_write_hwi_stream (ob->main_stream, ref);
 
+  streamer_write_hwi_stream (ob->main_stream, node->tp_first_run); 
+
   bp = bitpack_create (ob->main_stream);
   bp_pack_value (&bp, node->local.local, 1);
   bp_pack_value (&bp, node->symbol.externally_visible, 1);
@@ -1076,7 +1078,10 @@  input_node (struct lto_file_decl_data *file_data,
     internal_error ("bytecode stream: found multiple instances of cgraph "
 		    "node with uid %d", node->uid);
 
+  node->tp_first_run = streamer_read_uhwi (ib);
+
   bp = streamer_read_bitpack (ib);
+
   input_overwrite_node (file_data, node, tag, &bp);
 
   /* Store a reference for now, and fix up later to be a pointer.  */
diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c
index b1b7731..4f264c7 100644
--- a/gcc/lto/lto-symtab.c
+++ b/gcc/lto/lto-symtab.c
@@ -29,6 +29,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "plugin-api.h"
 #include "lto-streamer.h"
 #include "ipa-utils.h"
+#include "ipa-inline.h"
 
 /* Replace the cgraph node NODE with PREVAILING_NODE in the cgraph, merging
    all edges and removing the old node.  */
@@ -84,6 +85,12 @@  lto_cgraph_replace_node (struct cgraph_node *node,
   if (node->symbol.decl != prevailing_node->symbol.decl)
     cgraph_release_function_body (node);
 
+  /* Time profile merging */
+  if (node->tp_first_run)
+    prevailing_node->tp_first_run = prevailing_node->tp_first_run ?
+      MIN (prevailing_node->tp_first_run, node->tp_first_run) :
+      node->tp_first_run;
+
   /* Finally remove the replaced node.  */
   cgraph_remove_node (node);
 }
@@ -651,6 +658,9 @@  lto_symtab_prevailing_decl (tree decl)
   if (TREE_CODE (decl) == FUNCTION_DECL && DECL_BUILT_IN (decl))
     return decl;
 
+  if (!DECL_ASSEMBLER_NAME_SET_P (decl))
+    return decl;
+
   /* Ensure DECL_ASSEMBLER_NAME will not set assembler name.  */
   gcc_assert (DECL_ASSEMBLER_NAME_SET_P (decl));
 
diff --git a/gcc/predict.c b/gcc/predict.c
index cc9a053..4b655d3 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -170,7 +170,7 @@  maybe_hot_count_p (struct function *fun, gcov_type count)
   if (fun && profile_status_for_function (fun) != PROFILE_READ)
     return true;
   /* Code executed at most once is not hot.  */
-  if (profile_info->runs >= count)
+  if (count <= 1)
     return false;
   return (count >= get_hot_bb_threshold ());
 }
diff --git a/gcc/profile.c b/gcc/profile.c
index 7118ac8..98b02d5 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -65,6 +65,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "cfgloop.h"
 #include "dumpfile.h"
+#include "cgraph.h"
 
 #include "profile.h"
 
@@ -188,6 +189,15 @@  instrument_values (histogram_values values)
 	  gimple_gen_ior_profiler (hist, t, 0);
 	  break;
 
+  case HIST_TYPE_TIME_PROFILE:
+    {
+      basic_block bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR));
+      gimple_stmt_iterator gsi = gsi_start_bb (bb);
+
+  	  gimple_gen_time_profiler (t, 0, gsi);
+      break;
+    }
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -850,6 +860,7 @@  compute_value_histograms (histogram_values values, unsigned cfg_checksum,
   gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
   gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
   gcov_type *aact_count;
+  struct cgraph_node *node;
 
   for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
     n_histogram_counters[t] = 0;
@@ -888,6 +899,7 @@  compute_value_histograms (histogram_values values, unsigned cfg_checksum,
       t = (int) hist->type;
 
       aact_count = act_count[t];
+
       if (act_count[t])
         act_count[t] += hist->n_counters;
 
@@ -895,9 +907,19 @@  compute_value_histograms (histogram_values values, unsigned cfg_checksum,
       hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
       for (j = 0; j < hist->n_counters; j++)
         if (aact_count)
-	  hist->hvalue.counters[j] = aact_count[j];
-	else
-	  hist->hvalue.counters[j] = 0;
+          hist->hvalue.counters[j] = aact_count[j];
+        else
+          hist->hvalue.counters[j] = 0;
+
+      if (hist->type == HIST_TYPE_TIME_PROFILE)
+        {
+          node = cgraph_get_node (hist->fun->decl);
+      
+          node->tp_first_run = hist->hvalue.counters[0];
+
+          if (dump_file)
+            fprintf (dump_file, "Read tp_first_run: %d\n", node->tp_first_run);
+        }
     }
 
   for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index b2c5411..0fe262c 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -3390,7 +3390,9 @@  dump_function_header (FILE *dump_file, tree fdecl, int flags)
     fprintf (dump_file, ", decl_uid=%d", DECL_UID (fdecl));
   if (node)
     {
-      fprintf (dump_file, ", symbol_order=%d)%s\n\n", node->symbol.order,
+      fprintf (dump_file, ", tp_first_run=%d, symbol_order=%d)%s\n\n",
+               node->tp_first_run,
+               node->symbol.order,
                node->frequency == NODE_FREQUENCY_HOT
                ? " (hot)"
                : node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index 8a30397..894cd29 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -51,9 +51,10 @@  static GTY(()) tree tree_interval_profiler_fn;
 static GTY(()) tree tree_pow2_profiler_fn;
 static GTY(()) tree tree_one_value_profiler_fn;
 static GTY(()) tree tree_indirect_call_profiler_fn;
+static GTY(()) tree tree_time_profiler_fn;
 static GTY(()) tree tree_average_profiler_fn;
 static GTY(()) tree tree_ior_profiler_fn;
-
+
 
 static GTY(()) tree ic_void_ptr_var;
 static GTY(()) tree ic_gcov_type_ptr_var;
@@ -63,14 +64,15 @@  static GTY(()) tree ptr_void;
 
 /* Add code:
    __thread gcov*	__gcov_indirect_call_counters; // pointer to actual counter
-   __thread  void*	__gcov_indirect_call_callee; // actual callee address
+   __thread void*	__gcov_indirect_call_callee; // actual callee address
+   __thread int __gcov_function_counter; // time profiler function counter
 */
 static void
 init_ic_make_global_vars (void)
 {
-  tree  gcov_type_ptr;
+  tree gcov_type_ptr;
 
-  ptr_void = build_pointer_type (void_type_node);
+  ptr_void = build_pointer_type (void_type_node); 
 
   /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
   if (flag_lto)
@@ -102,7 +104,7 @@  init_ic_make_global_vars (void)
 
   varpool_finalize_decl (ic_void_ptr_var);
 
-  gcov_type_ptr = build_pointer_type (get_gcov_type ());
+  gcov_type_ptr = build_pointer_type (get_gcov_type ());  
   /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
   if (flag_lto)
     {
@@ -145,6 +147,7 @@  gimple_init_edge_profiler (void)
   tree gcov_type_ptr;
   tree ic_profiler_fn_type;
   tree average_profiler_fn_type;
+  tree time_profiler_fn_type;
 
   if (!gcov_type_node)
     {
@@ -222,6 +225,18 @@  gimple_init_edge_profiler (void)
 	= tree_cons (get_identifier ("leaf"), NULL,
 		     DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
 
+      /* void (*) (gcov_type *, gcov_type, void *)  */
+      time_profiler_fn_type
+	       = build_function_type_list (void_type_node,
+					  gcov_type_ptr, NULL_TREE);
+      tree_time_profiler_fn
+	      = build_fn_decl ("__gcov_time_profiler",
+				     time_profiler_fn_type);
+      TREE_NOTHROW (tree_time_profiler_fn) = 1;
+      DECL_ATTRIBUTES (tree_time_profiler_fn)
+	= tree_cons (get_identifier ("leaf"), NULL,
+		     DECL_ATTRIBUTES (tree_time_profiler_fn));
+
       /* void (*) (gcov_type *, gcov_type)  */
       average_profiler_fn_type
 	      = build_function_type_list (void_type_node,
@@ -247,6 +262,7 @@  gimple_init_edge_profiler (void)
       DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
       DECL_ASSEMBLER_NAME (tree_one_value_profiler_fn);
       DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
+      DECL_ASSEMBLER_NAME (tree_time_profiler_fn);
       DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
       DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
     }
@@ -455,6 +471,20 @@  gimple_gen_ic_func_profiler (void)
   gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
 }
 
+
+void
+gimple_gen_time_profiler (unsigned tag, unsigned base,
+                          gimple_stmt_iterator &gsi)
+{ 
+  tree ref_ptr = tree_coverage_counter_addr (tag, base);
+  gimple call;
+
+  ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
+				      true, NULL_TREE, true, GSI_SAME_STMT);
+  call = gimple_build_call (tree_time_profiler_fn, 1, ref_ptr);
+  gsi_insert_before (&gsi, call, GSI_NEW_STMT);
+}
+
 /* Output instructions as GIMPLE trees for code to find the most common value
    of a difference between two evaluations of an expression.
    VALUE is the expression whose value is profiled.  TAG is the tag of the
@@ -545,7 +575,7 @@  tree_profiling (void)
 
       if (! flag_branch_probabilities
 	  && flag_profile_values)
-	gimple_gen_ic_func_profiler ();
+          gimple_gen_ic_func_profiler ();
 
       if (flag_branch_probabilities
 	  && flag_profile_values
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index fd993c4..bbf842b 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -196,6 +196,7 @@  gimple_add_histogram_value (struct function *fun, gimple stmt,
 {
   hist->hvalue.next = gimple_histogram_value (fun, stmt);
   set_histogram_value (fun, stmt, hist);
+  hist->fun = fun;
 }
 
 
@@ -301,7 +302,6 @@  dump_histogram_value (FILE *dump_file, histogram_value hist)
 	}
       fprintf (dump_file, ".\n");
       break;
-
     case HIST_TYPE_IOR:
       fprintf (dump_file, "IOR value ");
       if (hist->hvalue.counters)
@@ -311,7 +311,6 @@  dump_histogram_value (FILE *dump_file, histogram_value hist)
 	}
       fprintf (dump_file, ".\n");
       break;
-
     case HIST_TYPE_CONST_DELTA:
       fprintf (dump_file, "Constant delta ");
       if (hist->hvalue.counters)
@@ -338,6 +337,15 @@  dump_histogram_value (FILE *dump_file, histogram_value hist)
 	}
       fprintf (dump_file, ".\n");
       break;
+    case HIST_TYPE_TIME_PROFILE:
+      fprintf (dump_file, "Time profile ");
+      if (hist->hvalue.counters)
+      {
+        fprintf (dump_file, "time:"HOST_WIDEST_INT_PRINT_DEC,
+                 (HOST_WIDEST_INT) hist->hvalue.counters[0]);
+      }
+      fprintf (dump_file, ".\n");
+      break;
     case HIST_TYPE_MAX:
       gcc_unreachable ();
    }
@@ -411,6 +419,7 @@  stream_in_histogram_value (struct lto_input_block *ib, gimple stmt)
 	  break;
 
 	case HIST_TYPE_IOR:
+  case HIST_TYPE_TIME_PROFILE:
 	  ncounters = 1;
 	  break;
 	case HIST_TYPE_MAX:
@@ -496,7 +505,9 @@  visit_hist (void **slot, void *data)
 {
   struct pointer_set_t *visited = (struct pointer_set_t *) data;
   histogram_value hist = *(histogram_value *) slot;
-  if (!pointer_set_contains (visited, hist))
+
+  if (!pointer_set_contains (visited, hist)
+      && hist->type != HIST_TYPE_TIME_PROFILE)
     {
       error ("dead histogram");
       dump_histogram_value (stderr, hist);
@@ -1919,12 +1930,14 @@  gimple_find_values_to_profile (histogram_values *values)
   gimple_stmt_iterator gsi;
   unsigned i;
   histogram_value hist = NULL;
-
   values->create (0);
+
   FOR_EACH_BB (bb)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       gimple_values_to_profile (gsi_stmt (gsi), values);
 
+  values->safe_push (gimple_alloc_histogram_value (cfun, HIST_TYPE_TIME_PROFILE, 0, 0));
+
   FOR_EACH_VEC_ELT (*values, i, hist)
     {
       switch (hist->type)
@@ -1949,6 +1962,10 @@  gimple_find_values_to_profile (histogram_values *values)
  	  hist->n_counters = 3;
 	  break;
 
+  case HIST_TYPE_TIME_PROFILE:
+    hist->n_counters = 1;
+    break;
+
 	case HIST_TYPE_AVERAGE:
 	  hist->n_counters = 2;
 	  break;
diff --git a/gcc/value-prof.h b/gcc/value-prof.h
index 57f249d..ef77af4 100644
--- a/gcc/value-prof.h
+++ b/gcc/value-prof.h
@@ -34,6 +34,7 @@  enum hist_type
 			    called in indirect call */
   HIST_TYPE_AVERAGE,	/* Compute average value (sum of all values).  */
   HIST_TYPE_IOR,	/* Used to compute expected alignment.  */
+  HIST_TYPE_TIME_PROFILE, /* Used for time profile */
   HIST_TYPE_MAX
 };
 
@@ -54,6 +55,7 @@  struct histogram_value_t
     } hvalue;
   enum hist_type type;			/* Type of information to measure.  */
   unsigned n_counters;			/* Number of required counters.  */
+  struct function *fun;
   union
     {
       struct
@@ -97,6 +99,8 @@  extern void gimple_gen_pow2_profiler (histogram_value, unsigned, unsigned);
 extern void gimple_gen_one_value_profiler (histogram_value, unsigned, unsigned);
 extern void gimple_gen_ic_profiler (histogram_value, unsigned, unsigned);
 extern void gimple_gen_ic_func_profiler (void);
+extern void gimple_gen_time_profiler (unsigned, unsigned,
+                                      gimple_stmt_iterator &);
 extern void gimple_gen_const_delta_profiler (histogram_value,
 					     unsigned, unsigned);
 extern void gimple_gen_average_profiler (histogram_value, unsigned, unsigned);
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 354fb72..0d91cfc 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -858,7 +858,7 @@  LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
     _gcov_execv _gcov_execvp _gcov_execve _gcov_reset _gcov_dump \
     _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
     _gcov_indirect_call_profiler _gcov_average_profiler _gcov_ior_profiler \
-    _gcov_merge_ior _gcov_indirect_call_profiler_v2
+    _gcov_merge_ior _gcov_time_profiler _gcov_indirect_call_profiler_v2 _gcov_merge_time_profile
 
 libgcov-objects = $(patsubst %,%$(objext),$(LIBGCOV))
 
diff --git a/libgcc/libgcov.c b/libgcc/libgcov.c
index 3c39331..7bf193d 100644
--- a/libgcc/libgcov.c
+++ b/libgcc/libgcov.c
@@ -80,6 +80,8 @@  void __gcov_merge_delta (gcov_type *counters  __attribute__ ((unused)),
 #include <sys/stat.h>
 #endif
 
+extern gcov_type function_counter ATTRIBUTE_HIDDEN;
+
 extern void gcov_clear (void) ATTRIBUTE_HIDDEN;
 extern void gcov_exit (void) ATTRIBUTE_HIDDEN;
 extern int gcov_dump_complete ATTRIBUTE_HIDDEN;
@@ -350,6 +352,10 @@  gcov_compute_histogram (struct gcov_summary *sum)
     }
 }
 
+/* Counter for first visit of each function.  */
+
+gcov_type function_counter;
+
 /* Dump the coverage counts. We merge with existing counts when
    possible, to avoid growing the .da files ad infinitum. We use this
    program's checksum to make sure we only accumulate whole program
@@ -594,7 +600,7 @@  gcov_exit (void)
 
 	    next_summary:;
 	    }
-	  
+
 	  /* Merge execution counts for each function.  */
 	  for (f_ix = 0; (unsigned)f_ix != gi_ptr->n_functions;
 	       f_ix++, tag = gcov_read_unsigned ())
@@ -624,6 +630,7 @@  gcov_exit (void)
 					    gi_ptr, fn_tail, f_ix);
 		  if (!fn_tail)
 		    goto read_mismatch;
+
 		  continue;
 		}
 
@@ -640,6 +647,7 @@  gcov_exit (void)
 		goto read_mismatch;
 	      
 	      ci_ptr = gfi_ptr->ctrs;
+
 	      for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++)
 		{
 		  gcov_merge_fn merge = gi_ptr->merge[t_ix];
@@ -652,9 +660,11 @@  gcov_exit (void)
 		  if (tag != GCOV_TAG_FOR_COUNTER (t_ix)
 		      || length != GCOV_TAG_COUNTER_LENGTH (ci_ptr->num))
 		    goto read_mismatch;
+
 		  (*merge) (ci_ptr->values, ci_ptr->num);
 		  ci_ptr++;
 		}
+
 	      if ((error = gcov_is_error ()))
 		goto read_error;
 	    }
@@ -692,13 +702,16 @@  gcov_exit (void)
 
 	  if (gi_ptr->merge[t_ix])
 	    {
-	      if (!cs_prg->runs++)
+	      int first_run = !cs_prg->runs;
+
+		    cs_prg->runs++;
+	      if (first_run)
 	        cs_prg->num = cs_tprg->num;
 	      cs_prg->sum_all += cs_tprg->sum_all;
 	      if (cs_prg->run_max < cs_tprg->run_max)
 		cs_prg->run_max = cs_tprg->run_max;
 	      cs_prg->sum_max += cs_tprg->run_max;
-              if (cs_prg->runs == 1)
+              if (first_run)
                 memcpy (cs_prg->histogram, cs_tprg->histogram,
                         sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
               else
@@ -795,17 +808,23 @@  gcov_exit (void)
 	  gcov_write_unsigned (gfi_ptr->cfg_checksum);
 
 	  ci_ptr = gfi_ptr->ctrs;
+
 	  for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++)
 	    {
-	      if (!gi_ptr->merge[t_ix])
+	      gcov_merge_fn merge = gi_ptr->merge[t_ix];
+
+	      if (!merge)
 		continue;
 
 	      n_counts = ci_ptr->num;
 	      gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix),
 				     GCOV_TAG_COUNTER_LENGTH (n_counts));
+
 	      gcov_type *c_ptr = ci_ptr->values;
+        gcov_type value;
 	      while (n_counts--)
-		gcov_write_counter (*c_ptr++);
+          gcov_write_counter (*c_ptr++);
+
 	      ci_ptr++;
 	    }
 	  if (buffered)
@@ -824,6 +843,8 @@  gcov_exit (void)
 		   "profiling:%s:Error writing\n",
 		   gi_filename);
     }
+
+  gcov_clear ();
 }
 
 /* Reset all counters to zero.  */
@@ -851,6 +872,7 @@  gcov_clear (void)
 		continue;
 	      
 	      memset (ci_ptr->values, 0, sizeof (gcov_type) * ci_ptr->num);
+
 	      ci_ptr++;
 	    }
 	}
@@ -912,7 +934,6 @@  __gcov_flush (void)
   __gthread_mutex_lock (&__gcov_flush_mx);
 
   gcov_exit ();
-  gcov_clear ();
 
   __gthread_mutex_unlock (&__gcov_flush_mx);
 }
@@ -974,6 +995,24 @@  __gcov_merge_ior (gcov_type *counters, unsigned n_counters)
 }
 #endif
 
+#ifdef L_gcov_merge_time_profile
+void
+__gcov_merge_time_profile (gcov_type *counters, unsigned n_counters)
+{
+  unsigned int i;
+  gcov_type value;
+
+  for (i = 0; i < n_counters; i++)
+    {
+      value = gcov_read_counter ();
+
+      if (value && (!counters[i] || value < counters[i]))
+        counters[i] = value;
+    }
+}
+#endif /* L_gcov_merge_time_profile */
+
+
 #ifdef L_gcov_merge_single
 /* The profile merging function for choosing the most common value.
    It is given an array COUNTERS of N_COUNTERS old counters and it
@@ -1171,6 +1210,7 @@  __thread
 #endif
 gcov_type * __gcov_indirect_call_counters;
 
+
 /* By default, the C++ compiler will use function addresses in the
    vtable entries.  Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero
    tells the compiler to use function descriptors instead.  The value
@@ -1202,6 +1242,16 @@  __gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func)
 }
 #endif
 
+#ifdef L_gcov_time_profiler
+
+void
+__gcov_time_profiler (gcov_type* counters)
+{
+  if (!counters[0])
+    counters[0] = ++function_counter;
+}
+#endif
+
 #ifdef L_gcov_average_profiler
 /* Increase corresponding COUNTER by VALUE.  FIXME: Perhaps we want
    to saturate up.  */
@@ -1233,10 +1283,12 @@  __gcov_fork (void)
 {
   pid_t pid;
   extern __gthread_mutex_t __gcov_flush_mx;
+
   __gcov_flush ();
   pid = fork ();
   if (pid == 0)
     __GTHREAD_MUTEX_INIT_FUNCTION (&__gcov_flush_mx);
+
   return pid;
 }
 #endif