Patchwork Add counter histogram to fdo summary (issue6465057)

login
register
mail settings
Submitter Teresa Johnson
Date Aug. 28, 2012, 2:45 p.m.
Message ID <20120828144553.E26B961470@tjsboxrox.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/180508/
State New
Headers show

Comments

Teresa Johnson - Aug. 28, 2012, 2:45 p.m.
Revision to earlier patch to augment the gcov program summary with working set
information. We now emit the non-zero arc counter histogram entries into the
summary, instead of computing the working set information and emitting that
into the summary.  The working set information is computed by the compiler from
the histogram when it is read back in. A new method for merging histogram
summaries based on earlier discussions with Honza is utilized when gcov
summaries are merged.

See earlier discussions and description at:
  http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01062.html
  http://gcc.gnu.org/ml/gcc-patches/2012-07/msg01412.html

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?

Thanks,
Teresa

2012-08-28  Teresa Johnson  <tejohnson@google.com>

	* libgcc/libgcov.c (gcov_histogram_insert): New function.
	(gcov_compute_histogram): Ditto.
	(gcov_exit): Invoke compute_histogram, and perform merging of
        histograms during summary merging.
	* gcc/gcov-io.c (gcov_write_summary): Include length of non-zero
        histogram entries in GCOV_TAG_SUMMARY_LENGTH, and write out those
        entries.
	(gcov_read_summary): Read histogram entries.
	(gcov_histo_index): New function.
	(gcov_histogram_merge): Ditto.
	* gcc/gcov-io.h (gcov_type_unsigned): New type.
        (struct gcov_bucket_type): Ditto.
	(struct gcov_ctr_summary): Include histogram.
        (GCOV_TAG_SUMMARY_LENGTH): Update to include histogram entries.
        (GCOV_HISTOGRAM_SIZE): New macro.
        (GCOV_TYPE_SIZE): Make macro available for all build targets.
	* gcc/profile.c (NUM_GCOV_WORKING_SETS): New macro.
	(gcov_working_sets): New global variable.
	(compute_working_sets): New function.
	(find_working_set): Ditto.
	(get_exec_counts): Invoke compute_working_sets.
	* gcc/coverage.c (read_counts_file): Merge histograms, and
        fix bug with accessing summary info for non-summable counters.
	* gcc/basic-block.h (gcov_type_unsigned): New type.
        (struct gcov_working_set_info): Ditto.
	* gcc/gcov-dump.c (tag_summary): Dump out histogram.


--
This patch is available for review at http://codereview.appspot.com/6465057
Jan Hubicka - Aug. 29, 2012, 1:12 p.m.
> Index: libgcc/libgcov.c
> ===================================================================
> --- libgcc/libgcov.c	(revision 190736)
> +++ libgcc/libgcov.c	(working copy)
> @@ -276,6 +276,78 @@ gcov_version (struct gcov_info *ptr, gcov_unsigned
>    return 1;
>  }
>  
> +/* Insert counter VALUE into HISTOGRAM.  */
> +
> +static void
> +gcov_histogram_insert(gcov_bucket_type *histogram, gcov_type value)
> +{
> +  unsigned i;
> +
> +  i = gcov_histo_index(value);
> +  gcc_assert (i < GCOV_HISTOGRAM_SIZE);
Does checking_assert work in libgcov? I do not think internal consistency check
should go to --enable-checking=release libgcov. We want to maintain it as
lightweight as possible. (I see there are two existing gcc_asserts, since they
report file format corruption, I think they should give better diagnostic).

Inliner will do good job here, but perhaps explicit inline fits.
> +      for (f_ix = 0; f_ix != gi_ptr->n_functions; f_ix++)
> +        {
> +          gfi_ptr = gi_ptr->functions[f_ix];
> +
> +          if (!gfi_ptr || gfi_ptr->key != gi_ptr)
> +            continue;
> +
> +          ci_ptr = &gfi_ptr->ctrs[ctr_info_ix];
> +          for (ix = 0; ix < ci_ptr->num; ix++)
> +            gcov_histogram_insert(cs_ptr->histogram, ci_ptr->values[ix]);
Space before (.
> +        }
> +    }
> +}
> +
>  /* Dump the coverage counts. We merge with existing counts when
>     possible, to avoid growing the .da files ad infinitum. We use this
>     program's checksum to make sure we only accumulate whole program
> @@ -347,6 +419,7 @@ gcov_exit (void)
>  	    }
>  	}
>      }
> +  gcov_compute_histogram (&this_prg);
> @@ -598,11 +669,18 @@ gcov_exit (void)
>  	  if (gi_ptr->merge[t_ix])
>  	    {
>  	      if (!cs_prg->runs++)
> -		cs_prg->num = cs_tprg->num;
> +                cs_prg->num = cs_tprg->num;
> +              else if (cs_prg->num != cs_tprg->num)
> +                goto read_mismatch;

Doesn't think check that all the programs that contain this unit are the same?
I.e. will this survive profiledbootstrap where we interleave cc1 and cc1plus?
> +  /* Count number of non-zero histogram entries. The histogram is only
> +     currently computed for arc counters.  */
> +  csum = &summary->ctrs[GCOV_COUNTER_ARCS];
> +  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
> +    {
> +      if (csum->histogram[h_ix].num_counters > 0)
> +        h_cnt++;
> +    }
> +  gcov_write_tag_length (tag, GCOV_TAG_SUMMARY_LENGTH(h_cnt));
>    gcov_write_unsigned (summary->checksum);
>    for (csum = summary->ctrs, ix = GCOV_COUNTERS_SUMMABLE; ix--; csum++)
>      {
> @@ -380,6 +388,21 @@ gcov_write_summary (gcov_unsigned_t tag, const str
>        gcov_write_counter (csum->sum_all);
>        gcov_write_counter (csum->run_max);
>        gcov_write_counter (csum->sum_max);
> +      if (ix != GCOV_COUNTER_ARCS)
> +        {
> +          gcov_write_unsigned (0);
> +          continue;
> +        }
> +      gcov_write_unsigned (h_cnt);
> +      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
> +        {
> +          if (!csum->histogram[h_ix].num_counters)
> +            continue;
> +          gcov_write_unsigned (h_ix);

It is kind of waste to write whole unsigned for each histogram index.
What about writting bitmap of non-zero entries followed by each entry?
> +/* Merge SRC_HISTO into TGT_HISTO.  */

Perhaps comment about overall concept of the merging routine would suit here.
> -#else /*!IN_GCOV */
> -#define GCOV_TYPE_SIZE (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32)

Why do you need t omove this out of !libgcov? I do not thing this is correct for all configurations.
i.e. gcov_type may be 16bit.

Patch is OK if it passed profiledbootstrap modulo the comments above.
Thanks!
Honza
Teresa Johnson - Aug. 29, 2012, 9:41 p.m.
On Wed, Aug 29, 2012 at 6:12 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> Index: libgcc/libgcov.c
>> ===================================================================
>> --- libgcc/libgcov.c  (revision 190736)
>> +++ libgcc/libgcov.c  (working copy)
>> @@ -276,6 +276,78 @@ gcov_version (struct gcov_info *ptr, gcov_unsigned
>>    return 1;
>>  }
>>
>> +/* Insert counter VALUE into HISTOGRAM.  */
>> +
>> +static void
>> +gcov_histogram_insert(gcov_bucket_type *histogram, gcov_type value)
>> +{
>> +  unsigned i;
>> +
>> +  i = gcov_histo_index(value);
>> +  gcc_assert (i < GCOV_HISTOGRAM_SIZE);
> Does checking_assert work in libgcov? I do not think internal consistency check
> should go to --enable-checking=release libgcov. We want to maintain it as
> lightweight as possible. (I see there are two existing gcc_asserts, since they
> report file format corruption, I think they should give better diagnostic).

gcc_checking_assert isn't available, since tsystem.h not system.h is
included. I could probably just remove the assert (to be safe,
silently return if i is out of bounds?).

>
> Inliner will do good job here, but perhaps explicit inline fits.
>> +      for (f_ix = 0; f_ix != gi_ptr->n_functions; f_ix++)
>> +        {
>> +          gfi_ptr = gi_ptr->functions[f_ix];
>> +
>> +          if (!gfi_ptr || gfi_ptr->key != gi_ptr)
>> +            continue;
>> +
>> +          ci_ptr = &gfi_ptr->ctrs[ctr_info_ix];
>> +          for (ix = 0; ix < ci_ptr->num; ix++)
>> +            gcov_histogram_insert(cs_ptr->histogram, ci_ptr->values[ix]);
> Space before (.

Ok.

>> +        }
>> +    }
>> +}
>> +
>>  /* Dump the coverage counts. We merge with existing counts when
>>     possible, to avoid growing the .da files ad infinitum. We use this
>>     program's checksum to make sure we only accumulate whole program
>> @@ -347,6 +419,7 @@ gcov_exit (void)
>>           }
>>       }
>>      }
>> +  gcov_compute_histogram (&this_prg);
>> @@ -598,11 +669,18 @@ gcov_exit (void)
>>         if (gi_ptr->merge[t_ix])
>>           {
>>             if (!cs_prg->runs++)
>> -             cs_prg->num = cs_tprg->num;
>> +                cs_prg->num = cs_tprg->num;
>> +              else if (cs_prg->num != cs_tprg->num)
>> +                goto read_mismatch;
>
> Doesn't think check that all the programs that contain this unit are the same?
> I.e. will this survive profiledbootstrap where we interleave cc1 and cc1plus?

Ok, removing that check and I am switching the histogram merging code
to handle the case where there are different numbers of counters. It
will end up with the same number of counters as in the summary we are
merging into since that is the num we keep above when runs > 0 to
start with.

>> +  /* Count number of non-zero histogram entries. The histogram is only
>> +     currently computed for arc counters.  */
>> +  csum = &summary->ctrs[GCOV_COUNTER_ARCS];
>> +  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
>> +    {
>> +      if (csum->histogram[h_ix].num_counters > 0)
>> +        h_cnt++;
>> +    }
>> +  gcov_write_tag_length (tag, GCOV_TAG_SUMMARY_LENGTH(h_cnt));
>>    gcov_write_unsigned (summary->checksum);
>>    for (csum = summary->ctrs, ix = GCOV_COUNTERS_SUMMABLE; ix--; csum++)
>>      {
>> @@ -380,6 +388,21 @@ gcov_write_summary (gcov_unsigned_t tag, const str
>>        gcov_write_counter (csum->sum_all);
>>        gcov_write_counter (csum->run_max);
>>        gcov_write_counter (csum->sum_max);
>> +      if (ix != GCOV_COUNTER_ARCS)
>> +        {
>> +          gcov_write_unsigned (0);
>> +          continue;
>> +        }
>> +      gcov_write_unsigned (h_cnt);
>> +      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
>> +        {
>> +          if (!csum->histogram[h_ix].num_counters)
>> +            continue;
>> +          gcov_write_unsigned (h_ix);
>
> It is kind of waste to write whole unsigned for each histogram index.
> What about writting bitmap of non-zero entries followed by each entry?

Sure, I will do that instead.

>> +/* Merge SRC_HISTO into TGT_HISTO.  */
>
> Perhaps comment about overall concept of the merging routine would suit here.

Ok.

>> -#else /*!IN_GCOV */
>> -#define GCOV_TYPE_SIZE (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32)
>
> Why do you need t omove this out of !libgcov? I do not thing this is correct for all configurations.
> i.e. gcov_type may be 16bit.

From my understanding of the mode attribute meanings, which I thought
are defined in terms of the number of smallest addressable units, the
code in gcov-io.h that sets up the gcov_type typedef will always end
up with a gcov_type that is 32 or 64 bits? I.e. when BITS_PER_UNIT is
8 it will use either SI or DI which will end up either 32 or 64, and
when BITS_PER_UNIT is 16 it would use either HI or SI which would
again be either 32 or 64. Is that wrong and we can end up with a 16
bit gcov_type?

The GCOV_TYPE_SIZE was being defined everywhere except when IN_GOV (so
it was being defined IN_LIBGCOV), but I wanted it defined
unconditionally because I am using it to determine the required number
of histogram entries.

In any case, I think it will be more straightforward to define the
number of histogram entries based on the max possible gcov_type size
which is 64 (so 256 entries). This will make implementing the bit mask
simpler, since it will always require the same number of gcov_unsigned
ints (8).

>
> Patch is OK if it passed profiledbootstrap modulo the comments above.

Ok, thanks. Working on the fixes above.

Teresa

> Thanks!
> Honza
Jan Hubicka - Aug. 30, 2012, 3:32 p.m.
> On Wed, Aug 29, 2012 at 6:12 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> >> Index: libgcc/libgcov.c
> >> ===================================================================
> >> --- libgcc/libgcov.c  (revision 190736)
> >> +++ libgcc/libgcov.c  (working copy)
> >> @@ -276,6 +276,78 @@ gcov_version (struct gcov_info *ptr, gcov_unsigned
> >>    return 1;
> >>  }
> >>
> >> +/* Insert counter VALUE into HISTOGRAM.  */
> >> +
> >> +static void
> >> +gcov_histogram_insert(gcov_bucket_type *histogram, gcov_type value)
> >> +{
> >> +  unsigned i;
> >> +
> >> +  i = gcov_histo_index(value);
> >> +  gcc_assert (i < GCOV_HISTOGRAM_SIZE);
> > Does checking_assert work in libgcov? I do not think internal consistency check
> > should go to --enable-checking=release libgcov. We want to maintain it as
> > lightweight as possible. (I see there are two existing gcc_asserts, since they
> > report file format corruption, I think they should give better diagnostic).
> 
> gcc_checking_assert isn't available, since tsystem.h not system.h is
> included. I could probably just remove the assert (to be safe,
> silently return if i is out of bounds?).

I think just removing the assert is fine here: only way it can trigger is when
GCOV_HISTOGRAM_SIZE is wrong and it ought not to be.
> 
> >From my understanding of the mode attribute meanings, which I thought
> are defined in terms of the number of smallest addressable units, the
> code in gcov-io.h that sets up the gcov_type typedef will always end
> up with a gcov_type that is 32 or 64 bits? I.e. when BITS_PER_UNIT is
> 8 it will use either SI or DI which will end up either 32 or 64, and
> when BITS_PER_UNIT is 16 it would use either HI or SI which would
> again be either 32 or 64. Is that wrong and we can end up with a 16
> bit gcov_type?

I see, the code simplified a bit since we dropped support for some of more exotic
targets.  The type should be either 32bit or 64. 
> 
> The GCOV_TYPE_SIZE was being defined everywhere except when IN_GOV (so
> it was being defined IN_LIBGCOV), but I wanted it defined
> unconditionally because I am using it to determine the required number
> of histogram entries.
> 
> In any case, I think it will be more straightforward to define the
> number of histogram entries based on the max possible gcov_type size
> which is 64 (so 256 entries). This will make implementing the bit mask
> simpler, since it will always require the same number of gcov_unsigned
> ints (8).
Sounds good to me. 64bit should be enough for everyone. Coverage is kind of useless
for smaller types and for really restricted targets we more likely will want to disable
histogram generation rather than making it a bit smaller.
> 
> >
> > Patch is OK if it passed profiledbootstrap modulo the comments above.
> 
> Ok, thanks. Working on the fixes above.

OK, thanks!
Honza
> 
> Teresa
> 
> > Thanks!
> > Honza
> 
> 
> 
> -- 
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413

Patch

Index: libgcc/libgcov.c
===================================================================
--- libgcc/libgcov.c	(revision 190736)
+++ libgcc/libgcov.c	(working copy)
@@ -276,6 +276,78 @@  gcov_version (struct gcov_info *ptr, gcov_unsigned
   return 1;
 }
 
+/* Insert counter VALUE into HISTOGRAM.  */
+
+static void
+gcov_histogram_insert(gcov_bucket_type *histogram, gcov_type value)
+{
+  unsigned i;
+
+  i = gcov_histo_index(value);
+  gcc_assert (i < GCOV_HISTOGRAM_SIZE);
+
+  histogram[i].num_counters++;
+  histogram[i].cum_value += value;
+  if (value < histogram[i].min_value)
+    histogram[i].min_value = value;
+}
+
+/* Computes a histogram of the arc counters to place in the summary SUM.  */
+
+static void
+gcov_compute_histogram (struct gcov_summary *sum)
+{
+  struct gcov_info *gi_ptr;
+  const struct gcov_fn_info *gfi_ptr;
+  const struct gcov_ctr_info *ci_ptr;
+  struct gcov_ctr_summary *cs_ptr;
+  unsigned t_ix, f_ix, ctr_info_ix, ix;
+  int h_ix;
+
+  /* This currently only applies to arc counters.  */
+  t_ix = GCOV_COUNTER_ARCS;
+
+  /* First check if there are any counts recorded for this counter.  */
+  cs_ptr = &(sum->ctrs[t_ix]);
+  if (!cs_ptr->num)
+    return;
+
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+    {
+      cs_ptr->histogram[h_ix].num_counters = 0;
+      cs_ptr->histogram[h_ix].min_value = cs_ptr->run_max;
+      cs_ptr->histogram[h_ix].cum_value = 0;
+    }
+
+  /* Walk through all the per-object structures and record each of
+     the count values in histogram.  */
+  for (gi_ptr = gcov_list; gi_ptr; gi_ptr = gi_ptr->next)
+    {
+      if (!gi_ptr->merge[t_ix])
+        continue;
+
+      /* Find the appropriate index into the gcov_ctr_info array
+         for the counter we are currently working on based on the
+         existence of the merge function pointer for this object.  */
+      for (ix = 0, ctr_info_ix = 0; ix < t_ix; ix++)
+        {
+          if (gi_ptr->merge[ix])
+            ctr_info_ix++;
+        }
+      for (f_ix = 0; f_ix != gi_ptr->n_functions; f_ix++)
+        {
+          gfi_ptr = gi_ptr->functions[f_ix];
+
+          if (!gfi_ptr || gfi_ptr->key != gi_ptr)
+            continue;
+
+          ci_ptr = &gfi_ptr->ctrs[ctr_info_ix];
+          for (ix = 0; ix < ci_ptr->num; ix++)
+            gcov_histogram_insert(cs_ptr->histogram, ci_ptr->values[ix]);
+        }
+    }
+}
+
 /* Dump the coverage counts. We merge with existing counts when
    possible, to avoid growing the .da files ad infinitum. We use this
    program's checksum to make sure we only accumulate whole program
@@ -347,6 +419,7 @@  gcov_exit (void)
 	    }
 	}
     }
+  gcov_compute_histogram (&this_prg);
 
   {
     /* Check if the level of dirs to strip off specified. */
@@ -482,8 +555,6 @@  gcov_exit (void)
 
 	      f_ix--;
 	      length = gcov_read_unsigned ();
-	      if (length != GCOV_TAG_SUMMARY_LENGTH)
-		goto read_mismatch;
 	      gcov_read_summary (&tmp);
 	      if ((error = gcov_is_error ()))
 		goto read_error;
@@ -598,11 +669,18 @@  gcov_exit (void)
 	  if (gi_ptr->merge[t_ix])
 	    {
 	      if (!cs_prg->runs++)
-		cs_prg->num = cs_tprg->num;
+                cs_prg->num = cs_tprg->num;
+              else if (cs_prg->num != cs_tprg->num)
+                goto read_mismatch;
 	      cs_prg->sum_all += cs_tprg->sum_all;
 	      if (cs_prg->run_max < cs_tprg->run_max)
 		cs_prg->run_max = cs_tprg->run_max;
 	      cs_prg->sum_max += cs_tprg->run_max;
+              if (cs_prg->runs == 1)
+                memcpy (cs_prg->histogram, cs_tprg->histogram,
+                        sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+              else
+                gcov_histogram_merge (cs_prg->histogram, cs_tprg->histogram);
 	    }
 	  else if (cs_prg->runs)
 	    goto read_mismatch;
Index: gcc/gcov-io.c
===================================================================
--- gcc/gcov-io.c	(revision 190736)
+++ gcc/gcov-io.c	(working copy)
@@ -368,10 +368,18 @@  gcov_write_tag_length (gcov_unsigned_t tag, gcov_u
 GCOV_LINKAGE void
 gcov_write_summary (gcov_unsigned_t tag, const struct gcov_summary *summary)
 {
-  unsigned ix;
+  unsigned ix, h_ix, h_cnt = 0;
   const struct gcov_ctr_summary *csum;
 
-  gcov_write_tag_length (tag, GCOV_TAG_SUMMARY_LENGTH);
+  /* Count number of non-zero histogram entries. The histogram is only
+     currently computed for arc counters.  */
+  csum = &summary->ctrs[GCOV_COUNTER_ARCS];
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+    {
+      if (csum->histogram[h_ix].num_counters > 0)
+        h_cnt++;
+    }
+  gcov_write_tag_length (tag, GCOV_TAG_SUMMARY_LENGTH(h_cnt));
   gcov_write_unsigned (summary->checksum);
   for (csum = summary->ctrs, ix = GCOV_COUNTERS_SUMMABLE; ix--; csum++)
     {
@@ -380,6 +388,21 @@  gcov_write_summary (gcov_unsigned_t tag, const str
       gcov_write_counter (csum->sum_all);
       gcov_write_counter (csum->run_max);
       gcov_write_counter (csum->sum_max);
+      if (ix != GCOV_COUNTER_ARCS)
+        {
+          gcov_write_unsigned (0);
+          continue;
+        }
+      gcov_write_unsigned (h_cnt);
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!csum->histogram[h_ix].num_counters)
+            continue;
+          gcov_write_unsigned (h_ix);
+          gcov_write_unsigned (csum->histogram[h_ix].num_counters);
+          gcov_write_counter (csum->histogram[h_ix].min_value);
+          gcov_write_counter (csum->histogram[h_ix].cum_value);
+        }
     }
 }
 #endif /* IN_LIBGCOV */
@@ -488,7 +511,7 @@  gcov_read_string (void)
 GCOV_LINKAGE void
 gcov_read_summary (struct gcov_summary *summary)
 {
-  unsigned ix;
+  unsigned ix, h_ix, h_cnt;
   struct gcov_ctr_summary *csum;
 
   summary->checksum = gcov_read_unsigned ();
@@ -499,6 +522,16 @@  gcov_read_summary (struct gcov_summary *summary)
       csum->sum_all = gcov_read_counter ();
       csum->run_max = gcov_read_counter ();
       csum->sum_max = gcov_read_counter ();
+      memset (csum->histogram, 0,
+              sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+      h_cnt = gcov_read_unsigned ();
+      while (h_cnt--)
+        {
+          h_ix = gcov_read_unsigned ();
+          csum->histogram[h_ix].num_counters = gcov_read_unsigned ();
+          csum->histogram[h_ix].min_value = gcov_read_counter ();
+          csum->histogram[h_ix].cum_value = gcov_read_counter ();
+        }
     }
 }
 
@@ -550,3 +583,142 @@  gcov_time (void)
     return status.st_mtime;
 }
 #endif /* IN_GCOV */
+
+#if IN_LIBGCOV || !IN_GCOV
+/* Determine the index into histogram for VALUE. */
+
+static unsigned
+gcov_histo_index(gcov_type value)
+{
+  gcov_type_unsigned v = (gcov_type_unsigned)value;
+  unsigned r = 0;
+  unsigned prev2bits = 0;
+
+  /* Find index into log2 scale histogram, where each of the log2
+     sized buckets is divided into 4 linear sub-buckets for better
+     focus in the higher buckets.  */
+
+  /* Find the place of the most-significant bit set.  */
+  if (v > 0)
+    r = GCOV_TYPE_SIZE - 1 - __builtin_clzll (v);
+
+  /* If at most the 2 least significant bits are set (value is
+     0 - 3) then that value is our index into the lowest set of
+     four buckets.  */
+  if (r < 2)
+    return (unsigned)value;
+
+  gcc_assert (r < 64);
+
+  /* Find the two next most significant bits to determine which
+     of the four linear sub-buckets to select.  */
+  prev2bits = (v >> (r - 2)) & 0x3;
+  /* Finally, compose the final bucket index from the log2 index and
+     the next 2 bits. The minimum r value at this point is 2 since we
+     returned above if r was 2 or more, so the minimum bucket at this
+     point is 4.  */
+  return (r - 1) * 4 + prev2bits;
+}
+
+/* Merge SRC_HISTO into TGT_HISTO.  */
+
+static void gcov_histogram_merge(gcov_bucket_type *tgt_histo,
+                                 gcov_bucket_type *src_histo)
+{
+  unsigned src_i, tgt_i, tmp_i;
+  unsigned src_num, tgt_num, merge_num;
+  gcov_type src_cum, tgt_cum, merge_src_cum, merge_tgt_cum, merge_cum;
+  gcov_type merge_min;
+  gcov_bucket_type tmp_histo[GCOV_HISTOGRAM_SIZE];
+
+  memset(tmp_histo, 0, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+
+  /* Assume that the counters are in the same relative order in both
+     histograms. Walk the histograms from smallest to largest entry,
+     matching up and combining counters in order.  */
+  src_num = 0;
+  src_cum = 0;
+  src_i = 0;
+  for (tgt_i = 0; tgt_i < GCOV_HISTOGRAM_SIZE; tgt_i++)
+    {
+      tgt_num = tgt_histo[tgt_i].num_counters;
+      tgt_cum = tgt_histo[tgt_i].cum_value;
+      /* Keep going until all of the target histogram's counters at this
+         position have been matched and merged with counters from the
+         source histogram.  */
+      while (tgt_num > 0)
+        {
+          /* If this is either the first time through this loop or we just
+             exhausted the previous non-zero source histogram entry, look
+             for the next non-zero source histogram entry.  */
+          if (!src_num)
+            {
+              /* Locate the first non-zero entry.  */
+              while (src_i < GCOV_HISTOGRAM_SIZE &&
+                     !src_histo[src_i].num_counters)
+                src_i++;
+              /* Source and target total number of counts should be
+                 the same or caller should not have attempted merge.
+                 Since we have target counters left to merge, there
+                 ought to still be some left in the source.  */
+              gcc_assert (src_i < GCOV_HISTOGRAM_SIZE);
+              src_num = src_histo[src_i].num_counters;
+              src_cum = src_histo[src_i].cum_value;
+            }
+
+          /* The number of counters to merge on this pass is the minimum
+             of the remaining counters from the current target and source
+             histogram entries.  */
+          merge_num = tgt_num;
+          if (src_num < merge_num)
+            merge_num = src_num;
+
+          /* The merged min_value is the min of the min_values from target
+             and source.  */
+          merge_min = tgt_histo[tgt_i].min_value;
+          if (merge_min > src_histo[tgt_i].min_value)
+            merge_min = src_histo[src_i].min_value;
+
+          /* Compute the portion of source and target entries' cum_value
+             that will be apportioned to the counters being merged.
+             The total remaining cum_value from each entry is divided
+             equally among the counters from that histogram entry if we
+             are not merging all of them.  */
+          merge_src_cum = src_cum;
+          if (merge_num < src_num)
+            merge_src_cum = merge_num * src_cum / src_num;
+          merge_tgt_cum = tgt_cum;
+          if (merge_num < tgt_num)
+            merge_tgt_cum = merge_num * tgt_cum / tgt_num;
+          /* The merged cum_value is the sum of the source and target
+             components.  */
+          merge_cum = merge_src_cum + merge_tgt_cum;
+
+          /* Update the remaining number of counters and cum_value left
+             to be merged from this source and target entry.  */
+          src_cum -= merge_src_cum;
+          tgt_cum -= merge_tgt_cum;
+          src_num -= merge_num;
+          tgt_num -= merge_num;
+
+          /* The merged counters get placed in the new merged histogram
+             at the entry for the merged min_value.  */
+          tmp_i = gcov_histo_index(merge_min);
+          gcc_assert (tmp_i < GCOV_HISTOGRAM_SIZE);
+          tmp_histo[tmp_i].num_counters += merge_num;
+          tmp_histo[tmp_i].cum_value += merge_cum;
+          if (!tmp_histo[tmp_i].min_value ||
+              merge_min < tmp_histo[tmp_i].min_value)
+            tmp_histo[tmp_i].min_value = merge_min;
+
+          /* Ensure the search for the next non-zero src_histo entry starts
+             at the next histogram bucket.  */
+          if (!src_num)
+            src_i++;
+        }
+    }
+
+  /* Finally, copy the merged histogram into tgt_histo.  */
+  memcpy(tgt_histo, tmp_histo, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+}
+#endif /* IN_LIBGCOV || !IN_GCOV */
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 190736)
+++ gcc/gcov-io.h	(working copy)
@@ -139,7 +139,9 @@  see the files COPYING3 and COPYING.RUNTIME respect
 	counts: header int64:count*
 	summary: int32:checksum {count-summary}GCOV_COUNTERS_SUMMABLE
 	count-summary:	int32:num int32:runs int64:sum
-			int64:max int64:sum_max
+			int64:max int64:sum_max histogram
+        histogram: int32:count histogram-buckets*
+        histogram-buckets: int32:index int32:num int64:min int64:sum
 
    The ANNOUNCE_FUNCTION record is the same as that in the note file,
    but without the source location.  The COUNTS gives the
@@ -171,8 +173,10 @@  typedef unsigned gcov_unsigned_t __attribute__ ((m
 typedef unsigned gcov_position_t __attribute__ ((mode (SI)));
 #if LONG_LONG_TYPE_SIZE > 32
 typedef signed gcov_type __attribute__ ((mode (DI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (DI)));
 #else
 typedef signed gcov_type __attribute__ ((mode (SI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (SI)));
 #endif
 #else
 #if BITS_PER_UNIT == 16
@@ -180,16 +184,20 @@  typedef unsigned gcov_unsigned_t __attribute__ ((m
 typedef unsigned gcov_position_t __attribute__ ((mode (HI)));
 #if LONG_LONG_TYPE_SIZE > 32
 typedef signed gcov_type __attribute__ ((mode (SI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (SI)));
 #else
 typedef signed gcov_type __attribute__ ((mode (HI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (HI)));
 #endif
 #else
 typedef unsigned gcov_unsigned_t __attribute__ ((mode (QI)));
 typedef unsigned gcov_position_t __attribute__ ((mode (QI)));
 #if LONG_LONG_TYPE_SIZE > 32
 typedef signed gcov_type __attribute__ ((mode (HI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (HI)));
 #else
 typedef signed gcov_type __attribute__ ((mode (QI)));
+typedef unsigned gcov_type_unsigned __attribute__ ((mode (QI)));
 #endif
 #endif
 #endif
@@ -210,11 +218,10 @@  typedef unsigned gcov_position_t;
 #if IN_GCOV
 #define GCOV_LINKAGE static
 typedef HOST_WIDEST_INT gcov_type;
+typedef unsigned HOST_WIDEST_INT gcov_type_unsigned;
 #if IN_GCOV > 0
 #include <sys/types.h>
 #endif
-#else /*!IN_GCOV */
-#define GCOV_TYPE_SIZE (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32)
 #endif
 
 #if defined (HOST_HAS_F_SETLKW)
@@ -309,9 +316,10 @@  typedef HOST_WIDEST_INT gcov_type;
 #define GCOV_TAG_COUNTER_NUM(LENGTH) ((LENGTH) / 2)
 #define GCOV_TAG_OBJECT_SUMMARY  ((gcov_unsigned_t)0xa1000000) /* Obsolete */
 #define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000)
-#define GCOV_TAG_SUMMARY_LENGTH  \
-	(1 + GCOV_COUNTERS_SUMMABLE * (2 + 3 * 2))
+#define GCOV_TAG_SUMMARY_LENGTH(NUM)  \
+        (1 + GCOV_COUNTERS_SUMMABLE * (3 + 3 * 2) + (NUM) * 6)
 
+
 /* Counters that are collected.  */
 #define GCOV_COUNTER_ARCS 	0  /* Arc transitions.  */
 #define GCOV_COUNTERS_SUMMABLE	1  /* Counters which can be
@@ -387,8 +395,28 @@  typedef HOST_WIDEST_INT gcov_type;
 #define GCOV_ARC_FAKE		(1 << 1)
 #define GCOV_ARC_FALLTHROUGH	(1 << 2)
 
+#define GCOV_TYPE_SIZE (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32)
+
 /* Structured records.  */
 
+/* Structure used for each bucket of the log2 histogram of counter values.  */
+typedef struct
+{
+  /* Number of counters whose profile count falls within the bucket.  */
+  gcov_unsigned_t num_counters;
+  /* Smallest profile count included in this bucket.  */
+  gcov_type min_value;
+  /* Cumulative value of the profile counts in this bucket.  */
+  gcov_type cum_value;
+} gcov_bucket_type;
+
+/* For a log2 scale histogram with each range split into 4
+   linear sub-ranges, there will be at most GCOV_TYPE_SIZE - 1 log2
+   ranges since the lowest 2 log2 values share the lowest 4 linear
+   sub-range (values 0 - 3).  */
+
+#define GCOV_HISTOGRAM_SIZE (GCOV_TYPE_SIZE - 1) * 4
+
 /* Cumulative counter data.  */
 struct gcov_ctr_summary
 {
@@ -397,6 +425,8 @@  struct gcov_ctr_summary
   gcov_type sum_all;		/* sum of all counters accumulated.  */
   gcov_type run_max;		/* maximum value on a single run.  */
   gcov_type sum_max;    	/* sum of individual run max values.  */
+  gcov_bucket_type histogram[GCOV_HISTOGRAM_SIZE]; /* histogram of
+                                                      counter values.  */
 };
 
 /* Object & program summary record.  */
Index: gcc/profile.c
===================================================================
--- gcc/profile.c	(revision 190736)
+++ gcc/profile.c	(working copy)
@@ -84,6 +84,15 @@  struct bb_info {
 
 const struct gcov_ctr_summary *profile_info;
 
+/* Number of data points in the working set summary array. Using 128
+   provides information for at least every 1% increment of the total
+   profile size. The last entry is hardwired to 99.9% of the total.  */
+#define NUM_GCOV_WORKING_SETS 128
+
+/* Counter working set information computed from the current counter
+   summary. Not initialized unless profile_info summary is non-NULL.  */
+static gcov_working_set_t gcov_working_sets[NUM_GCOV_WORKING_SETS];
+
 /* Collect statistics on the performance of this pass for the entire source
    file.  */
 
@@ -192,6 +201,151 @@  instrument_values (histogram_values values)
 }
 
 
+/* Compute the working set information from the counter histogram in
+   the profile summary. This is an array of information corresponding to a
+   range of percentages of the total execution count (sum_all), and includes
+   the number of counters required to cover that working set percentage and
+   the minimum counter value in that working set.  */
+
+static void
+compute_working_sets (void)
+{
+  gcov_type working_set_cum_values[NUM_GCOV_WORKING_SETS];
+  gcov_type ws_cum_hotness_incr;
+  gcov_type cum, tmp_cum;
+  const gcov_bucket_type *histo_bucket;
+  unsigned ws_ix, h_ix, c_num, count, pctinc, pct;
+  gcov_working_set_t *ws_info;
+
+  if (!profile_info)
+    return;
+
+  /* Compute the amount of sum_all that the cumulative hotness grows
+     by in each successive working set entry, which depends on the
+     number of working set entries.  */
+  ws_cum_hotness_incr = profile_info->sum_all / NUM_GCOV_WORKING_SETS;
+
+  /* Next fill in an array of the cumulative hotness values corresponding
+     to each working set summary entry we are going to compute below. 
+     Skip 0% statistics, which can be extrapolated from the
+     rest of the summary data.  */
+  cum = ws_cum_hotness_incr;
+  for (ws_ix = 0; ws_ix < NUM_GCOV_WORKING_SETS;
+       ws_ix++, cum += ws_cum_hotness_incr)
+    working_set_cum_values[ws_ix] = cum;
+  /* The last summary entry is reserved for (roughly) 99.9% of the
+     working set. Divide by 1024 so it becomes a shift, which gives
+     almost exactly 99.9%.  */
+  working_set_cum_values[NUM_GCOV_WORKING_SETS-1]
+      = profile_info->sum_all - profile_info->sum_all/1024;
+
+  /* Next, walk through the histogram in decending order of hotness
+     and compute the statistics for the working set summary array.
+     As histogram entries are accumulated, we check to see which
+     working set entries have had their expected cum_value reached
+     and fill them in, walking the working set entries in increasing
+     size of cum_value.  */
+  ws_ix = 0; /* The current entry into the working set array.  */
+  cum = 0; /* The current accumulated counter sum.  */
+  count = 0; /* The current accumulated count of block counters.  */
+  for (h_ix = GCOV_HISTOGRAM_SIZE - 1;
+       h_ix > 0 && ws_ix < NUM_GCOV_WORKING_SETS; h_ix--)
+    {
+      histo_bucket = &profile_info->histogram[h_ix];
+
+      /* If we haven't reached the required cumulative counter value for
+         the current working set percentage, simply accumulate this histogram
+         entry into the running sums and continue to the next histogram
+         entry.  */
+      if (cum + histo_bucket->cum_value < working_set_cum_values[ws_ix])
+        {
+          cum += histo_bucket->cum_value;
+          count += histo_bucket->num_counters;
+          continue;
+        }
+
+      /* If adding the current histogram entry's cumulative counter value
+         causes us to exceed the current working set size, then estimate
+         how many of this histogram entry's counter values are required to
+         reach the working set size, and fill in working set entries
+         as we reach their expected cumulative value.  */
+      for (c_num = 0, tmp_cum = cum;
+           c_num < histo_bucket->num_counters && ws_ix < NUM_GCOV_WORKING_SETS;
+           c_num++)
+        {
+          count++;
+          /* If we haven't reached the last histogram entry counter, add
+             in the minimum value again. This will underestimate the
+             cumulative sum so far, because many of the counter values in this
+             entry may have been larger than the minimum. We could add in the
+             average value every time, but that would require an expensive
+             divide operation.  */
+          if (c_num + 1 < histo_bucket->num_counters)
+            tmp_cum += histo_bucket->min_value;
+          /* If we have reached the last histogram entry counter, then add
+             in the entire cumulative value.  */
+          else
+            tmp_cum = cum + histo_bucket->cum_value;
+
+          /* Next walk through successive working set entries and fill in
+            the statistics for any whose size we have reached by accumulating
+            this histogram counter.  */
+          while (tmp_cum >= working_set_cum_values[ws_ix]
+                 && ws_ix < NUM_GCOV_WORKING_SETS)
+            {
+              gcov_working_sets[ws_ix].num_counters = count;
+              gcov_working_sets[ws_ix].min_counter
+                  = histo_bucket->min_value;
+              ws_ix++;
+            }
+        }
+      /* Finally, update the running cumulative value since we were
+         using a temporary above.  */
+      cum += histo_bucket->cum_value;
+    }
+  gcc_assert (ws_ix == NUM_GCOV_WORKING_SETS);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Counter working sets:\n");
+      /* Multiply the percentage by 100 to avoid float.  */
+      pctinc = 100 * 100 / NUM_GCOV_WORKING_SETS;
+      for (ws_ix = 0, pct = pctinc; ws_ix < NUM_GCOV_WORKING_SETS;
+           ws_ix++, pct += pctinc)
+        {
+          if (ws_ix == NUM_GCOV_WORKING_SETS - 1)
+            pct = 9990;
+          ws_info = &gcov_working_sets[ws_ix];
+          /* Print out the percentage using int arithmatic to avoid float.  */
+          fprintf (dump_file, "\t\t%u.%02u%%: num counts=%u, min counter="
+                   HOST_WIDEST_INT_PRINT_DEC "\n",
+                   pct / 100, pct - (pct / 100 * 100),
+                   ws_info->num_counters,
+                   (HOST_WIDEST_INT)ws_info->min_counter);
+        }
+    }
+}
+
+/* Given a the desired percentage of the full profile (sum_all from the
+   summary), multiplied by 10 to avoid float in PCT_TIMES_10, returns
+   the corresponding working set information. If an exact match for
+   the percentage isn't found, the closest value is used.  */
+
+gcov_working_set_t *
+find_working_set (unsigned pct_times_10)
+{
+  unsigned i;
+  if (!profile_info)
+    return NULL;
+  gcc_assert (pct_times_10 <= 1000);
+  if (pct_times_10 >= 999)
+    return &gcov_working_sets[NUM_GCOV_WORKING_SETS - 1];
+  i = pct_times_10 * NUM_GCOV_WORKING_SETS / 1000;
+  if (!i)
+    return &gcov_working_sets[0];
+  return &gcov_working_sets[i - 1];
+}
+
 /* Computes hybrid profile for all matching entries in da_file.  
    
    CFG_CHECKSUM is the precomputed checksum for the CFG.  */
@@ -219,6 +373,8 @@  get_exec_counts (unsigned cfg_checksum, unsigned l
   if (!counts)
     return NULL;
 
+  compute_working_sets();
+
   if (dump_file && profile_info)
     fprintf(dump_file, "Merged %u profiles with maximal count %u.\n",
 	    profile_info->runs, (unsigned) profile_info->sum_max);
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c	(revision 190736)
+++ gcc/coverage.c	(working copy)
@@ -248,6 +248,13 @@  read_counts_file (void)
 		summary.ctrs[ix].run_max = sum.ctrs[ix].run_max;
 	      summary.ctrs[ix].sum_max += sum.ctrs[ix].sum_max;
 	    }
+          if (new_summary)
+            memcpy (summary.ctrs[GCOV_COUNTER_ARCS].histogram,
+                    sum.ctrs[GCOV_COUNTER_ARCS].histogram,
+                    sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+          else
+            gcov_histogram_merge (summary.ctrs[GCOV_COUNTER_ARCS].histogram,
+                                  sum.ctrs[GCOV_COUNTER_ARCS].histogram);
 	  new_summary = 0;
 	}
       else if (GCOV_TAG_IS_COUNTER (tag) && fn_ident)
@@ -268,8 +275,9 @@  read_counts_file (void)
 	      entry->ctr = elt.ctr;
 	      entry->lineno_checksum = lineno_checksum;
 	      entry->cfg_checksum = cfg_checksum;
-	      entry->summary = summary.ctrs[elt.ctr];
-	      entry->summary.num = n_counts;
+              if (elt.ctr < GCOV_COUNTERS_SUMMABLE)
+                entry->summary = summary.ctrs[elt.ctr];
+              entry->summary.num = n_counts;
 	      entry->counts = XCNEWVEC (gcov_type, n_counts);
 	    }
 	  else if (entry->lineno_checksum != lineno_checksum
Index: gcc/basic-block.h
===================================================================
--- gcc/basic-block.h	(revision 190736)
+++ gcc/basic-block.h	(working copy)
@@ -31,6 +31,7 @@  along with GCC; see the file COPYING3.  If not see
    flow graph is manipulated by various optimizations.  A signed type
    makes those easy to detect.  */
 typedef HOST_WIDEST_INT gcov_type;
+typedef unsigned HOST_WIDEST_INT gcov_type_unsigned;
 
 /* Control flow edge information.  */
 struct GTY((user)) edge_def {
@@ -91,6 +92,16 @@  enum cfg_edge_flags {
    profile.c.  */
 extern const struct gcov_ctr_summary *profile_info;
 
+/* Working set size statistics for a given percentage of the entire
+   profile (sum_all from the counter summary).  */
+typedef struct gcov_working_set_info
+{
+  /* Number of hot counters included in this working set.  */
+  unsigned num_counters;
+  /* Smallest counter included in this working set.  */
+  gcov_type min_counter;
+} gcov_working_set_t;
+
 /* Declared in cfgloop.h.  */
 struct loop;
 
@@ -897,4 +908,7 @@  extern void rtl_profile_for_bb (basic_block);
 extern void rtl_profile_for_edge (edge);
 extern void default_rtl_profile (void);
 
+/* In profile.c.  */
+extern gcov_working_set_t *find_working_set(unsigned pct_times_10);
+
 #endif /* GCC_BASIC_BLOCK_H */
Index: gcc/gcov-dump.c
===================================================================
--- gcc/gcov-dump.c	(revision 190736)
+++ gcc/gcov-dump.c	(working copy)
@@ -447,7 +447,8 @@  tag_summary (const char *filename ATTRIBUTE_UNUSED
 	     unsigned tag ATTRIBUTE_UNUSED, unsigned length ATTRIBUTE_UNUSED)
 {
   struct gcov_summary summary;
-  unsigned ix;
+  unsigned ix, h_ix;
+  gcov_bucket_type *histo_bucket;
 
   gcov_read_summary (&summary);
   printf (" checksum=0x%08x", summary.checksum);
@@ -465,5 +466,24 @@  tag_summary (const char *filename ATTRIBUTE_UNUSED
 	      (HOST_WIDEST_INT)summary.ctrs[ix].run_max);
       printf (", sum_max=" HOST_WIDEST_INT_PRINT_DEC,
 	      (HOST_WIDEST_INT)summary.ctrs[ix].sum_max);
+      if (ix != GCOV_COUNTER_ARCS)
+        continue;
+      printf ("\n");
+      print_prefix (filename, 0, 0);
+      printf ("\t\tcounter histogram:");
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          histo_bucket = &summary.ctrs[ix].histogram[h_ix];
+          if (!histo_bucket->num_counters)
+            continue;
+          printf ("\n");
+          print_prefix (filename, 0, 0);
+          printf ("\t\t%d: num counts=%u, min counter="
+              HOST_WIDEST_INT_PRINT_DEC ", cum_counter="
+              HOST_WIDEST_INT_PRINT_DEC,
+	      h_ix, histo_bucket->num_counters,
+              (HOST_WIDEST_INT)histo_bucket->min_value,
+              (HOST_WIDEST_INT)histo_bucket->cum_value);
+        }
     }
 }