diff mbox

add statistics counting to postreload, copy-rename, and math-opts

Message ID 20110412141626.GF23480@codesourcery.com
State New
Headers show

Commit Message

Nathan Froyd April 12, 2011, 2:16 p.m. UTC
It's a shame more passes don't make use of the statistics_*
infrastructure.  This patch is a step towards rectifying that and adds
statistics_counter_event calls to passes mentioned in $SUBJECT.
postreload-gcse already tracked the stats for the dump file and so only
needs the statistics_counter_event calls; the other passes needed to be
taught about the statistics also.

Tested on x86_64-unknown-linux-gnu.  OK to commit?

-Nathan

	* postreload-gcse.c (gcse_after_reload_main): Add calls to
	statistics_counter_event.
	* tree-ssa-copyrename.c (stats): Define.
	(rename_ssa_copies): Count coalesced SSA_NAMEs.  Add call to
	statistics_counter_event.
	* tree-ssa-math-opts.c (reciprocal_stats, sincos_stats): Define.
	(bswap_stats, widen_mul_stats): Define.
	(insert_reciprocals): Increment rdivs_inserted.
	(execute_cse_reciprocals): Zeroize reciprocal_stats.  Increment
	rfuncs_inserted.  Add calls to statistics_counter_event.
	(execute_cse_sincos_1): Increment inserted.
	(execute_cse_sincos): Zeroize sincos_stats.  Add call to
	statistics_counter_event.
	(execute_optimize_bswap): Zeroize bswap_stats.  Increment fields
	of bswap_stats.  Add calls to statistics_counter_event.
	(convert_mult_to_widen): Increment widen_mults_inserted.
	(convert_plusminus_to_widen): Increment maccs_inserted.
	(convert_mult_to_fma): Increment fmas_inserted.
	(execute_optimize_widening_mul): Zeroize widen_mul_stats.  Add
	calls to statistics_counter_event.

Comments

Richard Biener April 12, 2011, 2:27 p.m. UTC | #1
On Tue, Apr 12, 2011 at 4:16 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> It's a shame more passes don't make use of the statistics_*
> infrastructure.  This patch is a step towards rectifying that and adds
> statistics_counter_event calls to passes mentioned in $SUBJECT.
> postreload-gcse already tracked the stats for the dump file and so only
> needs the statistics_counter_event calls; the other passes needed to be
> taught about the statistics also.
>
> Tested on x86_64-unknown-linux-gnu.  OK to commit?

Ok if there are no complaints within 24h.  I actually have a local patch
adding many of these which I use whenever fiddling with the pass pipeline ...
(attached).

Richard.

> -Nathan
>
>        * postreload-gcse.c (gcse_after_reload_main): Add calls to
>        statistics_counter_event.
>        * tree-ssa-copyrename.c (stats): Define.
>        (rename_ssa_copies): Count coalesced SSA_NAMEs.  Add call to
>        statistics_counter_event.
>        * tree-ssa-math-opts.c (reciprocal_stats, sincos_stats): Define.
>        (bswap_stats, widen_mul_stats): Define.
>        (insert_reciprocals): Increment rdivs_inserted.
>        (execute_cse_reciprocals): Zeroize reciprocal_stats.  Increment
>        rfuncs_inserted.  Add calls to statistics_counter_event.
>        (execute_cse_sincos_1): Increment inserted.
>        (execute_cse_sincos): Zeroize sincos_stats.  Add call to
>        statistics_counter_event.
>        (execute_optimize_bswap): Zeroize bswap_stats.  Increment fields
>        of bswap_stats.  Add calls to statistics_counter_event.
>        (convert_mult_to_widen): Increment widen_mults_inserted.
>        (convert_plusminus_to_widen): Increment maccs_inserted.
>        (convert_mult_to_fma): Increment fmas_inserted.
>        (execute_optimize_widening_mul): Zeroize widen_mul_stats.  Add
>        calls to statistics_counter_event.
>
> diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c
> index 7eeecf4..8e26419 100644
> --- a/gcc/postreload-gcse.c
> +++ b/gcc/postreload-gcse.c
> @@ -1294,6 +1294,13 @@ gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
>          fprintf (dump_file, "insns deleted:   %d\n", stats.insns_deleted);
>          fprintf (dump_file, "\n\n");
>        }
> +
> +      statistics_counter_event (cfun, "copies inserted",
> +                               stats.copies_inserted);
> +      statistics_counter_event (cfun, "moves inserted",
> +                               stats.moves_inserted);
> +      statistics_counter_event (cfun, "insns deleted",
> +                               stats.insns_deleted);
>     }
>
>   /* We are finished with alias.  */
> diff --git a/gcc/tree-ssa-copyrename.c b/gcc/tree-ssa-copyrename.c
> index dfc0b4e..ae4fb5f 100644
> --- a/gcc/tree-ssa-copyrename.c
> +++ b/gcc/tree-ssa-copyrename.c
> @@ -40,6 +40,12 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-pass.h"
>  #include "langhooks.h"
>
> +static struct
> +{
> +  /* Number of copies coalesced.  */
> +  int coalesced;
> +} stats;
> +
>  /* The following routines implement the SSA copy renaming phase.
>
>    This optimization looks for copies between 2 SSA_NAMES, either through a
> @@ -360,9 +366,12 @@ rename_ssa_copies (void)
>              fprintf (debug, "\n");
>            }
>        }
> +      stats.coalesced++;
>       replace_ssa_name_symbol (var, SSA_NAME_VAR (part_var));
>     }
>
> +  statistics_counter_event (cfun, "copies coalesced",
> +                           stats.coalesced);
>   delete_var_map (map);
>   return updated ? TODO_remove_unused_locals : 0;
>  }
> diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
> index 6e2213c..b9f631e 100644
> --- a/gcc/tree-ssa-math-opts.c
> +++ b/gcc/tree-ssa-math-opts.c
> @@ -138,6 +138,41 @@ struct occurrence {
>   bool bb_has_division;
>  };
>
> +static struct
> +{
> +  /* Number of 1.0/X ops inserted.  */
> +  int rdivs_inserted;
> +
> +  /* Number of 1.0/FUNC ops inserted.  */
> +  int rfuncs_inserted;
> +} reciprocal_stats;
> +
> +static struct
> +{
> +  /* Number of cexpi calls inserted.  */
> +  int inserted;
> +} sincos_stats;
> +
> +static struct
> +{
> +  /* Number of hand-written 32-bit bswaps found.  */
> +  int found_32bit;
> +
> +  /* Number of hand-written 64-bit bswaps found.  */
> +  int found_64bit;
> +} bswap_stats;
> +
> +static struct
> +{
> +  /* Number of widening multiplication ops inserted.  */
> +  int widen_mults_inserted;
> +
> +  /* Number of integer multiply-and-accumulate ops inserted.  */
> +  int maccs_inserted;
> +
> +  /* Number of fp fused multiply-add ops inserted.  */
> +  int fmas_inserted;
> +} widen_mul_stats;
>
>  /* The instance of "struct occurrence" representing the highest
>    interesting block in the dominator tree.  */
> @@ -339,6 +374,8 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
>           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
>         }
>
> +      reciprocal_stats.rdivs_inserted++;
> +
>       occ->recip_def_stmt = new_stmt;
>     }
>
> @@ -466,6 +503,7 @@ execute_cse_reciprocals (void)
>                                sizeof (struct occurrence),
>                                n_basic_blocks / 3 + 1);
>
> +  memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
>   calculate_dominance_info (CDI_DOMINATORS);
>   calculate_dominance_info (CDI_POST_DOMINATORS);
>
> @@ -568,6 +606,7 @@ execute_cse_reciprocals (void)
>                  gimple_replace_lhs (stmt1, arg1);
>                  gimple_call_set_fndecl (stmt1, fndecl);
>                  update_stmt (stmt1);
> +                 reciprocal_stats.rfuncs_inserted++;
>
>                  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
>                    {
> @@ -580,6 +619,11 @@ execute_cse_reciprocals (void)
>        }
>     }
>
> +  statistics_counter_event (cfun, "reciprocal divs inserted",
> +                           reciprocal_stats.rdivs_inserted);
> +  statistics_counter_event (cfun, "reciprocal functions inserted",
> +                           reciprocal_stats.rfuncs_inserted);
> +
>   free_dominance_info (CDI_DOMINATORS);
>   free_dominance_info (CDI_POST_DOMINATORS);
>   free_alloc_pool (occ_pool);
> @@ -711,6 +755,7 @@ execute_cse_sincos_1 (tree name)
>       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
>     }
>   update_stmt (stmt);
> +  sincos_stats.inserted++;
>
>   /* And adjust the recorded old call sites.  */
>   for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
> @@ -760,6 +805,7 @@ execute_cse_sincos (void)
>   bool cfg_changed = false;
>
>   calculate_dominance_info (CDI_DOMINATORS);
> +  memset (&sincos_stats, 0, sizeof (sincos_stats));
>
>   FOR_EACH_BB (bb)
>     {
> @@ -793,6 +839,9 @@ execute_cse_sincos (void)
>        }
>     }
>
> +  statistics_counter_event (cfun, "sincos statements inserted",
> +                           sincos_stats.inserted);
> +
>   free_dominance_info (CDI_DOMINATORS);
>   return cfg_changed ? TODO_cleanup_cfg : 0;
>  }
> @@ -1141,6 +1190,8 @@ execute_optimize_bswap (void)
>       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
>     }
>
> +  memset (&bswap_stats, 0, sizeof (bswap_stats));
> +
>   FOR_EACH_BB (bb)
>     {
>       gimple_stmt_iterator gsi;
> @@ -1189,6 +1240,10 @@ execute_optimize_bswap (void)
>            continue;
>
>          changed = true;
> +         if (type_size == 32)
> +           bswap_stats.found_32bit++;
> +         else
> +           bswap_stats.found_64bit++;
>
>          bswap_tmp = bswap_src;
>
> @@ -1237,6 +1292,11 @@ execute_optimize_bswap (void)
>        }
>     }
>
> +  statistics_counter_event (cfun, "32-bit bswap implementations found",
> +                           bswap_stats.found_32bit);
> +  statistics_counter_event (cfun, "64-bit bswap implementations found",
> +                           bswap_stats.found_64bit);
> +
>   return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
>          | TODO_verify_stmts : 0);
>  }
> @@ -1389,6 +1449,7 @@ convert_mult_to_widen (gimple stmt)
>   gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
>   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
>   update_stmt (stmt);
> +  widen_mul_stats.widen_mults_inserted++;
>   return true;
>  }
>
> @@ -1491,6 +1552,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
>                                    fold_convert (type2, mult_rhs2),
>                                    add_rhs);
>   update_stmt (gsi_stmt (*gsi));
> +  widen_mul_stats.maccs_inserted++;
>   return true;
>  }
>
> @@ -1666,6 +1728,7 @@ convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
>                                                mulop1, op2,
>                                                addop);
>       gsi_replace (&gsi, fma_stmt, true);
> +      widen_mul_stats.fmas_inserted++;
>     }
>
>   return true;
> @@ -1681,6 +1744,8 @@ execute_optimize_widening_mul (void)
>   basic_block bb;
>   bool cfg_changed = false;
>
> +  memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
> +
>   FOR_EACH_BB (bb)
>     {
>       gimple_stmt_iterator gsi;
> @@ -1752,6 +1817,13 @@ execute_optimize_widening_mul (void)
>        }
>     }
>
> +  statistics_counter_event (cfun, "widening multiplications inserted",
> +                           widen_mul_stats.widen_mults_inserted);
> +  statistics_counter_event (cfun, "widening maccs inserted",
> +                           widen_mul_stats.maccs_inserted);
> +  statistics_counter_event (cfun, "fused multiply-adds inserted",
> +                           widen_mul_stats.fmas_inserted);
> +
>   return cfg_changed ? TODO_cleanup_cfg : 0;
>  }
>
>
Nathan Froyd April 12, 2011, 2:32 p.m. UTC | #2
On Tue, Apr 12, 2011 at 04:27:01PM +0200, Richard Guenther wrote:
> On Tue, Apr 12, 2011 at 4:16 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> > It's a shame more passes don't make use of the statistics_*
> > infrastructure.  This patch is a step towards rectifying that and adds
> > statistics_counter_event calls to passes mentioned in $SUBJECT.
> > postreload-gcse already tracked the stats for the dump file and so only
> > needs the statistics_counter_event calls; the other passes needed to be
> > taught about the statistics also.
> 
> Ok if there are no complaints within 24h.  I actually have a local patch
> adding many of these which I use whenever fiddling with the pass pipeline ...
> (attached).

Thanks.  I may go twiddle that patch to do something similar to mine and
submit that.  Do you use your patch for checking that the same set of
optimizations get performed, then?  I'm interested in using the
statistics for identifying passes that don't buy us much across a wide
variety of codebases.  (Suggestions for suitable ones welcome!)

-Nathan
Richard Biener April 12, 2011, 2:37 p.m. UTC | #3
On Tue, Apr 12, 2011 at 4:32 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> On Tue, Apr 12, 2011 at 04:27:01PM +0200, Richard Guenther wrote:
>> On Tue, Apr 12, 2011 at 4:16 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
>> > It's a shame more passes don't make use of the statistics_*
>> > infrastructure.  This patch is a step towards rectifying that and adds
>> > statistics_counter_event calls to passes mentioned in $SUBJECT.
>> > postreload-gcse already tracked the stats for the dump file and so only
>> > needs the statistics_counter_event calls; the other passes needed to be
>> > taught about the statistics also.
>>
>> Ok if there are no complaints within 24h.  I actually have a local patch
>> adding many of these which I use whenever fiddling with the pass pipeline ...
>> (attached).
>
> Thanks.  I may go twiddle that patch to do something similar to mine and
> submit that.  Do you use your patch for checking that the same set of
> optimizations get performed, then?  I'm interested in using the
> statistics for identifying passes that don't buy us much across a wide
> variety of codebases.  (Suggestions for suitable ones welcome!)

Yes, I used it exactly for that.  And also to verify that passes don't
do anything if replicated (well, for those that shouldn't at least).

Don't expect any low-hanging fruit though ;)  I catched all of it already.

Candidates are obviously SPEC and GCC itself.  I also use tramp3d
of course.  That said, even if a pass does nearly nothing we often
have testcases that need it ...

Richard.

> -Nathan
>
Nathan Froyd April 12, 2011, 2:51 p.m. UTC | #4
On Tue, Apr 12, 2011 at 04:37:42PM +0200, Richard Guenther wrote:
> On Tue, Apr 12, 2011 at 4:32 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> > Thanks.  I may go twiddle that patch to do something similar to mine and
> > submit that.  Do you use your patch for checking that the same set of
> > optimizations get performed, then?  I'm interested in using the
> > statistics for identifying passes that don't buy us much across a wide
> > variety of codebases.  (Suggestions for suitable ones welcome!)
> 
> Yes, I used it exactly for that.  And also to verify that passes don't
> do anything if replicated (well, for those that shouldn't at least).
> 
> Don't expect any low-hanging fruit though ;)  I catched all of it already.
> 
> Candidates are obviously SPEC and GCC itself.  I also use tramp3d
> of course.  That said, even if a pass does nearly nothing we often
> have testcases that need it ...

True, but maybe those testcases should be adjusted--per-pass flags,
rather than blindly assuming -O2 includes them.  And it's not clear to
me that the statistics_counter_event infrastructure really helps
catching do-nothing passes, since it doesn't record stats that increment
by zero...

-Nathan
Richard Biener April 12, 2011, 2:54 p.m. UTC | #5
On Tue, Apr 12, 2011 at 4:51 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> On Tue, Apr 12, 2011 at 04:37:42PM +0200, Richard Guenther wrote:
>> On Tue, Apr 12, 2011 at 4:32 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
>> > Thanks.  I may go twiddle that patch to do something similar to mine and
>> > submit that.  Do you use your patch for checking that the same set of
>> > optimizations get performed, then?  I'm interested in using the
>> > statistics for identifying passes that don't buy us much across a wide
>> > variety of codebases.  (Suggestions for suitable ones welcome!)
>>
>> Yes, I used it exactly for that.  And also to verify that passes don't
>> do anything if replicated (well, for those that shouldn't at least).
>>
>> Don't expect any low-hanging fruit though ;)  I catched all of it already.
>>
>> Candidates are obviously SPEC and GCC itself.  I also use tramp3d
>> of course.  That said, even if a pass does nearly nothing we often
>> have testcases that need it ...
>
> True, but maybe those testcases should be adjusted--per-pass flags,
> rather than blindly assuming -O2 includes them.  And it's not clear to

It's easier to add things to GCC than to argue removing things ...

> me that the statistics_counter_event infrastructure really helps
> catching do-nothing passes, since it doesn't record stats that increment
> by zero...

Well, if the overall count is zero then nothing was done.

Richard.

> -Nathan
>
Steven Bosscher April 12, 2011, 3:01 p.m. UTC | #6
On Tue, Apr 12, 2011 at 4:37 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> Yes, I used it exactly for that.  And also to verify that passes don't
> do anything if replicated (well, for those that shouldn't at least).

What about passes that undo the work of previous patches -- and then
followed by a patch that re-does the changes? Think CPROP ->
loop-invariant -> CPROP, and I'm sure there are other examples. That
kind of thing makes the statistics gathering a bit suspect...

Ciao!
Steven
Nathan Froyd April 12, 2011, 3:09 p.m. UTC | #7
On Tue, Apr 12, 2011 at 04:54:43PM +0200, Richard Guenther wrote:
> On Tue, Apr 12, 2011 at 4:51 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> > True, but maybe those testcases should be adjusted--per-pass flags,
> > rather than blindly assuming -O2 includes them.  And it's not clear to
> 
> It's easier to add things to GCC than to argue removing things ...

And sometimes not even easy to argue for adding things. :)

> > me that the statistics_counter_event infrastructure really helps
> > catching do-nothing passes, since it doesn't record stats that increment
> > by zero...
> 
> Well, if the overall count is zero then nothing was done.

Granted, but that fact should still be recorded.  The situation we have
today, for something like:

func1: statistic for "statx" was 0
  - nothing is recorded in the statistics table
func2: statistic for "statx" was 0
  - nothing is recorded in the statistics table
func3: statistic for "statx" was 0
  - nothing is recorded in the statistics table
...

and so forth, is that at the end of the day, the dump file won't even
include any information about "statx".  If you had some func7387 where
"statx" was non-zero, you could infer that nothing else happened in the
previous 7386 functions.  For the case where a pass is truly useless on
a TU, it's hard to figure out from the statistics dump alone.  And I'd
argue that it's useful to see explicitly that the pass only helped in 1
out of 7387 functions, rather than trying to infer it from missing data.

-Nathan
Richard Biener April 13, 2011, 9:07 a.m. UTC | #8
On Tue, Apr 12, 2011 at 5:09 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> On Tue, Apr 12, 2011 at 04:54:43PM +0200, Richard Guenther wrote:
>> On Tue, Apr 12, 2011 at 4:51 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
>> > True, but maybe those testcases should be adjusted--per-pass flags,
>> > rather than blindly assuming -O2 includes them.  And it's not clear to
>>
>> It's easier to add things to GCC than to argue removing things ...
>
> And sometimes not even easy to argue for adding things. :)
>
>> > me that the statistics_counter_event infrastructure really helps
>> > catching do-nothing passes, since it doesn't record stats that increment
>> > by zero...
>>
>> Well, if the overall count is zero then nothing was done.
>
> Granted, but that fact should still be recorded.  The situation we have
> today, for something like:
>
> func1: statistic for "statx" was 0
>  - nothing is recorded in the statistics table
> func2: statistic for "statx" was 0
>  - nothing is recorded in the statistics table
> func3: statistic for "statx" was 0
>  - nothing is recorded in the statistics table
> ...
>
> and so forth, is that at the end of the day, the dump file won't even
> include any information about "statx".  If you had some func7387 where
> "statx" was non-zero, you could infer that nothing else happened in the
> previous 7386 functions.  For the case where a pass is truly useless on
> a TU, it's hard to figure out from the statistics dump alone.  And I'd
> argue that it's useful to see explicitly that the pass only helped in 1
> out of 7387 functions, rather than trying to infer it from missing data.

I always use statistics-stats (thus, overall stats, not per function).  The
per function ones omit zero counts during dumping on purpose
(to make the dump smaller).

Richard.

> -Nathan
>
Nathan Froyd April 13, 2011, 6:43 p.m. UTC | #9
On Wed, Apr 13, 2011 at 11:07:15AM +0200, Richard Guenther wrote:
> On Tue, Apr 12, 2011 at 5:09 PM, Nathan Froyd <froydnj@codesourcery.com> wrote:
> > Granted, but that fact should still be recorded.  The situation we have
> > today, for something like:
> >
> > func1: statistic for "statx" was 0
> >  - nothing is recorded in the statistics table
> > func2: statistic for "statx" was 0
> >  - nothing is recorded in the statistics table
> > func3: statistic for "statx" was 0
> >  - nothing is recorded in the statistics table
> > ...
> >
> > and so forth, is that at the end of the day, the dump file won't even
> > include any information about "statx".  If you had some func7387 where
> > "statx" was non-zero, you could infer that nothing else happened in the
> > previous 7386 functions.  For the case where a pass is truly useless on
> > a TU, it's hard to figure out from the statistics dump alone.  And I'd
> > argue that it's useful to see explicitly that the pass only helped in 1
> > out of 7387 functions, rather than trying to infer it from missing data.
> 
> I always use statistics-stats (thus, overall stats, not per function).  The
> per function ones omit zero counts during dumping on purpose
> (to make the dump smaller).

I didn't know about statistics-stats (or didn't realize that's what the
code was trying to do), that's useful.  And it looks like all the
statistics dumping things omit zero counts on purpose, not just the
per-function ones.

But that has no bearing on the point above: zero counts are not even
*recorded* today.  E.g. if you apply the patch upthread, grab a random C
file, compile it with -O2/3 -fdump-statistics/-stats, and examine the
dump file, you might not even know that new statistics counters have
been added.  Taking out the checks to avoid printing zero counts doesn't
help either, because the data simply doesn't get recorded.  This
infrastructure makes it somewhat difficult to figure out, in an
automated way from the dump file alone, whether passes are actually
doing anything.

Enough grousing.  I'm assuming turning on accumulation and dumping of
zero counts always would be frowned upon; would it be acceptable to turn
accumulation and dumping of zero counts if -details is given?

-Nathan
diff mbox

Patch

diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c
index 7eeecf4..8e26419 100644
--- a/gcc/postreload-gcse.c
+++ b/gcc/postreload-gcse.c
@@ -1294,6 +1294,13 @@  gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
 	  fprintf (dump_file, "insns deleted:   %d\n", stats.insns_deleted);
 	  fprintf (dump_file, "\n\n");
 	}
+
+      statistics_counter_event (cfun, "copies inserted",
+				stats.copies_inserted);
+      statistics_counter_event (cfun, "moves inserted",
+				stats.moves_inserted);
+      statistics_counter_event (cfun, "insns deleted",
+				stats.insns_deleted);
     }
 
   /* We are finished with alias.  */
diff --git a/gcc/tree-ssa-copyrename.c b/gcc/tree-ssa-copyrename.c
index dfc0b4e..ae4fb5f 100644
--- a/gcc/tree-ssa-copyrename.c
+++ b/gcc/tree-ssa-copyrename.c
@@ -40,6 +40,12 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "langhooks.h"
 
+static struct
+{
+  /* Number of copies coalesced.  */
+  int coalesced;
+} stats;
+
 /* The following routines implement the SSA copy renaming phase.
 
    This optimization looks for copies between 2 SSA_NAMES, either through a
@@ -360,9 +366,12 @@  rename_ssa_copies (void)
 	      fprintf (debug, "\n");
 	    }
 	}
+      stats.coalesced++;
       replace_ssa_name_symbol (var, SSA_NAME_VAR (part_var));
     }
 
+  statistics_counter_event (cfun, "copies coalesced",
+			    stats.coalesced);
   delete_var_map (map);
   return updated ? TODO_remove_unused_locals : 0;
 }
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 6e2213c..b9f631e 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -138,6 +138,41 @@  struct occurrence {
   bool bb_has_division;
 };
 
+static struct
+{
+  /* Number of 1.0/X ops inserted.  */
+  int rdivs_inserted;
+
+  /* Number of 1.0/FUNC ops inserted.  */
+  int rfuncs_inserted;
+} reciprocal_stats;
+
+static struct
+{
+  /* Number of cexpi calls inserted.  */
+  int inserted;
+} sincos_stats;
+
+static struct
+{
+  /* Number of hand-written 32-bit bswaps found.  */
+  int found_32bit;
+
+  /* Number of hand-written 64-bit bswaps found.  */
+  int found_64bit;
+} bswap_stats;
+
+static struct
+{
+  /* Number of widening multiplication ops inserted.  */
+  int widen_mults_inserted;
+
+  /* Number of integer multiply-and-accumulate ops inserted.  */
+  int maccs_inserted;
+
+  /* Number of fp fused multiply-add ops inserted.  */
+  int fmas_inserted;
+} widen_mul_stats;
 
 /* The instance of "struct occurrence" representing the highest
    interesting block in the dominator tree.  */
@@ -339,6 +374,8 @@  insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
         }
 
+      reciprocal_stats.rdivs_inserted++;
+
       occ->recip_def_stmt = new_stmt;
     }
 
@@ -466,6 +503,7 @@  execute_cse_reciprocals (void)
 				sizeof (struct occurrence),
 				n_basic_blocks / 3 + 1);
 
+  memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
   calculate_dominance_info (CDI_DOMINATORS);
   calculate_dominance_info (CDI_POST_DOMINATORS);
 
@@ -568,6 +606,7 @@  execute_cse_reciprocals (void)
 		  gimple_replace_lhs (stmt1, arg1);
 		  gimple_call_set_fndecl (stmt1, fndecl);
 		  update_stmt (stmt1);
+		  reciprocal_stats.rfuncs_inserted++;
 
 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 		    {
@@ -580,6 +619,11 @@  execute_cse_reciprocals (void)
 	}
     }
 
+  statistics_counter_event (cfun, "reciprocal divs inserted",
+			    reciprocal_stats.rdivs_inserted);
+  statistics_counter_event (cfun, "reciprocal functions inserted",
+			    reciprocal_stats.rfuncs_inserted);
+
   free_dominance_info (CDI_DOMINATORS);
   free_dominance_info (CDI_POST_DOMINATORS);
   free_alloc_pool (occ_pool);
@@ -711,6 +755,7 @@  execute_cse_sincos_1 (tree name)
       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
     }
   update_stmt (stmt);
+  sincos_stats.inserted++;
 
   /* And adjust the recorded old call sites.  */
   for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
@@ -760,6 +805,7 @@  execute_cse_sincos (void)
   bool cfg_changed = false;
 
   calculate_dominance_info (CDI_DOMINATORS);
+  memset (&sincos_stats, 0, sizeof (sincos_stats));
 
   FOR_EACH_BB (bb)
     {
@@ -793,6 +839,9 @@  execute_cse_sincos (void)
 	}
     }
 
+  statistics_counter_event (cfun, "sincos statements inserted",
+			    sincos_stats.inserted);
+
   free_dominance_info (CDI_DOMINATORS);
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }
@@ -1141,6 +1190,8 @@  execute_optimize_bswap (void)
       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
     }
 
+  memset (&bswap_stats, 0, sizeof (bswap_stats));
+
   FOR_EACH_BB (bb)
     {
       gimple_stmt_iterator gsi;
@@ -1189,6 +1240,10 @@  execute_optimize_bswap (void)
 	    continue;
 
 	  changed = true;
+	  if (type_size == 32)
+	    bswap_stats.found_32bit++;
+	  else
+	    bswap_stats.found_64bit++;
 
 	  bswap_tmp = bswap_src;
 
@@ -1237,6 +1292,11 @@  execute_optimize_bswap (void)
 	}
     }
 
+  statistics_counter_event (cfun, "32-bit bswap implementations found",
+			    bswap_stats.found_32bit);
+  statistics_counter_event (cfun, "64-bit bswap implementations found",
+			    bswap_stats.found_64bit);
+
   return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
 	  | TODO_verify_stmts : 0);
 }
@@ -1389,6 +1449,7 @@  convert_mult_to_widen (gimple stmt)
   gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
   update_stmt (stmt);
+  widen_mul_stats.widen_mults_inserted++;
   return true;
 }
 
@@ -1491,6 +1552,7 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 				    fold_convert (type2, mult_rhs2),
 				    add_rhs);
   update_stmt (gsi_stmt (*gsi));
+  widen_mul_stats.maccs_inserted++;
   return true;
 }
 
@@ -1666,6 +1728,7 @@  convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
 						mulop1, op2,
 						addop);
       gsi_replace (&gsi, fma_stmt, true);
+      widen_mul_stats.fmas_inserted++;
     }
 
   return true;
@@ -1681,6 +1744,8 @@  execute_optimize_widening_mul (void)
   basic_block bb;
   bool cfg_changed = false;
 
+  memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
+
   FOR_EACH_BB (bb)
     {
       gimple_stmt_iterator gsi;
@@ -1752,6 +1817,13 @@  execute_optimize_widening_mul (void)
 	}
     }
 
+  statistics_counter_event (cfun, "widening multiplications inserted",
+			    widen_mul_stats.widen_mults_inserted);
+  statistics_counter_event (cfun, "widening maccs inserted",
+			    widen_mul_stats.maccs_inserted);
+  statistics_counter_event (cfun, "fused multiply-adds inserted",
+			    widen_mul_stats.fmas_inserted);
+
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }