Patchwork Profile housekeeping 6/n (-fprofile-consistency-report)

login
register
mail settings
Submitter Jan Hubicka
Date Oct. 6, 2012, 2:10 p.m.
Message ID <20121006141004.GA13901@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/189711/
State New
Headers show

Comments

Jan Hubicka - Oct. 6, 2012, 2:10 p.m.
> > Index: passes.c
> > +/* Hold statistic about profile consistency.  */
> ...
> 
> I don't see why this should live in passes.c, can you please put it in
> a more logical place (profile.c, perhaps)?

Hmm, the problem here is that the code is using passmanager's dumping bits
to order the passes and assign them names.  So moving it elsewhere requires
exporting it that is not nice.  passes.c does similar stuff already, so I decided
to keep it there.

Here is patch I comitted that prints more detailed report.  It reports also changes
in overall unit time/size estimates and when pass did nothing it reports it as
uneffective.  For tramp3d it looks this way:

Pass name                        |mismatch in |mismated out|Overall
                                 |freq count  |freq count  |size       time
cfg                  (after TODO)|            |            |  -1.5474%
ssa                              |            |            |  -2.0148%
inline_param         ------------|            |            |
einline                          |            |            |  -0.4991%
einline              (after TODO)|            |            |  -0.0129%
early_optimizations  ------------|            |            |
copyrename           ------------|            |            |
ccp                              |            |            |  -0.2273%
forwprop                         |            |            |  -0.0688%
ealias               ------------|            |            |
esra                             |            |            |  -0.1892%
fre                              |            |            |  -7.9369%
copyprop             (after TODO)|            |            |  -0.0187%
mergephi             ------------|            |            |
cddce                            |            |            |  -0.1655%
eipa_sra                         |            |            |  -0.0237%
tailr                            |            |            |  -0.5305%
switchconv                       |            |            |  +0.0190%
profile_estimate                 |    +1      |            |
local-pure-const     ------------|            |            |
fnsplit                          |   +20      |            |  +0.2333%  +0.8267%
fnsplit              (after TODO)|   -20      |            |  -1.8146%  -1.3300%
release_ssa          ------------|            |            |
inline_param         ------------|            |            |
inline                           |  +229      |            | +14.5138% -10.6020%
inline               (after TODO)|  -225      |            |  -0.2662% -10.2195%
copyrename                       |            |            |  -2.7026%  -2.9448%
cunrolli             ------------|            |            |
ccp                              |            |            |  -0.0174%  -0.0142%
ccp                  (after TODO)|    +8      |            |  -0.3571%  -0.1213%
forwprop                         |            |            |  -0.1442%  -0.5343%
alias                ------------|            |            |
retslot              ------------|            |            |
phiprop              ------------|            |            |
fre                              |            |            |  -0.2801%  -0.2814%
fre                  (after TODO)|            |            |  -0.2897%  -0.0362%
copyprop                         |            |            |  -0.0264%  -0.0251%
mergephi             ------------|            |            |
vrp                              |   +80      |            |  +0.1233%  -1.0271%
vrp                  (after TODO)|    +3      |            |  -0.8618%  -0.2421%
dce                              |            |            |  -0.0089%  -0.0021%
dce                  (after TODO)|    -7      |            |  -0.0089%  -0.0347%
cdce                 ------------|            |            |
cselim                           |            |            |  -0.0177%  +0.0000%
ifcombine                        |            |            |  +0.0044%  +0.0001%
ifcombine            (after TODO)|            |            |  -0.0089%  +0.0000%
phiopt                           |            |            |  -0.1730%  -0.1203%
tailr                ------------|            |            |
ch                               |    +2      |            |  +2.0535%  +0.0000%
ch                   (after TODO)|            |            |            +0.0002%
cplxlower            ------------|            |            |
sra                              |            |            |  +0.0087%
copyrename           ------------|            |            |
dom                              |   +86      |            |  +1.1802%  -0.1366%
dom                  (after TODO)|    +5      |            |  -0.1894%  -0.3042%
phicprop                         |            |            |  -0.0043%
phicprop             (after TODO)|    -6      |            |            +0.2117%
dse                              |            |            |  -0.0086%  -0.0009%
reassoc                          |            |            |  +0.0302%  +0.0116%
dce                              |            |            |  -0.6640%  -0.1408%
dce                  (after TODO)|    -2      |            |
forwprop                         |            |            |  -0.1129%  -0.1826%
phiopt               ------------|            |            |
objsz                ------------|            |            |
strlen               ------------|            |            |
ccp                  (after TODO)|            |            |  -0.0087%
copyprop             ------------|            |            |
sincos               ------------|            |            |
bswap                ------------|            |            |
crited               ------------|            |            |
pre                              |    +1      |            |  +1.2168%  -1.3703%
pre                  (after TODO)|    -2      |            |  -0.0215%  +0.0764%
sink                             |            |            |            -0.0256%
loop                 ------------|            |            |
loopinit             ------------|            |            |
lim                              |            |            |  +0.0086%  -0.1529%
copyprop             (after TODO)|   -11      |            |            +0.1483%
dceloop                          |            |            |  -0.0172%  -0.0028%
unswitch                         |    +7      |            |  +1.0265%  +0.0404%
unswitch             (after TODO)|   +21      |            |  -0.5569%  -0.7164%
sccp                 ------------|            |            |
ldist                ------------|            |            |
copyprop             ------------|            |            |
ivcanon                          |            |            |  +0.0214%  +0.0661%
ifcvt                ------------|            |            |
vect                 ------------|            |            |
dceloop                          |            |            |  -0.0085%  -0.0009%
pcom                             |            |            |            -0.0002%
cunroll                          |    -2      |            |  +0.8891%  -0.0781%
slp                  ------------|            |            |
ivopts                           |            |            |  +0.1822%  +0.0837%
lim                  ------------|            |            |
loopdone             ------------|            |            |
veclower2            ------------|            |            |
reassoc                          |            |            |  -0.0042%  -0.0169%
vrp                              |   +17      |            |  -0.1015%  -0.2523%
vrp                  (after TODO)|    +1      |            |  -0.2540%  -0.3505%
slsr                             |            |            |  +0.1952%  +0.6254%
dom                              |   +16      |            |  -0.0254%  -0.6641%
dom                  (after TODO)|    +5      |            |  -0.0424%  -0.0243%
phicprop                         |            |            |  -0.0170%
phicprop             (after TODO)|    +2      |            |            +0.0141%
cddce                            |            |            |  -0.1229%  -0.0515%
cddce                (after TODO)|    -1      |            |  -0.0085%
dse                  ------------|            |            |
forwprop                         |            |            |  -0.0382%  -0.0254%
phiopt                           |            |            |  -0.0255%  -0.0462%
fab                  ------------|            |            |
widening_mul         ------------|            |            |
tailc                ------------|            |            |
copyrename           ------------|            |            |
uncprop              ------------|            |            |
local-pure-const     ------------|            |            |
nrv                  ------------|            |            |
optimized            ------------|            |            |
expand                           |  +430      |            |----------
vregs                ------------|            |            |
into_cfglayout                   |            |            |  -4.4596%  -3.2103%
jump                             |    +6      |            |  -0.5852%  -0.5775%
subreg1                          |            |            |  -0.0589%
dfinit               ------------|            |            |
cse1                             |            |            |  -0.0966%  -0.0620%
fwprop1                          |            |            |  -2.5152%  -3.0170%
cprop                            |    -8      |            |  -0.4209%  -0.4256%
rtl pre                          |            |            |  +0.8649%  +0.7079%
hoist                ------------|            |            |
cprop                            |    -7      |            |  -0.9099%  -1.0555%
cse_local                        |            |            |  -0.1787%  -0.2454%
ce1                              |    -1      |            |  +0.0913%  +0.3673%
reginfo              ------------|            |            |
loop2                ------------|            |            |
loop2_init           ------------|            |            |
loop2_invariant                  |            |            |  +0.0523%  +0.1014%
loop2_unswitch       ------------|            |            |
loop2_done           ------------|            |            |
cprop                            |            |            |  -0.3951%  -0.2580%
cse2                             |            |            |  -0.3698%  -0.3355%
dse1                             |            |            |  -0.0025%  -0.0011%
fwprop2                          |            |            |  -0.0012%  -0.0013%
init-regs            ------------|            |            |
ud_dce                           |            |            |  -0.0784%  -0.0126%
combine                          |            |            |  -3.1886%  -3.4658%
ce2                              |            |            |            +0.0001%
regmove                          |            |            |  +0.0025%  +0.0000%
outof_cfglayout                  |            |            |  +3.9667%  +1.8602%
split1                           |            |            |  +0.0828%  +0.0878%
subreg2              ------------|            |            |
mode_sw              ------------|            |            |
asmcons              ------------|            |            |
ira                              |            |            |  +3.0210%  +0.5602%
reload                           |    -4      |            |  -2.3169%  -3.8620%
postreload                       |            |            |  -0.3193%  -0.0548%
gcse2                            |            |            |  -0.0170%  -0.0051%
split2                           |            |            |  +0.2646%  +0.3206%
ree                              |            |            |  +0.0012%  +0.0175%
pro_and_epilogue                 |    +4      |            |  +3.8163% +18.4507%
dse2                 ------------|            |            |
csa                  ------------|            |            |
jump2                            |            |            |  -4.4781%  -0.0612%
peephole2                        |            |            |  +0.0073%  -0.0539%
ce3                              |            |            |  -0.0049%  +0.0092%
cprop_hardreg        ------------|            |            |
rtl_dce                          |            |            |  -0.0903%  -0.0736%
bbro                             |    -7      |            |  +0.4776%  -1.0224%
split4               ------------|            |            |
sched2               ------------|            |            |
stack                ------------|            |            |
alignments           ------------|            |            |
compgotos            ------------|            |            |

Honza

	* doc/invoke.texi (-fprofile-report): Document.
	* common.opt (-fprofile-report): New option.
	* toplev.c (finalize): Call dump_profile_report.
	* toplev.h (profile_report): Declare.
	* passes.c (profile_record): New static var.
	(check_profile_consistency): New function.
	(dump_profile_record): New function.
	(execute_one_ipa_transform_pass): Call check_profile_consistency.
	(execute_one_pass): Likewise.
Steven Bosscher - Oct. 6, 2012, 2:39 p.m.
On Sat, Oct 6, 2012 at 4:10 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> > Index: passes.c
>> > +/* Hold statistic about profile consistency.  */
>> ...
>>
>> I don't see why this should live in passes.c, can you please put it in
>> a more logical place (profile.c, perhaps)?
>
> Hmm, the problem here is that the code is using passmanager's dumping bits
> to order the passes and assign them names.  So moving it elsewhere requires
> exporting it that is not nice.  passes.c does similar stuff already, so I decided
> to keep it there.

I think this is not the right decision. We can also throw _all_ code
on one pile because there are inter-dependencies. Or they can be fixed
with a proper interface.

> If there are no complains I will commit the patch tomorrow.

+1 complaint.
You're putting profile stuff and even RTL stuff in the pass manager.
That is Just Wrong.

Ciao!
Steven
Graham Stott - Oct. 6, 2012, 3:15 p.m.
Jan.

This patch also breaks bootstrap due compilation errors reported for 
pases.c and toplev.c

Graham
Jan Hubicka - Oct. 6, 2012, 3:34 p.m.
> Jan.
> 
> This patch also breaks bootstrap due compilation errors reported for
> pases.c and toplev.c

Sorry for that.  I swapped files and used old version of the patch. It should
be fixed now.

Honza
> 
> Graham
Steven Bosscher - Oct. 6, 2012, 3:42 p.m.
On Sat, Oct 6, 2012 at 4:39 PM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:

>> If there are no complains I will commit the patch tomorrow.
>
> +1 complaint.
> You're putting profile stuff and even RTL stuff in the pass manager.
> That is Just Wrong.

You already committed the patch. Your tomorrow started early? ;-)

Ciao!
Steven
Jan Hubicka - Oct. 6, 2012, 3:44 p.m.
> On Sat, Oct 6, 2012 at 4:10 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> >> > Index: passes.c
> >> > +/* Hold statistic about profile consistency.  */
> >> ...
> >>
> >> I don't see why this should live in passes.c, can you please put it in
> >> a more logical place (profile.c, perhaps)?
> >
> > Hmm, the problem here is that the code is using passmanager's dumping bits
> > to order the passes and assign them names.  So moving it elsewhere requires
> > exporting it that is not nice.  passes.c does similar stuff already, so I decided
> > to keep it there.
> 
> I think this is not the right decision. We can also throw _all_ code
> on one pile because there are inter-dependencies. Or they can be fixed
> with a proper interface.
> 
> > If there are no complains I will commit the patch tomorrow.
> 
> +1 complaint.
> You're putting profile stuff and even RTL stuff in the pass manager.
> That is Just Wrong.

Hmm, the code really is about collecting statistics of individual passes.
I do not think it is that different from rest of TODO handling logic, but
I do not care much. It is not supposed to be something that will grow over
the time and become essential part of the compiler.

The code uses the CFG/Gimple/RTL interfaces that are both used by many parts of
the compiler (including the passmanager) + passmanager's passes_by_id and
passes_by_id_size that are not exported.

profile.c is not good place because that is only about reading profile
feedback.  cfg.c or predict.c seems better because that is where most of
cfg/profile API lives.  I will probably move it to cfg.c, because it should be
in sync with the BB dumping code that should report the same inconsistencies as
the statistics loop.

Would you preffer exporting profile_record structure and putting the actual
walk of CFG into cfg.c or exporting the passmanager's passes_by_id used by the
dumping? (actually passes_by_id misses "static" in the declaratoin, so it is
matter of putting it to the header)  I will implement one of those.

Honza
> 
> Ciao!
> Steven
Jan Hubicka - Oct. 6, 2012, 3:45 p.m.
> On Sat, Oct 6, 2012 at 4:39 PM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> 
> >> If there are no complains I will commit the patch tomorrow.
> >
> > +1 complaint.
> > You're putting profile stuff and even RTL stuff in the pass manager.
> > That is Just Wrong.
> 
> You already committed the patch. Your tomorrow started early? ;-)

The tomorrow you cite is from Thursday :)

Honza
Jan Hubicka - Oct. 6, 2012, 3:56 p.m.
Hi,
does this look better? Moving to cfg.c would importing tree-pass.h and rtl.h
that is not cool either. predict.c does all of these.
Obviously can also go to a separate file, if preferred.

If it looks fine to you, I will commit it after testing.

Honza

Index: tree-pass.h
===================================================================
*** tree-pass.h	(revision 192158)
--- tree-pass.h	(working copy)
*************** extern void do_per_function_toporder (vo
*** 547,551 ****
--- 547,553 ----
  extern void disable_pass (const char *);
  extern void enable_pass (const char *);
  extern void dump_passes (void);
+ extern struct opt_pass **passes_by_id;
+ extern int passes_by_id_size;
  
  #endif /* GCC_TREE_PASS_H */
Index: predict.c
===================================================================
*** predict.c	(revision 192158)
--- predict.c	(working copy)
*************** rebuild_frequencies (void)
*** 2799,2801 ****
--- 2799,3005 ----
      gcc_unreachable ();
    timevar_pop (TV_REBUILD_FREQUENCIES);
  }
+ 
+ /* Make statistic about profile consistency.  */
+ 
+ struct profile_record
+ {
+   int num_mismatched_freq_in[2];
+   int num_mismatched_freq_out[2];
+   int num_mismatched_count_in[2];
+   int num_mismatched_count_out[2];
+   bool run;
+   gcov_type time[2];
+   int size[2];
+ };
+ 
+ static struct profile_record *profile_record;
+ 
+ static void
+ check_profile_consistency (int index, int subpass, bool run)
+ {
+   basic_block bb;
+   edge_iterator ei;
+   edge e;
+   int sum;
+   gcov_type lsum;
+ 
+   if (index == -1)
+     return;
+   if (!profile_record)
+     profile_record = XCNEWVEC (struct profile_record,
+ 			       passes_by_id_size);
+   gcc_assert (index < passes_by_id_size && index >= 0);
+   gcc_assert (subpass < 2);
+   profile_record[index].run |= run;
+ 
+   FOR_ALL_BB (bb)
+    {
+       if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun)
+ 	  && profile_status != PROFILE_ABSENT)
+ 	{
+ 	  sum = 0;
+ 	  FOR_EACH_EDGE (e, ei, bb->succs)
+ 	    sum += e->probability;
+ 	  if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
+ 	    profile_record[index].num_mismatched_freq_out[subpass]++;
+ 	  lsum = 0;
+ 	  FOR_EACH_EDGE (e, ei, bb->succs)
+ 	    lsum += e->count;
+ 	  if (EDGE_COUNT (bb->succs)
+ 	      && (lsum - bb->count > 100 || lsum - bb->count < -100))
+ 	    profile_record[index].num_mismatched_count_out[subpass]++;
+ 	}
+       if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+ 	  && profile_status != PROFILE_ABSENT)
+ 	{
+ 	  sum = 0;
+ 	  FOR_EACH_EDGE (e, ei, bb->preds)
+ 	    sum += EDGE_FREQUENCY (e);
+ 	  if (abs (sum - bb->frequency) > 100
+ 	      || (MAX (sum, bb->frequency) > 10
+ 		  && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
+ 	    profile_record[index].num_mismatched_freq_in[subpass]++;
+ 	  lsum = 0;
+ 	  FOR_EACH_EDGE (e, ei, bb->preds)
+ 	    lsum += e->count;
+ 	  if (lsum - bb->count > 100 || lsum - bb->count < -100)
+ 	    profile_record[index].num_mismatched_count_in[subpass]++;
+ 	}
+       if (bb == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+ 	  || bb == EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
+ 	continue;
+       if ((cfun && (cfun->curr_properties & PROP_trees)))
+ 	{
+ 	  gimple_stmt_iterator i;
+ 
+ 	  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+ 	    {
+ 	      profile_record[index].size[subpass]
+ 		 += estimate_num_insns (gsi_stmt (i), &eni_size_weights);
+ 	      if (profile_status == PROFILE_READ)
+ 		profile_record[index].time[subpass]
+ 		   += estimate_num_insns (gsi_stmt (i),
+ 					  &eni_time_weights) * bb->count;
+ 	      else if (profile_status == PROFILE_GUESSED)
+ 		profile_record[index].time[subpass]
+ 		   += estimate_num_insns (gsi_stmt (i),
+ 					  &eni_time_weights) * bb->frequency;
+ 	    }
+ 	}
+       else if (cfun && (cfun->curr_properties & PROP_rtl))
+ 	{
+ 	  rtx insn;
+ 	  for (insn = NEXT_INSN (BB_HEAD (bb)); insn && insn != NEXT_INSN (BB_END (bb));
+ 	       insn = NEXT_INSN (insn))
+ 	    if (INSN_P (insn))
+ 	      {
+ 		profile_record[index].size[subpass]
+ 		   += insn_rtx_cost (PATTERN (insn), false);
+ 		if (profile_status == PROFILE_READ)
+ 		  profile_record[index].time[subpass]
+ 		     += insn_rtx_cost (PATTERN (insn), true) * bb->count;
+ 		else if (profile_status == PROFILE_GUESSED)
+ 		  profile_record[index].time[subpass]
+ 		     += insn_rtx_cost (PATTERN (insn), true) * bb->frequency;
+ 	      }
+ 	}
+    }
+ }
+ 
+ /* Output profile consistency.  */
+ 
+ void
+ dump_profile_report (void)
+ {
+   int i, j;
+   int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
+   gcov_type last_time = 0, last_size = 0;
+   double rel_time_change, rel_size_change;
+   int last_reported = 0;
+ 
+   if (!profile_record)
+     return;
+   fprintf (stderr, "\nProfile consistency report:\n\n");
+   fprintf (stderr, "Pass name                        |mismatch in |mismated out|Overall\n");
+   fprintf (stderr, "                                 |freq count  |freq count  |size      time\n");
+ 	   
+   for (i = 0; i < passes_by_id_size; i++)
+     for (j = 0 ; j < 2; j++)
+       if (profile_record[i].run)
+ 	{
+ 	  if (last_time)
+ 	    rel_time_change = (profile_record[i].time[j]
+ 			       - (double)last_time) * 100 / (double)last_time;
+ 	  else
+ 	    rel_time_change = 0;
+ 	  if (last_size)
+ 	    rel_size_change = (profile_record[i].size[j]
+ 			       - (double)last_size) * 100 / (double)last_size;
+ 	  else
+ 	    rel_size_change = 0;
+ 
+ 	  if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in
+ 	      || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
+ 	      || profile_record[i].num_mismatched_count_in[j] != last_count_in
+ 	      || profile_record[i].num_mismatched_count_out[j] != last_count_out
+ 	      || rel_time_change || rel_size_change)
+ 	    {
+ 	      last_reported = i;
+               fprintf (stderr, "%-20s %s",
+ 		       passes_by_id [i]->name,
+ 		       j ? "(after TODO)" : "            ");
+ 	      if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in)
+ 		fprintf (stderr, "| %+5i",
+ 		         profile_record[i].num_mismatched_freq_in[j]
+ 			  - last_freq_in);
+ 	      else
+ 		fprintf (stderr, "|      ");
+ 	      if (profile_record[i].num_mismatched_count_in[j] != last_count_in)
+ 		fprintf (stderr, " %+5i",
+ 		         profile_record[i].num_mismatched_count_in[j]
+ 			  - last_count_in);
+ 	      else
+ 		fprintf (stderr, "      ");
+ 	      if (profile_record[i].num_mismatched_freq_out[j] != last_freq_out)
+ 		fprintf (stderr, "| %+5i",
+ 		         profile_record[i].num_mismatched_freq_out[j]
+ 			  - last_freq_out);
+ 	      else
+ 		fprintf (stderr, "|      ");
+ 	      if (profile_record[i].num_mismatched_count_out[j] != last_count_out)
+ 		fprintf (stderr, " %+5i",
+ 		         profile_record[i].num_mismatched_count_out[j]
+ 			  - last_count_out);
+ 	      else
+ 		fprintf (stderr, "      ");
+ 
+ 	      /* Size/time units change across gimple and RTL.  */
+ 	      if (i == pass_expand.pass.static_pass_number)
+ 		fprintf (stderr, "|----------");
+ 	      else
+ 		{
+ 		  if (rel_size_change)
+ 		    fprintf (stderr, "| %+8.4f%%", rel_size_change);
+ 		  else
+ 		    fprintf (stderr, "|          ");
+ 		  if (rel_time_change)
+ 		    fprintf (stderr, " %+8.4f%%", rel_time_change);
+ 		}
+ 	      fprintf (stderr, "\n");
+ 	      last_freq_in = profile_record[i].num_mismatched_freq_in[j];
+ 	      last_freq_out = profile_record[i].num_mismatched_freq_out[j];
+ 	      last_count_in = profile_record[i].num_mismatched_count_in[j];
+ 	      last_count_out = profile_record[i].num_mismatched_count_out[j];
+ 	    }
+ 	  else if (j && last_reported != i)
+ 	    {
+ 	      last_reported = i;
+               fprintf (stderr, "%-20s ------------|            |            |\n",
+ 		       passes_by_id [i]->name);
+ 	    }
+ 	  last_time = profile_record[i].time[j];
+ 	  last_size = profile_record[i].size[j];
+ 	}
+ }
+ 
Index: basic-block.h
===================================================================
*** basic-block.h	(revision 192158)
--- basic-block.h	(working copy)
*************** extern void alloc_aux_for_edge (edge, in
*** 741,746 ****
--- 741,747 ----
  extern void alloc_aux_for_edges (int);
  extern void clear_aux_for_edges (void);
  extern void free_aux_for_edges (void);
+ extern void check_profile_consistency (int, int, bool);
  
  /* In cfganal.c  */
  extern void find_unreachable_blocks (void);
Index: passes.c
===================================================================
*** passes.c	(revision 192164)
--- passes.c	(working copy)
*************** execute_function_dump (void *data ATTRIB
*** 1778,1986 ****
      }
  }
  
- /* Make statistic about profile consistency.  */
- 
- struct profile_record
- {
-   int num_mismatched_freq_in[2];
-   int num_mismatched_freq_out[2];
-   int num_mismatched_count_in[2];
-   int num_mismatched_count_out[2];
-   bool run;
-   gcov_type time[2];
-   int size[2];
- };
- 
- static struct profile_record *profile_record;
- 
- static void
- check_profile_consistency (int index, int subpass, bool run)
- {
-   basic_block bb;
-   edge_iterator ei;
-   edge e;
-   int sum;
-   gcov_type lsum;
- 
-   if (index == -1)
-     return;
-   if (!profile_record)
-     profile_record = XCNEWVEC (struct profile_record,
- 			       passes_by_id_size);
-   gcc_assert (index < passes_by_id_size && index >= 0);
-   gcc_assert (subpass < 2);
-   profile_record[index].run |= run;
- 
-   FOR_ALL_BB (bb)
-    {
-       if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun)
- 	  && profile_status != PROFILE_ABSENT)
- 	{
- 	  sum = 0;
- 	  FOR_EACH_EDGE (e, ei, bb->succs)
- 	    sum += e->probability;
- 	  if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
- 	    profile_record[index].num_mismatched_freq_out[subpass]++;
- 	  lsum = 0;
- 	  FOR_EACH_EDGE (e, ei, bb->succs)
- 	    lsum += e->count;
- 	  if (EDGE_COUNT (bb->succs)
- 	      && (lsum - bb->count > 100 || lsum - bb->count < -100))
- 	    profile_record[index].num_mismatched_count_out[subpass]++;
- 	}
-       if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
- 	  && profile_status != PROFILE_ABSENT)
- 	{
- 	  sum = 0;
- 	  FOR_EACH_EDGE (e, ei, bb->preds)
- 	    sum += EDGE_FREQUENCY (e);
- 	  if (abs (sum - bb->frequency) > 100
- 	      || (MAX (sum, bb->frequency) > 10
- 		  && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
- 	    profile_record[index].num_mismatched_freq_in[subpass]++;
- 	  lsum = 0;
- 	  FOR_EACH_EDGE (e, ei, bb->preds)
- 	    lsum += e->count;
- 	  if (lsum - bb->count > 100 || lsum - bb->count < -100)
- 	    profile_record[index].num_mismatched_count_in[subpass]++;
- 	}
-       if (bb == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
- 	  || bb == EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
- 	continue;
-       if ((cfun && (cfun->curr_properties & PROP_trees)))
- 	{
- 	  gimple_stmt_iterator i;
- 
- 	  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
- 	    {
- 	      profile_record[index].size[subpass]
- 		 += estimate_num_insns (gsi_stmt (i), &eni_size_weights);
- 	      if (profile_status == PROFILE_READ)
- 		profile_record[index].time[subpass]
- 		   += estimate_num_insns (gsi_stmt (i),
- 					  &eni_time_weights) * bb->count;
- 	      else if (profile_status == PROFILE_GUESSED)
- 		profile_record[index].time[subpass]
- 		   += estimate_num_insns (gsi_stmt (i),
- 					  &eni_time_weights) * bb->frequency;
- 	    }
- 	}
-       else if (cfun && (cfun->curr_properties & PROP_rtl))
- 	{
- 	  rtx insn;
- 	  for (insn = NEXT_INSN (BB_HEAD (bb)); insn && insn != NEXT_INSN (BB_END (bb));
- 	       insn = NEXT_INSN (insn))
- 	    if (INSN_P (insn))
- 	      {
- 		profile_record[index].size[subpass]
- 		   += insn_rtx_cost (PATTERN (insn), false);
- 		if (profile_status == PROFILE_READ)
- 		  profile_record[index].time[subpass]
- 		     += insn_rtx_cost (PATTERN (insn), true) * bb->count;
- 		else if (profile_status == PROFILE_GUESSED)
- 		  profile_record[index].time[subpass]
- 		     += insn_rtx_cost (PATTERN (insn), true) * bb->frequency;
- 	      }
- 	}
-    }
- }
- 
- /* Output profile consistency.  */
- 
- void
- dump_profile_report (void)
- {
-   int i, j;
-   int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
-   gcov_type last_time = 0, last_size = 0;
-   double rel_time_change, rel_size_change;
-   int last_reported = 0;
- 
-   if (!profile_record)
-     return;
-   fprintf (stderr, "\nProfile consistency report:\n\n");
-   fprintf (stderr, "Pass name                        |mismatch in |mismated out|Overall\n");
-   fprintf (stderr, "                                 |freq count  |freq count  |size      time\n");
- 	   
-   for (i = 0; i < passes_by_id_size; i++)
-     for (j = 0 ; j < 2; j++)
-       if (profile_record[i].run)
- 	{
- 	  if (last_time)
- 	    rel_time_change = (profile_record[i].time[j]
- 			       - (double)last_time) * 100 / (double)last_time;
- 	  else
- 	    rel_time_change = 0;
- 	  if (last_size)
- 	    rel_size_change = (profile_record[i].size[j]
- 			       - (double)last_size) * 100 / (double)last_size;
- 	  else
- 	    rel_size_change = 0;
- 
- 	  if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in
- 	      || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
- 	      || profile_record[i].num_mismatched_count_in[j] != last_count_in
- 	      || profile_record[i].num_mismatched_count_out[j] != last_count_out
- 	      || rel_time_change || rel_size_change)
- 	    {
- 	      last_reported = i;
-               fprintf (stderr, "%-20s %s",
- 		       passes_by_id [i]->name,
- 		       j ? "(after TODO)" : "            ");
- 	      if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in)
- 		fprintf (stderr, "| %+5i",
- 		         profile_record[i].num_mismatched_freq_in[j]
- 			  - last_freq_in);
- 	      else
- 		fprintf (stderr, "|      ");
- 	      if (profile_record[i].num_mismatched_count_in[j] != last_count_in)
- 		fprintf (stderr, " %+5i",
- 		         profile_record[i].num_mismatched_count_in[j]
- 			  - last_count_in);
- 	      else
- 		fprintf (stderr, "      ");
- 	      if (profile_record[i].num_mismatched_freq_out[j] != last_freq_out)
- 		fprintf (stderr, "| %+5i",
- 		         profile_record[i].num_mismatched_freq_out[j]
- 			  - last_freq_out);
- 	      else
- 		fprintf (stderr, "|      ");
- 	      if (profile_record[i].num_mismatched_count_out[j] != last_count_out)
- 		fprintf (stderr, " %+5i",
- 		         profile_record[i].num_mismatched_count_out[j]
- 			  - last_count_out);
- 	      else
- 		fprintf (stderr, "      ");
- 
- 	      /* Size/time units change across gimple and RTL.  */
- 	      if (i == pass_expand.pass.static_pass_number)
- 		fprintf (stderr, "|----------");
- 	      else
- 		{
- 		  if (rel_size_change)
- 		    fprintf (stderr, "| %+8.4f%%", rel_size_change);
- 		  else
- 		    fprintf (stderr, "|          ");
- 		  if (rel_time_change)
- 		    fprintf (stderr, " %+8.4f%%", rel_time_change);
- 		}
- 	      fprintf (stderr, "\n");
- 	      last_freq_in = profile_record[i].num_mismatched_freq_in[j];
- 	      last_freq_out = profile_record[i].num_mismatched_freq_out[j];
- 	      last_count_in = profile_record[i].num_mismatched_count_in[j];
- 	      last_count_out = profile_record[i].num_mismatched_count_out[j];
- 	    }
- 	  else if (j && last_reported != i)
- 	    {
- 	      last_reported = i;
-               fprintf (stderr, "%-20s ------------|            |            |\n",
- 		       passes_by_id [i]->name);
- 	    }
- 	  last_time = profile_record[i].time[j];
- 	  last_size = profile_record[i].size[j];
- 	}
- }
- 
  /* Perform all TODO actions that ought to be done on each function.  */
  
  static void
--- 1778,1783 ----
Steven Bosscher - Oct. 8, 2012, 9:31 p.m.
On Sat, Oct 6, 2012 at 5:56 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> does this look better? Moving to cfg.c would importing tree-pass.h and rtl.h
> that is not cool either. predict.c does all of these.
> Obviously can also go to a separate file, if preferred.

Attached is how I would do it. What do you think about this?

Ciao!
Steven

        * basic-block. (profile_record): New struct, moved from passes.c.
        * cfghooks.h (struct cfg_hooks) <account_profile_record>: New hook.
        (account_profile_record): New prototype.
        * cfghooks.c (account_profile_record): New function.
        * tree-cfg.c (gimple_account_profile_record): New function
        (gimple_cfg_hooks): Add it.
        * cfgrtl.c (rtl_account_profile_record): New function
        (rtl_cfg_hooks, cfg_layout_rtl_cfg_hooks): Add it.
        * passes.c (check_profile_consistency): Simplify.  Move IR-dependent
        code around using cfghooks machinery.
Jan Hubicka - Oct. 9, 2012, 8:44 a.m.
> On Sat, Oct 6, 2012 at 5:56 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> > Hi,
> > does this look better? Moving to cfg.c would importing tree-pass.h and rtl.h
> > that is not cool either. predict.c does all of these.
> > Obviously can also go to a separate file, if preferred.
> 
> Attached is how I would do it. What do you think about this?
> 
> Ciao!
> Steven
> 
>         * basic-block. (profile_record): New struct, moved from passes.c.
>         * cfghooks.h (struct cfg_hooks) <account_profile_record>: New hook.
>         (account_profile_record): New prototype.
>         * cfghooks.c (account_profile_record): New function.
>         * tree-cfg.c (gimple_account_profile_record): New function
>         (gimple_cfg_hooks): Add it.
>         * cfgrtl.c (rtl_account_profile_record): New function
>         (rtl_cfg_hooks, cfg_layout_rtl_cfg_hooks): Add it.
>         * passes.c (check_profile_consistency): Simplify.  Move IR-dependent
>         code around using cfghooks machinery.

OK, I did not wanted to go as far as adding a hook, but why not.  Patch approved.
Thanks for looking into it!
Honza

Patch

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 192116)
+++ doc/invoke.texi	(working copy)
@@ -388,7 +388,7 @@  Objective-C and Objective-C++ Dialects}.
 -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
--fprefetch-loop-arrays @gol
+-fprefetch-loop-arrays -fprofile-report @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
@@ -5153,6 +5153,11 @@  allocation for the WPA phase only.
 Makes the compiler print some statistics about permanent memory
 allocation before or after interprocedural optimization.
 
+@item -fprofile-report
+@opindex fprofile-report
+Makes the compiler print some statistics about consistency of the
+(estimated) profile and effect of individual passes.
+
 @item -fstack-usage
 @opindex fstack-usage
 Makes the compiler output stack usage information for the program, on a
Index: common.opt
===================================================================
--- common.opt	(revision 192116)
+++ common.opt	(working copy)
@@ -1654,6 +1654,10 @@  fprofile-values
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions
 
+fprofile-report
+Common Report Var(profile_report)
+Report on consistency of profile
+
 frandom-seed
 Common Var(common_deferred_options) Defer
 
Index: toplev.c
===================================================================
--- toplev.c	(revision 192116)
+++ toplev.c	(working copy)
@@ -1815,6 +1815,9 @@  finalize (bool no_backend)
   if (mem_report)
     dump_memory_report (true);
 
+  if (dump_profile_report)
+    dump_profile_report ();
+
   /* Language-specific end of compilation actions.  */
   lang_hooks.finish ();
 }
Index: toplev.h
===================================================================
--- toplev.h	(revision 192116)
+++ toplev.h	(working copy)
@@ -49,6 +49,7 @@  extern void emit_debug_global_declaratio
 extern void write_global_declarations (void);
 
 extern void dump_memory_report (bool);
+extern void dump_profile_report (void);
 
 extern void target_reinit (void);
 
Index: passes.c
===================================================================
--- passes.c	(revision 192116)
+++ passes.c	(working copy)
@@ -1778,6 +1780,209 @@  execute_function_dump (void *data ATTRIB
     }
 }
 
+/* Make statistic about profile consistency.  */
+
+struct profile_record
+{
+  int num_mismatched_freq_in[2];
+  int num_mismatched_freq_out[2];
+  int num_mismatched_count_in[2];
+  int num_mismatched_count_out[2];
+  bool run;
+  gcov_type time[2];
+  int size[2];
+};
+
+static struct profile_record *profile_record;
+
+static void
+check_profile_consistency (int index, int subpass, bool run)
+{
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  int sum;
+  gcov_type lsum;
+
+  if (index == -1)
+    return;
+  if (!profile_record)
+    profile_record = XCNEWVEC (struct profile_record,
+			       passes_by_id_size);
+  gcc_assert (index < passes_by_id_size && index >= 0);
+  gcc_assert (subpass < 2);
+  profile_record[index].run |= run;
+
+  FOR_ALL_BB (bb)
+   {
+      if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  && profile_status != PROFILE_ABSENT)
+	{
+	  sum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    sum += e->probability;
+	  if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
+	    profile_record[index].num_mismatched_freq_out[subpass]++;
+	  lsum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    lsum += e->count;
+	  if (EDGE_COUNT (bb->succs)
+	      && (lsum - bb->count > 100 || lsum - bb->count < -100))
+	    profile_record[index].num_mismatched_count_out[subpass]++;
+	}
+      if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  && profile_status != PROFILE_ABSENT)
+	{
+	  sum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    sum += EDGE_FREQUENCY (e);
+	  if (abs (sum - bb->frequency) > 100
+	      || (MAX (sum, bb->frequency) > 10
+		  && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
+	    profile_record[index].num_mismatched_freq_in[subpass]++;
+	  lsum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    lsum += e->count;
+	  if (lsum - bb->count > 100 || lsum - bb->count < -100)
+	    profile_record[index].num_mismatched_count_in[subpass]++;
+	}
+      if (bb == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  || bb == EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
+	continue;
+      if ((cfun && (cfun->curr_properties & PROP_trees)))
+	{
+	  gimple_stmt_iterator i;
+
+	  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+	    {
+	      profile_record[index].size[subpass]
+		 += estimate_num_insns (gsi_stmt (i), &eni_size_weights);
+	      if (profile_status == PROFILE_READ)
+		profile_record[index].time[subpass]
+		   += estimate_num_insns (gsi_stmt (i),
+					  &eni_time_weights) * bb->count;
+	      else if (profile_status == PROFILE_GUESSED)
+		profile_record[index].time[subpass]
+		   += estimate_num_insns (gsi_stmt (i),
+					  &eni_time_weights) * bb->frequency;
+	    }
+	}
+      else if (cfun && (cfun->curr_properties & PROP_rtl))
+	{
+	  rtx insn;
+	  for (insn = NEXT_INSN (BB_HEAD (bb)); insn && insn != NEXT_INSN (BB_END (bb));
+	       insn = NEXT_INSN (insn))
+	    if (INSN_P (insn))
+	      {
+		profile_record[index].size[subpass]
+		   += insn_rtx_cost (PATTERN (insn), false);
+		if (profile_status == PROFILE_READ)
+		  profile_record[index].time[subpass]
+		     += insn_rtx_cost (PATTERN (insn), true) * bb->count;
+		else if (profile_status == PROFILE_GUESSED)
+		  profile_record[index].time[subpass]
+		     += insn_rtx_cost (PATTERN (insn), true) * bb->frequency;
+	      }
+	}
+   }
+}
+
+/* Output profile consistency.  */
+
+void
+dump_profile_report (void)
+{
+  int i, j;
+  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
+  gcov_type last_time, last_size;
+  double rel_time_change, rel_size_change;
+  int last_reported;
+
+  if (!profile_record)
+    return;
+  fprintf (stderr, "\nProfile consistency report:\n\n");
+  fprintf (stderr, "Pass name                        |mismatch in |mismated out|Overall\n");
+  fprintf (stderr, "                                 |freq count  |freq count  |size   time\n");
+	   
+  for (i = 0; i < passes_by_id_size; i++)
+    for (j = 0 ; j < 2; j++)
+      if (profile_record[i].run)
+	{
+	  if (last_time)
+	    rel_time_change = (profile_record[i].time[j]
+			       - (double)last_time) * 100 / (double)last_time;
+	  else
+	    rel_time_change = 0;
+	  if (last_size)
+	    rel_size_change = (profile_record[i].size[j]
+			       - (double)last_size) * 100 / (double)last_size;
+	  else
+	    rel_size_change = 0;
+
+	  if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in
+	      || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
+	      || profile_record[i].num_mismatched_count_in[j] != last_count_in
+	      || profile_record[i].num_mismatched_count_out[j] != last_count_out
+	      || rel_time_change || rel_size_change)
+	    {
+	      last_reported = i;
+              fprintf (stderr, "%-20s %s",
+		       passes_by_id [i]->name,
+		       j ? "(after TODO)" : "            ");
+	      if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in)
+		fprintf (stderr, "| %+5i",
+		         profile_record[i].num_mismatched_freq_in[j]
+			  - last_freq_in);
+	      else
+		fprintf (stderr, "|      ");
+	      if (profile_record[i].num_mismatched_count_in[j] != last_count_in)
+		fprintf (stderr, " %+5i",
+		         profile_record[i].num_mismatched_count_in[j]
+			  - last_count_in);
+	      else
+		fprintf (stderr, "      ");
+	      if (profile_record[i].num_mismatched_freq_out[j] != last_freq_out)
+		fprintf (stderr, "| %+5i",
+		         profile_record[i].num_mismatched_freq_out[j]
+			  - last_freq_out);
+	      else
+		fprintf (stderr, "|      ");
+	      if (profile_record[i].num_mismatched_count_out[j] != last_count_out)
+		fprintf (stderr, " %+5i",
+		         profile_record[i].num_mismatched_count_out[j]
+			  - last_count_out);
+	      else
+		fprintf (stderr, "      ");
+
+	      /* Size/time units change across gimple and RTL.  */
+	      if (i == pass_expand.pass.static_pass_number)
+		fprintf (stderr, "|----------");
+	      else
+		{
+		  if (rel_size_change)
+		    fprintf (stderr, "| %+8.4f%%", rel_size_change);
+		  else
+		    fprintf (stderr, "|          ");
+		  if (rel_time_change)
+		    fprintf (stderr, " %+8.4f%%", rel_time_change);
+		}
+	      fprintf (stderr, "\n");
+	      last_freq_in = profile_record[i].num_mismatched_freq_in[j];
+	      last_freq_out = profile_record[i].num_mismatched_freq_out[j];
+	      last_count_in = profile_record[i].num_mismatched_count_in[j];
+	      last_count_out = profile_record[i].num_mismatched_count_out[j];
+	    }
+	  else if (j && last_reported != i)
+	    {
+	      last_reported = i;
+              fprintf (stderr, "%-20s ------------|            |            |\n",
+		       passes_by_id [i]->name);
+	    }
+	  last_time = profile_record[i].time[j];
+	  last_size = profile_record[i].size[j];
+	}
+}
+
 /* Perform all TODO actions that ought to be done on each function.  */
 
 static void
@@ -2042,9 +2247,14 @@  execute_one_ipa_transform_pass (struct c
   if (pass->tv_id != TV_NONE)
     timevar_pop (pass->tv_id);
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 0, true);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after);
   verify_interpass_invariants ();
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 1, true);
 
   do_per_function (execute_function_dump, NULL);
   pass_fini_dump_file (pass);
@@ -2144,6 +2354,13 @@  execute_one_pass (struct opt_pass *pass)
 
   if (!gate_status)
     {
+      /* Run so passes selectively disabling themselves on a given function
+	 are not miscounted.  */
+      if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+	{
+          check_profile_consistency (pass->static_pass_number, 0, false);
+          check_profile_consistency (pass->static_pass_number, 1, false);
+	}
       current_pass = NULL;
       return false;
     }
@@ -2210,8 +2427,14 @@  execute_one_pass (struct opt_pass *pass)
       clean_graph_dump_file (dump_file_name);
     }
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 0, true);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after | pass->todo_flags_finish);
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 1, true);
+
   verify_interpass_invariants ();
   do_per_function (execute_function_dump, NULL);
   if (pass->type == IPA_PASS)