Patchwork Profile housekeeping 6/n (-fprofile-consistency-report)

login
register
mail settings
Submitter Jan Hubicka
Date Oct. 4, 2012, 2:01 p.m.
Message ID <20121004140127.GA15812@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/189159/
State New
Headers show

Comments

Jan Hubicka - Oct. 4, 2012, 2:01 p.m.
Hi,
this patch implements -fprofile-consistency-report that is useful to get an
statistic about what pass are major offenders in keeping profile up-to-date.

For example the following is output for combine.c
 Pass: fnsplit              (after pass) mismatched in:  +16 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: fnsplit              (after TODO) mismatched in:  -16 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: inline               (after pass) mismatched in: +197 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: inline               (after TODO) mismatched in: -209 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ccp                  (after TODO) mismatched in:   +8 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after pass) mismatched in: +191 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after TODO) mismatched in:  +25 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dce                  (after TODO) mismatched in:  -19 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cdce                 (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cselim               (after pass) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ifcombine            (after TODO) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phiopt               (after pass) mismatched in:   -2 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ch                   (after pass) mismatched in:   +2 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ch                   (after TODO) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after pass) mismatched in:  +89 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after TODO) mismatched in:   -3 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phicprop             (after TODO) mismatched in:   -6 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dce                  (after TODO) mismatched in:   -2 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: copyprop             (after TODO) mismatched in:  -17 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: unswitch             (after pass) mismatched in:   +7 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: unswitch             (after TODO) mismatched in:  +19 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cunroll              (after pass) mismatched in:  +10 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after pass) mismatched in:  +18 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after TODO) mismatched in:   -8 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after pass) mismatched in:  +14 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after TODO) mismatched in:   +4 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phicprop             (after TODO) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cddce                (after TODO) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: expand               (after pass) mismatched in: +435 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: jump                 (after pass) mismatched in:   +6 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cse1                 (after pass) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cprop                (after pass) mismatched in:   -8 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: rtl pre              (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cse_local            (after pass) mismatched in:   -7 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ce1                  (after pass) mismatched in:   +5 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: loop2_init           (after pass) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: loop2_done           (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: reload               (after pass) mismatched in:   -4 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: gcse2                (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: split2               (after pass) mismatched in:   +1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: pro_and_epilogue     (after pass) mismatched in:   +4 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dse2                 (after pass) mismatched in:   -2 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: jump2                (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ce3                  (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: bbro                 (after pass) mismatched in:   -6 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: compgotos            (after pass) mismatched in:   -1 (freqs)   +0 (counts); michmatched out:   +0 (freqs)   +0 (counts)

As a quick explanation, inliner is recomputing frequencies as part of TODO, so
the divergence.  VRP and DOM breaks a lot of probabilities by jump threading
where the original estimate was inconsistent (it would make sense to do some of
jump threading prior profile estimate).
Expansion obviously needs a lot of TLC, not only in switch expansion that
is currently all wrong and quite commonly excercised in combine.c

Regtested/bootstrapped x86_64-linux.  If there are no complains I will commit
the patch tomorrow.

Honza
	* doc/invoke.texi (-fprofile-consistency-report): Document.
	* common.opt (fprofile-consistency-report): New.
	* toplev.h (dump_profile_consistency_report): Declare.
	* toplev.c (finalize): Call dump_profile_consistency_report.
	* passes.c (profile_record): New global var.
	(check_profile_consistency): New function.
	(dump_profile_consistency_report): New function.
	(execute_one_ipa_transform_pass): Call check_profile_consistency.
	(execute_one_pass): Likewise.
Steven Bosscher - Oct. 4, 2012, 2:09 p.m.
On Thu, Oct 4, 2012 at 4:01 PM, Jan Hubicka wrote:
>         * doc/invoke.texi (-fprofile-consistency-report): Document.
>         * common.opt (fprofile-consistency-report): New.
>         * toplev.h (dump_profile_consistency_report): Declare.
>         * toplev.c (finalize): Call dump_profile_consistency_report.
>         * passes.c (profile_record): New global var.
>         (check_profile_consistency): New function.
>         (dump_profile_consistency_report): New function.
>         (execute_one_ipa_transform_pass): Call check_profile_consistency.
>         (execute_one_pass): Likewise.


Nice. And long overdue! :-)


> +fprofile-consistency-report
> +Common Report Var(profile_report)
> +Report on consistency of profile

Maybe make this a -d flag instead of -f?


> Index: passes.c
> +/* Hold statistic about profile consistency.  */
...

I don't see why this should live in passes.c, can you please put it in
a more logical place (profile.c, perhaps)?

Ciao!
Steven
Jan Hubicka - Oct. 4, 2012, 2:40 p.m.
> On Thu, Oct 4, 2012 at 4:01 PM, Jan Hubicka wrote:
> >         * doc/invoke.texi (-fprofile-consistency-report): Document.
> >         * common.opt (fprofile-consistency-report): New.
> >         * toplev.h (dump_profile_consistency_report): Declare.
> >         * toplev.c (finalize): Call dump_profile_consistency_report.
> >         * passes.c (profile_record): New global var.
> >         (check_profile_consistency): New function.
> >         (dump_profile_consistency_report): New function.
> >         (execute_one_ipa_transform_pass): Call check_profile_consistency.
> >         (execute_one_pass): Likewise.
> 
> 
> Nice. And long overdue! :-)
> 
> 
> > +fprofile-consistency-report
> > +Common Report Var(profile_report)
> > +Report on consistency of profile
> 
> Maybe make this a -d flag instead of -f?

time-report and mem-report are also -f, so I guess we shall move all of them or none.
> 
> 
> > Index: passes.c
> > +/* Hold statistic about profile consistency.  */
> ...
> 
> I don't see why this should live in passes.c, can you please put it in
> a more logical place (profile.c, perhaps)?

Hmm, I guess predict.c then.
I had it there but then reminded Richard's effort to pull out functions that
are only called from elsehwere and not using anything from given unit ;)

Honza
> 
> Ciao!
> Steven

Patch

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 191852)
+++ doc/invoke.texi	(working copy)
@@ -386,7 +386,7 @@  Objective-C and Objective-C++ Dialects}.
 -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
--fprefetch-loop-arrays @gol
+-fprefetch-loop-arrays -fprofile-consistency-report @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
@@ -5149,6 +5149,11 @@  allocation for the WPA phase only.
 Makes the compiler print some statistics about permanent memory
 allocation before or after interprocedural optimization.
 
+@item -fprofile-consistency-report
+@opindex fprofile-consistency-report
+Makes the compiler print some statistics about consistency of the
+(estimated) profile.
+
 @item -fstack-usage
 @opindex fstack-usage
 Makes the compiler output stack usage information for the program, on a
Index: common.opt
===================================================================
--- common.opt	(revision 191852)
+++ common.opt	(working copy)
@@ -1649,6 +1649,10 @@  fprofile-values
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions
 
+fprofile-consistency-report
+Common Report Var(profile_report)
+Report on consistency of profile
+
 frandom-seed
 Common Var(common_deferred_options) Defer
 
Index: toplev.c
===================================================================
--- toplev.c	(revision 191852)
+++ toplev.c	(working copy)
@@ -1815,6 +1815,9 @@  finalize (bool no_backend)
   if (mem_report)
     dump_memory_report (true);
 
+  if (profile_report)
+    dump_profile_consistency_report ();
+
   /* Language-specific end of compilation actions.  */
   lang_hooks.finish ();
 }
Index: passes.c
===================================================================
--- passes.c	(revision 191852)
+++ passes.c	(working copy)
@@ -1782,6 +1784,108 @@  execute_function_dump (void *data ATTRIB
     }
 }
 
+/* Hold statistic about profile consistency.  */
+
+struct profile_record
+{
+  int num_mismatched_freq_in[2];
+  int num_mismatched_freq_out[2];
+  int num_mismatched_count_in[2];
+  int num_mismatched_count_out[2];
+  bool tested;
+};
+
+static struct profile_record *profile_record;
+
+/* Account profile inconsistencies for pass INDEX. If SUBPASS is non-zero, the
+   accounting happens after TODO.  */
+
+static void
+check_profile_consistency (int index, int subpass)
+{
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  int sum;
+  gcov_type lsum;
+
+  if (index == -1)
+    return;
+  if (!profile_record)
+    profile_record = XCNEWVEC (struct profile_record,
+			       passes_by_id_size);
+  gcc_assert (index < passes_by_id_size && index >= 0);
+  gcc_assert (subpass < 2);
+  profile_record[index].tested = true;
+
+  FOR_ALL_BB (bb)
+   {
+      if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
+	{
+	  sum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    sum += e->probability;
+	  if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
+	    profile_record[index].num_mismatched_freq_out[subpass]++;
+	  lsum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    lsum += e->count;
+	  if (EDGE_COUNT (bb->succs)
+	      && (lsum - bb->count > 100 || lsum - bb->count < -100))
+	    profile_record[index].num_mismatched_count_out[subpass]++;
+	}
+	if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun))
+	  {
+	    sum = 0;
+	    FOR_EACH_EDGE (e, ei, bb->preds)
+	      sum += EDGE_FREQUENCY (e);
+	    if (abs (sum - bb->frequency) > 100
+		|| (MAX (sum, bb->frequency) > 10
+		    && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
+	      profile_record[index].num_mismatched_freq_in[subpass]++;
+	    lsum = 0;
+	    FOR_EACH_EDGE (e, ei, bb->preds)
+	      lsum += e->count;
+	    if (lsum - bb->count > 100 || lsum - bb->count < -100)
+	      profile_record[index].num_mismatched_count_in[subpass]++;
+	  }
+   }
+}
+
+/* Output profile consistency.  */
+
+void
+dump_profile_consistency_report (void)
+{
+  int i, j;
+  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
+
+  if (!profile_record)
+    return;
+  fprintf (stderr, "\nProfile consistency report:\n");
+  for (i = 0; i < passes_by_id_size; i++)
+    for (j = 0 ; j < 2; j++)
+      if ((profile_record[i].num_mismatched_freq_in[j] != last_freq_in
+	   || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
+	   || profile_record[i].num_mismatched_count_in[j] != last_count_in
+	   || profile_record[i].num_mismatched_count_out[j] != last_count_out)
+	  && profile_record[i].tested)
+      {
+	fprintf (stderr," Pass: %-20s %s mismatched in: %+4i (freqs) %+4i (counts); "
+		 "michmatched out: %+4i (freqs) %+4i (counts)\n",
+		 passes_by_id [i]->name,
+		 j ? "(after TODO)" : "(after pass)",
+		 profile_record[i].num_mismatched_freq_in[j] - last_freq_in,
+		 profile_record[i].num_mismatched_count_in[j] - last_count_in,
+		 profile_record[i].num_mismatched_freq_out[j] - last_freq_out,
+		 profile_record[i].num_mismatched_count_out[j] - last_count_out);
+	last_freq_in = profile_record[i].num_mismatched_freq_in[j];
+	last_freq_out = profile_record[i].num_mismatched_freq_out[j];
+	last_count_in = profile_record[i].num_mismatched_count_in[j];
+	last_count_out = profile_record[i].num_mismatched_count_out[j];
+      }
+}
+
 /* Perform all TODO actions that ought to be done on each function.  */
 
 static void
@@ -2050,9 +2150,16 @@  execute_one_ipa_transform_pass (struct c
   if (pass->tv_id != TV_NONE)
     timevar_pop (pass->tv_id);
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 0);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after);
   verify_interpass_invariants ();
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 1);
 
   do_per_function (execute_function_dump, NULL);
   pass_fini_dump_file (pass);
@@ -2218,8 +2325,16 @@  execute_one_pass (struct opt_pass *pass)
       clean_graph_dump_file (dump_file_name);
     }
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 0);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after | pass->todo_flags_finish);
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 1);
+
   verify_interpass_invariants ();
   do_per_function (execute_function_dump, NULL);
   if (pass->type == IPA_PASS)
Index: toplev.h
===================================================================
--- toplev.h	(revision 191852)
+++ toplev.h	(working copy)
@@ -49,6 +49,7 @@  extern void emit_debug_global_declaratio
 extern void write_global_declarations (void);
 
 extern void dump_memory_report (bool);
+extern void dump_profile_consistency_report (void);
 
 extern void target_reinit (void);