Patchwork Cross-module indirect call transformation

login
register
mail settings
Submitter Jan Hubicka
Date Aug. 9, 2013, 10:40 p.m.
Message ID <20130809224022.GA12350@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/266177/
State New
Headers show

Comments

Jan Hubicka - Aug. 9, 2013, 10:40 p.m.
Hi,
this makes the whole indirect call machinery to fly.  The histograms for cross
module indirect calls now collected by value-prof are still at compile time
turned into common targets stored into cgraph edges.  (in
ipa_profile_generate_summary)

Common targets can be used not only for speculation: I expect that code
placement pass can actually look up the target even when the call stays
unspeculative.

At LTO WPA time the common targets are turned into speculative edges by
ipa_profile.

Finally the clonning machinery turns the speuclative edges into conditional
direct calls.

The most painful part of this patch was undoubtely creation of a testcase.  We
have no way to grep LTO dump files, so I had to verify that transformation
happens (or not happen when not welcome) by __builtin_constant_p.

Second lto.exp has no support for FDO compilation and tree-prof has
just partly working support for multiple source testcases.  While there
is way to add additional source, there is no way I can think of to avoid
the other source from being copmiled, so I just turned it into empty
testcase.

Bootstrapped/regtested x86_64-linux

	* cgraph.c (cgraph_resolve_speculation): Cut frequency to
	CGRAPH_FREQ_MAX.
	(dump_cgraph_node): Dump profile-id.
	* cgraph.h (cgraph_indirect_call_info): Add common_target_id
	and common_target_probability.
	* lto-cgraph.c (lto_output_edge): Stream common targets.
	(lto_output_node): Stream profile ids.
	(input_node): Stream profile ids.
	(input_edge): Stream common targets.
	* lto-streamer-in.c (fixup_call_stmt_edges_1): Fix formatting.
	* ipa.c: Include value-prof.h
	(ipa_profile_generate_summary): Turn indirect call statement histograms
	into common targets.
	(ipa_profile): Turn common targets into speculative edges.
Index: cgraph.c
===================================================================
*** cgraph.c	(revision 201633)
--- cgraph.c	(working copy)
*************** cgraph_resolve_speculation (struct cgrap
*** 1176,1181 ****
--- 1176,1183 ----
      }
    edge->count += e2->count;
    edge->frequency += e2->frequency;
+   if (edge->frequency > CGRAPH_FREQ_MAX)
+     edge->frequency = CGRAPH_FREQ_MAX;
    edge->speculative = false;
    e2->speculative = false;
    if (e2->indirect_unknown_callee || e2->inline_failed)
*************** dump_cgraph_node (FILE *f, struct cgraph
*** 1801,1806 ****
--- 1803,1811 ----
      fprintf (f, "  Availability: %s\n",
  	     cgraph_availability_names [cgraph_function_body_availability (node)]);
  
+   if (node->profile_id)
+     fprintf (f, "  Profile id: %i\n",
+ 	     node->profile_id);
    fprintf (f, "  Function flags:");
    if (node->count)
      fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x",
Index: cgraph.h
===================================================================
*** cgraph.h	(revision 201634)
--- cgraph.h	(working copy)
*************** struct GTY(()) cgraph_indirect_call_info
*** 435,440 ****
--- 435,444 ----
    int param_index;
    /* ECF flags determined from the caller.  */
    int ecf_flags;
+   /* Profile_id of common target obtrained from profile.  */
+   int common_target_id;
+   /* Probability that call will land in function with COMMON_TARGET_ID.  */
+   int common_target_probability;
  
    /* Set when the call is a virtual call with the parameter being the
       associated object pointer rather than a simple direct call.  */
Index: lto-cgraph.c
===================================================================
*** lto-cgraph.c	(revision 201633)
--- lto-cgraph.c	(working copy)
*************** lto_output_edge (struct lto_simple_outpu
*** 299,304 ****
--- 299,312 ----
  			     | ECF_NOVOPS)));
      }
    streamer_write_bitpack (&bp);
+   if (edge->indirect_unknown_callee)
+     {
+       streamer_write_hwi_stream (ob->main_stream,
+ 			         edge->indirect_info->common_target_id);
+       if (edge->indirect_info->common_target_id)
+ 	streamer_write_hwi_stream
+ 	   (ob->main_stream, edge->indirect_info->common_target_probability);
+     }
  }
  
  /* Return if LIST contain references from other partitions.  */
*************** lto_output_node (struct lto_simple_outpu
*** 519,524 ****
--- 527,533 ----
        streamer_write_uhwi_stream (ob->main_stream, node->thunk.fixed_offset);
        streamer_write_uhwi_stream (ob->main_stream, node->thunk.virtual_value);
      }
+   streamer_write_hwi_stream (ob->main_stream, node->profile_id);
  }
  
  /* Output the varpool NODE to OB. 
*************** input_node (struct lto_file_decl_data *f
*** 1057,1062 ****
--- 1066,1072 ----
      }
    if (node->symbol.alias && !node->symbol.analyzed && node->symbol.weakref)
      node->symbol.alias_target = get_alias_symbol (node->symbol.decl);
+   node->profile_id = streamer_read_hwi (ib);
    return node;
  }
  
*************** input_edge (struct lto_input_block *ib,
*** 1205,1210 ****
--- 1215,1223 ----
        if (bp_unpack_value (&bp, 1))
  	ecf_flags |= ECF_RETURNS_TWICE;
        edge->indirect_info->ecf_flags = ecf_flags;
+       edge->indirect_info->common_target_id = streamer_read_hwi (ib);
+       if (edge->indirect_info->common_target_id)
+         edge->indirect_info->common_target_probability = streamer_read_hwi (ib);
      }
  }
  
Index: lto-streamer-in.c
===================================================================
*** lto-streamer-in.c	(revision 201633)
--- lto-streamer-in.c	(working copy)
*************** fixup_call_stmt_edges_1 (struct cgraph_n
*** 765,782 ****
    for (cedge = node->callees; cedge; cedge = cedge->next_callee)
      {
        if (gimple_stmt_max_uid (fn) < cedge->lto_stmt_uid)
!       fatal_error ("Cgraph edge statement index out of range");
        cedge->call_stmt = stmts[cedge->lto_stmt_uid - 1];
        if (!cedge->call_stmt)
!       fatal_error ("Cgraph edge statement index not found");
      }
    for (cedge = node->indirect_calls; cedge; cedge = cedge->next_callee)
      {
        if (gimple_stmt_max_uid (fn) < cedge->lto_stmt_uid)
!       fatal_error ("Cgraph edge statement index out of range");
        cedge->call_stmt = stmts[cedge->lto_stmt_uid - 1];
        if (!cedge->call_stmt)
!       fatal_error ("Cgraph edge statement index not found");
      }
    for (i = 0;
         ipa_ref_list_reference_iterate (&node->symbol.ref_list, i, ref);
--- 765,782 ----
    for (cedge = node->callees; cedge; cedge = cedge->next_callee)
      {
        if (gimple_stmt_max_uid (fn) < cedge->lto_stmt_uid)
!         fatal_error ("Cgraph edge statement index out of range");
        cedge->call_stmt = stmts[cedge->lto_stmt_uid - 1];
        if (!cedge->call_stmt)
!         fatal_error ("Cgraph edge statement index not found");
      }
    for (cedge = node->indirect_calls; cedge; cedge = cedge->next_callee)
      {
        if (gimple_stmt_max_uid (fn) < cedge->lto_stmt_uid)
!         fatal_error ("Cgraph edge statement index out of range");
        cedge->call_stmt = stmts[cedge->lto_stmt_uid - 1];
        if (!cedge->call_stmt)
!         fatal_error ("Cgraph edge statement index not found");
      }
    for (i = 0;
         ipa_ref_list_reference_iterate (&node->symbol.ref_list, i, ref);
Index: ipa.c
===================================================================
*** ipa.c	(revision 201633)
--- ipa.c	(working copy)
*************** along with GCC; see the file COPYING3.
*** 38,43 ****
--- 38,44 ----
  #include "params.h"
  #include "lto-streamer.h"
  #include "data-streamer.h"
+ #include "value-prof.h"
  
  /* Return true when NODE can not be local. Worker for cgraph_local_node_p.  */
  
*************** ipa_profile_generate_summary (void)
*** 1291,1298 ****
  	int size = 0;
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  	  {
! 	    time += estimate_num_insns (gsi_stmt (gsi), &eni_time_weights);
! 	    size += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
  	  }
  	account_time_size (hashtable, histogram, bb->count, time, size);
        }
--- 1292,1331 ----
  	int size = 0;
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  	  {
! 	    gimple stmt = gsi_stmt (gsi);
! 	    if (gimple_code (stmt) == GIMPLE_CALL
! 		&& !gimple_call_fndecl (stmt))
! 	      {
! 		histogram_value h;
! 		h = gimple_histogram_value_of_type
! 		      (DECL_STRUCT_FUNCTION (node->symbol.decl),
! 		       stmt, HIST_TYPE_INDIR_CALL);
! 		/* No need to do sanity check: gimple_ic_transform already
! 		   takes away bad histograms.  */
! 		if (h)
! 		  {
! 		    /* counter 0 is target, counter 1 is number of execution we called target,
! 		       counter 2 is total number of executions.  */
! 		    if (h->hvalue.counters[2])
! 		      {
! 			struct cgraph_edge * e = cgraph_edge (node, stmt);
! 			e->indirect_info->common_target_id
! 			  = h->hvalue.counters [0];
! 			e->indirect_info->common_target_probability
! 			  = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
! 			if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
! 			  {
! 			    if (dump_file)
! 			      fprintf (dump_file, "Probability capped to 1\n");
! 			    e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
! 			  }
! 		      }
! 		    gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->symbol.decl),
! 						    stmt, h);
! 		  }
! 	      }
! 	    time += estimate_num_insns (stmt, &eni_time_weights);
! 	    size += estimate_num_insns (stmt, &eni_size_weights);
  	  }
  	account_time_size (hashtable, histogram, bb->count, time, size);
        }
*************** ipa_profile (void)
*** 1375,1380 ****
--- 1408,1460 ----
    int i;
    gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
  
+   /* Produce speculative calls: we saved common traget from porfiling into
+      e->common_target_id.  Now, at link time, we can look up corresponding
+      function node and produce speculative call.  */
+   if (in_lto_p)
+     {
+       struct cgraph_edge *e;
+       struct cgraph_node *n,*n2;
+ 
+       init_node_map (false);
+       FOR_EACH_DEFINED_FUNCTION (n)
+ 	{
+ 	  bool update = false;
+ 
+ 	  for (e = n->indirect_calls; e; e = e->next_callee)
+ 	    if (e->indirect_info->common_target_id)
+ 	      {
+ 		n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
+ 		if (n2)
+ 		  {
+ 		    if (dump_file)
+ 		      {
+ 			fprintf (dump_file, "Indirect call -> direct call from"
+ 				 " other module %s/%i => %s/%i, prob %3.2f\n",
+ 				 xstrdup (cgraph_node_name (n)), n->symbol.order,
+ 				 xstrdup (cgraph_node_name (n2)), n2->symbol.order,
+ 				 e->indirect_info->common_target_probability
+ 				 / (float)REG_BR_PROB_BASE);
+ 		      }
+ 		    cgraph_turn_edge_to_speculative
+ 		      (e, n2,
+ 		       apply_scale (e->count,
+ 				    e->indirect_info->common_target_probability),
+ 		       apply_scale (e->frequency,
+ 				    e->indirect_info->common_target_probability));
+ 		    update = true;
+ 		  }
+ 		else
+ 		  if (dump_file)
+ 		    fprintf (dump_file, "Function with profile-id %i not found.\n",
+ 			     e->indirect_info->common_target_id);
+ 	       }
+ 	     if (update)
+ 	       inline_update_overall_summary (n);
+ 	   }
+ 	del_node_map ();
+     }
+ 
    if (dump_file)
      dump_histogram (dump_file, histogram);
    for (i = 0; i < (int)histogram.length (); i++)

Patch

Index: testsuite/gcc.dg/tree-prof/crossmodule-indircall-1.c
===================================================================
--- testsuite/gcc.dg/tree-prof/crossmodule-indircall-1.c	(revision 0)
+++ testsuite/gcc.dg/tree-prof/crossmodule-indircall-1.c	(revision 0)
@@ -0,0 +1,19 @@ 
+/* { dg-require-effective-target lto } */
+/* { dg-additional-sources "crossmodule-indircall-1a.c" } */
+/* { dg-options "-O3 -flto -DDOJOB=1" } */
+
+int a;
+extern void (*p[2])(int n);
+void abort (void);
+main()
+{ int i;
+
+  /* This call shall be converted.  */
+  for (i = 0;i<1000;i++)
+    p[0](1);
+  /* This call shall not be converted.  */
+  for (i = 0;i<1000;i++)
+    p[i%2](2);
+  if (a != 1000)
+    abort ();
+}
Index: testsuite/gcc.dg/tree-prof/crossmodule-indircall-1a.c
===================================================================
--- testsuite/gcc.dg/tree-prof/crossmodule-indircall-1a.c	(revision 0)
+++ testsuite/gcc.dg/tree-prof/crossmodule-indircall-1a.c	(revision 0)
@@ -0,0 +1,40 @@ 
+/* It seems there is no way to avoid the other source of mulitple
+   source testcase from being compiled independently.  Just avoid
+   error.  */
+#ifdef DOJOB
+extern int a;
+void abort (void);
+
+#ifdef _PROFILE_USE
+__attribute__ ((externally_visible))
+int constval=1,constval2=2;
+#else
+__attribute__ ((externally_visible))
+int constval=3,constval2=2;
+#endif
+
+
+void
+add(int i)
+{
+  /* Verify that inlining happens for first case.  */
+  if (i==constval && !__builtin_constant_p (i))
+    abort ();
+  /* Second case has no dominating target; it should not inline.  */
+  if (i==constval2 && __builtin_constant_p (i))
+    abort ();
+  a += i;
+}
+void
+sub(int i)
+{
+  a -= i;
+}
+__attribute__ ((externally_visible))
+void (*p[2])(int)={add, sub};
+#else
+main()
+{
+  return 0;
+}
+#endif