Patchwork Move function body streaming to passmanager/cgraph control

login
register
mail settings
Submitter Jan Hubicka
Date Aug. 6, 2013, 1:59 p.m.
Message ID <20130806135931.GB15638@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/265057/
State New
Headers show

Comments

Jan Hubicka - Aug. 6, 2013, 1:59 p.m.
Hi,
this patch breaks out lto.c' code to read given function body into
cgraph_get_body.  Instead of reading all bodies at once, we now read only
bodies that are needed on demand.  This is how I planned to get whole program
compilation working back in 2004, so code is still mostly ready for it. :)
Since we throw away bodies we no longer need during the compilation, this
reduces overall memory use of ltrans stages (and -flto-partition=none).
I tested in on Firefox. -flto-partition=none still gets over 16GB of memory use
but it gets quite a lot of work done before crashing, while w/o patch we crash
right away.  I suppose I need to check what parts of function bodies are getting
stale during compilation (for sure line locators, but probably more).
Slimmer ltrans stages helps to reduce overall footprint in parallel compilation.

Another motivation of this patch is to make IPA pass development easier.
Martin Liska's code unification pass can now read function bodies into WPA
stage when to compare bodies when hash claims they are equivalent.  This is a
lot easier than getting them compared only later at ltrans. Depending on
perofrmance implication of this we can either stay with this or go with not
reading them.

Reading of bodies is mostly done by passmanager just before it executes
local pass on it.  Callgraph materialization and inlining needs bodies of
other functions and therefore it needs its own cgraph_get_body call.
Once dwarf2out is less broken with -g I think it should also gets bodies
on its own to produce abstract origin representations (not includes in this
patch since push_cfun ICEs, I have followup for this. To see some benefits
we however need to stop clearning the origins).

Ipa-pta is only late IPA pass and as such it also needs changes.  These changes
I think should go away next.  I do not see how ipa-pta can reliably work in
presence of clones.  I have separate patch that adds fixup_cfg pass prior
ipa-pta that makes us to apply all IPA transforms.  (of course kiling the
memory benefits of this patch mostly) Currently I think it should be easy to
reproduce a bug where ipa-cp injects some aggregate constants taking address of
an object that ipa-pta don't see.

Bootstrapped/regtested x86_64-linux, will commit it after bit more testing.

Honza

	* cgraph.c (cgraph_get_body): New function based on lto.c
	implementation.
	* cgraph.h (cgraph_get_body): Declare.
	* cgraphclones.c (cgraph_create_virtual_clone): Commonize WPA and LTO paths.
	* cgraphunit.c (expand_function): Get body prior expanding.
	* ipa.c (function_and_variable_visibility): Use gimple_has_body_p test.
	* lto-cgraph.c (lto_output_node): Do not stream bodies we don't really need.
	* passes.c (do_per_function_toporder): Get body.
	* tree-inline.c (expand_call_inline): Get body prior inlining it.
	* tree-ssa-structalias.c (ipa_pta_execute): Get body; skip clones.

	* lto.c (lto_materialize_function): Do not read body anymore.

Patch

Index: cgraph.c
===================================================================
--- cgraph.c	(revision 201498)
+++ cgraph.c	(working copy)
@@ -2707,4 +2707,44 @@  cgraph_function_node (struct cgraph_node
   return node;
 }
 
+/* When doing LTO, read NODE's body from disk if it is not already present.  */
+
+bool
+cgraph_get_body (struct cgraph_node *node)
+{
+  struct lto_file_decl_data *file_data;
+  const char *data, *name;
+  size_t len;
+  tree decl = node->symbol.decl;
+
+  if (DECL_RESULT (decl))
+    return false;
+
+  gcc_assert (in_lto_p);
+
+  file_data = node->symbol.lto_file_data;
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+  /* We may have renamed the declaration, e.g., a static function.  */
+  name = lto_get_decl_name_mapping (file_data, name);
+
+  data = lto_get_section_data (file_data, LTO_section_function_body,
+			       name, &len);
+  if (!data)
+    {
+	dump_cgraph_node (stderr, node);
+    fatal_error ("%s: section %s is missing",
+		 file_data->file_name,
+		 name);
+    }
+
+  gcc_assert (DECL_STRUCT_FUNCTION (decl) == NULL);
+
+  lto_input_function_body (file_data, decl, data);
+  lto_stats.num_function_bodies++;
+  lto_free_section_data (file_data, LTO_section_function_body, name,
+			 data, len);
+  return true;
+}
+
 #include "gt-cgraph.h"
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 201498)
+++ cgraph.h	(working copy)
@@ -701,6 +701,7 @@  gimple cgraph_redirect_edge_call_stmt_to
 bool cgraph_propagate_frequency (struct cgraph_node *node);
 struct cgraph_node * cgraph_function_node (struct cgraph_node *,
 					   enum availability *avail = NULL);
+bool cgraph_get_body (struct cgraph_node *node);
 
 /* In cgraphunit.c  */
 struct asm_node *add_asm_node (tree);
Index: cgraphclones.c
===================================================================
--- cgraphclones.c	(revision 201498)
+++ cgraphclones.c	(working copy)
@@ -295,7 +295,7 @@  cgraph_create_virtual_clone (struct cgra
   size_t i;
   struct ipa_replace_map *map;
 
-  if (!flag_wpa)
+  if (!in_lto_p)
     gcc_checking_assert  (tree_versionable_function_p (old_decl));
 
   gcc_assert (old_node->local.can_change_signature || !args_to_skip);
@@ -829,6 +829,8 @@  cgraph_materialize_all_clones (void)
 	  if (node->clone_of && node->symbol.decl != node->clone_of->symbol.decl
 	      && !gimple_has_body_p (node->symbol.decl))
 	    {
+	      if (!node->clone_of->clone_of)
+		cgraph_get_body (node->clone_of);
 	      if (gimple_has_body_p (node->clone_of->symbol.decl))
 	        {
 		  if (cgraph_dump_file)
Index: cgraphunit.c
===================================================================
--- cgraphunit.c	(revision 201498)
+++ cgraphunit.c	(working copy)
@@ -1578,6 +1578,7 @@  expand_function (struct cgraph_node *nod
   announce_function (decl);
   node->process = 0;
   gcc_assert (node->lowered);
+  cgraph_get_body (node);
 
   /* Generate RTL for the body of DECL.  */
 
Index: ipa.c
===================================================================
--- ipa.c	(revision 201498)
+++ ipa.c	(working copy)
@@ -915,7 +915,7 @@  function_and_variable_visibility (bool w
 		  struct cgraph_edge *e = node->callers;
 
 		  cgraph_redirect_edge_callee (e, alias);
-		  if (!flag_wpa)
+		  if (gimple_has_body_p (e->caller->symbol.decl))
 		    {
 		      push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
 		      cgraph_redirect_edge_call_stmt_to_callee (e);
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 201498)
+++ lto/lto.c	(working copy)
@@ -192,48 +192,19 @@  static void
 lto_materialize_function (struct cgraph_node *node)
 {
   tree decl;
-  struct lto_file_decl_data *file_data;
-  const char *data, *name;
-  size_t len;
 
   decl = node->symbol.decl;
   /* Read in functions with body (analyzed nodes)
      and also functions that are needed to produce virtual clones.  */
   if ((cgraph_function_with_gimple_body_p (node) && node->symbol.analyzed)
+      || node->used_as_abstract_origin
       || has_analyzed_clone_p (node))
     {
       /* Clones don't need to be read.  */
       if (node->clone_of)
 	return;
-
-      /* Load the function body only if not operating in WPA mode.  In
-	 WPA mode, the body of the function is not needed.  */
-      if (!flag_wpa)
-	{
-	  file_data = node->symbol.lto_file_data;
-	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-
-	  /* We may have renamed the declaration, e.g., a static function.  */
-	  name = lto_get_decl_name_mapping (file_data, name);
-
-	  data = lto_get_section_data (file_data, LTO_section_function_body,
-				       name, &len);
-	  if (!data)
-	    fatal_error ("%s: section %s is missing",
-			 file_data->file_name,
-			 name);
-
-	  gcc_assert (DECL_STRUCT_FUNCTION (decl) == NULL);
-
-	  announce_function (decl);
-	  lto_input_function_body (file_data, decl, data);
-	  if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
-	    first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
-	  lto_stats.num_function_bodies++;
-	  lto_free_section_data (file_data, LTO_section_function_body, name,
-				 data, len);
-	  ggc_collect ();
-	}
+      if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
+	first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
     }
 
   /* Let the middle end know about the function.  */
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 201498)
+++ lto-cgraph.c	(working copy)
@@ -376,7 +376,7 @@  lto_output_node (struct lto_simple_outpu
   bool boundary_p;
   intptr_t ref;
   bool in_other_partition = false;
-  struct cgraph_node *clone_of;
+  struct cgraph_node *clone_of, *ultimate_clone_of;
   struct ipa_opt_pass_d *pass;
   int i;
   bool alias_p;
@@ -423,7 +423,16 @@  lto_output_node (struct lto_simple_outpu
     else
       clone_of = clone_of->clone_of;
 
-  if (LTO_symtab_analyzed_node)
+  /* See if body of the master function is output.  If not, we are seeing only
+     an declaration and we do not need to pass down clone tree. */
+  ultimate_clone_of = clone_of;
+  while (ultimate_clone_of && ultimate_clone_of->clone_of)
+    ultimate_clone_of = ultimate_clone_of->clone_of;
+
+  if (clone_of && !lto_symtab_encoder_encode_body_p (encoder, ultimate_clone_of))
+    clone_of = NULL;
+
+  if (tag == LTO_symtab_analyzed_node)
     gcc_assert (clone_of || !node->clone_of);
   if (!clone_of)
     streamer_write_hwi_stream (ob->main_stream, LCC_NOT_FOUND);
Index: passes.c
===================================================================
--- passes.c	(revision 201498)
+++ passes.c	(working copy)
@@ -1409,6 +1409,7 @@  do_per_function_toporder (void (*callbac
 	  node->process = 0;
 	  if (cgraph_function_with_gimple_body_p (node))
 	    {
+	      cgraph_get_body (node);
 	      push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
 	      callback (data);
 	      free_dominance_info (CDI_DOMINATORS);
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 201498)
+++ tree-inline.c	(working copy)
@@ -3939,6 +3939,7 @@  expand_call_inline (basic_block bb, gimp
       goto egress;
     }
   fn = cg_edge->callee->symbol.decl;
+  cgraph_get_body (cg_edge->callee);
 
 #ifdef ENABLE_CHECKING
   if (cg_edge->callee->symbol.decl != id->dst_node->symbol.decl)
Index: tree-ssa-structalias.c
===================================================================
--- tree-ssa-structalias.c	(revision 201498)
+++ tree-ssa-structalias.c	(working copy)
@@ -7054,8 +7054,9 @@  ipa_pta_execute (void)
       /* Nodes without a body are not interesting.  Especially do not
          visit clones at this point for now - we get duplicate decls
 	 there for inline clones at least.  */
-      if (!cgraph_function_with_gimple_body_p (node))
+      if (!cgraph_function_with_gimple_body_p (node) || node->clone_of)
 	continue;
+      cgraph_get_body (node);
 
       gcc_assert (!node->clone_of);
 
@@ -7088,7 +7089,7 @@  ipa_pta_execute (void)
       basic_block bb;
 
       /* Nodes without a body are not interesting.  */
-      if (!cgraph_function_with_gimple_body_p (node))
+      if (!cgraph_function_with_gimple_body_p (node) || node->clone_of)
 	continue;
 
       if (dump_file)
@@ -7197,7 +7198,7 @@  ipa_pta_execute (void)
       struct cgraph_edge *e;
 
       /* Nodes without a body are not interesting.  */
-      if (!cgraph_function_with_gimple_body_p (node))
+      if (!cgraph_function_with_gimple_body_p (node) || node->clone_of)
 	continue;
 
       fn = DECL_STRUCT_FUNCTION (node->symbol.decl);