Patchwork [google] static function promotion improvement patch for LIPO (issue4517117)

login
register
mail settings
Submitter Xinliang David Li
Date May 30, 2011, 5:09 p.m.
Message ID <20110530170954.60EBF2087E@syzygy.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/97939/
State New
Headers show

Comments

Xinliang David Li - May 30, 2011, 5:09 p.m.
The following patch will be committed to google/main. It improves 
performance of internal benchmarks significantly.

This is a patch that improves static function promotion in LIPO mode.
1) Do not promote non address taken static functions -- this greately
reduce the number of promotions and allows more DFE after inlining. This
also makes inline size estimation more consistent across profile-gen
and profile-use
2) Delay static promotion just before tree-profiling after early inlining
is done. This is to make sure consistent size estimation
3) For emition of static init functions from aux modules. Those functions
will be eliminated later (they are not called from global dtor/ctor) -- 
there existence is important to make sure address taken (etc) attributes
for called dtor/ctors are consistent between profile-gen and use.

2011-05-30  David Li  <davidxl@google.com>

	* cgraphunit.c (cgraph_optimize):  Remove call to static
	promotion funciton.
	* cp/decl2.c (cp_process_pending_declarations):  Do not
	remove body of __static_init functions for aux modules.
	* ipa-inline.c (leaf_node_p):  Filter indirect callsite
	to make sure profile gen and profile use consistency
	(cgraph_decide_inlining_incrementally):  Remove LIPO
	specific inline rule used to workaround size estimation
	problem for static functions.
	* tree-profile.c (tree_profiling):  Do static promotion here.
	* l-ipo.c (cgraph_is_aux_decl_external):  Handle non-promoted
	static function.
	(create_unique_name): New function.
	(promote_static_var_func): Do not promote non addr taken statics.


--
This patch is available for review at http://codereview.appspot.com/4517117

Patch

Index: cgraphunit.c
===================================================================
--- cgraphunit.c	(revision 174088)
+++ cgraphunit.c	(working copy)
@@ -2030,9 +2030,6 @@  cgraph_optimize (void)
     {
       cgraph_init_gid_map ();
       cgraph_add_fake_indirect_call_edges ();
-     /* Perform static promotion before IPA passes to avoid
-        needed static functions being deleted.  */
-      cgraph_process_module_scope_statics ();
     }
 
   /* Don't run the IPA passes if there was any error or sorry messages.  */
Index: cp/decl2.c
===================================================================
--- cp/decl2.c	(revision 174088)
+++ cp/decl2.c	(working copy)
@@ -3901,10 +3901,11 @@  cp_process_pending_declarations (locatio
          to be created for auxiliary modules -- they are created to keep
          funcdef_no consistent between profile use and profile gen.  */
       for (i = 0; VEC_iterate (tree, ssdf_decls, i, fndecl); ++i)
-         {
-           TREE_STATIC (fndecl) = 0;
-           DECL_INITIAL (fndecl) = 0;
-         }
+        /* Such ssdf_decls are not called from GLOBAL ctor/dtor, mark
+	   them reachable to avoid being eliminated too early before
+	   gimplication.  */
+        cgraph_mark_reachable_node (cgraph_node (fndecl));
+
       ssdf_decls = NULL;
       return;
     }
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 174088)
+++ ipa-inline.c	(working copy)
@@ -1624,8 +1624,10 @@  static bool
 leaf_node_p (struct cgraph_node *n)
 {
   struct cgraph_edge *e;
+  /* The following is buggy -- indirect call is not considered.  */
   for (e = n->callees; e; e = e->next_callee)
-    if (!is_inexpensive_builtin (e->callee->decl))
+    if (e->call_stmt /* Only exisit in profile use pass in LIPO */
+	&& !is_inexpensive_builtin (e->callee->decl))
       return false;
   return true;
 }
@@ -1640,8 +1642,6 @@  cgraph_decide_inlining_incrementally (st
   struct cgraph_edge *e;
   bool inlined = false;
   cgraph_inline_failed_t failed_reason;
-  bool after_tree_profile =
-    (DECL_STRUCT_FUNCTION (node->decl))->after_tree_profile;
 
 #ifdef ENABLE_CHECKING
   verify_cgraph_node (node);
@@ -1718,20 +1718,7 @@  cgraph_decide_inlining_incrementally (st
 	      || !e->inline_failed
 	      || e->callee->local.disregard_inline_limits)
 	    continue;
-	  /* Don't do cross-module inlining before profile-use, so that we have
-	     a consistent CFG between profile-gen and profile-use passes.  */
-	  if (!after_tree_profile
-	      && L_IPO_COMP_MODE
-	      && !cgraph_is_inline_body_available_in_module (
-		  e->callee->decl, cgraph_get_module_id (e->caller->decl)))
-	    {
-	      e->inline_failed = CIF_NO_INTERMODULE_INLINE;
-	      if (dump_file)
-		fprintf (dump_file, "Not inlining considering inlining %s: %s\n",
-			 cgraph_node_name (e->callee),
-			 "Inter-module inlining disabled");
-	      continue;
-	    }
+
 	  if (dump_file)
 	    fprintf (dump_file, "Considering inline candidate %s.\n",
 		     cgraph_node_name (e->callee));
@@ -1751,8 +1738,7 @@  cgraph_decide_inlining_incrementally (st
 	    }
 
 	  if (cgraph_maybe_hot_edge_p (e) && leaf_node_p (e->callee)
-	      && optimize_function_for_speed_p (cfun)
-	      && (after_tree_profile || !flag_dyn_ipa))
+	      && optimize_function_for_speed_p (cfun))
 	    allowed_growth = PARAM_VALUE (PARAM_EARLY_INLINING_INSNS);
 
 	  /* When the function body would grow and inlining the function
@@ -1763,12 +1749,7 @@  cgraph_decide_inlining_incrementally (st
 		   && !DECL_DECLARED_INLINE_P (e->callee->decl)))
 	      && (cgraph_estimate_size_after_inlining (e->caller, e->callee)
 		  > e->caller->global.size + allowed_growth)
-	      && (cgraph_estimate_growth (e->callee) > allowed_growth
-		  /* With lightweight IPO, due to static function promtion,
-		     it is hard to enable this heuristic and maintain consistent
-		     pre-profiling inline decisions between profiile generate
-		     and profile use passes.  */
-		  || (!after_tree_profile && flag_dyn_ipa)))
+	      && (cgraph_estimate_growth (e->callee) > allowed_growth))
 	    {
 	      if (dump_file)
 		fprintf (dump_file,
Index: tree-profile.c
===================================================================
--- tree-profile.c	(revision 174088)
+++ tree-profile.c	(working copy)
@@ -1347,6 +1347,7 @@  tree_profiling (void)
      function body from being deleted) won't be needed.  */
 
   cgraph_pre_profiling_inlining_done = true;
+  cgraph_process_module_scope_statics ();
   /* Now perform link to allow cross module inlining.  */
   cgraph_do_link ();
   varpool_do_link ();
Index: l-ipo.c
===================================================================
--- l-ipo.c	(revision 174088)
+++ l-ipo.c	(working copy)
@@ -1117,6 +1117,9 @@  cgraph_is_aux_decl_external (struct cgra
   if (DECL_COMDAT (decl) || DECL_WEAK (decl))
     return false;
 
+  if (!TREE_PUBLIC (decl))
+    return false;
+
   /* The others from aux modules are external. */
   return true;
 }
@@ -1675,33 +1678,17 @@  get_name_seq_num (const char *name)
   return (*slot)->seq;
 }
 
-/* Promote DECL to be global. MODULE_ID is the id of the module where
-   DECL is defined. IS_EXTERN is a flag indicating if externalization
-   is needed.  */
+/* Returns a unique assembler name for DECL.  */
 
-static void
-promote_static_var_func (unsigned module_id, tree decl, bool is_extern)
+static tree
+create_unique_name (tree decl, unsigned module_id)
 {
   tree id, assemb_id;
   char *assembler_name;
   const char *name;
   struct  function *context = NULL;
-  tree alias;
   int seq = 0;
 
-  /* No need to promote symbol alias.  */
-  alias = lookup_attribute ("alias", DECL_ATTRIBUTES (decl));
-  if (alias)
-    return;
-
-  /* Function decls in C++ may contain characters not taken by assembler.
-     Similarly, function scope static variable has UID as the assembler name
-     suffix which is not consistent across modules.  */
-
-  if (DECL_ASSEMBLER_NAME_SET_P (decl)
-      && TREE_CODE (decl) == FUNCTION_DECL)
-    cgraph_remove_assembler_hash_node (cgraph_node (decl));
-
   if (TREE_CODE (decl) == FUNCTION_DECL)
     {
       if (!DECL_CONTEXT (decl)
@@ -1748,6 +1735,34 @@  promote_static_var_func (unsigned module
     sprintf (assembler_name, "%s_%d", assembler_name, seq);
 
   assemb_id = get_identifier (assembler_name);
+
+  return assemb_id;
+}
+
+/* Promote DECL to be global. MODULE_ID is the id of the module where
+   DECL is defined. IS_EXTERN is a flag indicating if externalization
+   is needed.  */
+
+static void
+promote_static_var_func (unsigned module_id, tree decl, bool is_extern)
+{
+  tree assemb_id;
+  tree alias;
+
+  /* No need to promote symbol alias.  */
+  alias = lookup_attribute ("alias", DECL_ATTRIBUTES (decl));
+  if (alias)
+    return;
+
+  /* Function decls in C++ may contain characters not taken by assembler.
+     Similarly, function scope static variable has UID as the assembler name
+     suffix which is not consistent across modules.  */
+
+  if (DECL_ASSEMBLER_NAME_SET_P (decl)
+      && TREE_CODE (decl) == FUNCTION_DECL)
+    cgraph_remove_assembler_hash_node (cgraph_node (decl));
+
+  assemb_id = create_unique_name (decl, module_id);
   SET_DECL_ASSEMBLER_NAME (decl, assemb_id);
   TREE_PUBLIC (decl) = 1;
   DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
@@ -1756,11 +1771,14 @@  promote_static_var_func (unsigned module
   if (TREE_CODE (decl) == FUNCTION_DECL)
     {
       struct cgraph_node *node = cgraph_node (decl);
+
+      node->resolution = LDPR_UNKNOWN;
       cgraph_add_assembler_hash_node (node);
     }
   else
     {
       struct varpool_node *node = varpool_node (decl);
+      node->resolution = LDPR_UNKNOWN;
       varpool_link_node (node);
     }
 
@@ -1843,13 +1861,24 @@  process_module_scope_static_func (struct
       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)) != NULL)
     return;
 
-  if (cgraph_is_auxiliary (cnode->decl))
+  /* Can be local -- the promotion pass need to be done after
+     callgraph build when address taken bit is set.  */
+  if (!cnode->address_taken)
+    {
+      tree assemb_id = create_unique_name (decl, cgraph_get_module_id (decl));
+
+      if (DECL_ASSEMBLER_NAME_SET_P (decl))
+        cgraph_remove_assembler_hash_node (cnode);
+      SET_DECL_ASSEMBLER_NAME (decl, assemb_id);
+      return;
+    }
+
+  if (cgraph_is_auxiliary (decl))
     {
-      gcc_assert (cgraph_get_module_id (cnode->decl)
-                  != primary_module_id);
+      gcc_assert (cgraph_get_module_id (decl) != primary_module_id);
       /* Promote static function to global.  */
-      if (cgraph_get_module_id (cnode->decl))
-        promote_static_var_func (cgraph_get_module_id (cnode->decl), decl, 1);
+      if (cgraph_get_module_id (decl))
+        promote_static_var_func (cgraph_get_module_id (decl), decl, 1);
     }
   else
     {
@@ -1857,7 +1886,7 @@  process_module_scope_static_func (struct
           /* skip static_init routines.  */
           && !DECL_ARTIFICIAL (decl))
         {
-          promote_static_var_func (cgraph_get_module_id (cnode->decl), decl, 0);
+          promote_static_var_func (cgraph_get_module_id (decl), decl, 0);
           cgraph_mark_if_needed (decl);
         }
     }