Patchwork Take advantage of hidden symbols during LTO

login
register
mail settings
Submitter Jan Hubicka
Date July 7, 2010, 8:37 p.m.
Message ID <20100707203740.GB13327@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/58166/
State New
Headers show

Comments

Jan Hubicka - July 7, 2010, 8:37 p.m.
Hi,
this patch makes LTO to realize that hidden symbols can not be bound externally
after link time.  This has similar effect on libraries with proper interface
decorations as -fwhole-program has on programs.

I tested this by building Mozilla and it works modulo one mozilla bug where
function is called from asm statement and lacks __attribute__ ((used))
decoration. It does not bring huge size savings on mozilla per se, at least in
-Os, but it is just start.  We obviously need to retune the inliner and
implement stuff like function reordering.
The differences should be greater in -O2, where -fwhole-program has traditionally
quite noticeable effect.

I now get following sizes of libxul:

    4.3                 24065944
mainline                24972848
mainline+LTO+this patch 23431136
relinked with -O2       27658240

All mozillas starts and seems useful after replacing libmozswlite.so by version
built by 4.3. Otherwise we get ICE at startup. Memory footprints for starting seems
roughly same on all mainline builds. 4.3 for some reason stopped starting for me
(it used to start, now it exits without saying any error message).

So in particular I don't get very large code size rgression switching from 4.3
to mainline.  Things would be more interesting at -O2, where code duplication
can be a lot better estimated.  I am not sure myself what happens when .o files
are built with -Os and linked with -O2 as I think we end up with function
profiles being set to cold - obviously we should allow mixing -Os and -O2 code
together and we have infrastructure for that, just it is probably going in random
ways at the moment.

There is one issue. Without gold plugin the code assumes that all objects
binding hidden symbols are LTO objects and when you want to mix LTO and non-LTO
objects, one is required to mark symbol as externally_visible.  This is same
as for -fwhole-program, so I would like to keep it this way and tell users
to either switch to plugin (without plugin we are limited anyway) or
annotate by hand.

The patch uncovered two latent problems.  First is that for variables we did
not clear comdat flags that ended up causing linker to throw away the sections.
Also I think we should clear NAMED_SECTION for comdats to conserve size.  This
is the cgraph_make_decl_local change.

Other problem is with aliases.  Mozilla uses cairo that has for every function
two symbols, one internal and one external alias.  With the patch we turned the
internal symbol into static that causes us to mangle its name and as a result
the alias got lost.

Whole alias code is mess at a moment - I will need to carefully redesign it,
so I opted to simply disable localizing symbols with alias.  This has bit of
unwanted effect of that libraries that actually care about aliases will be
punished, but it is not that bad.

Bootstrapped/regtested x86_64-linux, will commit it tomorrow if there are
no complains.

Honza

	* ipa.c: Include pointer-set.h
	(cgraph_externally_visible_p): New attribute ALIASED;
	when in LTO, hidden symbols are local unless they are aliased.
	(function_and_variable_visibility): Compute aliased nodes;
	handle LTO and hidden symbol on functions and vars.
	* cgraph.c (cgraph_make_decl_local): Clear NAMED_SECTION
	for COMDAT symbols; handle COMDAT_GROUPS also at vars.

Patch

Index: ipa.c
===================================================================
--- ipa.c	(revision 161905)
+++ ipa.c	(working copy)
@@ -28,6 +28,7 @@  along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ggc.h"
 #include "flags.h"
+#include "pointer-set.h"
 
 /* Fill array order with all nodes with output flag set in the reverse
    topological order.  */
@@ -567,23 +568,28 @@  ipa_discover_readonly_nonaddressable_var
 /* Return true when function NODE should be considered externally visible.  */
 
 static bool
-cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program)
+cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program, bool aliased)
 {
   if (!node->local.finalized)
     return false;
   if (!DECL_COMDAT (node->decl)
       && (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)))
     return false;
-  if (!whole_program)
-    return true;
-  if (node->local.used_from_object_file)
+
+  /* Do not even try to be smart about aliased nodes.  Until we properly
+     represent everything by same body alias, these are just evil.  */
+  if (aliased)
     return true;
-  if (DECL_PRESERVE_P (node->decl))
+
+  /* When doing link time optimizations, hidden symbols become local.  */
+  if (in_lto_p && DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN)
+    ;
+  else if (!whole_program)
     return true;
   /* COMDAT functions must be shared only if they have address taken,
      otherwise we can produce our own private implementation with
      -fwhole-program.  */
-  if (DECL_COMDAT (node->decl))
+  else if (DECL_COMDAT (node->decl))
     {
       if (node->address_taken || !node->analyzed)
 	return true;
@@ -601,6 +607,10 @@  cgraph_externally_visible_p (struct cgra
 	      return true;
 	}
     }
+  if (node->local.used_from_object_file)
+    return true;
+  if (DECL_PRESERVE_P (node->decl))
+    return true;
   if (MAIN_NAME_P (DECL_NAME (node->decl)))
     return true;
   if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl)))
@@ -639,6 +649,38 @@  function_and_variable_visibility (bool w
 {
   struct cgraph_node *node;
   struct varpool_node *vnode;
+  struct pointer_set_t *aliased_nodes = pointer_set_create ();
+  struct pointer_set_t *aliased_vnodes = pointer_set_create ();
+  unsigned i;
+  alias_pair *p;
+
+  /* Discover aliased nodes.  */
+  for (i = 0; VEC_iterate (alias_pair, alias_pairs, i, p); i++)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Alias %s->%s",
+		IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (p->decl)),
+		IDENTIFIER_POINTER (p->target));
+		
+      if ((node = cgraph_node_for_asm (p->target)) != NULL)
+        {
+	  gcc_assert (node->needed);
+	  pointer_set_insert (aliased_nodes, node);
+	  if (dump_file)
+	    fprintf (dump_file, "  node %s/%i",
+		     cgraph_node_name (node), node->uid);
+        }
+      else if ((vnode = varpool_node_for_asm (p->target)) != NULL)
+        {
+	  gcc_assert (vnode->needed);
+	  pointer_set_insert (aliased_vnodes, vnode);
+	  if (dump_file)
+	    fprintf (dump_file, "  varpool node %s",
+		     varpool_node_name (vnode));
+        }
+      if (dump_file)
+       fprintf (dump_file, "\n");
+    }
 
   for (node = cgraph_nodes; node; node = node->next)
     {
@@ -667,7 +709,9 @@  function_and_variable_visibility (bool w
 	}
       gcc_assert ((!DECL_WEAK (node->decl) && !DECL_COMDAT (node->decl))
       	          || TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl));
-      if (cgraph_externally_visible_p (node, whole_program))
+      if (cgraph_externally_visible_p (node, whole_program,
+				       pointer_set_contains (aliased_nodes,
+							     node)))
         {
 	  gcc_assert (!node->global.inlined_to);
 	  node->local.externally_visible = true;
@@ -678,7 +722,7 @@  function_and_variable_visibility (bool w
 	  && !DECL_EXTERNAL (node->decl))
 	{
           struct cgraph_node *alias;
-	  gcc_assert (whole_program || !TREE_PUBLIC (node->decl));
+	  gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl));
 	  cgraph_make_decl_local (node->decl);
 	  for (alias = node->same_body; alias; alias = alias->next)
 	    cgraph_make_decl_local (alias->decl);
@@ -725,13 +769,19 @@  function_and_variable_visibility (bool w
         continue;
       if (vnode->needed
 	  && (DECL_COMDAT (vnode->decl) || TREE_PUBLIC (vnode->decl))
-	  && (!whole_program
-	      /* We can privatize comdat readonly variables whose address is not taken,
-	         but doing so is not going to bring us optimization oppurtunities until
-	         we start reordering datastructures.  */
-	      || DECL_COMDAT (vnode->decl)
-	      || DECL_WEAK (vnode->decl)
+	  && (((!whole_program
+	        /* We can privatize comdat readonly variables whose address is
+		   not taken, but doing so is not going to bring us
+		   optimization oppurtunities until we start reordering
+		   datastructures.  */
+		|| DECL_COMDAT (vnode->decl)
+		|| DECL_WEAK (vnode->decl))
+	       /* When doing linktime optimizations, all hidden symbols will
+		  become local.  */
+	       && (!in_lto_p
+		   || DECL_VISIBILITY (vnode->decl) != VISIBILITY_HIDDEN))
               || vnode->used_from_object_file
+	      || pointer_set_contains (aliased_vnodes, vnode)
 	      || lookup_attribute ("externally_visible",
 				   DECL_ATTRIBUTES (vnode->decl))))
 	vnode->externally_visible = true;
@@ -739,11 +789,13 @@  function_and_variable_visibility (bool w
         vnode->externally_visible = false;
       if (!vnode->externally_visible)
 	{
-	  gcc_assert (whole_program || !TREE_PUBLIC (vnode->decl));
+	  gcc_assert (in_lto_p || whole_program || !TREE_PUBLIC (vnode->decl));
 	  cgraph_make_decl_local (vnode->decl);
 	}
      gcc_assert (TREE_STATIC (vnode->decl));
     }
+  pointer_set_destroy (aliased_nodes);
+  pointer_set_destroy (aliased_vnodes);
 
   if (dump_file)
     {
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 161905)
+++ cgraph.c	(working copy)
@@ -2455,15 +2455,16 @@  cgraph_make_decl_local (tree decl)
 
   if (TREE_CODE (decl) == VAR_DECL)
     DECL_COMMON (decl) = 0;
-  else if (TREE_CODE (decl) == FUNCTION_DECL)
+  else gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
+
+  if (DECL_COMDAT (decl))
     {
+      DECL_SECTION_NAME (decl) = 0;
       DECL_COMDAT (decl) = 0;
-      DECL_COMDAT_GROUP (decl) = 0;
-      DECL_WEAK (decl) = 0;
-      DECL_EXTERNAL (decl) = 0;
     }
-  else
-    gcc_unreachable ();
+  DECL_COMDAT_GROUP (decl) = 0;
+  DECL_WEAK (decl) = 0;
+  DECL_EXTERNAL (decl) = 0;
   TREE_PUBLIC (decl) = 0;
   if (!DECL_RTL_SET_P (decl))
     return;