Patchwork Tweak partitioning for less streaming

login
register
mail settings
Submitter Jan Hubicka
Date Aug. 28, 2013, 6:53 a.m.
Message ID <20130828065331.GA8260@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/270374/
State New
Headers show

Comments

Jan Hubicka - Aug. 28, 2013, 6:53 a.m.
Hi,
this is something we noticed during experiments with Martin Liska.  The streaming
works a lot better if partitioning is based on original source order instead
of reverse postorder and both orders seems to work similarly badly code quality
wise.

This reduces streaming needed by firefox to 1/2.

Bootstrapped/regtested ppc64-linux, comitted.

	* lto-partition.c (lto_balanced_map): Always base order on 
	source file order.

Patch

Index: lto-partition.c
===================================================================
--- lto-partition.c	(revision 202000)
+++ lto-partition.c	(working copy)
@@ -449,11 +449,9 @@  lto_balanced_map (void)
 {
   int n_nodes = 0;
   int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
-  struct cgraph_node **postorder =
-    XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
   struct cgraph_node **order = XNEWVEC (struct cgraph_node *, cgraph_max_uid);
   struct varpool_node **varpool_order = NULL;
-  int i, postorder_len;
+  int i;
   struct cgraph_node *node;
   int total_size = 0, best_total_size = 0;
   int partition_size;
@@ -468,24 +466,20 @@  lto_balanced_map (void)
 
   FOR_EACH_VARIABLE (vnode)
     gcc_assert (!vnode->symbol.aux);
-  /* Until we have better ordering facility, use toplogical order.
-     Include only nodes we will partition and compute estimate of program
-     size.  Note that since nodes that are not partitioned might be put into
-     multiple partitions, this is just an estimate of real size.  This is why
-     we keep partition_size updated after every partition is finalized.  */
-  postorder_len = ipa_reverse_postorder (postorder);
     
-  for (i = 0; i < postorder_len; i++)
-    {
-      node = postorder[i];
-      if (get_symbol_class ((symtab_node) node) == SYMBOL_PARTITION)
-	{
-	  order[n_nodes++] = node;
-          total_size += inline_summary (node)->size;
-	}
-    }
-  free (postorder);
+  FOR_EACH_DEFINED_FUNCTION (node)
+    if (get_symbol_class ((symtab_node) node) == SYMBOL_PARTITION)
+      {
+	order[n_nodes++] = node;
+	total_size += inline_summary (node)->size;
+      }
 
+  /* Streaming works best when the source units do not cross partition
+     boundaries much.  This is because importing function from a source
+     unit tends to import a lot of global trees defined there.  We should
+     get better about minimizing the function bounday, but until that
+     things works smoother if we order in source order.  */
+  qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp);
   if (!flag_toplevel_reorder)
     {
       qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp);