diff mbox series

Fix lto partition ordering

Message ID 20180429180240.GB99871@kam.mff.cuni.cz
State New
Headers show
Series Fix lto partition ordering | expand

Commit Message

Jan Hubicka April 29, 2018, 6:02 p.m. UTC
Hi,
this patch fixes the problem with more or less random code layout with
LTO.  Currently we split unit into partitions, sort them by size (so parallel
compile works well) and output in this order. Problem is that the order of
final binary is then more or less random - dependent on partitioning
decisions.

This patch extens ltrans output file to contain priorities and makes lto-wrapper
to sort the partitions by priorities only when producing the inner makefile but
preserving original order in all other cases.

Boostrapped/regtested x86_64-linux and gcc8 branch with LTO (currently I can not
bootstrap trunk with lto enabled).  I plan to commit it tomorrow to mainline if there
are no complains and later to gcc 8 branch.

Honza

	* lto-wrapper.c (ltrans_priorities): New static var.
	(cmp_priority): New.
	(run_gcc): Read priorities and if doing parallel build order
	the Makefile by them.

	* lto.c (cmp_partitions_size): Remove.
	(lto_wpa_write_files): Also output priorities; do not sort partitions.
	(cmp_partition_order): Move to ...
	* lto-partition.c (cmp_partition_order): ...
	(lto_1_to_1_map): Sort partitions.
diff mbox series

Patch

Index: lto/lto-partition.c
===================================================================
--- lto/lto-partition.c	(revision 259747)
+++ lto/lto-partition.c	(working copy)
@@ -41,6 +41,24 @@ 
 static void add_symbol_to_partition (ltrans_partition part, symtab_node *node);
 
 
+/* Helper for qsort; compare partitions and return one with smaller order.  */
+
+static int
+cmp_partitions_order (const void *a, const void *b)
+{
+  const struct ltrans_partition_def *pa
+     = *(struct ltrans_partition_def *const *)a;
+  const struct ltrans_partition_def *pb
+     = *(struct ltrans_partition_def *const *)b;
+  int ordera = -1, orderb = -1;
+
+  if (lto_symtab_encoder_size (pa->encoder))
+    ordera = lto_symtab_encoder_deref (pa->encoder, 0)->order;
+  if (lto_symtab_encoder_size (pb->encoder))
+    orderb = lto_symtab_encoder_deref (pb->encoder, 0)->order;
+  return orderb - ordera;
+}
+
 /* Create new partition with name NAME.  */
 
 static ltrans_partition
@@ -332,6 +350,9 @@ 
   if (!npartitions)
     new_partition ("empty");
 
+  /* Order partitions by order of symbols because they are linked into binary
+     that way.  */
+  ltrans_partitions.qsort (cmp_partitions_order);
 }
 
 /* Maximal partitioning.  Put every new symbol into new partition if possible.  */
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 259747)
+++ lto/lto.c	(working copy)
@@ -2327,38 +2327,6 @@ 
 
 static lto_file *current_lto_file;
 
-/* Helper for qsort; compare partitions and return one with smaller size.
-   We sort from greatest to smallest so parallel build doesn't stale on the
-   longest compilation being executed too late.  */
-
-static int
-cmp_partitions_size (const void *a, const void *b)
-{
-  const struct ltrans_partition_def *pa
-     = *(struct ltrans_partition_def *const *)a;
-  const struct ltrans_partition_def *pb
-     = *(struct ltrans_partition_def *const *)b;
-  return pb->insns - pa->insns;
-}
-
-/* Helper for qsort; compare partitions and return one with smaller order.  */
-
-static int
-cmp_partitions_order (const void *a, const void *b)
-{
-  const struct ltrans_partition_def *pa
-     = *(struct ltrans_partition_def *const *)a;
-  const struct ltrans_partition_def *pb
-     = *(struct ltrans_partition_def *const *)b;
-  int ordera = -1, orderb = -1;
-
-  if (lto_symtab_encoder_size (pa->encoder))
-    ordera = lto_symtab_encoder_deref (pa->encoder, 0)->order;
-  if (lto_symtab_encoder_size (pb->encoder))
-    orderb = lto_symtab_encoder_deref (pb->encoder, 0)->order;
-  return orderb - ordera;
-}
-
 /* Actually stream out ENCODER into TEMP_FILENAME.  */
 
 static void
@@ -2468,7 +2436,8 @@ 
   ltrans_partition part;
   FILE *ltrans_output_list_stream;
   char *temp_filename;
-  vec <char *>temp_filenames = vNULL;
+  auto_vec <char *>temp_filenames;
+  auto_vec <int>temp_priority;
   size_t blen;
 
   /* Open the LTRANS output list.  */
@@ -2496,15 +2465,6 @@ 
 
   n_sets = ltrans_partitions.length ();
 
-  /* Sort partitions by size so small ones are compiled last.
-     FIXME: Even when not reordering we may want to output one list for parallel make
-     and other for final link command.  */
-
-  if (!flag_profile_reorder_functions || !flag_profile_use)
-    ltrans_partitions.qsort (flag_toplevel_reorder
-			   ? cmp_partitions_size
-			   : cmp_partitions_order);
-
   for (i = 0; i < n_sets; i++)
     {
       ltrans_partition part = ltrans_partitions[i];
@@ -2556,6 +2516,7 @@ 
 
       part->encoder = NULL;
 
+      temp_priority.safe_push (part->insns);
       temp_filenames.safe_push (xstrdup (temp_filename));
     }
   ltrans_output_list_stream = fopen (ltrans_output_list, "w");
@@ -2565,13 +2526,13 @@ 
   for (i = 0; i < n_sets; i++)
     {
       unsigned int len = strlen (temp_filenames[i]);
-      if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
+      if (fprintf (ltrans_output_list_stream, "%i\n", temp_priority[i]) < 0
+	  || fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
 	  || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
 	fatal_error (input_location, "writing to LTRANS output list %s: %m",
 		     ltrans_output_list);
      free (temp_filenames[i]);
     }
-  temp_filenames.release();
 
   lto_stats.num_output_files += n_sets;
 
Index: lto-wrapper.c
===================================================================
--- lto-wrapper.c	(revision 259747)
+++ lto-wrapper.c	(working copy)
@@ -65,6 +65,7 @@ 
 static char *ltrans_output_file;
 static char *flto_out;
 static unsigned int nr;
+static int *ltrans_priorities;
 static char **input_names;
 static char **output_names;
 static char **offload_names;
@@ -1018,8 +1019,15 @@ 
   return outfile;
 }
 
+/* Helper for qsort: compare priorities for parallel compilation.  */
 
+int
+cmp_priority (const void *a, const void *b)
+{
+  return *((const int *)b)-*((const int *)a);
+}
 
+
 /* Execute gcc. ARGC is the number of arguments. ARGV contains the arguments. */
 
 static void
@@ -1477,6 +1485,7 @@ 
       FILE *stream = fopen (ltrans_output_file, "r");
       FILE *mstream = NULL;
       struct obstack env_obstack;
+      int priority;
 
       if (!stream)
 	fatal_error (input_location, "fopen: %s: %m", ltrans_output_file);
@@ -1492,6 +1501,14 @@ 
 	  size_t len;
 
 	  buf = input_name;
+          if (fscanf (stream, "%i\n", &priority) != 1)
+	    {
+	      if (!feof (stream))
+	        fatal_error (input_location,
+		             "Corrupted ltrans output file %s",
+			     ltrans_output_file);
+	      break;
+	    }
 cont:
 	  if (!fgets (buf, piece, stream))
 	    break;
@@ -1508,8 +1525,12 @@ 
 	    output_name = &input_name[1];
 
 	  nr++;
+	  ltrans_priorities
+	     = (int *)xrealloc (ltrans_priorities, nr * sizeof (int) * 2);
 	  input_names = (char **)xrealloc (input_names, nr * sizeof (char *));
 	  output_names = (char **)xrealloc (output_names, nr * sizeof (char *));
+	  ltrans_priorities[(nr-1)*2] = priority;
+	  ltrans_priorities[(nr-1)*2+1] = nr-1;
 	  input_names[nr-1] = input_name;
 	  output_names[nr-1] = output_name;
 	}
@@ -1521,6 +1542,7 @@ 
 	{
 	  makefile = make_temp_file (".mk");
 	  mstream = fopen (makefile, "w");
+	  qsort (ltrans_priorities, nr, sizeof (int) * 2, cmp_priority);
 	}
 
       /* Execute the LTRANS stage for each input file (or prepare a
@@ -1586,7 +1608,10 @@ 
 
 	  fprintf (mstream, "all:");
 	  for (i = 0; i < nr; ++i)
-	    fprintf (mstream, " \\\n\t%s", output_names[i]);
+	    {
+	      int j = ltrans_priorities[i*2 + 1];
+	      fprintf (mstream, " \\\n\t%s", output_names[j]);
+	    }
 	  fprintf (mstream, "\n");
 	  fclose (mstream);
 	  if (!jobserver)
@@ -1630,6 +1655,7 @@ 
 	  free (input_names[i]);
 	}
       nr = 0;
+      free (ltrans_priorities);
       free (output_names);
       free (input_names);
       free (list_option_full);