diff mbox

[RFC] Getting LTO incremental linking work

Message ID 20151125230758.GF20593@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka Nov. 25, 2015, 11:07 p.m. UTC
Hi,
this is the first part of patch that adds -flinker-output flags and gets symbol
visibility right.  It makes the testcase in the PR to pass, but I do not know
how to turn it into a testsuite ready version.
I remember there was other PRs related to incremental linking and symbol visibility,
I will try to find them and see if any of those can be turned into a testcase.

I think this may even be backportable to GCC 5 if it does not cause any new
issues.  Also the collect2 path can probably indeed be updated, it has info
about what type of binary it produces.  I will try to look into it incrementally.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	PR lto/67548
	* lto-plugin.c (linker_output, linker_output_set): New statics.
	(all_symbols_read_handler): Add -flinker-output option.
	(onload): Record linker_output info.

	* ipa-visibility.c (cgraph_externally_visible_p,
	varpool_node::externally_visible_p): When doing incremental linking,
	hidden symbols may be still used later.
	(update_visibility_by_resolution_info): Do not drop weak during
	incremental link.
	(function_and_variable_visibility): Fix formating.
	* flag-types.h (lto_linker_output): Declare.
	* common.opt 9flag_incremental_link): New flag.

	* lto-lang.c (lto_post_options): Process flag_lto_linker_output.
	* lang.opt (lto_linker_output): New enum.
	(flinker_output): New flag.

Comments

Tom de Vries Nov. 28, 2015, 9:35 a.m. UTC | #1
On 26/11/15 00:07, Jan Hubicka wrote:
> (flinker_output): New flag.

Hi,

this seems to have cause a regression when using a compiler configured 
for offloading (giving ~1000 fails in libgomp testing).

For test-case libgomp.c/examples-4/array_sections-3.c, we enter run_gcc 
in lto-wrapper with args:
...
Breakpoint 1, run_gcc (argc=4, argv=0x7fffffffde68) at 
src/gcc-gomp-4_0-branch/gcc/lto-wrapper.c:897
897       char *list_option_full = NULL;
(gdb) p argv[0]
$8 = 0x7fffffffe104 "lto-wrapper"
(gdb) p argv[1]
$9 = 0x7fffffffe1af "-fresolution=array_sections-3.res"
(gdb) p argv[2]
$10 = 0x7fffffffe1d1 "-flinker-output=exec"
(gdb) p argv[3]
$11 = 0x7fffffffe1e6 "array_sections-3.o"
...

And here (cc-ing author of this bit) we decide that -flinker-output=exec 
is a file:
...
/* If object files contain offload sections, but do not contain LTO
    sections,
    then there is no need to perform a link-time recompilation, i.e.
    lto-wrapper is used only for a compilation of offload images. */
if (have_offload && !have_lto)
   {
     for (i = 1; i < argc; ++i)
       if (strncmp (argv[i], "-fresolution=",
		   sizeof ("-fresolution=") - 1))
	{
	  char *out_file;
	  /* Can be ".o" or ".so". */
	  char *ext = strrchr (argv[i], '.');
	  if (ext == NULL)
	    out_file = make_temp_file ("");
	  else
	    out_file = make_temp_file (ext);
	  /* The linker will delete the files we give it, so make
	     copies. */
	  copy_file (out_file, argv[i]);
	  printf ("%s\n", out_file);
	}
     goto finish;
   }
...

And try to copy it:
...
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff783d7e0 in feof () from /lib/libc.so.6
(gdb) bt
#0  0x00007ffff783d7e0 in feof () from /lib/libc.so.6
#1  0x0000000000406ff5 in copy_file (dest=0x71cdd0 "/tmp/ccL6HCCe", 
src=0x7fffffffe1d1 "-flinker-output=exec")
     at lto-wrapper.c:769
#2  0x00000000004080b7 in run_gcc (argc=4, argv=0x7fffffffde68) at 
gcc/lto-wrapper.c:1109
#3  0x0000000000409873 in main (argc=4, argv=0x7fffffffde68) at 
gcc/lto-wrapper.c:1396
...

Thanks,
- Tom
diff mbox

Patch

Index: lto-plugin/lto-plugin.c
===================================================================
--- lto-plugin/lto-plugin.c	(revision 230897)
+++ lto-plugin/lto-plugin.c	(working copy)
@@ -167,6 +167,8 @@  static unsigned int num_pass_through_ite
 static char debug;
 static char nop;
 static char *resolution_file = NULL;
+static enum ld_plugin_output_file_type linker_output;
+static int linker_output_set;
 
 /* The version of gold being used, or -1 if not gold.  The number is
    MAJOR * 100 + MINOR.  */
@@ -624,8 +626,9 @@  all_symbols_read_handler (void)
 {
   unsigned i;
   unsigned num_lto_args
-    = num_claimed_files + num_offload_files + lto_wrapper_num_args + 1;
+    = num_claimed_files + num_offload_files + lto_wrapper_num_args + 2;
   char **lto_argv;
+  const char *linker_output_str;
   const char **lto_arg_ptr;
   if (num_claimed_files + num_offload_files == 0)
     return LDPS_OK;
@@ -648,6 +651,26 @@  all_symbols_read_handler (void)
   for (i = 0; i < lto_wrapper_num_args; i++)
     *lto_arg_ptr++ = lto_wrapper_argv[i];
 
+  assert (linker_output_set);
+  switch (linker_output)
+    {
+    case LDPO_REL:
+      linker_output_str = "-flinker-output=rel";
+      break;
+    case LDPO_DYN:
+      linker_output_str = "-flinker-output=dyn";
+      break;
+    case LDPO_PIE:
+      linker_output_str = "-flinker-output=pie";
+      break;
+    case LDPO_EXEC:
+      linker_output_str = "-flinker-output=exec";
+      break;
+    default:
+      message (LDPL_FATAL, "unsupported linker output %i", linker_output);
+      break;
+    }
+  *lto_arg_ptr++ = xstrdup (linker_output_str);
   for (i = 0; i < num_claimed_files; i++)
     {
       struct plugin_file_info *info = &claimed_files[i];
@@ -1100,6 +1123,10 @@  onload (struct ld_plugin_tv *tv)
 	case LDPT_GOLD_VERSION:
 	  gold_version = p->tv_u.tv_val;
 	  break;
+	case LDPT_LINKER_OUTPUT:
+	  linker_output = (enum ld_plugin_output_file_type) p->tv_u.tv_val;
+	  linker_output_set = 1;
+	  break;
 	default:
 	  break;
 	}
Index: gcc/lto/lto-lang.c
===================================================================
--- gcc/lto/lto-lang.c	(revision 230902)
+++ gcc/lto/lto-lang.c	(working copy)
@@ -819,6 +819,35 @@  lto_post_options (const char **pfilename
   if (flag_wpa)
     flag_generate_lto = 1;
 
+  /* Initialize the codegen flags according to the output type.  */
+  switch (flag_lto_linker_output)
+    {
+    case LTO_LINKER_OUTPUT_REL: /* .o: incremental link producing LTO IL  */
+      flag_whole_program = 0;
+      flag_incremental_link = 1;
+      break;
+
+    case LTO_LINKER_OUTPUT_DYN: /* .so: PID library */
+      /* On some targets, like i386 it makes sense to build PIC library wihout
+	 -fpic for performance reasons.  So no need to adjust flags.  */
+      break;
+
+    case LTO_LINKER_OUTPUT_PIE: /* PIE binary */
+      /* If -fPIC or -fPIE was used at compile time, be sure that
+         flag_pie is 2.  */
+      flag_pie = MAX (flag_pie, flag_pic);
+      flag_pic = 0;
+      break;
+
+    case LTO_LINKER_OUTPUT_EXEC: /* Normal executable */
+      flag_pic = 0;
+      flag_pie = 0;
+      break;
+
+    case LTO_LINKER_OUTPUT_UNKNOWN:
+      break;
+    }
+
   /* Excess precision other than "fast" requires front-end
      support.  */
   flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
Index: gcc/lto/lang.opt
===================================================================
--- gcc/lto/lang.opt	(revision 230902)
+++ gcc/lto/lang.opt	(working copy)
@@ -24,6 +24,29 @@ 
 Language
 LTO
 
+Enum
+Name(lto_linker_output) Type(enum lto_linker_output) UnknownError(unknown linker output %qs)
+
+EnumValue
+Enum(lto_linker_output) String(unknown) Value(LTO_LINKER_OUTPUT_UNKNOWN)
+
+EnumValue
+Enum(lto_linker_output) String(rel) Value(LTO_LINKER_OUTPUT_REL)
+
+EnumValue
+Enum(lto_linker_output) String(dyn) Value(LTO_LINKER_OUTPUT_DYN)
+
+EnumValue
+Enum(lto_linker_output) String(pie) Value(LTO_LINKER_OUTPUT_PIE)
+
+EnumValue
+Enum(lto_linker_output) String(exec) Value(LTO_LINKER_OUTPUT_EXEC)
+
+flinker-output=
+LTO Report Driver Joined RejectNegative Enum(lto_linker_output) Var(flag_lto_linker_output) Init(LTO_LINKER_OUTPUT_UNKNOWN)
+Set linker output type (used internally during LTO optimization)
+
+
 fltrans
 LTO Report Var(flag_ltrans)
 Run the link-time optimizer in local transformation (LTRANS) mode.
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h	(revision 230902)
+++ gcc/flag-types.h	(working copy)
@@ -265,6 +265,14 @@  enum lto_partition_model {
   LTO_PARTITION_MAX = 4
 };
 
+/* flag_lto_linker_output initialization values.  */
+enum lto_linker_output {
+  LTO_LINKER_OUTPUT_UNKNOWN,
+  LTO_LINKER_OUTPUT_REL,
+  LTO_LINKER_OUTPUT_DYN,
+  LTO_LINKER_OUTPUT_PIE,
+  LTO_LINKER_OUTPUT_EXEC
+};
 
 /* gfortran -finit-real= values.  */
 
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 230902)
+++ gcc/common.opt	(working copy)
@@ -46,6 +46,12 @@  int optimize_fast
 Variable
 bool in_lto_p = false
 
+; This variable is set to non-0 only by LTO front-end.  1 indicates that
+; the output produced will be used for incrmeental linking (thus weak symbols
+; can still be bound).
+Variable
+int flag_incremental_link = 0
+
 ; 0 means straightforward implementation of complex divide acceptable.
 ; 1 means wide ranges of inputs must work for complex divide.
 ; 2 means C99-like requirements for complex multiply and divide.
Index: ipa-visibility.c
===================================================================
--- ipa-visibility.c	(revision 230902)
+++ ipa-visibility.c	(working copy)
@@ -217,13 +217,13 @@  cgraph_externally_visible_p (struct cgra
      This improves code quality and we know we will duplicate them at most twice
      (in the case that we are not using plugin and link with object file
       implementing same COMDAT)  */
-  if ((in_lto_p || whole_program)
+  if (((in_lto_p || whole_program) && !flag_incremental_link)
       && DECL_COMDAT (node->decl)
       && comdat_can_be_unshared_p (node))
     return false;
 
   /* When doing link time optimizations, hidden symbols become local.  */
-  if (in_lto_p
+  if ((in_lto_p && !flag_incremental_link)
       && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
 	  || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL)
       /* Be sure that node is defined in IR file, not in other object
@@ -293,13 +293,13 @@  varpool_node::externally_visible_p (void
      so this does not enable more optimization, but referring static var
      is faster for dynamic linking.  Also this match logic hidding vtables
      from LTO symbol tables.  */
-  if ((in_lto_p || flag_whole_program)
+  if (((in_lto_p || flag_whole_program) && !flag_incremental_link)
       && DECL_COMDAT (decl)
       && comdat_can_be_unshared_p (this))
     return false;
 
   /* When doing link time optimizations, hidden symbols become local.  */
-  if (in_lto_p
+  if (in_lto_p && !flag_incremental_link
       && (DECL_VISIBILITY (decl) == VISIBILITY_HIDDEN
 	  || DECL_VISIBILITY (decl) == VISIBILITY_INTERNAL)
       /* Be sure that node is defined in IR file, not in other object
@@ -405,17 +405,36 @@  update_visibility_by_resolution_info (sy
     for (symtab_node *next = node->same_comdat_group;
 	 next != node; next = next->same_comdat_group)
       {
-	next->set_comdat_group (NULL);
-	DECL_WEAK (next->decl) = false;
+	/* During incremental linking we need to keep symbol weak for future
+	   linking.  We can still drop definition if we know non-LTO world
+	   prevails.  */
+	if (!flag_incremental_link)
+	  {
+	    DECL_WEAK (next->decl) = false;
+	    next->set_comdat_group (NULL);
+	  }
 	if (next->externally_visible
 	    && !define)
-	  DECL_EXTERNAL (next->decl) = true;
+	  {
+	    DECL_EXTERNAL (next->decl) = true;
+	    next->set_comdat_group (NULL);
+	  }
       }
-  node->set_comdat_group (NULL);
-  DECL_WEAK (node->decl) = false;
+
+  /* During incremental linking we need to keep symbol weak for future
+     linking.  We can still drop definition if we know non-LTO world prevails.  */
+  if (!flag_incremental_link)
+    {
+      DECL_WEAK (node->decl) = false;
+      node->set_comdat_group (NULL);
+      node->dissolve_same_comdat_group_list ();
+    }
   if (!define)
-    DECL_EXTERNAL (node->decl) = true;
-  node->dissolve_same_comdat_group_list ();
+    {
+      DECL_EXTERNAL (node->decl) = true;
+      node->set_comdat_group (NULL);
+      node->dissolve_same_comdat_group_list ();
+    }
 }
 
 /* Decide on visibility of all symbols.  */
@@ -639,8 +658,9 @@  function_and_variable_visibility (bool w
 	{
 	  gcc_assert (in_lto_p || whole_program || !TREE_PUBLIC (vnode->decl));
 	  vnode->unique_name = ((vnode->resolution == LDPR_PREVAILING_DEF_IRONLY
-				       || vnode->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)
-				       && TREE_PUBLIC (vnode->decl));
+			         || vnode->resolution
+				      == LDPR_PREVAILING_DEF_IRONLY_EXP)
+			        && TREE_PUBLIC (vnode->decl));
 	  if (vnode->same_comdat_group && TREE_PUBLIC (vnode->decl))
 	    {
 	      symtab_node *next = vnode;