diff mbox

nvptx offloading patches [3/n], RFD

Message ID 20150217153918.GX1746@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Feb. 17, 2015, 3:39 p.m. UTC
On Tue, Feb 17, 2015 at 04:32:06PM +0300, Ilya Verbin wrote:
> > If we don't try to write .gnu.offload_lto_* again, I think following patch
> > with additionally not calling lto_write_mode_table for !lto_stream_offload_p
> > and not calling lto_input_mode_table for !ACCEL_COMPILER - instead build
> > a single shared identity table - might actually work.
> > 
> > Thoughts on this?
> 
> Probably the ACCEL_COMPILER in WPA mode (flag_wpa) can read .gnu.offload_lto_*
> sections and produce temporary partitions with .gnu.lto_* sections.  And the
> ACCEL_COMPILER in LTRANS mode (flag_ltrans) will read .gnu.lto_* sections?

FYI, I have tested my mode_table patch with the intelmic emul offloading and
saw no regressions.

Then I went over and wanted to try nvptx offloading, but running into
various issues.

I had two patches from Bernd (already approved, why they haven't been
installed?) applied, had to tweak the first one so that it applies,
then my mode_table patch.
I've built nvptx-tools and configured:
../configure --target=nvptx-none --enable-as-accelerator-for=x86_64-pc-linux-gnu --with-build-time-tools=/usr/src/gcc/objnvptxinst/usr/local/nvptx-none/bin --disable-sjlj-exceptions --enable-newlib-io-long-long
make -j16
This failed miserably, because of missing mkoffload.o dependencies, patch attached
(ok for trunk?; it does what intelmic mkoffload.o does; I've tried to add
| $(generated_files) dependency instead, but that somehow didn't work).

The second attempt with that fixed died because for some reason
nvptx-none-as wants to verify by default using ptxas.  Can that be made
configurable?  E.g. for building nvptx offloading in distros, I believe
due to the proprietary cuda stuff it will be better if everything can be
built without the proprietary stuff and only used when actually running it.
E.g. could the verification be done by default only if ptxas is found in
$PATH and not if it isn't found?

Third attempt failed with:
../../../libgcc/config/nvptx/realloc.c:24:20: fatal error: stdlib.h: No such file or directory
compilation terminated.
../../../libgcc/static-object.mk:17: recipe for target 'realloc.o' failed
make[2]: *** [realloc.o] Error 1
make[2]: *** Waiting for unfinished jobs....
make[2]: Leaving directory '/usr/src/gcc/objnvptx/nvptx-none/libgcc'
I have nvptx-newlib symlinked into the gcc tree as newlib, so I expected it
would be built in-tree, is that not the case (at least wiki/Offloading
mentions that).  Or is it just that libgcc can't really have dependencies on
newlib headers as newlib is built after libgcc?

	Jakub
* cgraph.h (clone_function_name_1): Declare.
    	* cgraphclones.c (clone_function_name_1): New function.
    	(clone_function_name): Use it.
    	* lto-partition.c: Include "stringpool.h".
    	(must_not_rename, maybe_rewrite_identifier,
    	validize_symbol_for_target): New static functions.
    	(privatize_symbol_name): Use must_not_rename.
    	(promote_symbol): Call validize_symbol_for_target.
    	(lto_promote_cross_file_statics): Likewise.
    	(lto_promote_statics_nonwpa): Likewise.
* tree-streamer-in.c (unpack_ts_decl_common_value_fields,
	unpack_ts_type_common_value_fields): If ACCEL_COMPILER,
	restrict alignments to absolute_biggest_alignment.
	* config/i386/i386.c (TARGET_ABSOLUTE_BIGGEST_ALIGNMENT):
	Define.
	* doc/tm.texi.in (TARGET_ABSOLUTE_BIGGEST_ALIGNMENT): Add.
	* doc/tm.texi: Regenerate.
	* target.def (absolute_biggest_alignment): New DEFHOOKPOD.

Index: gcc/tree-streamer-in.c
===================================================================
--- gcc/tree-streamer-in.c.orig
+++ gcc/tree-streamer-in.c
@@ -217,7 +217,10 @@ unpack_ts_decl_common_value_fields (stru
   DECL_EXTERNAL (expr) = (unsigned) bp_unpack_value (bp, 1);
   DECL_GIMPLE_REG_P (expr) = (unsigned) bp_unpack_value (bp, 1);
   DECL_ALIGN (expr) = (unsigned) bp_unpack_var_len_unsigned (bp);
-
+#ifdef ACCEL_COMPILER
+  if (DECL_ALIGN (expr) > targetm.absolute_biggest_alignment)
+    DECL_ALIGN (expr) = targetm.absolute_biggest_alignment;
+#endif
   if (TREE_CODE (expr) == LABEL_DECL)
     {
       EH_LANDING_PAD_NR (expr) = (int) bp_unpack_var_len_unsigned (bp);
@@ -359,6 +362,10 @@ unpack_ts_type_common_value_fields (stru
   TYPE_READONLY (expr) = (unsigned) bp_unpack_value (bp, 1);
   TYPE_PRECISION (expr) = bp_unpack_var_len_unsigned (bp);
   TYPE_ALIGN (expr) = bp_unpack_var_len_unsigned (bp);
+#ifdef ACCEL_COMPILER
+  if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
+    TYPE_ALIGN (expr) = targetm.absolute_biggest_alignment;
+#endif
   TYPE_ALIAS_SET (expr) = bp_unpack_var_len_int (bp);
 }
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -47623,6 +47623,9 @@ ix86_atomic_assign_expand_fenv (tree *ho
 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 
+#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -784,7 +784,10 @@ extern const char *host_detect_local_cpu
    rounder than this.
 
    Pentium+ prefers DFmode values to be aligned to 64 bit boundary
-   and Pentium Pro XFmode values at 128 bit boundaries.  */
+   and Pentium Pro XFmode values at 128 bit boundaries.
+
+   When increasing the maximum, also update
+   TARGET_ABSOLUTE_BIGGEST_ALIGNMENT.  */
 
 #define BIGGEST_ALIGNMENT \
   (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi.orig
+++ gcc/doc/tm.texi
@@ -1003,6 +1003,12 @@ bits.  Note that this is not the biggest
 just the biggest alignment that, when violated, may cause a fault.
 @end defmac
 
+@deftypevr {Target Hook} HOST_WIDE_INT TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+If defined, this target hook specifies the absolute biggest alignment
+that a type or variable can have on this machine, otherwise,
+@code{BIGGEST_ALIGNMENT} is used.
+@end deftypevr
+
 @defmac MALLOC_ABI_ALIGNMENT
 Alignment, in bits, a C conformant malloc implementation has to
 provide.  If not defined, the default value is @code{BITS_PER_WORD}.
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in.orig
+++ gcc/doc/tm.texi.in
@@ -957,6 +957,8 @@ bits.  Note that this is not the biggest
 just the biggest alignment that, when violated, may cause a fault.
 @end defmac
 
+@hook TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+
 @defmac MALLOC_ABI_ALIGNMENT
 Alignment, in bits, a C conformant malloc implementation has to
 provide.  If not defined, the default value is @code{BITS_PER_WORD}.
Index: gcc/target.def
===================================================================
--- gcc/target.def.orig
+++ gcc/target.def
@@ -1760,6 +1760,13 @@ HOOK_VECTOR_END (vectorize)
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_"
 
+DEFHOOKPOD
+(absolute_biggest_alignment,
+ "If defined, this target hook specifies the absolute biggest alignment\n\
+that a type or variable can have on this machine, otherwise,\n\
+@code{BIGGEST_ALIGNMENT} is used.",
+ HOST_WIDE_INT, BIGGEST_ALIGNMENT)
+
 /* Allow target specific overriding of option settings after options have
   been changed by an attribute or pragma or when it is reset at the
   end of the code affected by an attribute or pragma.  */
2015-02-17  Jakub Jelinek  <jakub@redhat.com>

	* config/nvptx/t-nvptx (mkoffload.o): Compile after insn-modes.h
	is generated.

--- gcc/config/nvptx/t-nvptx.jj	2015-01-28 21:24:56.000000000 +0100
+++ gcc/config/nvptx/t-nvptx	2015-02-17 16:08:39.026676002 +0100
@@ -1,6 +1,6 @@
 CFLAGS-mkoffload.o += $(DRIVER_DEFINES) \
 	-DGCC_INSTALL_NAME=\"$(GCC_INSTALL_NAME)\"
-mkoffload.o: $(srcdir)/config/nvptx/mkoffload.c
+mkoffload.o: $(srcdir)/config/nvptx/mkoffload.c | insn-modes.h
 	$(COMPILE) $<
 	$(POSTCOMPILE)
diff mbox

Patch

--- gcc/cgraph.h.jj	2015-02-16 11:19:03.474984223 +0100
+++ gcc/cgraph.h	2015-02-17 13:54:00.413964133 +0100
@@ -2206,6 +2206,7 @@  basic_block init_lowered_empty_function
 
 /* In cgraphclones.c  */
 
+tree clone_function_name_1 (const char *, const char *);
 tree clone_function_name (tree decl, const char *);
 
 void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *,
--- gcc/cgraphclones.c.jj	2015-02-17 10:07:53.208582797 +0100
+++ gcc/cgraphclones.c	2015-02-17 13:54:00.413964133 +0100
@@ -533,19 +533,19 @@  cgraph_node::create_clone (tree decl, gc
   return new_node;
 }
 
-/* Return a new assembler name for a clone of DECL with SUFFIX.  */
-
 static GTY(()) unsigned int clone_fn_id_num;
 
+/* Return a new assembler name for a clone with SUFFIX of a decl named
+   NAME.  */
+
 tree
-clone_function_name (tree decl, const char *suffix)
+clone_function_name_1 (const char *name, const char *suffix)
 {
-  tree name = DECL_ASSEMBLER_NAME (decl);
-  size_t len = IDENTIFIER_LENGTH (name);
+  size_t len = strlen (name);
   char *tmp_name, *prefix;
 
   prefix = XALLOCAVEC (char, len + strlen (suffix) + 2);
-  memcpy (prefix, IDENTIFIER_POINTER (name), len);
+  memcpy (prefix, name, len);
   strcpy (prefix + len + 1, suffix);
 #ifndef NO_DOT_IN_LABEL
   prefix[len] = '.';
@@ -558,6 +558,16 @@  clone_function_name (tree decl, const ch
   return get_identifier (tmp_name);
 }
 
+/* Return a new assembler name for a clone of DECL with SUFFIX.  */
+
+tree
+clone_function_name (tree decl, const char *suffix)
+{
+  tree name = DECL_ASSEMBLER_NAME (decl);
+  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
+}
+
+
 /* Create callgraph node clone with new declaration.  The actual body will
    be copied later at compilation stage.
 
--- gcc/lto/lto-partition.c.jj	2015-01-15 14:05:08.706092596 +0100
+++ gcc/lto/lto-partition.c	2015-02-17 14:01:13.182718693 +0100
@@ -57,6 +57,7 @@  along with GCC; see the file COPYING3.
 #include "ipa-inline.h"
 #include "ipa-utils.h"
 #include "lto-partition.h"
+#include "stringpool.h"
 
 vec<ltrans_partition> ltrans_partitions;
 
@@ -783,29 +784,11 @@  lto_balanced_map (int n_lto_partitions)
   free (order);
 }
 
-/* Mangle NODE symbol name into a local name.  
-   This is necessary to do
-   1) if two or more static vars of same assembler name
-      are merged into single ltrans unit.
-   2) if prevoiusly static var was promoted hidden to avoid possible conflict
-      with symbols defined out of the LTO world.
-*/
-
+/* Return true if we must not change the name of the NODE.  The name as
+   extracted from the corresponding decl should be passed in NAME.  */
 static bool
-privatize_symbol_name (symtab_node *node)
+must_not_rename (symtab_node *node, const char *name)
 {
-  tree decl = node->decl;
-  cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
-  const char *name;
-
-  /* If we want to privatize instrumentation clone
-     then we need to change original function name
-     which is used via transparent alias chain.  */
-  if (cnode && cnode->instrumentation_clone)
-    decl = cnode->orig_decl;
-
-  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-
   /* Our renaming machinery do not handle more than one change of assembler name.
      We should not need more than one anyway.  */
   if (node->lto_file_data
@@ -813,9 +796,9 @@  privatize_symbol_name (symtab_node *node
     {
       if (symtab->dump_file)
 	fprintf (symtab->dump_file,
-		"Not privatizing symbol name: %s. It privatized already.\n",
-		name);
-      return false;
+		 "Not privatizing symbol name: %s. It privatized already.\n",
+		 name);
+      return true;
     }
   /* Avoid mangling of already mangled clones. 
      ???  should have a flag whether a symbol has a 'private' name already,
@@ -825,18 +808,108 @@  privatize_symbol_name (symtab_node *node
     {
       if (symtab->dump_file)
 	fprintf (symtab->dump_file,
-		"Not privatizing symbol name: %s. Has unique name.\n",
-		name);
-      return false;
+		 "Not privatizing symbol name: %s. Has unique name.\n",
+		 name);
+      return true;
+    }
+  return false;
+}
+
+/* If we are an offload compiler, we may have to rewrite symbols to be
+   valid on this target.  Return either PTR or a modified version of it.  */
+
+static const char *
+maybe_rewrite_identifier (const char *ptr)
+{
+#if defined ACCEL_COMPILER && (defined NO_DOT_IN_LABEL || defined NO_DOLLAR_IN_LABEL)
+#ifndef NO_DOT_IN_LABEL
+  char valid = '.';
+  const char reject[] = "$";
+#elif !defined NO_DOLLAR_IN_LABEL
+  char valid = '$';
+  const char reject[] = ".";
+#else
+  char valid = '_';
+  const char reject[] = ".$";
+#endif
+
+  char *copy = NULL;
+  const char *match = ptr;
+  for (;;)
+    {
+      size_t off = strcspn (match, reject);
+      if (match[off] == '\0')
+	break;
+      if (copy == NULL)
+	{
+	  copy = xstrdup (ptr);
+	  match = copy;
+	}
+      copy[off] = valid;
+    }
+  return match;
+#else
+  return ptr;
+#endif
+}
+
+/* Ensure that the symbol in NODE is valid for the target, and if not,
+   rewrite it.  */
+
+static void
+validize_symbol_for_target (symtab_node *node)
+{
+  tree decl = node->decl;
+  const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+  if (must_not_rename (node, name))
+    return;
+
+  const char *name2 = maybe_rewrite_identifier (name);
+  if (name2 != name)
+    {
+      symtab->change_decl_assembler_name (decl, get_identifier (name2));
+      if (node->lto_file_data)
+	lto_record_renamed_decl (node->lto_file_data, name,
+				 IDENTIFIER_POINTER
+				 (DECL_ASSEMBLER_NAME (decl)));
     }
+}
+
+/* Mangle NODE symbol name into a local name.  
+   This is necessary to do
+   1) if two or more static vars of same assembler name
+      are merged into single ltrans unit.
+   2) if previously static var was promoted hidden to avoid possible conflict
+      with symbols defined out of the LTO world.  */
+
+static bool
+privatize_symbol_name (symtab_node *node)
+{
+  tree decl = node->decl;
+  cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
+
+  /* If we want to privatize instrumentation clone
+     then we need to change original function name
+     which is used via transparent alias chain.  */
+  if (cnode && cnode->instrumentation_clone)
+    decl = cnode->orig_decl;
+
+  const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+  if (must_not_rename (node, name))
+    return false;
+
+  name = maybe_rewrite_identifier (name);
   symtab->change_decl_assembler_name (decl,
-				      clone_function_name (decl, "lto_priv"));
+				      clone_function_name_1 (name,
+							     "lto_priv"));
   if (node->lto_file_data)
     lto_record_renamed_decl (node->lto_file_data, name,
 			     IDENTIFIER_POINTER
 			     (DECL_ASSEMBLER_NAME (decl)));
   /* We could change name which is a target of transparent alias
-     chain of instrumented function name.  Fix alias chain if so  .*/
+     chain of instrumented function name.  Fix alias chain if so.  */
   if (cnode)
     {
       tree iname = NULL_TREE;
@@ -868,7 +941,10 @@  promote_symbol (symtab_node *node)
   if (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
       && DECL_VISIBILITY_SPECIFIED (node->decl)
       && TREE_PUBLIC (node->decl))
-    return;
+    {
+      validize_symbol_for_target (node);
+      return;
+    }
 
   gcc_checking_assert (!TREE_PUBLIC (node->decl)
 		       && !DECL_EXTERNAL (node->decl));
@@ -1007,7 +1083,10 @@  lto_promote_cross_file_statics (void)
 	      /* ... or if we do not partition it. This mean that it will
 		 appear in every partition refernecing it.  */
 	      || node->get_partitioning_class () != SYMBOL_PARTITION)
-	    continue;
+	    {
+	      validize_symbol_for_target (node);
+	      continue;
+	    }
 
           promote_symbol (node);
         }
@@ -1022,5 +1101,8 @@  lto_promote_statics_nonwpa (void)
 {
   symtab_node *node;
   FOR_EACH_SYMBOL (node)
-    rename_statics (NULL, node);
+    {
+      rename_statics (NULL, node);
+      validize_symbol_for_target (node);
+    }
 }