diff mbox

[gomp4] Add tables generation

Message ID 20140813161914.GA26226@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Verbin Aug. 13, 2014, 4:19 p.m. UTC
Hi,

Here is the updated patch.  offload_funcs/vars are now declared in omp-low.h,
the functions have a comment.  Also it fixes the issue of offload_funcs/vars
corruption by the garbage collector.  OK for gomp-4_0-branch?

  -- Ilya

---
 gcc/Makefile.in        |    1 +
 gcc/gengtype.c         |    2 +-
 gcc/lto-cgraph.c       |  110 ++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/lto-section-in.c   |    3 +-
 gcc/lto-streamer-out.c |    2 +
 gcc/lto-streamer.h     |    3 +
 gcc/lto/lto.c          |    2 +
 gcc/omp-low.c          |   68 ++++++------------------------
 gcc/omp-low.h          |    3 +
 9 files changed, 137 insertions(+), 57 deletions(-)
diff mbox

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index bfa5f32..372f586 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2290,6 +2290,7 @@  GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
   $(srcdir)/tree-parloops.c \
   $(srcdir)/omp-low.c \
+  $(srcdir)/omp-low.h \
   $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
   $(srcdir)/cgraphclones.c \
   $(srcdir)/tree-phinodes.c \
diff --git a/gcc/gengtype.c b/gcc/gengtype.c
index ffe3f94..5bcbbe2 100644
--- a/gcc/gengtype.c
+++ b/gcc/gengtype.c
@@ -1800,7 +1800,7 @@  open_base_files (void)
       "tree-ssa.h", "reload.h", "cpp-id-data.h", "tree-chrec.h",
       "except.h", "output.h",  "cfgloop.h",
       "target.h", "ipa-prop.h", "lto-streamer.h", "target-globals.h",
-      "ipa-inline.h", "dwarf2out.h", NULL
+      "ipa-inline.h", "dwarf2out.h", "omp-low.h", NULL
     };
     const char *const *ifp;
     outf_p gtype_desc_c;
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index bc05400..64ad599 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -52,6 +52,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "context.h"
 #include "pass_manager.h"
 #include "ipa-utils.h"
+#include "omp-low.h"
 
 /* True when asm nodes has been output.  */
 bool asm_nodes_output = false;
@@ -1044,6 +1045,66 @@  read_string (struct lto_input_block *ib)
   return str;
 }
 
+/* Output function/variable tables that will allow libgomp to look up offload
+   target code.  OFFLOAD_FUNCS is filled in expand_omp_target, OFFLOAD_VARS is
+   filled here just before streaming.  In WHOPR (partitioned) mode during the
+   WPA stage both OFFLOAD_FUNCS and OFFLOAD_VARS are filled by
+   input_offload_tables.  */
+
+void
+output_offload_tables (void)
+{
+  /* Collect all omp-target global variables to offload_vars, if they have not
+     been gathered earlier by input_offload_tables on the WPA stage.  */
+  if (!flag_wpa && vec_safe_is_empty (offload_vars))
+    {
+      struct varpool_node *vnode;
+      FOR_EACH_DEFINED_VARIABLE (vnode)
+	{
+	  if (!lookup_attribute ("omp declare target",
+				 DECL_ATTRIBUTES (vnode->decl))
+	      || TREE_CODE (vnode->decl) != VAR_DECL
+	      || DECL_SIZE (vnode->decl) == 0)
+	    continue;
+	  vec_safe_push (offload_vars, vnode->decl);
+	}
+    }
+
+  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+    return;
+
+  struct lto_simple_output_block *ob
+    = lto_create_simple_output_block (LTO_section_offload_table);
+
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+    {
+      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+			   LTO_symtab_last_tag, LTO_symtab_unavail_node);
+      lto_output_fn_decl_index (ob->decl_state, ob->main_stream,
+				(*offload_funcs)[i]);
+    }
+
+  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+    {
+      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+			   LTO_symtab_last_tag, LTO_symtab_variable);
+      lto_output_var_decl_index (ob->decl_state, ob->main_stream,
+				 (*offload_vars)[i]);
+    }
+
+  streamer_write_uhwi_stream (ob->main_stream, 0);
+  lto_destroy_simple_output_block (ob);
+
+  /* In WHOPR mode during the WPA stage the joint offload tables need to be
+     streamed to one partition only.  That's why we free offload_funcs and
+     offload_vars after the first call of output_offload_tables.  */
+  if (flag_wpa)
+    {
+      vec_free (offload_funcs);
+      vec_free (offload_vars);
+    }
+}
+
 /* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS,
    STACK_SIZE, SELF_TIME and SELF_SIZE.  This is called either to initialize
    NODE or to replace the values in it, for instance because the first
@@ -1739,6 +1800,55 @@  input_symtab (void)
     }
 }
 
+/* Input function/variable tables that will allow libgomp to look up offload
+   target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS.  */
+
+void
+input_offload_tables (void)
+{
+  struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
+  struct lto_file_decl_data *file_data;
+  unsigned int j = 0;
+
+  while ((file_data = file_data_vec[j++]))
+    {
+      const char *data;
+      size_t len;
+      struct lto_input_block *ib
+	= lto_create_simple_input_block (file_data, LTO_section_offload_table,
+					 &data, &len);
+      if (!ib)
+	continue;
+
+      enum LTO_symtab_tags tag
+	= streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+      while (tag)
+	{
+	  if (tag == LTO_symtab_unavail_node)
+	    {
+	      int decl_index = streamer_read_uhwi (ib);
+	      tree fn_decl
+		= lto_file_decl_data_get_fn_decl (file_data, decl_index);
+	      vec_safe_push (offload_funcs, fn_decl);
+	    }
+	  else if (tag == LTO_symtab_variable)
+	    {
+	      int decl_index = streamer_read_uhwi (ib);
+	      tree var_decl
+		= lto_file_decl_data_get_var_decl (file_data, decl_index);
+	      vec_safe_push (offload_vars, var_decl);
+	    }
+	  else
+	    fatal_error ("invalid offload table in %s", file_data->file_name);
+
+	  tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+	}
+
+      lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
+				      ib, data, len);
+    }
+}
+
 /* True when we need optimization summary for NODE.  */
 
 static int
diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
index d887763..b705c75 100644
--- a/gcc/lto-section-in.c
+++ b/gcc/lto-section-in.c
@@ -60,7 +60,8 @@  const char *lto_section_name[LTO_N_SECTION_TYPES] =
   "opts",
   "cgraphopt",
   "inline",
-  "ipcp_trans"
+  "ipcp_trans",
+  "offload_table"
 };
 
 
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index 3064562..ff8572d 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -2108,6 +2108,8 @@  lto_output (void)
      statements using the statement UIDs.  */
   output_symtab ();
 
+  output_offload_tables ();
+
 #ifdef ENABLE_CHECKING
   lto_bitmap_free (output);
 #endif
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index eedec95..3607634 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -248,6 +248,7 @@  enum lto_section_type
   LTO_section_cgraph_opt_sum,
   LTO_section_inline_summary,
   LTO_section_ipcp_transform,
+  LTO_section_offload_table,
   LTO_N_SECTION_TYPES		/* Must be last.  */
 };
 
@@ -884,6 +885,8 @@  bool lto_symtab_encoder_encode_initializer_p (lto_symtab_encoder_t,
 					      varpool_node *);
 void output_symtab (void);
 void input_symtab (void);
+void output_offload_tables (void);
+void input_offload_tables (void);
 bool referenced_from_other_partition_p (struct ipa_ref_list *,
 				        lto_symtab_encoder_t);
 bool reachable_from_other_partition_p (struct cgraph_node *,
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 28c896d..a0b606c 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3015,6 +3015,8 @@  read_cgraph_and_symbols (unsigned nfiles, const char **fnames)
   /* Read the symtab.  */
   input_symtab ();
 
+  input_offload_tables ();
+
   /* Store resolutions into the symbol table.  */
 
   FOR_EACH_SYMBOL (snode)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index ce97a0e..6bea2c3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -240,6 +240,9 @@  omp_get_id (tree node)
 /* Holds a decl for __OPENMP_TARGET__.  */
 static GTY(()) tree offload_symbol_decl;
 
+/* Holds offload tables with decls.  */
+vec<tree, va_gc> *offload_funcs, *offload_vars;
+
 /* Get the __OPENMP_TARGET__ symbol.  */
 static tree
 get_offload_symbol_decl (void)
@@ -8906,6 +8909,9 @@  expand_omp_target (struct omp_region *region)
       DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
       cgraph_add_new_function (child_fn, true);
 
+      /* Add the new function to the offload table.  */
+      vec_safe_push (offload_funcs, child_fn);
+
       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
 	 fixed in a following pass.  */
       push_cfun (child_cfun);
@@ -13730,71 +13736,23 @@  add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
 void
 omp_finish_file (void)
 {
-  struct cgraph_node *node;
-  struct varpool_node *vnode;
   const char *funcs_section_name = OFFLOAD_FUNC_TABLE_SECTION_NAME;
   const char *vars_section_name = OFFLOAD_VAR_TABLE_SECTION_NAME;
-  vec<tree, va_gc> *v_funcs, *v_vars;
-
-  vec_alloc (v_vars, 0);
-  vec_alloc (v_funcs, 0);
-
-  /* Collect all omp-target functions.  */
-  FOR_EACH_DEFINED_FUNCTION (node)
-    {
-      /* TODO: This check could fail on functions, created by omp
-	 parallel/task pragmas.  It's better to name outlined for offloading
-	 functions in some different way and to check here the function name.
-	 It could be something like "*_omp_tgtfn" in contrast with "*_omp_fn"
-	 for functions from omp parallel/task pragmas.  */
-      if (!lookup_attribute ("omp declare target",
-			     DECL_ATTRIBUTES (node->decl))
-	  || !DECL_ARTIFICIAL (node->decl))
-	continue;
-      vec_safe_push (v_funcs, node->decl);
-    }
-  /* Collect all omp-target global variables.  */
-  FOR_EACH_DEFINED_VARIABLE (vnode)
-    {
-      if (!lookup_attribute ("omp declare target",
-			     DECL_ATTRIBUTES (vnode->decl))
-	  || TREE_CODE (vnode->decl) != VAR_DECL
-	  || DECL_SIZE (vnode->decl) == 0)
-	continue;
 
-      vec_safe_push (v_vars, vnode->decl);
-    }
-  unsigned num_vars = vec_safe_length (v_vars);
-  unsigned num_funcs = vec_safe_length (v_funcs);
+  unsigned num_funcs = vec_safe_length (offload_funcs);
+  unsigned num_vars = vec_safe_length (offload_vars);
 
-  if (num_vars == 0 && num_funcs == 0)
+  if (num_funcs == 0 && num_vars == 0)
     return;
 
-#ifdef ACCEL_COMPILER
-  /* Decls are placed in reversed order in fat-objects, so we need to
-     revert them back if we compile target.  */
-  for (unsigned i = 0; i < num_funcs / 2; i++)
-    {
-      tree it = (*v_funcs)[i];
-      (*v_funcs)[i] = (*v_funcs)[num_funcs - i - 1];
-      (*v_funcs)[num_funcs - i - 1] = it;
-    }
-  for (unsigned i = 0; i < num_vars / 2; i++)
-    {
-      tree it = (*v_vars)[i];
-      (*v_vars)[i] = (*v_vars)[num_vars - i - 1];
-      (*v_vars)[num_vars - i - 1] = it;
-    }
-#endif
-
   if (targetm_common.have_named_sections)
     {
       vec<constructor_elt, va_gc> *v_f, *v_v;
       vec_alloc (v_f, num_funcs);
       vec_alloc (v_v, num_vars * 2);
 
-      add_decls_addresses_to_decl_constructor (v_funcs, v_f);
-      add_decls_addresses_to_decl_constructor (v_vars, v_v);
+      add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
+      add_decls_addresses_to_decl_constructor (offload_vars, v_v);
 
       tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
 						    num_vars * 2);
@@ -13825,12 +13783,12 @@  omp_finish_file (void)
     {
       for (unsigned i = 0; i < num_funcs; i++)
 	{
-	  tree it = (*v_funcs)[i];
+	  tree it = (*offload_funcs)[i];
 	  targetm.record_offload_symbol (it);
 	}  
       for (unsigned i = 0; i < num_vars; i++)
 	{
-	  tree it = (*v_vars)[i];
+	  tree it = (*offload_vars)[i];
 	  targetm.record_offload_symbol (it);
 	}  
     }
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index f904eda..ac587d0 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -29,4 +29,7 @@  extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
 
+extern GTY(()) vec<tree, va_gc> *offload_funcs;
+extern GTY(()) vec<tree, va_gc> *offload_vars;
+
 #endif /* GCC_OMP_LOW_H */