@@ -2290,6 +2290,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
$(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
$(srcdir)/tree-parloops.c \
$(srcdir)/omp-low.c \
+ $(srcdir)/omp-low.h \
$(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
$(srcdir)/cgraphclones.c \
$(srcdir)/tree-phinodes.c \
@@ -1800,7 +1800,7 @@ open_base_files (void)
"tree-ssa.h", "reload.h", "cpp-id-data.h", "tree-chrec.h",
"except.h", "output.h", "cfgloop.h",
"target.h", "ipa-prop.h", "lto-streamer.h", "target-globals.h",
- "ipa-inline.h", "dwarf2out.h", NULL
+ "ipa-inline.h", "dwarf2out.h", "omp-low.h", NULL
};
const char *const *ifp;
outf_p gtype_desc_c;
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "pass_manager.h"
#include "ipa-utils.h"
+#include "omp-low.h"
/* True when asm nodes has been output. */
bool asm_nodes_output = false;
@@ -1044,6 +1045,66 @@ read_string (struct lto_input_block *ib)
return str;
}
+/* Output function/variable tables that will allow libgomp to look up offload
+ target code. OFFLOAD_FUNCS is filled in expand_omp_target, OFFLOAD_VARS is
+ filled here just before streaming. In WHOPR (partitioned) mode during the
+ WPA stage both OFFLOAD_FUNCS and OFFLOAD_VARS are filled by
+ input_offload_tables. */
+
+void
+output_offload_tables (void)
+{
+ /* Collect all omp-target global variables to offload_vars, if they have not
+ been gathered earlier by input_offload_tables on the WPA stage. */
+ if (!flag_wpa && vec_safe_is_empty (offload_vars))
+ {
+ struct varpool_node *vnode;
+ FOR_EACH_DEFINED_VARIABLE (vnode)
+ {
+ if (!lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (vnode->decl))
+ || TREE_CODE (vnode->decl) != VAR_DECL
+ || DECL_SIZE (vnode->decl) == 0)
+ continue;
+ vec_safe_push (offload_vars, vnode->decl);
+ }
+ }
+
+ if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+ return;
+
+ struct lto_simple_output_block *ob
+ = lto_create_simple_output_block (LTO_section_offload_table);
+
+ for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+ {
+ streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+ LTO_symtab_last_tag, LTO_symtab_unavail_node);
+ lto_output_fn_decl_index (ob->decl_state, ob->main_stream,
+ (*offload_funcs)[i]);
+ }
+
+ for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+ {
+ streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+ LTO_symtab_last_tag, LTO_symtab_variable);
+ lto_output_var_decl_index (ob->decl_state, ob->main_stream,
+ (*offload_vars)[i]);
+ }
+
+ streamer_write_uhwi_stream (ob->main_stream, 0);
+ lto_destroy_simple_output_block (ob);
+
+ /* In WHOPR mode during the WPA stage the joint offload tables need to be
+ streamed to one partition only. That's why we free offload_funcs and
+ offload_vars after the first call of output_offload_tables. */
+ if (flag_wpa)
+ {
+ vec_free (offload_funcs);
+ vec_free (offload_vars);
+ }
+}
+
/* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS,
STACK_SIZE, SELF_TIME and SELF_SIZE. This is called either to initialize
NODE or to replace the values in it, for instance because the first
@@ -1739,6 +1800,55 @@ input_symtab (void)
}
}
+/* Input function/variable tables that will allow libgomp to look up offload
+ target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS. */
+
+void
+input_offload_tables (void)
+{
+ struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
+ struct lto_file_decl_data *file_data;
+ unsigned int j = 0;
+
+ while ((file_data = file_data_vec[j++]))
+ {
+ const char *data;
+ size_t len;
+ struct lto_input_block *ib
+ = lto_create_simple_input_block (file_data, LTO_section_offload_table,
+ &data, &len);
+ if (!ib)
+ continue;
+
+ enum LTO_symtab_tags tag
+ = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+ while (tag)
+ {
+ if (tag == LTO_symtab_unavail_node)
+ {
+ int decl_index = streamer_read_uhwi (ib);
+ tree fn_decl
+ = lto_file_decl_data_get_fn_decl (file_data, decl_index);
+ vec_safe_push (offload_funcs, fn_decl);
+ }
+ else if (tag == LTO_symtab_variable)
+ {
+ int decl_index = streamer_read_uhwi (ib);
+ tree var_decl
+ = lto_file_decl_data_get_var_decl (file_data, decl_index);
+ vec_safe_push (offload_vars, var_decl);
+ }
+ else
+ fatal_error ("invalid offload table in %s", file_data->file_name);
+
+ tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+ }
+
+ lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
+ ib, data, len);
+ }
+}
+
/* True when we need optimization summary for NODE. */
static int
@@ -60,7 +60,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
"opts",
"cgraphopt",
"inline",
- "ipcp_trans"
+ "ipcp_trans",
+ "offload_table"
};
@@ -2108,6 +2108,8 @@ lto_output (void)
statements using the statement UIDs. */
output_symtab ();
+ output_offload_tables ();
+
#ifdef ENABLE_CHECKING
lto_bitmap_free (output);
#endif
@@ -248,6 +248,7 @@ enum lto_section_type
LTO_section_cgraph_opt_sum,
LTO_section_inline_summary,
LTO_section_ipcp_transform,
+ LTO_section_offload_table,
LTO_N_SECTION_TYPES /* Must be last. */
};
@@ -884,6 +885,8 @@ bool lto_symtab_encoder_encode_initializer_p (lto_symtab_encoder_t,
varpool_node *);
void output_symtab (void);
void input_symtab (void);
+void output_offload_tables (void);
+void input_offload_tables (void);
bool referenced_from_other_partition_p (struct ipa_ref_list *,
lto_symtab_encoder_t);
bool reachable_from_other_partition_p (struct cgraph_node *,
@@ -3015,6 +3015,8 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames)
/* Read the symtab. */
input_symtab ();
+ input_offload_tables ();
+
/* Store resolutions into the symbol table. */
FOR_EACH_SYMBOL (snode)
@@ -240,6 +240,9 @@ omp_get_id (tree node)
/* Holds a decl for __OPENMP_TARGET__. */
static GTY(()) tree offload_symbol_decl;
+/* Holds offload tables with decls. */
+vec<tree, va_gc> *offload_funcs, *offload_vars;
+
/* Get the __OPENMP_TARGET__ symbol. */
static tree
get_offload_symbol_decl (void)
@@ -8906,6 +8909,9 @@ expand_omp_target (struct omp_region *region)
DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
cgraph_add_new_function (child_fn, true);
+ /* Add the new function to the offload table. */
+ vec_safe_push (offload_funcs, child_fn);
+
/* Fix the callgraph edges for child_cfun. Those for cfun will be
fixed in a following pass. */
push_cfun (child_cfun);
@@ -13730,71 +13736,23 @@ add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
void
omp_finish_file (void)
{
- struct cgraph_node *node;
- struct varpool_node *vnode;
const char *funcs_section_name = OFFLOAD_FUNC_TABLE_SECTION_NAME;
const char *vars_section_name = OFFLOAD_VAR_TABLE_SECTION_NAME;
- vec<tree, va_gc> *v_funcs, *v_vars;
-
- vec_alloc (v_vars, 0);
- vec_alloc (v_funcs, 0);
-
- /* Collect all omp-target functions. */
- FOR_EACH_DEFINED_FUNCTION (node)
- {
- /* TODO: This check could fail on functions, created by omp
- parallel/task pragmas. It's better to name outlined for offloading
- functions in some different way and to check here the function name.
- It could be something like "*_omp_tgtfn" in contrast with "*_omp_fn"
- for functions from omp parallel/task pragmas. */
- if (!lookup_attribute ("omp declare target",
- DECL_ATTRIBUTES (node->decl))
- || !DECL_ARTIFICIAL (node->decl))
- continue;
- vec_safe_push (v_funcs, node->decl);
- }
- /* Collect all omp-target global variables. */
- FOR_EACH_DEFINED_VARIABLE (vnode)
- {
- if (!lookup_attribute ("omp declare target",
- DECL_ATTRIBUTES (vnode->decl))
- || TREE_CODE (vnode->decl) != VAR_DECL
- || DECL_SIZE (vnode->decl) == 0)
- continue;
- vec_safe_push (v_vars, vnode->decl);
- }
- unsigned num_vars = vec_safe_length (v_vars);
- unsigned num_funcs = vec_safe_length (v_funcs);
+ unsigned num_funcs = vec_safe_length (offload_funcs);
+ unsigned num_vars = vec_safe_length (offload_vars);
- if (num_vars == 0 && num_funcs == 0)
+ if (num_funcs == 0 && num_vars == 0)
return;
-#ifdef ACCEL_COMPILER
- /* Decls are placed in reversed order in fat-objects, so we need to
- revert them back if we compile target. */
- for (unsigned i = 0; i < num_funcs / 2; i++)
- {
- tree it = (*v_funcs)[i];
- (*v_funcs)[i] = (*v_funcs)[num_funcs - i - 1];
- (*v_funcs)[num_funcs - i - 1] = it;
- }
- for (unsigned i = 0; i < num_vars / 2; i++)
- {
- tree it = (*v_vars)[i];
- (*v_vars)[i] = (*v_vars)[num_vars - i - 1];
- (*v_vars)[num_vars - i - 1] = it;
- }
-#endif
-
if (targetm_common.have_named_sections)
{
vec<constructor_elt, va_gc> *v_f, *v_v;
vec_alloc (v_f, num_funcs);
vec_alloc (v_v, num_vars * 2);
- add_decls_addresses_to_decl_constructor (v_funcs, v_f);
- add_decls_addresses_to_decl_constructor (v_vars, v_v);
+ add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
+ add_decls_addresses_to_decl_constructor (offload_vars, v_v);
tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
num_vars * 2);
@@ -13825,12 +13783,12 @@ omp_finish_file (void)
{
for (unsigned i = 0; i < num_funcs; i++)
{
- tree it = (*v_funcs)[i];
+ tree it = (*offload_funcs)[i];
targetm.record_offload_symbol (it);
}
for (unsigned i = 0; i < num_vars; i++)
{
- tree it = (*v_vars)[i];
+ tree it = (*offload_vars)[i];
targetm.record_offload_symbol (it);
}
}
@@ -29,4 +29,7 @@ extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void);
+extern GTY(()) vec<tree, va_gc> *offload_funcs;
+extern GTY(()) vec<tree, va_gc> *offload_vars;
+
#endif /* GCC_OMP_LOW_H */