From 8cbddf693f93328f117dc48588deee924d2df6cd Mon Sep 17 00:00:00 2001
From: mliska <mliska@suse.cz>
Date: Tue, 1 Sep 2015 14:10:24 +0200
Subject: [PATCH 1/4] HSA: create HSA clones.
gcc/c-family/ChangeLog:
2015-09-03 Martin Liska <mliska@suse.cz>
* c-common.c (handle_hsa_attribute): Do not handle hsakernel attribute.
gcc/lto/ChangeLog:
2015-09-03 Martin Liska <mliska@suse.cz>
* lto-partition.c (add_symbol_to_partition_1): For an HSA clone, append
also all dependencies to a LTO partition.
libgomp/ChangeLog:
2015-09-03 Martin Liska <mliska@suse.cz>
* plugin/plugin-hsa.c (GOMP_OFFLOAD_load_image): Enable having a module
without kernels (can contain HSA functions).
gcc/ChangeLog:
2015-09-03 Martin Liska <mliska@suse.cz>
* Makefile.in: Add new source file and remove hsa-gen.c from list
of GT files.
* cgraph.h: Remove hsa_imp_of property of cgraph_node.
* hsa-brig.c (brig_init): Append LTRANS name to a BRIG module name.
(emit_function_directives): Add new argument.
(emit_function_declaration): Use it.
(emit_call_insn): Fill up offsets of functions that should be filled
before a BRIG module is done.
(hsa_brig_emit_function): Emit declarations before a function
is defined/declared.
(hsa_output_kernel_mapping): An HSA brig module can have zero kernels.
(hsa_output_brig): Process functions linkage that fills up correct
code list references.
* hsa-dump.c: Add new include files due to function_summary.
* hsa-gen.c (hsa_get_gpu_function): New function.
(hsa_get_host_function): New function.
(gen_hsa_insns_for_direct_call): Small refactoring.
(gen_hsa_insns_for_known_library_call): Likewise.
(hsa_generate_function_declaration): Sanitize function name.
(generate_hsa): Remove unused return value.
(init_hsa_functions): Remove.
(insert_store_range_dim): Likewise.
(wrap_hsa_kernel_call): Likewise.
(wrap_all_hsa_calls): Likewise.
(pass_gen_hsail::execute): Emit code just for cgraph_nodes that
is hsa_summaries.
* hsa-regalloc.c: Include additional header files.
* hsa.c (hsa_get_declaration_name): Use asm_name as name of function.
(hsa_register_kernel): New function.
* hsa.h (enum hsa_function_kind): New enum.
(struct hsa_function_summary): New.
(hsa_summary_t::link_functions): Likewise.
* ipa-hsa.c: New file.
* lto-section-in.c: Add new section name.
* lto-streamer.h (enum lto_section_type): Likewise.
* omp-low.c (expand_parallel_call): Fill up HSA function summary.
(expand_target_kernel_body): Likewise.
* passes.c (execute_one_pass): Terminate pass queue if stop execution
TODO is returned.
(execute_pass_list_1): Likewise.
(execute_ipa_pass_list): Likewise.
* passes.def: Add new IPA pass.
* timevar.def: Likewise.
* tree-pass.h: Likewise.
---
gcc/Makefile.in | 2 +-
gcc/c-family/c-common.c | 9 --
gcc/cgraph.h | 4 -
gcc/hsa-brig.c | 89 +++++++++---
gcc/hsa-dump.c | 40 +++++-
gcc/hsa-gen.c | 267 +++++++----------------------------
gcc/hsa-regalloc.c | 27 +++-
gcc/hsa.c | 28 ++++
gcc/hsa.h | 62 +++++++++
gcc/ipa-hsa.c | 330 ++++++++++++++++++++++++++++++++++++++++++++
gcc/lto-section-in.c | 3 +-
gcc/lto-streamer.h | 1 +
gcc/lto/lto-partition.c | 48 +++++++
gcc/omp-low.c | 16 ++-
gcc/passes.c | 18 ++-
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 2 +
libgomp/plugin/plugin-hsa.c | 2 -
19 files changed, 681 insertions(+), 269 deletions(-)
create mode 100644 gcc/ipa-hsa.c
@@ -1314,6 +1314,7 @@ OBJS = \
ipa-icf.o \
ipa-icf-gimple.o \
ipa-reference.o \
+ ipa-hsa.o \
ipa-ref.o \
ipa-utils.o \
ipa.o \
@@ -2371,7 +2372,6 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
$(srcdir)/ipa-devirt.c \
$(srcdir)/internal-fn.h \
$(srcdir)/hsa.c \
- $(srcdir)/hsa-gen.c \
@all_gtfiles@
# Compute the list of GT header files from the corresponding C sources,
@@ -667,10 +667,6 @@ const struct attribute_spec c_common_attribute_table[] =
handle_noinline_attribute, false },
{ "noclone", 0, 0, true, false, false,
handle_noclone_attribute, false },
- { "hsa", 0, 0, true, false, false,
- handle_hsa_attribute, false },
- { "hsakernel", 0, 0, true, false, false,
- handle_hsa_attribute, false },
{ "hsafunc", 0, 0, true, false, false,
handle_hsa_attribute, false },
{ "no_icf", 0, 0, true, false, false,
@@ -7369,11 +7365,6 @@ handle_hsa_attribute (tree *node, tree name,
TREE_USED (*node) = 1;
DECL_UNINLINABLE (*node) = 1;
- if (strcmp ("hsakernel", IDENTIFIER_POINTER (name)) == 0
- && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (TREE_TYPE (*node))))
- == void_type_node))
- warning (OPT_Wattributes, "%qE attribute on a function with fixed number "
- "of argument makes no sense", name);
return NULL_TREE;
}
@@ -524,10 +524,6 @@ public:
/* Section name. Again can be private, if allowed. */
section_hash_entry *x_section;
- /* TODO: Consider moving this to a summary.
- The node this HSA node corresponds to. */
- symtab_node *hsa_imp_of;
-
protected:
/* Dump base fields of symtab nodes to F. Not to be used directly. */
void dump_base (FILE *);
@@ -37,7 +37,6 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "tree-cfg.h"
#include "tree-ssa-alias.h"
-#include "machmode.h"
#include "output.h"
#include "gimple-expr.h"
#include "dominance.h"
@@ -51,10 +50,26 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-pretty-print.h"
#include "diagnostic-core.h"
#include "hash-map.h"
-#include "ipa-ref.h"
#include "lto-streamer.h"
#include "cgraph.h"
#include "real.h"
+#include "gimple-iterator.h"
+#include "bitmap.h"
+#include "dumpfile.h"
+#include "alloc-pool.h"
+#include "tree-ssa-operands.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "tree-ssanames.h"
+#include "rtl.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "ssa-iterators.h"
+#include "ipa-ref.h"
+#include "gimplify-me.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "symbol-summary.h"
#include "hsa.h"
#define BRIG_ELF_SECTION_NAME ".brig"
@@ -116,6 +131,9 @@ static bool brig_initialized = false;
/* Mapping between emitted HSA functions and their offset in code segment. */
static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
+/* Set of emitted function declarations. */
+static hash_set <tree> *emitted_declarations;
+
struct function_linkage_pair
{
function_linkage_pair (tree decl, unsigned int off):
@@ -128,6 +146,9 @@ struct function_linkage_pair
unsigned int offset;
};
+/* Vector of function calls where we need to resolve function offsets. */
+static auto_vec <function_linkage_pair> function_call_linkage;
+
/* Add a new chunk, allocate data for it and initialize it. */
void
@@ -404,6 +425,21 @@ brig_init (void)
char* extension = strchr (modname, '.');
if (extension)
*extension = '\0';
+
+ /* As in LTO mode, we have to emit a different module names. */
+ if (flag_ltrans)
+ {
+ part = strrchr (asm_file_name, '/');
+ if (!part)
+ part = asm_file_name;
+ else
+ part++;
+ char *modname2;
+ asprintf (&modname2, "%s_%s", modname, part);
+ free (modname);
+ modname = modname2;
+ }
+
hsa_sanitize_name (modname);
moddir.name = brig_emit_string (modname);
free (modname);
@@ -570,7 +606,7 @@ emit_directive_variable (struct hsa_symbol *symbol)
definition F. */
static BrigDirectiveExecutable *
-emit_function_directives (hsa_function_representation *f)
+emit_function_directives (hsa_function_representation *f, bool is_declaration)
{
struct BrigDirectiveExecutable fndir;
unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
@@ -621,7 +657,10 @@ emit_function_directives (hsa_function_representation *f)
fndir.modifier.allBits |= BRIG_EXECUTABLE_DEFINITION;
memset (&fndir.reserved, 0, sizeof (fndir.reserved));
- function_offsets->put (f->decl, brig_code.total_size);
+ /* Once we put a definition of function_offsets, we should not overwrite
+ it with a declaration of the function. */
+ if (!function_offsets->get (f->decl) || !is_declaration)
+ function_offsets->put (f->decl, brig_code.total_size);
brig_code.add (&fndir, sizeof (fndir));
/* XXX terrible hack: we need to set instCount after we emit all
@@ -1048,7 +1087,7 @@ emit_function_declaration (tree decl)
{
hsa_function_representation *f = hsa_generate_function_declaration (decl);
- emit_function_directives (f);
+ emit_function_directives (f, true);
emit_queued_operands ();
delete f;
@@ -1423,11 +1462,9 @@ emit_call_insn (hsa_insn_basic *insn)
operand_offsets[0] = htole32 (enqueue_op (call->result_code_list));
/* Operand 1: func */
- BrigCodeOffset32_t *func_offset = function_offsets->get
- (call->called_function);
- gcc_assert (func_offset != NULL);
- call->func.directive_offset = *func_offset;
unsigned int offset = enqueue_op (&call->func);
+ function_call_linkage.safe_push
+ (function_linkage_pair (call->called_function, offset));
operand_offsets[1] = htole32 (offset);
/* Operand 2: in-args. */
@@ -1746,18 +1783,22 @@ hsa_brig_emit_function (void)
if (!function_offsets)
function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
+ if (!emitted_declarations)
+ emitted_declarations = new hash_set<tree> ();
+
for (unsigned i = 0; i < hsa_cfun->called_functions.length (); i++)
{
tree called = hsa_cfun->called_functions[i];
- if (function_offsets->get (called) == NULL)
+ /* If the function has no definition, emit a declaration. */
+ if (!emitted_declarations->contains (called))
{
emit_function_declaration (called);
- gcc_assert (function_offsets->get (called) != NULL);
+ emitted_declarations->add (called);
}
}
- ptr_to_fndir = emit_function_directives (hsa_cfun);
+ ptr_to_fndir = emit_function_directives (hsa_cfun, false);
for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->first_insn;
insn;
insn = insn->next)
@@ -1790,10 +1831,6 @@ hsa_output_kernel_mapping (tree brig_decl)
{
unsigned map_count = hsa_get_number_decl_kernel_mappings ();
- /* If the current TU does not contain a kernel, no mapping is produced. */
- if (map_count == 0)
- return;
-
tree int_num_of_kernels;
int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
tree kernel_num_index_type = build_index_type (int_num_of_kernels);
@@ -1804,8 +1841,9 @@ hsa_output_kernel_mapping (tree brig_decl)
for (unsigned i = 0; i < map_count; ++i)
{
tree decl = hsa_get_decl_kernel_mapping_decl (i);
- CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE,
- build_fold_addr_expr (decl));
+ CONSTRUCTOR_APPEND_ELT
+ (host_functions_vec, NULL_TREE,
+ build_fold_addr_expr (hsa_get_host_function (decl)));
}
tree host_functions_ctor = build_constructor (host_functions_array_type,
host_functions_vec);
@@ -2106,6 +2144,18 @@ hsa_output_brig (void)
if (!brig_initialized)
return;
+ for (unsigned i = 0; i < function_call_linkage.length (); i++)
+ {
+ function_linkage_pair p = function_call_linkage[i];
+
+ BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
+ gcc_assert (*func_offset);
+ BrigOperandCodeRef *code_ref = (BrigOperandCodeRef *)
+ (brig_operand.get_ptr_by_offset (p.offset));
+ gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
+ code_ref->ref = htole32 (*func_offset);
+ }
+
saved_section = in_section;
switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
@@ -2178,4 +2228,7 @@ hsa_output_brig (void)
hsa_free_decl_kernel_mapping ();
brig_release_data ();
hsa_deinit_compilation_unit_data ();
+
+ delete emitted_declarations;
+ delete function_offsets;
}
@@ -22,27 +22,55 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "is-a.h"
-#include "vec.h"
-#include "hash-set.h"
#include "defaults.h"
#include "hard-reg-set.h"
-#include "dominance.h"
-#include "cfg.h"
-#include "input.h"
-#include "function.h"
+#include "hash-set.h"
+#include "vec.h"
#include "symtab.h"
+#include "vec.h"
+#include "input.h"
#include "alias.h"
#include "double-int.h"
#include "inchash.h"
#include "tree.h"
+#include "tree-pass.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-expr.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
+#include "function.h"
#include "predict.h"
#include "basic-block.h"
#include "fold-const.h"
#include "gimple.h"
+#include "gimple-iterator.h"
+#include "machmode.h"
+#include "output.h"
+#include "function.h"
+#include "bitmap.h"
+#include "dumpfile.h"
#include "gimple-pretty-print.h"
+#include "diagnostic-core.h"
+#include "alloc-pool.h"
+#include "tree-ssa-operands.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "rtl.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "ssa-iterators.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
+#include "stor-layout.h"
+#include "gimplify-me.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "symbol-summary.h"
#include "hsa.h"
/* Return textual name of TYPE. */
@@ -70,7 +70,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "gimplify-me.h"
#include "print-tree.h"
-#include "cfghooks.h"
+#include "symbol-summary.h"
#include "hsa.h"
#include "cfghooks.h"
@@ -662,6 +662,32 @@ get_symbol_for_decl (tree decl)
return sym;
}
+/* For a given function declaration, return a GPU function
+ of the function. */
+
+static tree
+hsa_get_gpu_function (tree decl)
+{
+ hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl));
+ gcc_assert (s->kind != HSA_NONE);
+ gcc_assert (!s->gpu_implementation_p);
+
+ return s->binded_function->decl;
+}
+
+/* For a given HSA function declaration, return a host
+ function declaration. */
+
+tree
+hsa_get_host_function (tree decl)
+{
+ hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl));
+ gcc_assert (s->kind != HSA_NONE);
+ gcc_assert (s->gpu_implementation_p);
+
+ return s->binded_function->decl;
+}
+
/* Create a spill symbol of type TYPE. */
hsa_symbol *
@@ -2664,7 +2690,8 @@ static void
gen_hsa_insns_for_direct_call (gimple stmt, hsa_bb *hbb,
vec <hsa_op_reg_p> *ssa_map)
{
- hsa_insn_call *call_insn = new hsa_insn_call (gimple_call_fndecl (stmt));
+ tree decl = gimple_call_fndecl (stmt);
+ hsa_insn_call *call_insn = new hsa_insn_call (decl);
hsa_cfun->called_functions.safe_push (call_insn->called_function);
/* Argument block start. */
@@ -2702,7 +2729,7 @@ gen_hsa_insns_for_direct_call (gimple stmt, hsa_bb *hbb,
call_insn->args_code_list = new hsa_op_code_list (args);
hbb->append_insn (call_insn);
- tree result_type = TREE_TYPE (TREE_TYPE (gimple_call_fndecl (stmt)));
+ tree result_type = TREE_TYPE (TREE_TYPE (decl));
tree result = gimple_call_lhs (stmt);
hsa_insn_mem *result_insn = NULL;
@@ -2796,8 +2823,7 @@ static bool
gen_hsa_insns_for_known_library_call (gimple stmt, hsa_bb *hbb,
vec <hsa_op_reg_p> *ssa_map)
{
- tree decl = gimple_call_fndecl (stmt);
- const char *name = hsa_get_declaration_name (decl);
+ const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt));
if (strcmp (name, "omp_is_initial_device") == 0)
{
@@ -3474,7 +3500,8 @@ specialop:
called = TREE_OPERAND (called, 0);
gcc_checking_assert (TREE_CODE (called) == FUNCTION_DECL);
- const char *name = hsa_get_declaration_name (called);
+ const char *name = hsa_get_declaration_name
+ (hsa_get_gpu_function (called));
hsa_add_kernel_dependency (hsa_cfun->decl,
hsa_brig_function_name (name));
gen_hsa_insns_for_kernel_call (hbb, as_a <gcall *> (stmt));
@@ -3833,6 +3860,7 @@ hsa_generate_function_declaration (tree decl)
fun->declaration_p = true;
fun->decl = decl;
fun->name = xstrdup (hsa_get_declaration_name (decl));
+ hsa_sanitize_name (fun->name);
gen_function_decl_parameters (fun, decl);
@@ -3844,19 +3872,19 @@ hsa_generate_function_declaration (tree decl)
considered an HSA kernel callable from the host, otherwise it will be
compiled as an HSA function callable from other HSA code. */
-static unsigned int
+static void
generate_hsa (bool kernel)
{
if (DECL_STATIC_CHAIN (cfun->decl))
{
sorry ("HSA does not support nested functions");
- return 0;
+ return;
}
else if (!TYPE_ARG_TYPES (TREE_TYPE (cfun->decl)))
{
sorry ("HSA does not support functions with variadic arguments "
"(or unknown return type)");
- return 0;
+ return;
}
vec <hsa_op_reg_p> ssa_map = vNULL;
@@ -3879,13 +3907,7 @@ generate_hsa (bool kernel)
if (hsa_cfun->kern_p)
{
- cgraph_node *node = cgraph_node::get_create (current_function_decl);
- tree host_decl;
- if (node->hsa_imp_of)
- host_decl = node->hsa_imp_of->decl;
- else
- host_decl = current_function_decl;
- hsa_add_kern_decl_mapping (host_decl, hsa_cfun->name,
+ hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->name,
hsa_cfun->maximum_omp_data_size);
}
@@ -3903,197 +3925,6 @@ generate_hsa (bool kernel)
fail:
hsa_deinit_data_for_cfun ();
- return 0;
-}
-
-static GTY(()) tree hsa_launch_fn;
-static GTY(()) tree hsa_dim_array_type;
-static GTY(()) tree hsa_lattrs_dimnum_decl;
-static GTY(()) tree hsa_lattrs_grid_decl;
-static GTY(()) tree hsa_lattrs_group_decl;
-static GTY(()) tree hsa_lattrs_nargs_decl;
-static GTY(()) tree hsa_launch_attributes_type;
-
-static void
-init_hsa_functions (void)
-{
- if (hsa_launch_fn)
- return;
-
- tree dim_arr_index_type;
- dim_arr_index_type = build_index_type (build_int_cst (integer_type_node, 2));
- hsa_dim_array_type = build_array_type (uint32_type_node, dim_arr_index_type);
-
- hsa_launch_attributes_type = make_node (RECORD_TYPE);
- hsa_lattrs_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("ndim"),
- uint32_type_node);
- DECL_CHAIN (hsa_lattrs_dimnum_decl) = NULL_TREE;
-
- hsa_lattrs_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("global_size"),
- hsa_dim_array_type);
- DECL_CHAIN (hsa_lattrs_grid_decl) = hsa_lattrs_dimnum_decl;
- hsa_lattrs_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("group_size"),
- hsa_dim_array_type);
- DECL_CHAIN (hsa_lattrs_group_decl) = hsa_lattrs_grid_decl;
- hsa_lattrs_nargs_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("nargs"),
- uint32_type_node);
- DECL_CHAIN (hsa_lattrs_nargs_decl) = hsa_lattrs_group_decl;
- finish_builtin_struct (hsa_launch_attributes_type, "__hsa_launch_attributes",
- hsa_lattrs_nargs_decl, NULL_TREE);
- tree launch_fn_type;
- launch_fn_type
- = build_function_type_list (void_type_node, ptr_type_node,
- build_pointer_type (hsa_launch_attributes_type),
- build_pointer_type (uint64_type_node),
- NULL_TREE);
-
- hsa_launch_fn = build_fn_decl ("__hsa_launch_kernel", launch_fn_type);
-}
-
-/* Insert before the current statement in GSI a store of VALUE to INDEX of
- array (of type hsa_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be of
- type uint32_type_node. */
-
-static void
-insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
- tree fld_decl, int index, tree value)
-{
- tree ref = build4 (ARRAY_REF, uint32_type_node,
- build3 (COMPONENT_REF, hsa_dim_array_type,
- range_var, fld_decl, NULL_TREE),
- build_int_cst (integer_type_node, index),
- NULL_TREE, NULL_TREE);
- gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
-}
-
-/* Generate call to invoke kernel implementing function FNDECL. */
-
-static void
-wrap_hsa_kernel_call (gimple_stmt_iterator *gsi, tree fndecl)
-{
- init_hsa_functions ();
-
- bool real_kern_p = lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl));
- tree grid_size_1, group_size_1;
- tree u32_one = build_int_cst (uint32_type_node, 1);
- gimple call_stmt = gsi_stmt (*gsi);
- unsigned discard_arguents, num_args = gimple_call_num_args (call_stmt);
- if (real_kern_p)
- {
- discard_arguents = 2;
- if (num_args < 2)
- {
- error ("Calls to functions with hsakernel attribute must "
- "have at least two arguments.");
- grid_size_1 = group_size_1 = u32_one;
- }
- else
- {
- grid_size_1 = fold_convert (uint32_type_node,
- gimple_call_arg (call_stmt, num_args - 2));
- grid_size_1 = force_gimple_operand_gsi (gsi, grid_size_1, true,
- NULL_TREE, true,
- GSI_SAME_STMT);
- group_size_1 = fold_convert (uint32_type_node,
- gimple_call_arg (call_stmt,
- num_args - 1));
- group_size_1 = force_gimple_operand_gsi (gsi, group_size_1, true,
- NULL_TREE, true,
- GSI_SAME_STMT);
- }
- }
- else
- {
- discard_arguents = 0;
- grid_size_1 = build_int_cst (uint32_type_node, 64);
- group_size_1 = build_int_cst (uint32_type_node, 64);
- }
-
- tree lattrs = create_tmp_var (hsa_launch_attributes_type,
- "__hsa_launch_attrs");
- tree dimref = build3 (COMPONENT_REF, uint32_type_node,
- lattrs, hsa_lattrs_dimnum_decl, NULL_TREE);
- gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 0,
- grid_size_1);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 1,
- u32_one);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 2,
- u32_one);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 0,
- group_size_1);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 1,
- u32_one);
- insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 2,
- u32_one);
- tree nargsref = build3 (COMPONENT_REF, uint32_type_node,
- lattrs, hsa_lattrs_nargs_decl, NULL_TREE);
- tree nargsval = build_int_cst (uint32_type_node, num_args - discard_arguents);
- gsi_insert_before (gsi, gimple_build_assign (nargsref, nargsval),
- GSI_SAME_STMT);
- lattrs = build_fold_addr_expr (lattrs);
-
- tree args;
- args = create_tmp_var (build_array_type_nelts (uint64_type_node,
- num_args - discard_arguents),
- NULL);
-
- gcc_assert (num_args >= discard_arguents);
- for (unsigned i = 0; i < (num_args - discard_arguents); i++)
- {
- tree arg = gimple_call_arg (call_stmt, i);
- gimple g;
-
- tree r = build4 (ARRAY_REF, uint64_type_node, args,
- size_int (i), NULL_TREE, NULL_TREE);
-
- arg = force_gimple_operand_gsi (gsi, fold_convert (uint64_type_node, arg),
- true, NULL_TREE, true, GSI_SAME_STMT);
- g = gimple_build_assign (r, arg);
- gsi_insert_before (gsi, g, GSI_SAME_STMT);
- }
-
- args = build_fold_addr_expr (args);
-
- /* XXX doesn't handle calls with lhs, doesn't remove EH
- edges. */
- gimple launch = gimple_build_call (hsa_launch_fn, 3,
- build_fold_addr_expr (fndecl),
- lattrs, args);
- gsi_insert_before (gsi, launch, GSI_SAME_STMT);
- unlink_stmt_vdef (call_stmt);
- gsi_remove (gsi, true);
-}
-
-/* Replace calls of functions which have been turned into HSA kernels into
- their invocation via HSA run-time. */
-
-static unsigned int
-wrap_all_hsa_calls (void)
-{
- bool changed = false;
- basic_block bb;
- FOR_ALL_BB_FN (bb, cfun)
- {
- gimple_stmt_iterator gsi;
- tree fndecl;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
- if (is_gimple_call (gsi_stmt (gsi))
- && (fndecl = gimple_call_fndecl (gsi_stmt (gsi)))
- && (lookup_attribute ("hsa", DECL_ATTRIBUTES (fndecl))
- || lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl))))
- {
- wrap_hsa_kernel_call (&gsi, fndecl);
- changed = true;
- }
- else
- gsi_next (&gsi);
- }
- return changed ? TODO_cleanup_cfg | TODO_update_ssa : 0;
}
namespace {
@@ -4135,15 +3966,17 @@ pass_gen_hsail::gate (function *)
unsigned int
pass_gen_hsail::execute (function *)
{
- if (cgraph_node::get_create (current_function_decl)->hsa_imp_of
- || lookup_attribute ("hsa", DECL_ATTRIBUTES (current_function_decl))
- || lookup_attribute ("hsakernel",
- DECL_ATTRIBUTES (current_function_decl)))
- return generate_hsa (true);
- else if (hsa_callable_function_p (current_function_decl))
- return generate_hsa (false);
- else
- return wrap_all_hsa_calls ();
+ hsa_function_summary *s = hsa_summaries->get
+ (cgraph_node::get_create (current_function_decl));
+
+ if (s->gpu_implementation_p)
+ {
+ generate_hsa (s->kind == HSA_KERNEL);
+ TREE_ASM_WRITTEN (current_function_decl) = 1;
+ return TODO_stop_pass_execution;
+ }
+
+ return 0;
}
} // anon namespace
@@ -4155,5 +3988,3 @@ make_pass_gen_hsail (gcc::context *ctxt)
{
return new pass_gen_hsail (ctxt);
}
-
-#include "gt-hsa-gen.h"
@@ -27,27 +27,50 @@ along with GCC; see the file COPYING3. If not see
#include "hash-set.h"
#include "vec.h"
#include "symtab.h"
+#include "vec.h"
#include "input.h"
#include "alias.h"
#include "double-int.h"
#include "inchash.h"
#include "tree.h"
+#include "tree-pass.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-expr.h"
#include "dominance.h"
#include "cfg.h"
-#include "cfghooks.h"
+#include "cfganal.h"
#include "function.h"
#include "predict.h"
#include "basic-block.h"
#include "fold-const.h"
#include "gimple.h"
+#include "gimple-iterator.h"
+#include "machmode.h"
+#include "output.h"
+#include "function.h"
#include "bitmap.h"
#include "dumpfile.h"
#include "gimple-pretty-print.h"
#include "diagnostic-core.h"
-#include "cfganal.h"
+#include "alloc-pool.h"
+#include "tree-ssa-operands.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "rtl.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "ssa-iterators.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
+#include "stor-layout.h"
+#include "gimplify-me.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "symbol-summary.h"
#include "hsa.h"
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "gimplify-me.h"
#include "print-tree.h"
+#include "symbol-summary.h"
#include "hsa.h"
/* Structure containing intermediate HSA representation of the generated
@@ -100,6 +101,9 @@ hash_map <tree, vec <char *> *> *hsa_decl_kernel_dependencies;
/* Hash function to lookup a symbol for a decl. */
hash_table <hsa_free_symbol_hasher> *hsa_global_variable_symbols;
+/* HSA summaries. */
+hsa_summary_t *hsa_summaries = NULL;
+
/* True if compilation unit-wide data are already allocated and initialized. */
static bool compilation_unit_data_initialized;
@@ -464,10 +468,34 @@ hsa_get_declaration_name (tree decl)
free (b);
return ggc_str;
}
+ else if (TREE_CODE (decl) == FUNCTION_DECL)
+ return cgraph_node::get_create (decl)->asm_name ();
else
return IDENTIFIER_POINTER (DECL_NAME (decl));
return NULL;
}
+/* Add a HOST function to HSA summaries. */
+
+void
+hsa_register_kernel (cgraph_node *host)
+{
+ if (hsa_summaries == NULL)
+ hsa_summaries = new hsa_summary_t (symtab);
+ hsa_function_summary *s = hsa_summaries->get (host);
+ s->kind = HSA_KERNEL;
+}
+
+/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
+ a HOST function. */
+
+void
+hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
+{
+ if (hsa_summaries == NULL)
+ hsa_summaries = new hsa_summary_t (symtab);
+ hsa_summaries->link_functions (gpu, host, HSA_KERNEL);
+}
+
#include "gt-hsa.h"
@@ -889,10 +889,69 @@ public:
unsigned maximum_omp_data_size;
};
+enum hsa_function_kind
+{
+ HSA_NONE,
+ HSA_KERNEL,
+ HSA_FUNCTION
+};
+
+struct hsa_function_summary
+{
+ /* Default constructor. */
+ hsa_function_summary ();
+
+ /* Kind of GPU/hostfunction. */
+ hsa_function_kind kind;
+
+ /* Pointer to a cgraph node which is a HSA implementation of the function.
+ In case of the function is a HSA function, the binded function points
+ to the host function. */
+ cgraph_node *binded_function;
+
+ /* Identifies if the function is an HSA function or a host function. */
+ bool gpu_implementation_p;
+};
+
+inline
+hsa_function_summary::hsa_function_summary (): kind (HSA_NONE),
+ binded_function (NULL), gpu_implementation_p (false)
+{
+}
+
+/* Function summary for HSA functions. */
+class hsa_summary_t: public function_summary <hsa_function_summary *>
+{
+public:
+ hsa_summary_t (symbol_table *table):
+ function_summary<hsa_function_summary *> (table) { }
+
+ void link_functions (cgraph_node *gpu, cgraph_node *host,
+ hsa_function_kind kind);
+};
+
+inline void
+hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
+ hsa_function_kind kind)
+{
+ hsa_function_summary *gpu_summary = get (gpu);
+ hsa_function_summary *host_summary = get (host);
+
+ gpu_summary->kind = kind;
+ host_summary->kind = kind;
+
+ gpu_summary->gpu_implementation_p = true;
+ host_summary->gpu_implementation_p = false;
+
+ gpu_summary->binded_function = host;
+ host_summary->binded_function = gpu;
+}
+
/* in hsa.c */
extern struct hsa_function_representation *hsa_cfun;
extern hash_table <hsa_free_symbol_hasher> *hsa_global_variable_symbols;
extern hash_map <tree, vec <char *> *> *hsa_decl_kernel_dependencies;
+extern hsa_summary_t *hsa_summaries;
extern unsigned hsa_kernel_calls_counter;
bool hsa_callable_function_p (tree fndecl);
void hsa_init_compilation_unit_data (void);
@@ -915,6 +974,8 @@ void hsa_add_kernel_dependency (tree caller, char *called_function);
void hsa_sanitize_name (char *p);
char *hsa_brig_function_name (const char *p);
const char *hsa_get_declaration_name (tree decl);
+void hsa_register_kernel (cgraph_node *host);
+void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host);
/* In hsa-gen.c. */
void hsa_build_append_simple_mov (hsa_op_reg *, hsa_op_base *, hsa_bb *);
@@ -924,6 +985,7 @@ hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
hsa_bb *hsa_init_new_bb (basic_block);
hsa_function_representation *hsa_generate_function_declaration (tree decl);
+tree hsa_get_host_function (tree decl);
/* In hsa-regalloc.c. */
void hsa_regalloc (void);
new file mode 100644
@@ -0,0 +1,330 @@
+/* Callgraph based analysis of static variables.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Martin Liska <mliska@suse.cz>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Interprocedural HSA pass is responsible for creation of HSA clones.
+ For all these HSA clones, we emit HSAIL instructions and pass processing
+ is terminated. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "is-a.h"
+#include "defaults.h"
+#include "hard-reg-set.h"
+#include "hash-set.h"
+#include "vec.h"
+#include "symtab.h"
+#include "vec.h"
+#include "input.h"
+#include "alias.h"
+#include "double-int.h"
+#include "inchash.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
+#include "function.h"
+#include "predict.h"
+#include "basic-block.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "machmode.h"
+#include "output.h"
+#include "function.h"
+#include "bitmap.h"
+#include "dumpfile.h"
+#include "gimple-pretty-print.h"
+#include "tree-streamer.h"
+#include "diagnostic-core.h"
+#include "alloc-pool.h"
+#include "tree-ssa-operands.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "rtl.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "ssa-iterators.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
+#include "stor-layout.h"
+#include "gimplify-me.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "symbol-summary.h"
+#include "hsa.h"
+
+namespace {
+
+static unsigned int
+process_hsa_functions (void)
+{
+ struct cgraph_node *node;
+
+ if (hsa_summaries == NULL)
+ hsa_summaries = new hsa_summary_t (symtab);
+
+ FOR_EACH_DEFINED_FUNCTION (node)
+ {
+ hsa_function_summary *s = hsa_summaries->get (node);
+
+ /* A linked function is skipped. */
+ if (s->binded_function != NULL)
+ continue;
+
+ if (s->kind != HSA_NONE)
+ {
+ cgraph_node *clone = node->create_virtual_clone
+ (vec <cgraph_edge *> (), NULL, NULL, "hsa");
+
+ clone->force_output = true;
+ hsa_summaries->link_functions (clone, node, s->kind);
+
+ if (dump_file)
+ fprintf (dump_file, "HSA creates a new clone: %s, type: %s\n",
+ clone->name (),
+ s->kind == HSA_KERNEL ? "kernel" : "function");
+ }
+ else if (hsa_callable_function_p (node->decl))
+ {
+ cgraph_node *clone = node->create_virtual_clone
+ (vec <cgraph_edge *> (), NULL, NULL, "hsa");
+
+ hsa_summaries->link_functions (clone, node, HSA_FUNCTION);
+
+ if (dump_file)
+ fprintf (dump_file, "HSA creates a new function clone: %s\n",
+ clone->name ());
+ }
+ }
+
+ /* Redirect all edges that are between HSA clones. */
+ FOR_EACH_DEFINED_FUNCTION (node)
+ {
+ cgraph_edge *e = node->callees;
+
+ while (e)
+ {
+ hsa_function_summary *src = hsa_summaries->get (node);
+ if (src->kind != HSA_NONE && src->gpu_implementation_p)
+ {
+ hsa_function_summary *dst = hsa_summaries->get (e->callee);
+ if (dst->kind != HSA_NONE && !dst->gpu_implementation_p)
+ {
+ e->redirect_callee (dst->binded_function);
+ if (dump_file)
+ fprintf (dump_file,
+ "Redirecting edge to HSA function: %s->%s\n",
+ xstrdup_for_dump (e->caller->name ()),
+ xstrdup_for_dump (e->callee->name ()));
+ }
+ }
+
+ e = e->next_callee;
+ }
+ }
+
+ return 0;
+}
+
+static void
+ipa_hsa_write_summary (void)
+{
+ struct bitpack_d bp;
+ struct cgraph_node *node;
+ struct output_block *ob;
+ unsigned int count = 0;
+ lto_symtab_encoder_iterator lsei;
+ lto_symtab_encoder_t encoder;
+
+ if (!hsa_summaries)
+ return;
+
+ ob = create_output_block (LTO_section_ipa_hsa);
+ encoder = ob->decl_state->symtab_node_encoder;
+ ob->symbol = NULL;
+ for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
+ lsei_next_function_in_partition (&lsei))
+ {
+ node = lsei_cgraph_node (lsei);
+ hsa_function_summary *s = hsa_summaries->get (node);
+
+ if (s->kind != HSA_NONE)
+ count++;
+ }
+
+ streamer_write_uhwi (ob, count);
+
+ /* Process all of the functions. */
+ for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
+ lsei_next_function_in_partition (&lsei))
+ {
+ node = lsei_cgraph_node (lsei);
+ hsa_function_summary *s = hsa_summaries->get (node);
+
+ if (s->kind != HSA_NONE)
+ {
+ encoder = ob->decl_state->symtab_node_encoder;
+ int node_ref = lto_symtab_encoder_encode (encoder, node);
+ streamer_write_uhwi (ob, node_ref);
+
+ bp = bitpack_create (ob->main_stream);
+ bp_pack_value (&bp, s->kind, 2);
+ bp_pack_value (&bp, s->gpu_implementation_p, 1);
+ bp_pack_value (&bp, s->binded_function != NULL, 1);
+ streamer_write_bitpack (&bp);
+ if (s->binded_function)
+ stream_write_tree (ob, s->binded_function->decl, true);
+ }
+ }
+
+ streamer_write_char_stream (ob->main_stream, 0);
+ produce_asm (ob, NULL);
+ destroy_output_block (ob);
+}
+
+static void
+ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
+ size_t len)
+{
+ const struct lto_function_header *header =
+ (const struct lto_function_header *) data;
+ const int cfg_offset = sizeof (struct lto_function_header);
+ const int main_offset = cfg_offset + header->cfg_size;
+ const int string_offset = main_offset + header->main_size;
+ struct data_in *data_in;
+ unsigned int i;
+ unsigned int count;
+
+ lto_input_block ib_main ((const char *) data + main_offset,
+ header->main_size, file_data->mode_table);
+
+ data_in =
+ lto_data_in_create (file_data, (const char *) data + string_offset,
+ header->string_size, vNULL);
+ count = streamer_read_uhwi (&ib_main);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned int index;
+ struct cgraph_node *node;
+ lto_symtab_encoder_t encoder;
+
+ index = streamer_read_uhwi (&ib_main);
+ encoder = file_data->symtab_node_encoder;
+ node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
+ index));
+ gcc_assert (node->definition);
+ hsa_function_summary *s = hsa_summaries->get (node);
+
+ struct bitpack_d bp = streamer_read_bitpack (&ib_main);
+ s->kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
+ s->gpu_implementation_p = bp_unpack_value (&bp, 1);
+ bool has_tree = bp_unpack_value (&bp, 1);
+
+ if (has_tree)
+ {
+ tree decl = stream_read_tree (&ib_main, data_in);
+ s->binded_function = cgraph_node::get_create (decl);
+ }
+ }
+ lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data,
+ len);
+ lto_data_in_delete (data_in);
+}
+
+static void
+ipa_hsa_read_summary (void)
+{
+ struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
+ struct lto_file_decl_data *file_data;
+ unsigned int j = 0;
+
+ if (hsa_summaries == NULL)
+ hsa_summaries = new hsa_summary_t (symtab);
+
+ while ((file_data = file_data_vec[j++]))
+ {
+ size_t len;
+ const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa,
+ NULL, &len);
+
+ if (data)
+ ipa_hsa_read_section (file_data, data, len);
+ }
+}
+
+const pass_data pass_data_ipa_hsa =
+{
+ IPA_PASS, /* type */
+ "hsa", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_IPA_HSA, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_symtab, /* todo_flags_finish */
+};
+
+class pass_ipa_hsa : public ipa_opt_pass_d
+{
+public:
+ pass_ipa_hsa (gcc::context *ctxt)
+ : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt,
+ NULL, /* generate_summary */
+ ipa_hsa_write_summary, /* write_summary */
+ ipa_hsa_read_summary, /* read_summary */
+ ipa_hsa_write_summary, /* write_optimization_summary */
+ ipa_hsa_read_summary, /* read_optimization_summary */
+ NULL, /* stmt_fixup */
+ 0, /* function_transform_todo_flags_start */
+ NULL, /* function_transform */
+ NULL) /* variable_transform */
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+
+ virtual unsigned int execute (function *) { return process_hsa_functions (); }
+
+}; // class pass_ipa_reference
+
+bool
+pass_ipa_hsa::gate (function *)
+{
+ return hsa_gen_requested_p () || in_lto_p;
+}
+
+} // anon namespace
+
+ipa_opt_pass_d *
+make_pass_ipa_hsa (gcc::context *ctxt)
+{
+ return new pass_ipa_hsa (ctxt);
+}
@@ -68,7 +68,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
"ipcp_trans",
"icf",
"offload_table",
- "mode_table"
+ "mode_table",
+ "hsa"
};
@@ -244,6 +244,7 @@ enum lto_section_type
LTO_section_ipa_icf,
LTO_section_offload_table,
LTO_section_mode_table,
+ LTO_section_ipa_hsa,
LTO_N_SECTION_TYPES /* Must be last. */
};
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-utils.h"
#include "lto-partition.h"
#include "stringpool.h"
+#include "hsa.h"
vec<ltrans_partition> ltrans_partitions;
@@ -180,6 +181,53 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
Therefore put it into the same partition. */
if (cnode->instrumented_version)
add_symbol_to_partition_1 (part, cnode->instrumented_version);
+
+ /* Add an HSA associated with the symbol. */
+ if (hsa_summaries != NULL)
+ {
+ hsa_function_summary *s = hsa_summaries->get (cnode);
+ if (s->kind != HSA_NONE)
+ {
+ /* Add binded function. */
+ bool added = add_symbol_to_partition_1 (part, s->binded_function);
+ gcc_assert (added);
+ if (symtab->dump_file)
+ fprintf (symtab->dump_file,
+ "adding an HSA function (host/gpu) to the "
+ "partition: %s\n",
+ s->binded_function->name ());
+
+ ipa_ref *ref;
+
+ /* Add all parents nodes that have HSA type. */
+ for (unsigned i = 0; node->iterate_referring (i, ref); i++)
+ {
+ cgraph_node *r = dyn_cast <cgraph_node *> (ref->referring);
+ if (r && hsa_summaries->get (r)->kind != HSA_NONE)
+ {
+ add_symbol_to_partition_1 (part, r);
+ if (symtab->dump_file)
+ fprintf (symtab->dump_file,
+ "adding an HSA referring node: %s\n",
+ r->name ());
+ }
+ }
+
+ /* Add all children nodes that have HSA type. */
+ for (unsigned i = 0; node->iterate_reference (i, ref); i++)
+ {
+ cgraph_node *r = dyn_cast <cgraph_node *> (ref->referred);
+ if (r && hsa_summaries->get (r)->kind != HSA_NONE)
+ {
+ add_symbol_to_partition_1 (part, r);
+ if (symtab->dump_file)
+ fprintf (symtab->dump_file,
+ "adding an HSA referred symbol: %s\n",
+ r->name ());
+ }
+ }
+ }
+ }
}
add_references_to_partition (part, node);
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "lto-section-names.h"
#include "gomp-constants.h"
+#include "symbol-summary.h"
#include "hsa.h"
@@ -5236,7 +5237,7 @@ gimple_build_cond_empty (tree cond)
target region that has not been turned into a simple GPGPU kernel. */
static bool
-region_part_of_unkernelized_tartget_p (struct omp_region *region)
+region_part_of_unkernelized_target_p (struct omp_region *region)
{
if (lookup_attribute ("omp declare target",
DECL_ATTRIBUTES (current_function_decl)))
@@ -5429,10 +5430,11 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
false, GSI_CONTINUE_LINKING);
if (hsa_gen_requested_p ()
- && region_part_of_unkernelized_tartget_p (region))
+ && region_part_of_unkernelized_target_p (region))
{
cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
- child_cnode->hsa_imp_of = child_cnode;
+ hsa_register_kernel (child_cnode);
+
/* FIXME: Flatten should be set on HSA-only clones created by an IPA
pass. */
DECL_ATTRIBUTES (child_fndecl)
@@ -10010,7 +10012,8 @@ expand_target_kernel_body (struct omp_region *target)
{
gcc_assert (!tgt_stmt->kernel_iter);
cgraph_node *n = cgraph_node::get (orig_child_fndecl);
- n->hsa_imp_of = n;
+
+ hsa_register_kernel (n);
/* FIXME: Flatten should be set on HSA-only clones created by an IPA
pass. */
DECL_ATTRIBUTES (orig_child_fndecl)
@@ -10075,7 +10078,10 @@ expand_target_kernel_body (struct omp_region *target)
cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
kcn->mark_force_output ();
- kcn->hsa_imp_of = cgraph_node::get (orig_child_fndecl);
+ cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
+
+ hsa_register_kernel (kcn, orig_child);
+
/* FIXME: Flatten should be set on HSA-only clones created by an IPA
pass. */
DECL_ATTRIBUTES (kern_fndecl)
@@ -2257,7 +2257,7 @@ override_gate_status (opt_pass *pass, tree func, bool gate_status)
/* Execute PASS. */
bool
-execute_one_pass (opt_pass *pass)
+execute_one_pass (opt_pass *pass, bool *exit)
{
unsigned int todo_after = 0;
@@ -2362,18 +2362,28 @@ execute_one_pass (opt_pass *pass)
if (!((todo_after | pass->todo_flags_finish) & TODO_do_not_ggc_collect))
ggc_collect ();
+ /* If finish TODO flags contain TODO_stop_pass_execution, set exit = true. */
+ if (todo_after & TODO_stop_pass_execution)
+ *exit = true;
+
return true;
}
static void
execute_pass_list_1 (opt_pass *pass)
{
+ bool stop_pass_execution = false;
+
do
{
gcc_assert (pass->type == GIMPLE_PASS
|| pass->type == RTL_PASS);
- if (execute_one_pass (pass) && pass->sub)
+ if (execute_one_pass (pass, &stop_pass_execution) && pass->sub)
execute_pass_list_1 (pass->sub);
+
+ if (stop_pass_execution)
+ return;
+
pass = pass->next;
}
while (pass);
@@ -2714,12 +2724,14 @@ ipa_read_optimization_summaries (void)
void
execute_ipa_pass_list (opt_pass *pass)
{
+ bool stop_pass_execution;
+
do
{
gcc_assert (!current_function_decl);
gcc_assert (!cfun);
gcc_assert (pass->type == SIMPLE_IPA_PASS || pass->type == IPA_PASS);
- if (execute_one_pass (pass) && pass->sub)
+ if (execute_one_pass (pass, &stop_pass_execution) && pass->sub)
{
if (pass->sub->type == GIMPLE_PASS)
{
@@ -127,6 +127,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_ipa_inline);
NEXT_PASS (pass_ipa_pure_const);
NEXT_PASS (pass_ipa_reference);
+ NEXT_PASS (pass_ipa_hsa);
/* This pass needs to be scheduled after any IP code duplication. */
NEXT_PASS (pass_ipa_single_use);
/* Comdat privatization come last, as direct references to comdat local
@@ -94,6 +94,7 @@ DEFTIMEVAR (TV_WHOPR_WPA_IO , "whopr wpa I/O")
DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning")
DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans")
DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference")
+DEFTIMEVAR (TV_IPA_HSA , "ipa HSA")
DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile")
DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile")
DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const")
@@ -295,6 +295,7 @@ protected:
/* Rebuild the callgraph edges. */
#define TODO_rebuild_cgraph_edges (1 << 22)
+#define TODO_stop_pass_execution (1 << 23)
/* Internally used in execute_function_todo(). */
#define TODO_update_ssa_any \
@@ -480,6 +481,7 @@ extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
@@ -473,8 +473,6 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version __attribute__ ((unused)),
if (agent->prog_finalized)
destroy_hsa_program (agent);
- if (kernel_count == 0)
- GOMP_PLUGIN_fatal ("No kernels encountered in a brig module description");
if (debug)
fprintf (stderr, "Encountered %d kernels in an image\n", kernel_count);
pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
--
2.4.6