From 2897bc5c5485430f1102688a437785fdf2a80add Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Fri, 25 Sep 2015 17:01:00 +0200
Subject: [PATCH] HSA: distribute hsa_num_threads among kernel dispatches.
libgomp/ChangeLog:
2015-09-25 Martin Liska <mliska@suse.cz>
* hsa-traits.h: Add omp_num_threads to hsa_kernel_dispatch
structure.
* plugin/plugin-hsa.c (print_kernel_dispatch): Print the
struct field.
(create_kernel_dispatch_recursive): Set default value
to omp_num_threads
(GOMP_OFFLOAD_run): Add shadow_reg to all kernel dispatches.
gcc/ChangeLog:
2015-09-25 Martin Liska <mliska@suse.cz>
* hsa-gen.c (struct hsa_kernel_dispatch): New field.
(gen_hsa_insns_for_kernel_call): Distribute hsa_num_threads
for a kernel dispatch.
(init_omp_in_prologue): Emit loading of shadow argument.
(gen_body_from_gimple): Remove usage of init_omp_in_prologue.
(generate_hsa): Move it to this function.
---
gcc/hsa-gen.c | 42 +++++++++++++++++++++++++++++++++++-------
libgomp/hsa-traits.h | 2 ++
libgomp/plugin/plugin-hsa.c | 16 ++++++++--------
3 files changed, 45 insertions(+), 15 deletions(-)
@@ -101,6 +101,8 @@ struct hsa_kernel_dispatch
uint32_t group_segment_size;
/* Number of children kernel dispatches. */
uint64_t kernel_dispatch_count;
+ /* Number of threads. */
+ uint32_t omp_num_threads;
/* Debug purpose argument. */
uint64_t debug;
/* Kernel dispatch structures created for children kernel dispatches. */
@@ -3523,6 +3525,16 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call)
addr);
hbb->append_insn (mem);
+ /* Write to shadow_reg->omp_num_threads = hsa_num_threads. */
+ hbb->append_insn (new hsa_insn_comment
+ ("set shadow_reg->omp_num_threads = hsa_num_threads"));
+
+ addr = new hsa_op_address (shadow_reg, offsetof (hsa_kernel_dispatch,
+ omp_num_threads));
+ hbb->append_insn
+ (new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads_reg->type,
+ hsa_num_threads_reg, addr));
+
/* Write to packet->workgroup_size_x. */
hbb->append_insn (new hsa_insn_comment
("set packet->workgroup_size_x = hsa_num_threads"));
@@ -4507,12 +4519,27 @@ hsa_init_new_bb (basic_block bb)
/* Initialize OMP in an HSA basic block PROLOGUE. */
static void
-init_omp_in_prologue (hsa_bb *prologue)
+init_omp_in_prologue (void)
{
- BrigType16_t t = hsa_num_threads->type;
- prologue->append_insn
- (new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (64, t),
- new hsa_op_address (hsa_num_threads)));
+ if (!hsa_cfun->kern_p)
+ return;
+
+ hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+ /* Load a default value from shadow argument. */
+ hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
+ hsa_op_address *addr = new hsa_op_address
+ (shadow_reg_ptr, offsetof (hsa_kernel_dispatch, omp_num_threads));
+
+ hsa_op_reg *threads = new hsa_op_reg (BRIG_TYPE_U32);
+ hsa_insn_basic *basic = new hsa_insn_mem
+ (BRIG_OPCODE_LD, threads->type, threads, addr);
+ prologue->append_insn (basic);
+
+ /* Save it to private variable hsa_num_threads. */
+ basic = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->type, threads,
+ new hsa_op_address (hsa_num_threads));
+ prologue->append_insn (basic);
}
/* Go over gimple representation and generate our internal HSA one. SSA_MAP
@@ -4554,8 +4581,6 @@ gen_body_from_gimple (vec <hsa_op_reg_p> *ssa_map)
}
}
- init_omp_in_prologue (hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
-
FOR_EACH_BB_FN (bb, cfun)
{
gimple_stmt_iterator gsi;
@@ -5012,6 +5037,9 @@ generate_hsa (bool kernel)
gen_function_def_parameters (hsa_cfun, &ssa_map);
if (seen_error ())
goto fail;
+
+ init_omp_in_prologue ();
+
gen_body_from_gimple (&ssa_map);
if (seen_error ())
goto fail;
@@ -43,6 +43,8 @@ struct hsa_kernel_dispatch
uint32_t group_segment_size;
/* Number of children kernel dispatches. */
uint64_t kernel_dispatch_count;
+ /* Number of threads. */
+ uint32_t omp_num_threads;
/* Debug purpose argument. */
uint64_t debug;
/* Kernel dispatch structures created for children kernel dispatches. */
@@ -743,6 +743,9 @@ print_kernel_dispatch (struct hsa_kernel_dispatch *dispatch, unsigned indent)
indent_stream (stderr, indent);
fprintf (stderr, "children dispatches: %lu\n",
dispatch->kernel_dispatch_count);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "omp_num_threads: %u\n",
+ dispatch->omp_num_threads);
fprintf (stderr, "\n");
for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
@@ -761,6 +764,7 @@ create_kernel_dispatch_recursive (struct kernel_info *kernel,
struct hsa_kernel_dispatch *shadow = create_kernel_dispatch (kernel,
omp_data_size);
+ shadow->omp_num_threads = 64;
shadow->debug = 0;
for (unsigned i = 0; i < kernel->dependencies_count; i++)
@@ -926,15 +930,11 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch)
hsa_signal_store_relaxed (s, 1);
memcpy (shadow->kernarg_address, &vars, sizeof (vars));
- /* Append shadow pointer to kernel arguments. */
- if (kernel->dependencies_count > 0)
- {
- memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
- sizeof (struct hsa_kernel_runtime *));
+ memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
+ sizeof (struct hsa_kernel_runtime *));
- if (debug)
- fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n");
- }
+ if (debug)
+ fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n");
uint16_t header;
header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
--
2.5.1