diff mbox

[HSA] introduce hsa_num_threads

Message ID 560BC0DA.50201@suse.cz
State New
Headers show

Commit Message

Martin Liška Sept. 30, 2015, 11 a.m. UTC
On 09/25/2015 04:22 PM, Martin Liška wrote:
> Hello.
> 
> In the following patch HSA is capable of handling various OMP builtins
> that are utilized to set or get the number of threads.
> 
> Martin
> 

Hello.

This patch is a small follow-up which preserves hsa_num_threads among
kernel dispatches.

Martin
diff mbox

Patch

From 2897bc5c5485430f1102688a437785fdf2a80add Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Fri, 25 Sep 2015 17:01:00 +0200
Subject: [PATCH] HSA: distribute hsa_num_threads among kernel dispatches.

libgomp/ChangeLog:

2015-09-25  Martin Liska  <mliska@suse.cz>

        * hsa-traits.h: Add omp_num_threads to hsa_kernel_dispatch
        structure.
        * plugin/plugin-hsa.c (print_kernel_dispatch): Print the
        struct field.
        (create_kernel_dispatch_recursive): Set default value
        to omp_num_threads
        (GOMP_OFFLOAD_run): Add shadow_reg to all kernel dispatches.

gcc/ChangeLog:

2015-09-25  Martin Liska  <mliska@suse.cz>

	* hsa-gen.c (struct hsa_kernel_dispatch): New field.
	(gen_hsa_insns_for_kernel_call): Distribute hsa_num_threads
	for a kernel dispatch.
	(init_omp_in_prologue): Emit loading of shadow argument.
	(gen_body_from_gimple): Remove usage of init_omp_in_prologue.
	(generate_hsa): Move it to this function.
---
 gcc/hsa-gen.c               | 42 +++++++++++++++++++++++++++++++++++-------
 libgomp/hsa-traits.h        |  2 ++
 libgomp/plugin/plugin-hsa.c | 16 ++++++++--------
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 6f45bfe..185b9cc 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -101,6 +101,8 @@  struct hsa_kernel_dispatch
   uint32_t group_segment_size;
   /* Number of children kernel dispatches.  */
   uint64_t kernel_dispatch_count;
+  /* Number of threads.  */
+  uint32_t omp_num_threads;
   /* Debug purpose argument.  */
   uint64_t debug;
   /* Kernel dispatch structures created for children kernel dispatches.  */
@@ -3523,6 +3525,16 @@  gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call)
 			  addr);
   hbb->append_insn (mem);
 
+  /* Write to shadow_reg->omp_num_threads = hsa_num_threads.  */
+  hbb->append_insn (new hsa_insn_comment
+		    ("set shadow_reg->omp_num_threads = hsa_num_threads"));
+
+  addr = new hsa_op_address (shadow_reg, offsetof (hsa_kernel_dispatch,
+						   omp_num_threads));
+  hbb->append_insn
+    (new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads_reg->type,
+		       hsa_num_threads_reg, addr));
+
   /* Write to packet->workgroup_size_x.  */
   hbb->append_insn (new hsa_insn_comment
 		    ("set packet->workgroup_size_x = hsa_num_threads"));
@@ -4507,12 +4519,27 @@  hsa_init_new_bb (basic_block bb)
 /* Initialize OMP in an HSA basic block PROLOGUE.  */
 
 static void
-init_omp_in_prologue (hsa_bb *prologue)
+init_omp_in_prologue (void)
 {
-  BrigType16_t t = hsa_num_threads->type;
-  prologue->append_insn
-    (new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (64, t),
-		       new hsa_op_address (hsa_num_threads)));
+  if (!hsa_cfun->kern_p)
+    return;
+
+  hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+  /* Load a default value from shadow argument.  */
+  hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
+  hsa_op_address *addr = new hsa_op_address
+    (shadow_reg_ptr, offsetof (hsa_kernel_dispatch, omp_num_threads));
+
+  hsa_op_reg *threads = new hsa_op_reg (BRIG_TYPE_U32);
+  hsa_insn_basic *basic = new hsa_insn_mem
+    (BRIG_OPCODE_LD, threads->type, threads, addr);
+  prologue->append_insn (basic);
+
+  /* Save it to private variable hsa_num_threads.  */
+  basic = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->type, threads,
+			    new hsa_op_address (hsa_num_threads));
+  prologue->append_insn (basic);
 }
 
 /* Go over gimple representation and generate our internal HSA one.  SSA_MAP
@@ -4554,8 +4581,6 @@  gen_body_from_gimple (vec <hsa_op_reg_p> *ssa_map)
 	}
     }
 
-  init_omp_in_prologue (hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
-
   FOR_EACH_BB_FN (bb, cfun)
     {
       gimple_stmt_iterator gsi;
@@ -5012,6 +5037,9 @@  generate_hsa (bool kernel)
   gen_function_def_parameters (hsa_cfun, &ssa_map);
   if (seen_error ())
     goto fail;
+
+  init_omp_in_prologue ();
+
   gen_body_from_gimple (&ssa_map);
   if (seen_error ())
     goto fail;
diff --git a/libgomp/hsa-traits.h b/libgomp/hsa-traits.h
index 3b20008..6fb7e48 100644
--- a/libgomp/hsa-traits.h
+++ b/libgomp/hsa-traits.h
@@ -43,6 +43,8 @@  struct hsa_kernel_dispatch
   uint32_t group_segment_size;
   /* Number of children kernel dispatches.  */
   uint64_t kernel_dispatch_count;
+  /* Number of threads.  */
+  uint32_t omp_num_threads;
   /* Debug purpose argument.  */
   uint64_t debug;
   /* Kernel dispatch structures created for children kernel dispatches.  */
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index f9be015..76a3b45 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -743,6 +743,9 @@  print_kernel_dispatch (struct hsa_kernel_dispatch *dispatch, unsigned indent)
   indent_stream (stderr, indent);
   fprintf (stderr, "children dispatches: %lu\n",
 	   dispatch->kernel_dispatch_count);
+  indent_stream (stderr, indent);
+  fprintf (stderr, "omp_num_threads: %u\n",
+	   dispatch->omp_num_threads);
   fprintf (stderr, "\n");
 
   for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
@@ -761,6 +764,7 @@  create_kernel_dispatch_recursive (struct kernel_info *kernel,
 
   struct hsa_kernel_dispatch *shadow = create_kernel_dispatch (kernel,
 							       omp_data_size);
+  shadow->omp_num_threads = 64;
   shadow->debug = 0;
 
   for (unsigned i = 0; i < kernel->dependencies_count; i++)
@@ -926,15 +930,11 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch)
   hsa_signal_store_relaxed (s, 1);
   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
 
-  /* Append shadow pointer to kernel arguments.  */
-  if (kernel->dependencies_count > 0)
-    {
-      memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
-	      sizeof (struct hsa_kernel_runtime *));
+  memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
+	  sizeof (struct hsa_kernel_runtime *));
 
-      if (debug)
-	fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n");
-    }
+  if (debug)
+    fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n");
 
   uint16_t header;
   header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
-- 
2.5.1