diff mbox

[hsa] Create a special omp statement for gpu kernels

Message ID 20150828145330.GW32341@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor Aug. 28, 2015, 2:53 p.m. UTC
Hi,

the patch I below that I have committed to the branch adds a special
gimple statement code in which GPU statements can survive between
lowering and expansion and which makes sure that even statements which
pertain to the kernel loop but lowering puts them in front of the loop
are picked up by expansion and put into a separate function.

Thanks,

Martin


2015-08-28  Martin Jambor  <mjambor@suse.cz>

	* omp-low.c (expand_omp_for_kernel): Do not insert return
	statement.
	(expand_target_kernel_body): Handle kernels
	encapsulated in GIMPLE_OMP_GPUKERNEL statements.
	(lower_omp_target): Lower kernel code into a new
	GIMPLE_OMP_GPUKERNEL statement.
	* gimple.def (GIMPLE_OMP_GPUKERNEL): New code.
	* gimple.c (gimple_build_omp_gpukernel): New function.
	(gimple_copy): Handle GIMPLE_OMP_GPUKERNEL case.
	* gimple-low.c (lower_stmt): Likewise.
	* gimple-pretty-print.c (dump_gimple_omp_block): Likewise.
	(pp_gimple_stmt_1): Likewise.
	* gimple.h (gimple_build_omp_gpukernel): Declare.
	(gimple_has_substatements): Handle GIMPLE_OMP_GPUKERNEL case.
	(CASE_GIMPLE_OMP): Likewise.
diff mbox

Patch

Index: gcc/gimple.def
===================================================================
--- gcc/gimple.def	(revision 227279)
+++ gcc/gimple.def	(working copy)
@@ -375,6 +375,10 @@  DEFGSCODE(GIMPLE_OMP_TARGET, "gimple_omp
    CLAUSES is an OMP_CLAUSE chain holding the associated clauses.  */
 DEFGSCODE(GIMPLE_OMP_TEAMS, "gimple_omp_teams", GSS_OMP_SINGLE_LAYOUT)
 
+/* GIMPLE_OMP_GPUKERNEL <BODY> represents a parallel loop lowered for execution
+   on a GPU.  It is an artificial statement created by omp lowering.  */
+DEFGSCODE(GIMPLE_OMP_GPUKERNEL, "gimple_omp_gpukernel", GSS_OMP)
+
 /* GIMPLE_PREDICT <PREDICT, OUTCOME> specifies a hint for branch prediction.
 
    PREDICT is one of the predictors from predict.def.
Index: gcc/gimple.c
===================================================================
--- gcc/gimple.c	(revision 227279)
+++ gcc/gimple.c	(working copy)
@@ -959,6 +959,19 @@  gimple_build_omp_master (gimple_seq body
   return p;
 }
 
+/* Build a GIMPLE_OMP_GPUKERNEL statement.
+
+   BODY is the sequence of statements to be executed by the kernel.  */
+
+gimple
+gimple_build_omp_gpukernel (gimple_seq body)
+{
+  gimple p = gimple_alloc (GIMPLE_OMP_GPUKERNEL, 0);
+  if (body)
+    gimple_omp_set_body (p, body);
+
+  return p;
+}
 
 /* Build a GIMPLE_OMP_TASKGROUP statement.
 
@@ -1798,6 +1811,7 @@  gimple_copy (gimple stmt)
 	case GIMPLE_OMP_MASTER:
 	case GIMPLE_OMP_TASKGROUP:
 	case GIMPLE_OMP_ORDERED:
+	case GIMPLE_OMP_GPUKERNEL:
 	copy_omp_body:
 	  new_seq = gimple_seq_copy (gimple_omp_body (stmt));
 	  gimple_omp_set_body (copy, new_seq);
Index: gcc/gimple.h
===================================================================
--- gcc/gimple.h	(revision 227279)
+++ gcc/gimple.h	(working copy)
@@ -1435,6 +1435,7 @@  gomp_task *gimple_build_omp_task (gimple
 				       tree, tree);
 gimple gimple_build_omp_section (gimple_seq);
 gimple gimple_build_omp_master (gimple_seq);
+gimple gimple_build_omp_gpukernel (gimple_seq);
 gimple gimple_build_omp_taskgroup (gimple_seq);
 gomp_continue *gimple_build_omp_continue (tree, tree);
 gimple gimple_build_omp_ordered (gimple_seq);
@@ -1691,6 +1692,7 @@  gimple_has_substatements (gimple g)
     case GIMPLE_OMP_TARGET:
     case GIMPLE_OMP_TEAMS:
     case GIMPLE_OMP_CRITICAL:
+    case GIMPLE_OMP_GPUKERNEL:
     case GIMPLE_WITH_CLEANUP_EXPR:
     case GIMPLE_TRANSACTION:
       return true;
@@ -5879,7 +5881,8 @@  gimple_return_set_retbnd (gimple gs, tre
     case GIMPLE_OMP_RETURN:			\
     case GIMPLE_OMP_ATOMIC_LOAD:		\
     case GIMPLE_OMP_ATOMIC_STORE:		\
-    case GIMPLE_OMP_CONTINUE
+    case GIMPLE_OMP_CONTINUE:			\
+    case GIMPLE_OMP_GPUKERNEL
 
 static inline bool
 is_gimple_omp (const_gimple stmt)
Index: gcc/gimple-pretty-print.c
===================================================================
--- gcc/gimple-pretty-print.c	(revision 227279)
+++ gcc/gimple-pretty-print.c	(working copy)
@@ -1486,6 +1486,9 @@  dump_gimple_omp_block (pretty_printer *b
 	case GIMPLE_OMP_SECTION:
 	  pp_string (buffer, "#pragma omp section");
 	  break;
+	case GIMPLE_OMP_GPUKERNEL:
+	  pp_string (buffer, "#pragma omp gpukernel");
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -2240,6 +2243,7 @@  pp_gimple_stmt_1 (pretty_printer *buffer
     case GIMPLE_OMP_TASKGROUP:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_SECTION:
+    case GIMPLE_OMP_GPUKERNEL:
       dump_gimple_omp_block (buffer, gs, spc, flags);
       break;
 
Index: gcc/gimple-low.c
===================================================================
--- gcc/gimple-low.c	(revision 227279)
+++ gcc/gimple-low.c	(working copy)
@@ -366,6 +366,7 @@  lower_stmt (gimple_stmt_iterator *gsi, s
     case GIMPLE_OMP_TASK:
     case GIMPLE_OMP_TARGET:
     case GIMPLE_OMP_TEAMS:
+    case GIMPLE_OMP_GPUKERNEL:
       data->cannot_fallthru = false;
       lower_omp_directive (gsi, data);
       data->cannot_fallthru = false;
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 227279)
+++ gcc/omp-low.c	(working copy)
@@ -9905,8 +9905,6 @@  expand_omp_for_kernel (struct omp_region
   gsi = gsi_last_bb (kfor->exit);
   gcc_assert (!gsi_end_p (gsi)
 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
-  gimple ret_stmt = gimple_build_return (NULL);
-  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
   gsi_remove (&gsi, true);
 
   /* Fixup the much simpler CFG.  */
@@ -9957,14 +9955,13 @@  expand_target_kernel_body (struct omp_re
   struct omp_region **pp;
 
   for (pp = &target->inner; *pp; pp = &(*pp)->next)
-    if ((*pp)->type == GIMPLE_OMP_FOR
-	&& (gimple_omp_for_kind (last_stmt ((*pp)->entry))
-	    == GF_OMP_FOR_KIND_KERNEL_BODY))
+    if ((*pp)->type == GIMPLE_OMP_GPUKERNEL)
       break;
 
+  struct omp_region *gpukernel = *pp;
+
   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
-  struct omp_region *kfor = *pp;
-  if (!kfor)
+  if (!gpukernel)
     {
       gcc_assert (!tgt_stmt->kernel_iter);
       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
@@ -9978,9 +9975,18 @@  expand_target_kernel_body (struct omp_re
     }
 
   gcc_assert (tgt_stmt->kernel_iter);
+  *pp = gpukernel->next;
+
+  for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
+    if ((*pp)->type == GIMPLE_OMP_FOR
+	&& (gimple_omp_for_kind (last_stmt ((*pp)->entry))
+	    == GF_OMP_FOR_KIND_KERNEL_BODY))
+      break;
+
+  struct omp_region *kfor = *pp;
+  gcc_assert (kfor);
   if (kfor->inner)
     expand_omp (kfor->inner);
-  *pp = kfor->next;
 
   tree kern_fndecl = copy_node (orig_child_fndecl);
   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
@@ -10007,8 +10013,20 @@  expand_target_kernel_body (struct omp_re
 
   expand_omp_for_kernel (kfor);
 
-  move_sese_region_to_fn (kern_cfun, single_succ (kfor->entry),
-			  kfor->exit, block);
+  /* Remove the omp for statement */
+  gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
+  gsi_remove (&gsi, true);
+  /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
+     return.  */
+  gsi = gsi_last_bb (gpukernel->exit);
+  gcc_assert (!gsi_end_p (gsi)
+	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
+  gimple ret_stmt = gimple_build_return (NULL);
+  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
+  gsi_remove (&gsi, true);
+
+  move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
+			  gpukernel->exit, block);
 
   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
   kcn->mark_force_output ();
@@ -10034,7 +10052,6 @@  expand_target_kernel_body (struct omp_re
   basic_block bb;
   FOR_EACH_BB_FN (bb, kern_cfun)
     {
-      gimple_stmt_iterator gsi;
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	{
 	  gimple stmt = gsi_stmt (gsi);
@@ -12117,10 +12134,12 @@  lower_omp_target (gimple_stmt_iterator *
       lower_omp (&tgt_body, ctx);
       if (ctx->kernel_inner_loop)
 	{
-	  /* FIXME: Try to invent an encapsulating block which would survive
-	     until omp expansion.  */
 	  gimple_seq_add_stmt (&kernel_seq, ctx->kernel_inner_loop);
 	  lower_omp (&kernel_seq, ctx);
+	  gimple_seq_add_stmt (&kernel_seq, gimple_build_omp_return (false));
+	  gimple gpukernel = gimple_build_omp_gpukernel (kernel_seq);
+	  kernel_seq = NULL;
+	  gimple_seq_add_stmt (&kernel_seq, gpukernel);
 	}
       target_nesting_level--;
     }
@@ -13047,6 +13066,7 @@  make_gimple_omp_edges (basic_block bb, s
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
     case GIMPLE_OMP_SECTION:
+    case GIMPLE_OMP_GPUKERNEL:
       cur_region = new_omp_region (bb, code, cur_region);
       fallthru = true;
       break;
Index: libgomp/plugin/plugin-hsa.c
===================================================================
--- libgomp/plugin/plugin-hsa.c	(revision 227279)
+++ libgomp/plugin/plugin-hsa.c	(working copy)
@@ -855,9 +855,6 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, v
 {
   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
   struct agent_info *agent = kernel->agent;
-  if (pthread_rwlock_rdlock (&agent->modules_rwlock))
-    GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
-
   struct kernel_launch_attributes def;
   const struct kernel_launch_attributes *kla;
   if (!parse_launch_attributes (kern_launch, &def, &kla))
@@ -867,6 +864,8 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, v
 		 "zero\n");
       return;
     }
+  if (pthread_rwlock_rdlock (&agent->modules_rwlock))
+    GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
 
   create_and_finalize_hsa_program (agent);
   init_kernel (kernel) ;