diff mbox

[hsa-branch] Switch dynamic parallelism off by default

Message ID E1bUxHf-0005FO-Og@eggs.gnu.org
State New
Headers show

Commit Message

Martin Jambor July 25, 2016, 4:39 p.m. UTC
Hi,

the dynamic parallelism (i.e. ability to execute one HSA from another
and in particular wait for its completion) path is unreliable and
problematic in many ways with no fix in sight.  I do not want to remove
the code just yet, it is likely to prove useful at least as a reference,
but am going to switch it off by default even on the HSA branch (it is
not even present on trunk).  It can be switched on by means of a new
parameter, which is technically a switch, but I want to want to
emphasize that the interface is volatile.

Thanks,

Martin

2016-07-25  Martin Jambor  <mjambor@suse.cz>

	* params.def (PARAM_HSA_EXPAND_GOMP_PARALLEL): New.
	* hsa-gen.c (gen_hsa_insns_for_call): Only expand gomp_parallel if
	the above parameter is set to one.
	* invoke.texi (hsa-expand-omp-parallel): New.
---
 gcc/doc/invoke.texi |  6 ++++++
 gcc/hsa-gen.c       | 31 ++++++++++++++++++-------------
 gcc/params.def      |  6 ++++++
 3 files changed, 30 insertions(+), 13 deletions(-)
diff mbox

Patch

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index bddac9c..1ba10e4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9769,6 +9769,12 @@  Enable creation of gridified GPU kernels out of loops within target
 OpenMP constructs.  This conversion is enabled by default when
 offloading to HSA, to disable it, use @option{--param omp-gpu-gridify=0}
 
+@item hsa-expand-omp-parallel
+Enable compiling non-gridified OpenMP parallel constructs into HSAIL as
+invocations of child kernels in their own grid.  This behavior is
+disabled by default because in many scenarios it does not work
+properly.  To enable it, use @option{--param hsa-expand-omp-parallel=1}.
+
 @item hsa-gen-debug-stores
 Enable emission of special debug stores within HSA kernels which are
 then read and reported by libgomp plugin.  Generation of these stores
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 6accbd7..a944df4 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -5870,20 +5870,25 @@  gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 					 BRIG_WIDTH_ALL));
       break;
     case BUILT_IN_GOMP_PARALLEL:
-      {
-	gcc_checking_assert (gimple_call_num_args (stmt) == 4);
-	tree called = gimple_call_arg (stmt, 0);
-	gcc_checking_assert (TREE_CODE (called) == ADDR_EXPR);
-	called = TREE_OPERAND (called, 0);
-	gcc_checking_assert (TREE_CODE (called) == FUNCTION_DECL);
-
-	const char *name
-	  = hsa_brig_function_name (hsa_get_declaration_name (called));
-	hsa_add_kernel_dependency (hsa_cfun->m_decl, name);
-	gen_hsa_insns_for_kernel_call (hbb, as_a <gcall *> (stmt));
+      if (PARAM_VALUE (PARAM_HSA_EXPAND_GOMP_PARALLEL) == 1)
+	{
+	  gcc_checking_assert (gimple_call_num_args (stmt) == 4);
+	  tree called = gimple_call_arg (stmt, 0);
+	  gcc_checking_assert (TREE_CODE (called) == ADDR_EXPR);
+	  called = TREE_OPERAND (called, 0);
+	  gcc_checking_assert (TREE_CODE (called) == FUNCTION_DECL);
+
+	  const char *name
+	    = hsa_brig_function_name (hsa_get_declaration_name (called));
+	  hsa_add_kernel_dependency (hsa_cfun->m_decl, name);
+	  gen_hsa_insns_for_kernel_call (hbb, as_a <gcall *> (stmt));
+	}
+      else
+	HSA_SORRY_AT (gimple_location (stmt), "expansion of ungridified "
+		      "omp parallel is epxerimental, enable with "
+		      "--param hsa-expand-omp-parallel");
+      break;
 
-	break;
-      }
     case BUILT_IN_OMP_GET_THREAD_NUM:
       {
 	query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb);
diff --git a/gcc/params.def b/gcc/params.def
index 129da8f..632d5ef 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1242,6 +1242,12 @@  DEFPARAM (PARAM_OMP_GPU_GRIDIFY,
 	  "constructs",
 	  1, 0, 1)
 
+DEFPARAM (PARAM_HSA_EXPAND_GOMP_PARALLEL,
+	  "hsa-expand-omp-parallel",
+	  "Expand ungridified OpenMP parallel via dynamic parallelism "
+	  "constructs",
+	  0, 0, 1)
+
 DEFPARAM (PARAM_HSA_GEN_DEBUG_STORES,
 	  "hsa-gen-debug-stores",
 	  "Level of hsa debug stores verbosity",