diff mbox

[hsa] Stricter target_follows_kernelizable_pattern

Message ID 20150902184644.GI2685@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor Sept. 2, 2015, 6:46 p.m. UTC
Hi,

the patch below makes target_follows_kernelizable_pattern stricter by
adding a few checks for clauses that have to preclude kernelization.
Committed to the branch.

Thanks,

Martin


2015-09-02  Martin Jambor  <mjambor@suse.cz>

	* omp-low.c (target_follows_kernelizable_pattern): Parallel
	num_thread clause and non-automatic loop schedule preclude
	kernelization.
---
 gcc/ChangeLog.hsa |  6 ++++++
 gcc/omp-low.c     | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 36 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6c2bbe7..d6c521f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2832,9 +2832,23 @@  target_follows_kernelizable_pattern (gomp_target *target, tree *group_size_p,
   gomp_parallel *par;
   if (!stmt || !(par = dyn_cast <gomp_parallel *> (stmt)))
     return NULL;
+
+  tree clauses = gimple_omp_parallel_clauses (par);
+  tree num_threads_clause = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
+  if (num_threads_clause)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, tloc,
+			 "Will not turn target construct into a "
+			 "simple GPGPU kernel because there is a num_threads "
+			 "clause of the parallel construct that "
+			 "is likely to require looping \n");
+      return NULL;
+    }
+
   stmt = single_stmt_in_seq_skip_bind (gimple_omp_body (par), tloc, "parallel");
-  /* FIXME: We are currently ignoring parallel clauses and potentially also
-     sharing clauses of teams and distribute, if there are any. We need to
+  /* FIXME: We are currently ignoring parallel sharing clauses and potentially
+     also sharing clauses of teams and distribute, if there are any. We need to
      check they can be skipped.  */
   gomp_for *gfor;
   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
@@ -2859,6 +2873,20 @@  target_follows_kernelizable_pattern (gomp_target *target, tree *group_size_p,
       return NULL;
     }
 
+  clauses = gimple_omp_for_clauses (gfor);
+  tree for_sched_clause = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
+
+  if (for_sched_clause
+      && OMP_CLAUSE_SCHEDULE_KIND (for_sched_clause) != OMP_CLAUSE_SCHEDULE_AUTO)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, tloc,
+			 "Will not turn target construct into a simple GPGPU "
+			 "kernel because the inner loop has non-automatic "
+			 "scheduling clause\n");
+      return NULL;
+    }
+
   if (teams)
     gather_inner_locals (gimple_omp_body (teams), kri);
   if (dist)