diff mbox

[hsa] Identify simple omp loops

Message ID 20140925234428.GC20259@virgil.suse
State New
Headers show

Commit Message

Martin Jambor Sept. 25, 2014, 11:44 p.m. UTC
Hi,

this patch is preparatory work for the next one in the series.  It
identifies simple and single omp parallel for loops and marks them as
such.  A loop is eligible if there is just one in its parallel
construct, there is no other omp construct in that construct and there
are other limitations like static schedule, no collapse and such, it
really looks for the simplest possible loops for now.  For more
explanation, please have a look at the next patch, I split them up
just to make the second one smaller.

Bootstrapping only showed there were no warnings, I have tested this
with some later patches on a number of OMP testcases.  Committed to
the HSA branch.

Thanks,

Martin


2014-09-26  Martin Jambor  <mjambor@suse.cz>

        * omp-low.c (struct omp_region): New flag kernelize.
	(analyze_kernelizability): New function.
        (expand_omp): Call it.  New parameter within_parallel, update all
	callers.
---
 gcc/omp-low.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 136 insertions(+), 6 deletions(-)
diff mbox

Patch

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f97fe2c..ea8a2aa 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -121,6 +121,9 @@  struct omp_region
 
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
+
+  /* True if this region is or is a part of kernelized parallel block. */
+  bool kernelize;
 };
 
 /* Context structure.  Used to store information about each parallel
@@ -8507,15 +8510,131 @@  expand_omp_target (struct omp_region *region)
     }
 }
 
+/* Analyze a PARALLEL region and decide whether it should be turned into an HSA
+   kernel, i.e. whether it should just contain the body of the loop and work
+   sharing should be decided by kernel attributes and HSA run time.  */
+
+static void
+analyze_kernelizability (struct omp_region *parallel)
+{
+  gcc_checking_assert (parallel->type == GIMPLE_OMP_PARALLEL);
+  struct omp_region *inner = parallel->inner;
+  if (!inner)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it contains no nested constructs\n");
+      return;
+    }
+  if (inner->next)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it contains multiple OMP constructs\n");
+      return;
+    }
+  if (inner->type != GIMPLE_OMP_FOR)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it contains a non-looping construct\n");
+      return;
+    }
+  struct omp_region *in2 = inner->inner;
+  while (in2 && (in2->type == GIMPLE_OMP_ATOMIC_LOAD
+		   || in2->type == GIMPLE_OMP_ATOMIC_STORE))
+    in2 = inner->next;
+  if (in2)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because the inner construct has nested constructs\n");
+      return;
+    }
+  if (!inner->cont)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it is not clear where its loop ends\n");
+      return;
+    }
+  gcc_assert (inner->exit);
+  gimple for_stmt = last_stmt (inner->entry);
+  if (gimple_omp_for_combined_p (for_stmt))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it is gimple_omp_for_combined_p\n");
+      return;
+    }
+  struct omp_for_data fd;
+  extract_omp_for_data (for_stmt, &fd, NULL);
+  if (fd.collapse > 1)
+    {
+      dump_printf_loc (MSG_NOTE,
+		       gimple_location (last_stmt (parallel->entry)),
+		       "Will not turn parallel construct into kernel "
+		       "because it uses collapse clause\n");
+      return;
+    }
+  if (fd.sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
+      && fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because we cannot handle the selected scheduling\n");
+      return;
+    }
+  if (fd.have_ordered)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it has ordered clause\n");
+      return;
+    }
+  if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE,
+			 gimple_location (last_stmt (parallel->entry)),
+			 "Will not turn parallel construct into kernel "
+			 "because it contains distribute construct\n");
+      return;
+    }
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
+		     gimple_location (last_stmt (parallel->entry)),
+		     "Parallel construct will be turned into an HSA kernel\n");
+  parallel->kernelize = true;
+  inner->kernelize = true;
+}
 
 /* Expand the parallel region tree rooted at REGION.  Expansion
    proceeds in depth-first order.  Innermost regions are expanded
    first.  This way, parallel regions that require a new function to
    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
-   internal dependencies in their body.  */
+   internal dependencies in their body.  WITHIN_PARALLEL must be true if
+   REGION is located within a parallel construct.  */
 
 static void
-expand_omp (struct omp_region *region)
+expand_omp (struct omp_region *region, bool within_parallel)
 {
   while (region)
     {
@@ -8525,14 +8644,25 @@  expand_omp (struct omp_region *region)
       /* First, determine whether this is a combined parallel+workshare
        	 region.  */
       if (region->type == GIMPLE_OMP_PARALLEL)
-	determine_parallel_type (region);
+	{
+	  determine_parallel_type (region);
+	  if (!within_parallel)
+	    analyze_kernelizability (region);
+	  else if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE,
+			     gimple_location (last_stmt (region->entry)),
+			     "Will not turn parallel construct into kernel "
+			     "because it is located within another parallel "
+			     "construct\n");
+	}
 
       if (region->type == GIMPLE_OMP_FOR
 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
 	inner_stmt = last_stmt (region->inner->entry);
 
       if (region->inner)
-	expand_omp (region->inner);
+	expand_omp (region->inner,
+		    within_parallel || (region->type == GIMPLE_OMP_PARALLEL));
 
       saved_location = input_location;
       if (gimple_has_location (last_stmt (region->entry)))
@@ -8688,7 +8818,7 @@  omp_expand_local (basic_block head)
     }
 
   remove_exit_barriers (root_omp_region);
-  expand_omp (root_omp_region);
+  expand_omp (root_omp_region, false);
 
   free_omp_regions ();
 }
@@ -8723,7 +8853,7 @@  execute_expand_omp (void)
 
   remove_exit_barriers (root_omp_region);
 
-  expand_omp (root_omp_region);
+  expand_omp (root_omp_region, false);
 
   cleanup_tree_cfg ();