diff mbox

[gomp-4_0-branch] openacc parallel reduction part 1

Message ID 87y4s7i662.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge Oct. 23, 2014, 7:41 a.m. UTC
Hi!

On Mon, 28 Jul 2014 10:36:03 -0700, Cesar Philippidis <cesar_philippidis@mentor.com> wrote:
> On 07/28/2014 10:02 AM, Thomas Schwinge wrote:
> > On Sun, 6 Jul 2014 16:10:56 -0700, Cesar Philippidis <cesar_philippidis@mentor.com> wrote:
> >> This patch is the first step to enabling parallel reductions in openacc.
> > 
> > I think I have found one issue in this code -- but please verify that my
> > understanding of reductions is correct.  Namely:
> > 
> >> --- a/gcc/omp-low.c
> >> +++ b/gcc/omp-low.c
> >> +/* Helper function to finalize local data for the reduction arrays. The
> >> +   reduction array needs to be reduced to the original reduction variable.
> >> +   FIXME: This function assumes that there are vector_length threads in
> >> +   total.  Also, it assumes that there are at least vector_length iterations
> >> +   in the for loop.  */
> >> +
> >> +static void
> >> +finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
> >> +			 omp_context *ctx)
> >> +{
> >> +  gcc_assert (is_gimple_omp_oacc_specifically (ctx->stmt));
> >> +
> >> +  tree c, var, array, loop_header, loop_body, loop_exit;
> >> +  gimple stmt;
> >> +
> >> +  /* Create for loop.
> >> +
> >> +     let var = the original reduction variable
> >> +     let array = reduction variable array
> >> +
> >> +     var = array[0]
> >> +     for (i = 1; i < nthreads; i++)
> >> +       var op= array[i]
> >> + */
> > 
> > This should also consider the reduction variable's original value.  Test
> > case (which does the expected thing if modified for OpenMP):
> > 
> >     #include <stdlib.h>
> >     
> >     int
> >     main(void)
> >     {
> >     #define I 5
> >     #define N 11
> >     #define A 8
> >     
> >       int a = A;
> >       int s = I;
> >     
> >     #pragma acc parallel vector_length(N)
> >       {
> >         int i;
> >     #pragma acc loop reduction(+:s)
> >         for (i = 0; i < N; ++i)
> >           s += a;
> >       }
> >     
> >       if (s != I + N * A)
> >         abort ();
> >     
> >       return 0;
> >     }
> > 
> > OK to check in the following?
> 
> Reductions can be specified with both the parallel and loop constructs.
> According to section 2.5.11 in the opacc spec, a reduction in a parallel
> construct should behave as you described:
> 
> 	At the end of the region, the values for each gang are combined
> 	using the reduction operator, and the result combined with the
> 	value of the original variable and stored in the original
> 	variable.
> 
> However,in section 2.7.11, a reduction in a loop construct behaves as
> follows:
> 
> 	At the end of the loop, the values for each thread are combined
> 	using the specified reduction operator, and the result stored
> 	in the original variable at the end of the parallel or kernels 	
> 	region.
> 
> The parallel reduction behavior does make more sense though. I'll ask
> the openacc gurus if there's a typo in section 2.7.11. It does refer to
> parallel reduction.

I proceeded by checking in the following patch to gomp-4_0-branch,
r216574:

commit 75e2a58b8ef7d20be2239ff029493986542ee7e3
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Thu Oct 23 07:26:40 2014 +0000

    OpenACC reductions: Don't skip the reduction variable's original value.
    
    	gcc/
    	* omp-low.c (finalize_reduction_data): Don't skip the reduction
    	variable's original value.
    	libgomp/
    	* testsuite/libgomp.oacc-c/reduction-initial-1.c: New file.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@216574 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  5 +++
 gcc/omp-low.c                                      | 40 ++--------------------
 libgomp/ChangeLog.gomp                             |  4 +++
 .../testsuite/libgomp.oacc-c/reduction-initial-1.c | 32 +++++++++++++++++
 4 files changed, 44 insertions(+), 37 deletions(-)



Grüße,
 Thomas
diff mbox

Patch

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 6d107d2..28e7252 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@ 
+2014-10-23  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* omp-low.c (finalize_reduction_data): Don't skip the reduction
+	variable's original value.
+
 2014-10-20  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* gimplify.c (gimplify_scan_omp_clauses): Remove switch stmt which
diff --git gcc/omp-low.c gcc/omp-low.c
index b8022c2..b21235f 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -9869,8 +9869,7 @@  finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
      let var = the original reduction variable
      let array = reduction variable array
 
-     var = array[0]
-     for (i = 1; i < nthreads; i++)
+     for (i = 0; i < nthreads; i++)
        var op= array[i]
  */
 
@@ -9878,42 +9877,9 @@  finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
   loop_body = create_artificial_label (UNKNOWN_LOCATION);
   loop_exit = create_artificial_label (UNKNOWN_LOCATION);
 
-  /* Initialize the reduction variables to be value of the first array
-     element.  */
-  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
-    {
-      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
-	continue;
-
-      tree_code reduction_code = OMP_CLAUSE_REDUCTION_CODE (c);
-
-      /* reduction(-:var) sums up the partial results, so it acts
-	 identically to reduction(+:var).  */
-      if (reduction_code == MINUS_EXPR)
-        reduction_code = PLUS_EXPR;
-
-      /* Set up reduction variable, var.  Becuase it's not gimple register,
-         it needs to be treated as a reference.  */
-      var = OMP_CLAUSE_DECL (c);
-      type = get_base_type (var);
-      tree ptr = lookup_reduction (omp_get_id (OMP_CLAUSE_DECL (c)), ctx);
-
-      /* Extract array[0] into mem.  */
-      tree mem = create_tmp_var (type, NULL);
-      gimplify_assign (mem, build_simple_mem_ref (ptr), stmt_seqp);
-
-      /* Find the original reduction variable.  */
-      tree x = build_outer_var_ref (var, ctx);
-      if (is_reference (var))
-	var = build_simple_mem_ref (var);
-
-      x = lang_hooks.decls.omp_clause_assign_op (c, var, mem);
-      gimplify_and_add (unshare_expr(x), stmt_seqp);
-    }
-
-  /* Create an index variable and set it to one.  */
+  /* Create and initialize an index variable.  */
   tree ix = create_tmp_var (sizetype, NULL);
-  gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_one_node),
+  gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node),
 		   stmt_seqp);
 
   /* Insert the loop header label here.  */
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 065dfb6..5363068 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,7 @@ 
+2014-10-23  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* testsuite/libgomp.oacc-c/reduction-initial-1.c: New file.
+
 2014-10-20  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* (GOACC_update): Declare.
diff --git libgomp/testsuite/libgomp.oacc-c/reduction-initial-1.c libgomp/testsuite/libgomp.oacc-c/reduction-initial-1.c
new file mode 100644
index 0000000..e763cf2
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/reduction-initial-1.c
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+/* TODO:
+   { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+int
+main(void)
+{
+#define I 5
+/* TODO */
+#ifdef ACC_DEVICE_TYPE_host_nonshm
+# define N 1
+#else
+# define N 11
+#endif
+#define A 8
+
+  int a = A;
+  int s = I;
+
+#pragma acc parallel vector_length(N)
+  {
+    int i;
+#pragma acc loop reduction(+:s)
+    for (i = 0; i < N; ++i)
+      s += a;
+  }
+
+  if (s != I + N * A)
+    __builtin_abort();
+
+  return 0;
+}