diff mbox

[gomp-4_0-branch] openacc parallel reduction part 1

Message ID 87wqaxh0x7.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge July 28, 2014, 5:02 p.m. UTC
Hi Cesar!

On Sun, 6 Jul 2014 16:10:56 -0700, Cesar Philippidis <cesar_philippidis@mentor.com> wrote:
> This patch is the first step to enabling parallel reductions in openacc.

I think I have found one issue in this code -- but please verify that my
understanding of reductions is correct.  Namely:

> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> +/* Helper function to finalize local data for the reduction arrays. The
> +   reduction array needs to be reduced to the original reduction variable.
> +   FIXME: This function assumes that there are vector_length threads in
> +   total.  Also, it assumes that there are at least vector_length iterations
> +   in the for loop.  */
> +
> +static void
> +finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
> +			 omp_context *ctx)
> +{
> +  gcc_assert (is_gimple_omp_oacc_specifically (ctx->stmt));
> +
> +  tree c, var, array, loop_header, loop_body, loop_exit;
> +  gimple stmt;
> +
> +  /* Create for loop.
> +
> +     let var = the original reduction variable
> +     let array = reduction variable array
> +
> +     var = array[0]
> +     for (i = 1; i < nthreads; i++)
> +       var op= array[i]
> + */

This should also consider the reduction variable's original value.  Test
case (which does the expected thing if modified for OpenMP):

    #include <stdlib.h>
    
    int
    main(void)
    {
    #define I 5
    #define N 11
    #define A 8
    
      int a = A;
      int s = I;
    
    #pragma acc parallel vector_length(N)
      {
        int i;
    #pragma acc loop reduction(+:s)
        for (i = 0; i < N; ++i)
          s += a;
      }
    
      if (s != I + N * A)
        abort ();
    
      return 0;
    }

OK to check in the following?



Grüße,
 Thomas

Comments

Cesar Philippidis July 28, 2014, 5:36 p.m. UTC | #1
On 07/28/2014 10:02 AM, Thomas Schwinge wrote:
> Hi Cesar!
> 
> On Sun, 6 Jul 2014 16:10:56 -0700, Cesar Philippidis <cesar_philippidis@mentor.com> wrote:
>> This patch is the first step to enabling parallel reductions in openacc.
> 
> I think I have found one issue in this code -- but please verify that my
> understanding of reductions is correct.  Namely:
> 
>> --- a/gcc/omp-low.c
>> +++ b/gcc/omp-low.c
>> +/* Helper function to finalize local data for the reduction arrays. The
>> +   reduction array needs to be reduced to the original reduction variable.
>> +   FIXME: This function assumes that there are vector_length threads in
>> +   total.  Also, it assumes that there are at least vector_length iterations
>> +   in the for loop.  */
>> +
>> +static void
>> +finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
>> +			 omp_context *ctx)
>> +{
>> +  gcc_assert (is_gimple_omp_oacc_specifically (ctx->stmt));
>> +
>> +  tree c, var, array, loop_header, loop_body, loop_exit;
>> +  gimple stmt;
>> +
>> +  /* Create for loop.
>> +
>> +     let var = the original reduction variable
>> +     let array = reduction variable array
>> +
>> +     var = array[0]
>> +     for (i = 1; i < nthreads; i++)
>> +       var op= array[i]
>> + */
> 
> This should also consider the reduction variable's original value.  Test
> case (which does the expected thing if modified for OpenMP):
> 
>     #include <stdlib.h>
>     
>     int
>     main(void)
>     {
>     #define I 5
>     #define N 11
>     #define A 8
>     
>       int a = A;
>       int s = I;
>     
>     #pragma acc parallel vector_length(N)
>       {
>         int i;
>     #pragma acc loop reduction(+:s)
>         for (i = 0; i < N; ++i)
>           s += a;
>       }
>     
>       if (s != I + N * A)
>         abort ();
>     
>       return 0;
>     }
> 
> OK to check in the following?

Reductions can be specified with both the parallel and loop constructs.
According to section 2.5.11 in the opacc spec, a reduction in a parallel
construct should behave as you described:

	At the end of the region, the values for each gang are combined
	using the reduction operator, and the result combined with the
	value of the original variable and stored in the original
	variable.

However,in section 2.7.11, a reduction in a loop construct behaves as
follows:

	At the end of the loop, the values for each thread are combined
	using the specified reduction operator, and the result stored
	in the original variable at the end of the parallel or kernels 	
	region.

The parallel reduction behavior does make more sense though. I'll ask
the openacc gurus if there's a typo in section 2.7.11. It does refer to
parallel reduction.

Thanks,
Cesar
diff mbox

Patch

--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -9547,8 +9547,7 @@  finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
      let var = the original reduction variable
      let array = reduction variable array
 
-     var = array[0]
-     for (i = 1; i < nthreads; i++)
+     for (i = 0; i < nthreads; i++)
        var op= array[i]
  */
 
@@ -9556,42 +9555,9 @@  finalize_reduction_data (tree clauses, tree nthreads, gimple_seq *stmt_seqp,
   loop_body = create_artificial_label (UNKNOWN_LOCATION);
   loop_exit = create_artificial_label (UNKNOWN_LOCATION);
 
-  /* Initialize the reduction variables to be value of the first array
-     element.  */
-  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
-    {
-      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
-	continue;
-
-      tree_code reduction_code = OMP_CLAUSE_REDUCTION_CODE (c);
-
-      /* reduction(-:var) sums up the partial results, so it acts
-	 identically to reduction(+:var).  */
-      if (reduction_code == MINUS_EXPR)
-        reduction_code = PLUS_EXPR;
-
-      /* Set up reduction variable, var.  Becuase it's not gimple register,
-         it needs to be treated as a reference.  */
-      var = OMP_CLAUSE_DECL (c);
-      type = get_base_type (var);
-      tree ptr = lookup_reduction (omp_get_id (OMP_CLAUSE_DECL (c)), ctx);
-
-      /* Extract array[0] into mem.  */
-      tree mem = create_tmp_var (type, NULL);
-      gimplify_assign (mem, build_simple_mem_ref (ptr), stmt_seqp);
-
-      /* Find the original reduction variable.  */
-      tree x = build_outer_var_ref (var, ctx);
-      if (is_reference (var))
-	var = build_simple_mem_ref (var);
-
-      x = lang_hooks.decls.omp_clause_assign_op (c, var, mem);
-      gimplify_and_add (unshare_expr(x), stmt_seqp);
-    }
-
-  /* Create an index variable and set it to one.  */
+  /* Create and initialize an index variable.  */
   tree ix = create_tmp_var (sizetype, NULL);
-  gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_one_node),
+  gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node),
 		   stmt_seqp);
 
   /* Insert the loop header label here.  */