diff mbox series

[PR90742] OpenACC/OpenMP target offloading: Fortran 'allocatable' scalars in 'firstprivate' clauses

Message ID 87r285h4ue.fsf@euler.schwinge.homeip.net
State New
Headers show
Series [PR90742] OpenACC/OpenMP target offloading: Fortran 'allocatable' scalars in 'firstprivate' clauses | expand

Commit Message

Thomas Schwinge June 7, 2019, 2:01 p.m. UTC
Hi!

As I had mentioned in the PR...

On Tue, 7 Aug 2018 14:55:07 -0700, Cesar Philippidis <cesar@codesourcery.com> wrote:
> This patch

... would be one component for fixing <https://gcc.gnu.org/PR90742>
"OpenACC/OpenMP target offloading: Fortran 'allocatable' scalars in
'firstprivate' clauses".

(Also, as mentioned there, such changes have been submitted already, a
few times, muddled into other changes.  So, thanks, that this also got
submitted separately, to address just this one issue.)

> updates the way that lower_omp_target uses firstprivate
> pointers in OpenACC offloaded regions. On host side, when preparing
> firstprivate data mapping for pointer type objects, not to be confused
> with GOMP_MAP_FIRSTPRIVATE_POINTER, the compiler passes passes the
> address of the value being pointed to and not the address of the pointer
> itself to the runtime. Correspondingly, on the device side, the compiler
> generates to code to dereference the remapped pointer once to copy the
> data to a local buffer.
> 
> While this behavior looks like it would break things, it will not affect
> C or C++ data mappings, because those languages transfer pointers via
> GOMP_MAP_FIRSTPRIVATE_POINTER.

Not with current GCC sources, as I should eventually find out, which are
still missing another patch or two, or three, or more.

> In addition, this will not cause
> problems with array types, because the default remapping rules for
> OpenACC is to transfer them in via copy. Besides it really doesn't
> make sense to allow arrays to be transferred in via firstprivate
> because that would use up a lot of memory on the accelerator.

(Huh, but the latter ought to be supported nevertheless, as far as I
understand?  Anyway, that'll be for later.)

> Is this OK for trunk? I bootstrapped and regtested it for x86_64 with
> nvptx offloading.

The patch, as proposed, does introduce regressions.

> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -7643,15 +7643,21 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
>  	      {
>  		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
> -		if (omp_is_reference (new_var)
> -		    && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE)
> +		if (omp_is_reference (new_var))
>  		  {
>  		    /* Create a local object to hold the instance
>  		       value.  */
> -		    tree type = TREE_TYPE (TREE_TYPE (new_var));
> +		    tree type = TREE_TYPE (new_var);
> +		    /* Pointer types are mapped onto the device via a
> +		       single level of indirection.  */
> +		    if (TREE_CODE (type) != POINTER_TYPE)
> +		      type = TREE_TYPE (type);
>  		    const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
>  		    tree inst = create_tmp_var (type, id);
> -		    gimplify_assign (inst, fold_indirect_ref (x), &fplist);
> +		    if (TREE_CODE (TREE_TYPE (new_var)) == POINTER_TYPE)
> +		      gimplify_assign (inst, fold_indirect_ref (x), &fplist);
> +		    else
> +		      gimplify_assign (inst, fold_indirect_ref (x), &fplist);
>  		    x = build_fold_addr_expr (inst);
>  		  }
>  		gimplify_assign (new_var, x, &fplist);

(It seems strange to have the same code in both branches of the 'if'
statement?)

> @@ -7879,7 +7885,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  		else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
>  		  {
>  		    gcc_assert (is_gimple_omp_oacc (ctx->stmt));
> -		    if (!omp_is_reference (var))
> +		    /* Handle Fortran allocatable scalars.  */
> +		    if (!omp_is_reference (var)
> +			&& TREE_CODE (TREE_TYPE (var)) != POINTER_TYPE)
>  		      {
>  			if (is_gimple_reg (var)
>  			    && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
|  			  TREE_NO_WARNING (var) = 1;
|  			var = build_fold_addr_expr (var);
|  		      }
|  		    else
|  		      talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
|  		    gimplify_assign (x, var, &ilist);
|  		  }

That's what's causing regressions, for example for 'firstprivate' clauses
even in non-offloading situation ('if(0)' clause, for example):

    Program received signal SIGSEGV, Segmentation fault.
    0x0000000000402f8a in main._omp_fn.1 () at source-gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c:59
    59                      b[ii] = a[ii] + 1;
    (gdb) list 10, 10
    10          float *a, *b, *d_a, *d_b, exp, exp2;
    (gdb) list 16, 17
    16          a = (float *) malloc (N * sizeof (float));
    17          b = (float *) malloc (N * sizeof (float));
    (gdb) list 52, 63 
    52      #pragma acc parallel if(0)
    53          {
    54              int ii;
    55
    56              for (ii = 0; ii < N; ii++)
    57              {
    58                  if (acc_on_device (acc_device_host))
    59                      b[ii] = a[ii] + 1;
    60                  else
    61                      b[ii] = a[ii];
    62              }
    63          }

So we got here implicit 'firstprivate(a, b)' (which in this scenario
means no-op, given that the host pointer values are just passed through).
(On x86_64) these used to have eight bytes alignment, now they have four
bytes.  But worse, the code on the "sending" side is changed as follows
('omplower' dump):

    -      b.57 = b;
    -      .omp_data_arr.54.b = &b.57;
    +      .omp_data_arr.54.b = b;
    -      a.58 = a;
    -      .omp_data_arr.54.a = &a.58;
    +      .omp_data_arr.54.a = a;
           #pragma omp target oacc_parallel if(0) firstprivate(b) firstprivate(a) [child fn: main._omp_fn.1 (.omp_data_arr.54, .omp_data_sizes.55, .omp_data_kinds.56)]

..., but the "receiving" side stays the same, so we got a mismatch.

If something like that, then the 'POINTER_TYPE' conditional should
probably be inside the '!omp_is_reference' conditional, just guarding the
'build_fold_addr_expr'?


Anyway, I had a look at this now, and seem to have gotten it work.

I will admit, though, that I'm somewhat lost especially with all the
'omp_is_reference' usage ("should privatize what this DECL points to
rather than the DECL itself").  Using that on 'OMP_CLAUSE_DECL ([...])'
(the common case) makes sense given that's in context of the originating
source language, but what exactly does it mean when 'omp_is_reference' is
used on 'new_var = lookup_decl (var, ctx)', or on 'var =
lookup_decl_in_outer_ctx (ovar, ctx)', where the things looked up by
these (that is, stored in 'ctx->cb.decl_map') are "arbitrary"/"synthetic"
items?  (Jakub?)  Or is it actually improper to use 'omp_is_reference' on
these, but it just happens to do the expected things in the (several)
existing cases?

Anyway, for an 'integer, allocatable :: a' that is used 'firstprivate'
inside an OpenACC offloading region, we now get the following 'omplower'
changes:

    [...]
       integer(kind=4) * a;
    [...]
           a = __builtin_malloc (4);
    [...]
    -        a.16 = a;
    -        .omp_data_arr.13.a = &a.16;
    +        .omp_data_arr.13.a = a;
             #pragma omp target oacc_parallel map(tofrom:b [len: 400]) firstprivate(a) [child fn: MAIN__._omp_fn.0 (.omp_data_arr.13, .omp_data_sizes.14, .omp_data_kinds.15)]
               {
                 .omp_data_i = (const struct .omp_data_t.10 & restrict) &.omp_data_arr.13;
    -            D.3981 = .omp_data_i->a;
    -            a = *D.3981;
    +            a = .omp_data_i->a;
    [...]

..., and that seems to work fine.  (..., and no testsuite regressions.)

(I have not yet looked into the related OpenMP changes required.)

Jakub, is the following about right, do you have any comments?  If
approving this patch, please respond with "Reviewed-by: NAME <EMAIL>" so
that your effort will be recorded in the commit log, see
<https://gcc.gnu.org/wiki/Reviewed-by>.

The code changes seem very ad-hoc, but that's the common impression that
I got from looking at/working though a lot of all that OMP code...  :-(



Grüße
 Thomas

Comments

Thomas Schwinge Oct. 7, 2019, 9:28 a.m. UTC | #1
Hi!

Jakub, ping -- and/or: Kwok, Tobias, as you recently worked through that
code for related issues (Fortran optional arguments), do you happen to
have any comments?

On 2019-06-07T16:01:29+0200, I wrote:
> As I had mentioned in the PR...
>
> On Tue, 7 Aug 2018 14:55:07 -0700, Cesar Philippidis <cesar@codesourcery.com> wrote:
>> This patch
>
> ... would be one component for fixing <https://gcc.gnu.org/PR90742>
> "OpenACC/OpenMP target offloading: Fortran 'allocatable' scalars in
> 'firstprivate' clauses".
>
> (Also, as mentioned there, such changes have been submitted already, a
> few times, muddled into other changes.  So, thanks, that this also got
> submitted separately, to address just this one issue.)
>
>> updates the way that lower_omp_target uses firstprivate
>> pointers in OpenACC offloaded regions. On host side, when preparing
>> firstprivate data mapping for pointer type objects, not to be confused
>> with GOMP_MAP_FIRSTPRIVATE_POINTER, the compiler passes passes the
>> address of the value being pointed to and not the address of the pointer
>> itself to the runtime. Correspondingly, on the device side, the compiler
>> generates to code to dereference the remapped pointer once to copy the
>> data to a local buffer.
>> 
>> While this behavior looks like it would break things, it will not affect
>> C or C++ data mappings, because those languages transfer pointers via
>> GOMP_MAP_FIRSTPRIVATE_POINTER.
>
> Not with current GCC sources, as I should eventually find out, which are
> still missing another patch or two, or three, or more.
>
>> In addition, this will not cause
>> problems with array types, because the default remapping rules for
>> OpenACC is to transfer them in via copy. Besides it really doesn't
>> make sense to allow arrays to be transferred in via firstprivate
>> because that would use up a lot of memory on the accelerator.
>
> (Huh, but the latter ought to be supported nevertheless, as far as I
> understand?  Anyway, that'll be for later.)
>
>> Is this OK for trunk? I bootstrapped and regtested it for x86_64 with
>> nvptx offloading.
>
> The patch, as proposed, does introduce regressions.
>
>> --- a/gcc/omp-low.c
>> +++ b/gcc/omp-low.c
>> @@ -7643,15 +7643,21 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>>  	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
>>  	      {
>>  		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
>> -		if (omp_is_reference (new_var)
>> -		    && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE)
>> +		if (omp_is_reference (new_var))
>>  		  {
>>  		    /* Create a local object to hold the instance
>>  		       value.  */
>> -		    tree type = TREE_TYPE (TREE_TYPE (new_var));
>> +		    tree type = TREE_TYPE (new_var);
>> +		    /* Pointer types are mapped onto the device via a
>> +		       single level of indirection.  */
>> +		    if (TREE_CODE (type) != POINTER_TYPE)
>> +		      type = TREE_TYPE (type);
>>  		    const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
>>  		    tree inst = create_tmp_var (type, id);
>> -		    gimplify_assign (inst, fold_indirect_ref (x), &fplist);
>> +		    if (TREE_CODE (TREE_TYPE (new_var)) == POINTER_TYPE)
>> +		      gimplify_assign (inst, fold_indirect_ref (x), &fplist);
>> +		    else
>> +		      gimplify_assign (inst, fold_indirect_ref (x), &fplist);
>>  		    x = build_fold_addr_expr (inst);
>>  		  }
>>  		gimplify_assign (new_var, x, &fplist);
>
> (It seems strange to have the same code in both branches of the 'if'
> statement?)
>
>> @@ -7879,7 +7885,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>>  		else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
>>  		  {
>>  		    gcc_assert (is_gimple_omp_oacc (ctx->stmt));
>> -		    if (!omp_is_reference (var))
>> +		    /* Handle Fortran allocatable scalars.  */
>> +		    if (!omp_is_reference (var)
>> +			&& TREE_CODE (TREE_TYPE (var)) != POINTER_TYPE)
>>  		      {
>>  			if (is_gimple_reg (var)
>>  			    && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
> |  			  TREE_NO_WARNING (var) = 1;
> |  			var = build_fold_addr_expr (var);
> |  		      }
> |  		    else
> |  		      talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
> |  		    gimplify_assign (x, var, &ilist);
> |  		  }
>
> That's what's causing regressions, for example for 'firstprivate' clauses
> even in non-offloading situation ('if(0)' clause, for example):
>
>     Program received signal SIGSEGV, Segmentation fault.
>     0x0000000000402f8a in main._omp_fn.1 () at source-gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c:59
>     59                      b[ii] = a[ii] + 1;
>     (gdb) list 10, 10
>     10          float *a, *b, *d_a, *d_b, exp, exp2;
>     (gdb) list 16, 17
>     16          a = (float *) malloc (N * sizeof (float));
>     17          b = (float *) malloc (N * sizeof (float));
>     (gdb) list 52, 63 
>     52      #pragma acc parallel if(0)
>     53          {
>     54              int ii;
>     55
>     56              for (ii = 0; ii < N; ii++)
>     57              {
>     58                  if (acc_on_device (acc_device_host))
>     59                      b[ii] = a[ii] + 1;
>     60                  else
>     61                      b[ii] = a[ii];
>     62              }
>     63          }
>
> So we got here implicit 'firstprivate(a, b)' (which in this scenario
> means no-op, given that the host pointer values are just passed through).
> (On x86_64) these used to have eight bytes alignment, now they have four
> bytes.  But worse, the code on the "sending" side is changed as follows
> ('omplower' dump):
>
>     -      b.57 = b;
>     -      .omp_data_arr.54.b = &b.57;
>     +      .omp_data_arr.54.b = b;
>     -      a.58 = a;
>     -      .omp_data_arr.54.a = &a.58;
>     +      .omp_data_arr.54.a = a;
>            #pragma omp target oacc_parallel if(0) firstprivate(b) firstprivate(a) [child fn: main._omp_fn.1 (.omp_data_arr.54, .omp_data_sizes.55, .omp_data_kinds.56)]
>
> ..., but the "receiving" side stays the same, so we got a mismatch.
>
> If something like that, then the 'POINTER_TYPE' conditional should
> probably be inside the '!omp_is_reference' conditional, just guarding the
> 'build_fold_addr_expr'?
>
>
> Anyway, I had a look at this now, and seem to have gotten it work.
>
> I will admit, though, that I'm somewhat lost especially with all the
> 'omp_is_reference' usage ("should privatize what this DECL points to
> rather than the DECL itself").  Using that on 'OMP_CLAUSE_DECL ([...])'
> (the common case) makes sense given that's in context of the originating
> source language, but what exactly does it mean when 'omp_is_reference' is
> used on 'new_var = lookup_decl (var, ctx)', or on 'var =
> lookup_decl_in_outer_ctx (ovar, ctx)', where the things looked up by
> these (that is, stored in 'ctx->cb.decl_map') are "arbitrary"/"synthetic"
> items?  (Jakub?)  Or is it actually improper to use 'omp_is_reference' on
> these, but it just happens to do the expected things in the (several)
> existing cases?
>
> Anyway, for an 'integer, allocatable :: a' that is used 'firstprivate'
> inside an OpenACC offloading region, we now get the following 'omplower'
> changes:
>
>     [...]
>        integer(kind=4) * a;
>     [...]
>            a = __builtin_malloc (4);
>     [...]
>     -        a.16 = a;
>     -        .omp_data_arr.13.a = &a.16;
>     +        .omp_data_arr.13.a = a;
>              #pragma omp target oacc_parallel map(tofrom:b [len: 400]) firstprivate(a) [child fn: MAIN__._omp_fn.0 (.omp_data_arr.13, .omp_data_sizes.14, .omp_data_kinds.15)]
>                {
>                  .omp_data_i = (const struct .omp_data_t.10 & restrict) &.omp_data_arr.13;
>     -            D.3981 = .omp_data_i->a;
>     -            a = *D.3981;
>     +            a = .omp_data_i->a;
>     [...]
>
> ..., and that seems to work fine.  (..., and no testsuite regressions.)
>
> (I have not yet looked into the related OpenMP changes required.)
>
> Jakub, is the following about right, do you have any comments?  If
> approving this patch, please respond with "Reviewed-by: NAME <EMAIL>" so
> that your effort will be recorded in the commit log, see
> <https://gcc.gnu.org/wiki/Reviewed-by>.
>
> The code changes seem very ad-hoc, but that's the common impression that
> I got from looking at/working though a lot of all that OMP code...  :-(
>
> --- gcc/omp-low.c
> +++ gcc/omp-low.c
> @@ -9685,7 +9685,13 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  	      {
>  		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
>  		if (omp_is_reference (new_var)
> -		    && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE)
> +		    && TREE_CODE (TREE_TYPE (new_var)) == POINTER_TYPE)
> +		  {
> +		    /* Special handling for Fortran 'allocatable' scalars:
> +		       avoid indirection.  */
> +		    x = build_receiver_ref (var, false, ctx);
> +		  }
> +		else if (omp_is_reference (new_var))
>  		  {
>  		    /* Create a local object to hold the instance
>  		       value.  */
> @@ -9920,7 +9926,13 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  		else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
>  		  {
>  		    gcc_assert (is_gimple_omp_oacc (ctx->stmt));
> -		    if (!omp_is_reference (var))
> +		    if (omp_is_reference (lookup_decl (ovar, ctx))
> +			&& TREE_CODE (TREE_TYPE (ovar)) == POINTER_TYPE)
> +		      {
> +			/* Special handling for Fortran 'allocatable' scalars:
> +			   avoid indirection.  */
> +		      }
> +		    else if (!omp_is_reference (var))
>  		      {
>  			if (is_gimple_reg (var)
>  			    && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))


Grüße
 Thomas
diff mbox series

Patch

--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -9685,7 +9685,13 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	      {
 		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
 		if (omp_is_reference (new_var)
-		    && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE)
+		    && TREE_CODE (TREE_TYPE (new_var)) == POINTER_TYPE)
+		  {
+		    /* Special handling for Fortran 'allocatable' scalars:
+		       avoid indirection.  */
+		    x = build_receiver_ref (var, false, ctx);
+		  }
+		else if (omp_is_reference (new_var))
 		  {
 		    /* Create a local object to hold the instance
 		       value.  */
@@ -9920,7 +9926,13 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
 		  {
 		    gcc_assert (is_gimple_omp_oacc (ctx->stmt));
-		    if (!omp_is_reference (var))
+		    if (omp_is_reference (lookup_decl (ovar, ctx))
+			&& TREE_CODE (TREE_TYPE (ovar)) == POINTER_TYPE)
+		      {
+			/* Special handling for Fortran 'allocatable' scalars:
+			   avoid indirection.  */
+		      }
+		    else if (!omp_is_reference (var))
 		      {
 			if (is_gimple_reg (var)
 			    && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))