diff mbox series

[OpenACC,2.7,v2] Adjust acc_map_data/acc_unmap_data interaction with reference counters

Message ID c6029808-3e32-4686-98ea-f5767af03b5d@pllab.cs.nthu.edu.tw
State New
Headers show
Series [OpenACC,2.7,v2] Adjust acc_map_data/acc_unmap_data interaction with reference counters | expand

Commit Message

Chung-Lin Tang March 4, 2024, 4:18 p.m. UTC
Hi Thomas,

On 2023/10/31 11:06 PM, Thomas Schwinge wrote:
> A few comments, should be easy to work in:

>> @@ -460,7 +461,7 @@ acc_unmap_data (void *h)
>>       the different 'REFCOUNT_INFINITY' cases, or simply separate
>>       'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
>>       etc.)?  */
>> -  else if (n->refcount != REFCOUNT_INFINITY)
>> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>>      {
>>        gomp_mutex_unlock (&acc_dev->lock);
>>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
> 
> Thus remove the TODO comment before this 'else if' block?  :-)

Removed TODO block, comments added below around new assert.

> We should add a comment here that we're unmapping without consideration
> of 'n->dynamic_refcount' (that is, 'acc_unmap_data' has implicit
> 'finalize' semantics -- at least per my reading of the specification; do
> you agree?), that is:
> 
>     acc_map_data([var]); // 'dynamic_refcount = 1'
>     acc_copyin([var]); // 'dynamic_refcount++'
>     acc_unmap_data([var]); // does un-map, despite 'dynamic_refcount == 2'?
>     assert (!acc_is_present([var]));
> 
> Do we have such a test case?  If not, please add one.

I've added a new testsuite/libgomp.oacc-c-c++-common/lib-96.c testcase for this.

> To complement 'goacc_exit_datum_1' (see below), we should add here:
> 
>     assert (n->dynamic_refcount >= 1);

Added.

> 
> The subsequenct code:
> 
>     if (tgt->refcount == REFCOUNT_INFINITY)
>       {
>         gomp_mutex_unlock (&acc_dev->lock);
>         gomp_fatal ("cannot unmap target block");
>       }
> 
> ... is now unreachable, I think, and may thus be removed -- and any
> inconsistency is caught by the subsequent:
> 
>     /* Above, we've verified that the mapping must have been set up by
>        'acc_map_data'.  */
>     assert (tgt->refcount == 1);

Removed the 'if (tgt->refcount == REFCOUNT_INFINITY)' block.

>> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>>
>>    if (finalize)
>>      {
>> -      if (n->refcount != REFCOUNT_INFINITY)
>> +      if (n->refcount != REFCOUNT_INFINITY
>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>       n->refcount -= n->dynamic_refcount;
>> -      n->dynamic_refcount = 0;
>> +
>> +      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
>> +     /* Mappings created by acc_map_data are returned to initial
>> +        dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
>> +     n->dynamic_refcount = 1;
>> +      else
>> +     n->dynamic_refcount = 0;
>>      }
>>    else if (n->dynamic_refcount)
>>      {
>> -      if (n->refcount != REFCOUNT_INFINITY)
>> +      if (n->refcount != REFCOUNT_INFINITY
>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>       n->refcount--;
>> -      n->dynamic_refcount--;
>> +
>> +      /* When mapping is created by acc_map_data, dynamic_refcount must be
>> +      maintained at >= 1.  */
>> +      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
>> +     n->dynamic_refcount--;
>>      }
> 
> I'd find those changes more concise to understand if done the following
> way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
> branches to their original form (other than excluding 'n->refcount'
> modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
> afterwards (that is, here), do:
> 
>     /* Mappings created by 'acc_map_data' can only be deleted by 'acc_unmap_data'.  */
>     if (n->refcount == REFCOUNT_ACC_MAP_DATA
>         && n->dynamic_refcount == 0)
>       n->dynamic_refcount = 1;
> 
> That does have the same semantics, please verify?

This does not have the same semantics, because if the original finalize/n->dynamic_refcount
cases are left unmodified, they will treat REFCOUNT_ACC_MAP_DATA like a normal refcount and
decrement n->refcount, and handling n->refcount == REFCOUNT_ACC_MAP_DATA later won't work either.

I have however, adjusted the nesting of cases to split the 'n->refcount == REFCOUNT_ACC_MAP_DATA'
case away. This should be easier to read.

>> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>>
>>    uintptr_t *refcount_ptr = &k->refcount;
>>
>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>> +    refcount_ptr = &k->dynamic_refcount;
>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>      refcount_ptr = &k->structelem_refcount;
>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>      refcount_ptr = k->structelem_refcount_ptr;
>> @@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>
>>    uintptr_t *refcount_ptr = &k->refcount;
>>
>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>> +    refcount_ptr = &k->dynamic_refcount;
>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>      refcount_ptr = &k->structelem_refcount;
>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>      refcount_ptr = k->structelem_refcount_ptr;
>> @@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>    else if (*refcount_ptr > 0)
>>      *refcount_ptr -= 1;
>>
>> +  /* Force back to 1 if this is an acc_map_data mapping.  */
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
>> +    *refcount_ptr = 1;
>> +
>>   end:
>>    if (*refcount_ptr == 0)
>>      {
> 
> It's not clear to me why you need this handling -- instead of just
> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
> early 'return'?
> 
> Per my understanding, this code is for OpenACC only exercised for
> structured data regions, and it seems strange (unnecessary?) to adjust
> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
> missing anything?

No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.

> Overall, your changes regress the
> commit 3e888f94624294d2b9b34ebfee0916768e5d9c3f
> "Add OpenACC 'acc_map_data' variant to 'libgomp.oacc-c-c++-common/deep-copy-8.c'"
> that I just pushed.  I think you just need to handle
> 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' in
> 'libgomp/oacc-mem.c:goacc_enter_data_internal', 'if (n && struct_p)'?
> Please verify.

Fixed by adding another '&& n->refcount != REFCOUNT_ACC_MAP_DATA' check in goacc_enter_data_internal.

> But please also to the "Minimal OpenACC variant corresponding to PR96668"
> code in 'libgomp/oacc-mem.c:goacc_enter_data_internal' add a safeguard
> that we're not running into 'REFCOUNT_ACC_MAP_DATA' there.  I think
> that's currently not (reasonably easily) possible, given that
> 'acc_map_data' isn't available in OpenACC/Fortran, but it'll be available
> later, and then I'd rather have an 'assert' trigger there, instead of
> random behavior.  (I'm not asking you to write a mixed OpenACC/Fortran
> plus C test case for that scenario -- if feasible at all.)

I am not really sure what you want me to do here, but REFCOUNT_ACC_MAP_DATA mappings
are all created through a single GOMP_MAP_ALLOC kind. The complex stuff of MAP_STRUCT, MAP_TO_PSET, etc.
should all be not related here (I presume even if Fortran eventually gets acc_map_data, it would be the
compiler side which should take care of passing the raw data-pointer/array-size to the acc_map_data routine)

I have re-tested this on x86_64-linux + nvptx. Please see if this is okay for committing to mainline.

Thanks,
Chung-Lin

2024-03-04  Chung-Lin Tang  <cltang@baylibre.com>

libgomp/ChangeLog:

	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	initialize dynamic_refcount as 1.
	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA, remove TODO
	comments. Add assert of 'n->dynamic_refcount >= 1' and comments.
	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	(goacc_enter_data_internal): Add REFCOUNT_ACC_MAP_DATA case.
	* target.c (gomp_increment_refcount): Add REFCOUNT_ACC_MAP_DATA case.
	(gomp_decrement_refcount): Add REFCOUNT_ACC_MAP_DATA case, force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	* testsuite/libgomp.oacc-c-c++-common/lib-96.c: New testcase.
	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
	testcase error output scan test.

Comments

Thomas Schwinge March 15, 2024, 11:24 a.m. UTC | #1
Hi Chung-Lin!

I realized: please add "PR libgomp/92840" to the Git commit log, as your
changes are directly a continuation of my earlier changes.


On 2024-03-05T01:18:28+0900, Chung-Lin Tang <cltang@pllab.cs.nthu.edu.tw> wrote:
> On 2023/10/31 11:06 PM, Thomas Schwinge wrote:
>>> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>>>
>>>    if (finalize)
>>>      {
>>> -      if (n->refcount != REFCOUNT_INFINITY)
>>> +      if (n->refcount != REFCOUNT_INFINITY
>>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>       n->refcount -= n->dynamic_refcount;
>>> -      n->dynamic_refcount = 0;
>>> +
>>> +      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +     /* Mappings created by acc_map_data are returned to initial
>>> +        dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
>>> +     n->dynamic_refcount = 1;
>>> +      else
>>> +     n->dynamic_refcount = 0;
>>>      }
>>>    else if (n->dynamic_refcount)
>>>      {
>>> -      if (n->refcount != REFCOUNT_INFINITY)
>>> +      if (n->refcount != REFCOUNT_INFINITY
>>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>       n->refcount--;
>>> -      n->dynamic_refcount--;
>>> +
>>> +      /* When mapping is created by acc_map_data, dynamic_refcount must be
>>> +      maintained at >= 1.  */
>>> +      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
>>> +     n->dynamic_refcount--;
>>>      }
>> 
>> I'd find those changes more concise to understand if done the following
>> way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
>> branches to their original form (other than excluding 'n->refcount'
>> modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
>> afterwards (that is, here), do:
>> 
>>     /* Mappings created by 'acc_map_data' can only be deleted by 'acc_unmap_data'.  */
>>     if (n->refcount == REFCOUNT_ACC_MAP_DATA
>>         && n->dynamic_refcount == 0)
>>       n->dynamic_refcount = 1;
>> 
>> That does have the same semantics, please verify?
>
> This does not have the same semantics, because if the original finalize/n->dynamic_refcount
> cases are left unmodified, they will treat REFCOUNT_ACC_MAP_DATA like a normal refcount and
> decrement n->refcount, and handling n->refcount == REFCOUNT_ACC_MAP_DATA later won't work either.

That's why I said: "restore [...] excluding 'n->refcount' modification
for 'REFCOUNT_ACC_MAP_DATA', as you have [...]".  Sorry if that was
unclear.

> I have however, adjusted the nesting of cases to split the 'n->refcount == REFCOUNT_ACC_MAP_DATA'
> case away. This should be easier to read.

Thanks, that easier to follow indeed.  I had meant (on top of your v2):

    --- a/libgomp/oacc-mem.c
    +++ b/libgomp/oacc-mem.c
    @@ -686,35 +686,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
           gomp_fatal ("Dynamic reference counting assert fail\n");
         }
     
    -  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
    +  if (finalize)
         {
    -      if (finalize)
    -	{
    -	  /* Mappings created by acc_map_data are returned to initial
    -	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
    -	  n->dynamic_refcount = 1;
    -	}
    -      else if (n->dynamic_refcount)
    -	{
    -	  /* When mapping is created by acc_map_data, dynamic_refcount must be
    -	     maintained at >= 1.  */
    -	  if (n->dynamic_refcount > 1)
    -	    n->dynamic_refcount--;
    -	}
    -    }
    -  else if (finalize)
    -    {
    -      if (n->refcount != REFCOUNT_INFINITY)
    +      if (n->refcount != REFCOUNT_INFINITY
    +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
     	n->refcount -= n->dynamic_refcount;
           n->dynamic_refcount = 0;
         }
       else if (n->dynamic_refcount)
         {
    -      if (n->refcount != REFCOUNT_INFINITY)
    +      if (n->refcount != REFCOUNT_INFINITY
    +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
     	n->refcount--;
           n->dynamic_refcount--;
         }
     
    +  /* Mappings created by 'acc_map_data' may only be deleted by
    +     'acc_unmap_data'.  */
    +  if (n->refcount == REFCOUNT_ACC_MAP_DATA
    +      && n->dynamic_refcount == 0)
    +    n->dynamic_refcount = 1;
    +
       if (n->refcount == 0)
         {
           bool copyout = (kind == GOMP_MAP_FROM

..., which really should have the same semantics?  No strong opinion on
which of the two variants you now chose.


>>> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>>>
>>>    uintptr_t *refcount_ptr = &k->refcount;
>>>
>>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +    refcount_ptr = &k->dynamic_refcount;
>>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>>      refcount_ptr = &k->structelem_refcount;
>>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>>      refcount_ptr = k->structelem_refcount_ptr;
>>> @@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>>
>>>    uintptr_t *refcount_ptr = &k->refcount;
>>>
>>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +    refcount_ptr = &k->dynamic_refcount;
>>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>>      refcount_ptr = &k->structelem_refcount;
>>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>>      refcount_ptr = k->structelem_refcount_ptr;
>>> @@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>>    else if (*refcount_ptr > 0)
>>>      *refcount_ptr -= 1;
>>>
>>> +  /* Force back to 1 if this is an acc_map_data mapping.  */
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
>>> +    *refcount_ptr = 1;
>>> +
>>>   end:
>>>    if (*refcount_ptr == 0)
>>>      {
>> 
>> It's not clear to me why you need this handling -- instead of just
>> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
>> early 'return'?
>> 
>> Per my understanding, this code is for OpenACC only exercised for
>> structured data regions, and it seems strange (unnecessary?) to adjust
>> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
>> missing anything?
>
> No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
> This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.

But I don't understand what you foresee breaking with the following (on
top of your v2):

    --- a/libgomp/target.c
    +++ b/libgomp/target.c
    @@ -476,14 +476,14 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
     static inline void
     gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
     {
    -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
    +  if (k == NULL
    +      || k->refcount == REFCOUNT_INFINITY
    +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
         return;
     
       uintptr_t *refcount_ptr = &k->refcount;
     
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
    -    refcount_ptr = &k->dynamic_refcount;
    -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
         refcount_ptr = &k->structelem_refcount;
       else if (REFCOUNT_STRUCTELEM_P (k->refcount))
         refcount_ptr = k->structelem_refcount_ptr;
    @@ -522,7 +522,9 @@ static inline void
     gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
     			 bool *do_copy, bool *do_remove)
     {
    -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
    +  if (k == NULL
    +      || k->refcount == REFCOUNT_INFINITY
    +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
         {
           *do_copy = *do_remove = false;
           return;
    @@ -530,9 +532,7 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
     
       uintptr_t *refcount_ptr = &k->refcount;
     
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
    -    refcount_ptr = &k->dynamic_refcount;
    -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
         refcount_ptr = &k->structelem_refcount;
       else if (REFCOUNT_STRUCTELEM_P (k->refcount))
         refcount_ptr = k->structelem_refcount_ptr;
    @@ -565,10 +565,6 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
       else if (*refcount_ptr > 0)
         *refcount_ptr -= 1;
     
    -  /* Force back to 1 if this is an acc_map_data mapping.  */
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
    -    *refcount_ptr = 1;
    -
      end:
       if (*refcount_ptr == 0)
         {

Can you please show a test case?


>> But please also to the "Minimal OpenACC variant corresponding to PR96668"
>> code in 'libgomp/oacc-mem.c:goacc_enter_data_internal' add a safeguard
>> that we're not running into 'REFCOUNT_ACC_MAP_DATA' there.  I think
>> that's currently not (reasonably easily) possible, given that
>> 'acc_map_data' isn't available in OpenACC/Fortran, but it'll be available
>> later

('acc_map_data' now is available in OpenACC/Fortran as of Tobias' recent
commit r14-9196-g8b3f1edf9b38cb8a88c0a101a675d092bf6135d2
"OpenACC: Add Fortran routines acc_{alloc,free,hostptr,deviceptr,memcpy_{to,from}_device*}".
See 'libgomp.oacc-fortran/acc_host_device_ptr.f90' for a simple test
case.)

>> and then I'd rather have an 'assert' trigger there, instead of
>> random behavior.  (I'm not asking you to write a mixed OpenACC/Fortran
>> plus C test case for that scenario -- if feasible at all.)
>
> I am not really sure what you want me to do here, but REFCOUNT_ACC_MAP_DATA mappings
> are all created through a single GOMP_MAP_ALLOC kind. The complex stuff of MAP_STRUCT, MAP_TO_PSET, etc.
> should all be not related here (I presume even if Fortran eventually gets acc_map_data, it would be the
> compiler side which should take care of passing the raw data-pointer/array-size to the acc_map_data routine)

I see we already have:

    if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
        && tgt->list_count == 0)
      {
        /* 'declare target'.  */
        assert (n->refcount == REFCOUNT_INFINITY);

I think I wanted to you to add:

    --- a/libgomp/oacc-mem.c
    +++ b/libgomp/oacc-mem.c
    @@ -1217,7 +1209,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
     	      processed = true;
     	    }
     	  else
    -	    assert (n->refcount != REFCOUNT_INFINITY);
    +	    assert (n->refcount != REFCOUNT_INFINITY
    +		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
     
     	  for (size_t j = 0; j < tgt->list_count; j++)
     	    if (tgt->list[j].key == n)


Please check these items, and then we're good to go.


Grüße
 Thomas


> libgomp/ChangeLog:
>
> 	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
> 	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
> 	initialize dynamic_refcount as 1.
> 	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA, remove TODO
> 	comments. Add assert of 'n->dynamic_refcount >= 1' and comments.
> 	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
> 	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
> 	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
> 	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
> 	(goacc_enter_data_internal): Add REFCOUNT_ACC_MAP_DATA case.
> 	* target.c (gomp_increment_refcount): Add REFCOUNT_ACC_MAP_DATA case.
> 	(gomp_decrement_refcount): Add REFCOUNT_ACC_MAP_DATA case, force lowest
> 	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
> 	* testsuite/libgomp.oacc-c-c++-common/lib-96.c: New testcase.
> 	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
> 	testcase error output scan test.
>
>
> diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
> index f98cccd8b66..089393846d1 100644
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1163,6 +1163,8 @@ struct target_mem_desc;
>  /* Special value for refcount - tgt_offset contains target address of the
>     artificial pointer to "omp declare target link" object.  */
>  #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
> +/* Special value for refcount - created through acc_map_data.  */
> +#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
>  
>  /* Special value for refcount - structure element sibling list items.
>     All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
> index ba48a1ccbb7..ffd56e397f6 100644
> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c
> @@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
>        assert (n->refcount == 1);
>        assert (n->dynamic_refcount == 0);
>        /* Special reference counting behavior.  */
> -      n->refcount = REFCOUNT_INFINITY;
> +      n->refcount = REFCOUNT_ACC_MAP_DATA;
> +      n->dynamic_refcount = 1;
>  
>        if (profiling_p)
>  	{
> @@ -455,12 +456,7 @@ acc_unmap_data (void *h)
>        gomp_fatal ("[%p,%d] surrounds %p",
>  		  (void *) n->host_start, (int) host_size, (void *) h);
>      }
> -  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
> -     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
> -     the different 'REFCOUNT_INFINITY' cases, or simply separate
> -     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
> -     etc.)?  */
> -  else if (n->refcount != REFCOUNT_INFINITY)
> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
> @@ -468,13 +464,12 @@ acc_unmap_data (void *h)
>  		  (void *) h, (int) host_size);
>      }
>  
> -  struct target_mem_desc *tgt = n->tgt;
> +  /* This should hold for all mappings created by acc_map_data. We are however
> +     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
> +     not matter.  */
> +  assert (n->dynamic_refcount >= 1);
>  
> -  if (tgt->refcount == REFCOUNT_INFINITY)
> -    {
> -      gomp_mutex_unlock (&acc_dev->lock);
> -      gomp_fatal ("cannot unmap target block");
> -    }
> +  struct target_mem_desc *tgt = n->tgt;
>  
>    /* Above, we've verified that the mapping must have been set up by
>       'acc_map_data'.  */
> @@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
>      }
>  
>    assert (n->refcount != REFCOUNT_LINK);
> -  if (n->refcount != REFCOUNT_INFINITY)
> +  if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>      n->refcount++;
>    n->dynamic_refcount++;
>  
> @@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>  
>    assert (n->refcount != REFCOUNT_LINK);
>    if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA
>        && n->refcount < n->dynamic_refcount)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("Dynamic reference counting assert fail\n");
>      }
>  
> -  if (finalize)
> +  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
> +    {
> +      if (finalize)
> +	{
> +	  /* Mappings created by acc_map_data are returned to initial
> +	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
> +	  n->dynamic_refcount = 1;
> +	}
> +      else if (n->dynamic_refcount)
> +	{
> +	  /* When mapping is created by acc_map_data, dynamic_refcount must be
> +	     maintained at >= 1.  */
> +	  if (n->dynamic_refcount > 1)
> +	    n->dynamic_refcount--;
> +	}
> +    }
> +  else if (finalize)
>      {
>        if (n->refcount != REFCOUNT_INFINITY)
>  	n->refcount -= n->dynamic_refcount;
> @@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>  	    }
>  	  /* This is a special case because we must increment the refcount by
>  	     the number of mapped struct elements, rather than by one.  */
> -	  if (n->refcount != REFCOUNT_INFINITY)
> +	  if (n->refcount != REFCOUNT_INFINITY
> +	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>  	    n->refcount += groupnum - 1;
>  	  n->dynamic_refcount += groupnum - 1;
>  	}
> diff --git a/libgomp/target.c b/libgomp/target.c
> index 1367e9cce6c..1851feba271 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>    else if (*refcount_ptr > 0)
>      *refcount_ptr -= 1;
>  
> +  /* Force back to 1 if this is an acc_map_data mapping.  */
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
> +    *refcount_ptr = 1;
> +
>   end:
>    if (*refcount_ptr == 0)
>      {
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> new file mode 100644
> index 00000000000..7bc55b94f33
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
> +
> +/* Test if acc_unmap_data has implicit finalize semantics.  */
> +
> +#include <stdlib.h>
> +#include <openacc.h>
> +
> +int
> +main (int argc, char **argv)
> +{
> +  const int N = 256;
> +  unsigned char *h;
> +  void *d;
> +
> +  h = (unsigned char *) malloc (N);
> +
> +  d = acc_malloc (N);
> +
> +  acc_map_data (h, d, N);
> +
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +
> +  acc_unmap_data (h);
> +
> +  if (acc_is_present (h, N))
> +    abort ();
> +
> +  acc_free (d);
> +
> +  free (h);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> index 872f0c1de5c..9ed9fa7e413 100644
> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> @@ -10,7 +10,7 @@ main (int argc, char *argv[])
>  {
>    acc_init (acc_device_default);
>    acc_unmap_data ((void *) foo);
> -/* { dg-output "libgomp: cannot unmap target block" } */
> +/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
>    return 0;
>  }
>
diff mbox series

Patch

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..089393846d1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1163,6 +1163,8 @@  struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ba48a1ccbb7..ffd56e397f6 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@  acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
 	{
@@ -455,12 +456,7 @@  acc_unmap_data (void *h)
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
-  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
-     the different 'REFCOUNT_INFINITY' cases, or simply separate
-     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
-     etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -468,13 +464,12 @@  acc_unmap_data (void *h)
 		  (void *) h, (int) host_size);
     }
 
-  struct target_mem_desc *tgt = n->tgt;
+  /* This should hold for all mappings created by acc_map_data. We are however
+     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
+     not matter.  */
+  assert (n->dynamic_refcount >= 1);
 
-  if (tgt->refcount == REFCOUNT_INFINITY)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("cannot unmap target block");
-    }
+  struct target_mem_desc *tgt = n->tgt;
 
   /* Above, we've verified that the mapping must have been set up by
      'acc_map_data'.  */
@@ -519,7 +514,8 @@  goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,13 +679,30 @@  goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("Dynamic reference counting assert fail\n");
     }
 
-  if (finalize)
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+    {
+      if (finalize)
+	{
+	  /* Mappings created by acc_map_data are returned to initial
+	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+	  n->dynamic_refcount = 1;
+	}
+      else if (n->dynamic_refcount)
+	{
+	  /* When mapping is created by acc_map_data, dynamic_refcount must be
+	     maintained at >= 1.  */
+	  if (n->dynamic_refcount > 1)
+	    n->dynamic_refcount--;
+	}
+    }
+  else if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
 	n->refcount -= n->dynamic_refcount;
@@ -1131,7 +1144,8 @@  goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    }
 	  /* This is a special case because we must increment the refcount by
 	     the number of mapped struct elements, rather than by one.  */
-	  if (n->refcount != REFCOUNT_INFINITY)
+	  if (n->refcount != REFCOUNT_INFINITY
+	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	    n->refcount += groupnum - 1;
 	  n->dynamic_refcount += groupnum - 1;
 	}
diff --git a/libgomp/target.c b/libgomp/target.c
index 1367e9cce6c..1851feba271 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -481,7 +481,9 @@  gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -528,7 +530,9 @@  gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -561,6 +565,10 @@  gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
   else if (*refcount_ptr > 0)
     *refcount_ptr -= 1;
 
+  /* Force back to 1 if this is an acc_map_data mapping.  */
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
+    *refcount_ptr = 1;
+
  end:
   if (*refcount_ptr == 0)
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
new file mode 100644
index 00000000000..7bc55b94f33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
@@ -0,0 +1,36 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+/* Test if acc_unmap_data has implicit finalize semantics.  */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+
+  acc_unmap_data (h);
+
+  if (acc_is_present (h, N))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@  main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }