diff mbox series

[2/7,OpenACC] Adjust dynamic reference count semantics

Message ID b23ea71697f77d8214411a3e1348e9dee496e5a6.1590182783.git.julian@codesourcery.com
State New
Headers show
Series Dynamic reference counts for mapped data | expand

Commit Message

Julian Brown May 22, 2020, 10:16 p.m. UTC
This patch adjusts the semantics of dynamic reference counts, as described
in the parent email. There are also two new test cases derived from
Thomas's test in the email:

https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546166.html

that work now.

OK?

Julian

ChangeLog

	libgomp/
	* libgomp.h (struct splay_tree_key_s): Change virtual_refcount to
	dynamic_refcount.
	(struct gomp_device_descr): Remove GOMP_MAP_VARS_OPENACC_ENTER_DATA.
	* oacc-mem.c (acc_map_data): Substitute virtual_refcount for
	dynamic_refcount.
	(acc_unmap_data): Replace open-coded refcount handling with call to
	gomp_remove_var.
	(goacc_enter_datum): Adjust for dynamic_refcount semantics.  Use tgt
	returned from gomp_map_vars_async.  Update assertions.
	(goacc_exit_datum): Re-add some error checking.  Adjust for
	dynamic_refcount semantics.  Fix is_tgt_unmapped test for struct
	mappings.
	(goacc_enter_data_internal): Implement "present" case of dynamic
	memory-map handling here.  Update "non-present" case for
	dynamic_refcount semantics.
	(goacc_exit_data_internal): Update for dynamic_refcount semantics.
	Re-introduce error checking for tgt unmapping when appropriate.
	* target.c (gomp_map_vars_internal): Remove
	GOMP_MAP_VARS_OPENACC_ENTER_DATA handling.  Update for dynamic_refcount
	handling.
	(gomp_unmap_vars_internal): Remove virtual_refcount handling.
	(gomp_load_image_to_device): Substitute dynamic_refcount for
	virtual_refcount.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/refcounting-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/refcounting-2.c: New test.
---
 libgomp/libgomp.h                             |   8 +-
 libgomp/oacc-mem.c                            | 241 ++++++++++++------
 libgomp/target.c                              |  38 +--
 .../libgomp.oacc-c-c++-common/refcounting-1.c |  31 +++
 .../libgomp.oacc-c-c++-common/refcounting-2.c |  31 +++
 5 files changed, 243 insertions(+), 106 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c

Comments

Thomas Schwinge June 3, 2020, 12:36 p.m. UTC | #1
Hi Julian!

On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> wrote:
> This patch adjusts the semantics of dynamic reference counts, as described
> in the parent email.

Thanks!

A few questions, but no need to send an updated patch.

> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c

> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>  {
>    for (size_t i = 0; i < mapnum; i++)
>      {
> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
> +      splay_tree_key n;
> +      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
> +      bool struct_p = false;
> +      size_t size, groupnum = (group_last - i) + 1;
>
> -      gomp_map_vars_async (acc_dev, aq,
> -                        (group_last - i) + 1,
> -                        &hostaddrs[i], NULL,
> -                        &sizes[i], &kinds[i], true,
> -                        GOMP_MAP_VARS_OPENACC_ENTER_DATA);
> +      switch (kinds[i] & 0xff)
> +     {
> +     case GOMP_MAP_STRUCT:
> +       {
> +         int last = i + sizes[i];

The 'last' calculated here must always equal the 'group_last' calculated
above.  ;-) (... so we might just use 'group_last' instead of 'last' in
the following.)

> +         size = (uintptr_t) hostaddrs[last] + sizes[last]
> +                - (uintptr_t) hostaddrs[i];
> +         struct_p = true;
> +       }
> +       break;
> +
> +     case GOMP_MAP_ATTACH:
> +       size = sizeof (void *);
> +       break;
> +
> +     default:
> +       size = sizes[i];
> +     }
> +
> +      n = lookup_host (acc_dev, hostaddrs[i], size);
> +

> +      if (n && struct_p)
> +     {
> +       if (n->refcount != REFCOUNT_INFINITY)
> +         n->refcount += groupnum - 1;
> +       n->dynamic_refcount += groupnum - 1;
> +       gomp_mutex_unlock (&acc_dev->lock);
> +     }

Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that
just an optimization of the 'n && groupnum > 1' case below?

> +      else if (n && groupnum == 1)
> +     {
> +       void *h = hostaddrs[i];
> +       size_t s = sizes[i];
> +
> +       /* A standalone attach clause.  */
> +       if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
> +         gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
> +                              (uintptr_t) h, s, NULL);
> +       else if (h + s > (void *) n->host_end)
> +         {
> +           gomp_mutex_unlock (&acc_dev->lock);
> +           gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
> +         }
> +
> +       assert (n->refcount != REFCOUNT_LINK);
> +       if (n->refcount != REFCOUNT_INFINITY)
> +         n->refcount++;
> +       n->dynamic_refcount++;
> +
> +       gomp_mutex_unlock (&acc_dev->lock);
> +     }

> +      else if (n && groupnum > 1)
> +     {
> +       assert (n->refcount != REFCOUNT_INFINITY
> +               && n->refcount != REFCOUNT_LINK);
> +
> +       bool processed = false;
> +
> +       struct target_mem_desc *tgt = n->tgt;
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         if (tgt->list[j].key == n)
> +           {
> +             for (size_t k = 0; k < groupnum; k++)
> +               if (j + k < tgt->list_count && tgt->list[j + k].key)
> +                 {
> +                   tgt->list[j + k].key->refcount++;
> +                   tgt->list[j + k].key->dynamic_refcount++;
> +                 }
> +             processed = true;
> +           }
> +
> +       gomp_mutex_unlock (&acc_dev->lock);
> +       if (!processed)
> +         gomp_fatal ("dynamic refcount incrementing failed for "
> +                     "pointer/pset");
> +     }

Please add some text to explain the nested 'j', 'k' loops and their 'if'
conditionals, and the 'groupnum' usage in the 'k' loop boundary.  Should
the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)'
(..., or is 'groupnum' relevant?), and in the loop body then use 'k'
instead of 'j + k'?  (Maybe I've now confused myself, staring at this for
a while...)

> +      else if (hostaddrs[i])
> +     {
> +       gomp_mutex_unlock (&acc_dev->lock);
> +
> +       struct target_mem_desc *tgt
> +         = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
> +                                &sizes[i], &kinds[i], true,
> +                                GOMP_MAP_VARS_ENTER_DATA);
> +       assert (tgt);
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         {
> +           n = tgt->list[j].key;
> +           if (n)
> +             n->dynamic_refcount++;
> +         }
> +     }

... else nothing.  This latter "nothing" case (not present, and no
'hostaddrs[i]') is exercised by
'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only).  Is that
alright?

>
>        i = group_last;
>      }


> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,

(Diff slightly edited.)

>           if (n->refcount == 0)
> -           gomp_remove_var_async (acc_dev, n, aq);

> +           {
> +             if (aq)
> +               {
> +                 /* TODO The way the following code is currently
> +                    implemented, we need the 'is_tgt_unmapped' return
> +                    value from 'gomp_remove_var', so can't use
> +                    'gomp_remove_var_async' here -- see the
> +                    'gomp_unref_tgt' comment in
> +                    <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
> +                    PR92881 -- so have to synchronize here.  */
> +                 if (!acc_dev->openacc.async.synchronize_func (aq))
> +                   {
> +                     gomp_mutex_unlock (&acc_dev->lock);
> +                     gomp_fatal ("synchronize failed");
> +                   }
> +               }

As far as I understand, it's no longer true that "The way the following
code is [...] implemented, we need the 'is_tgt_unmapped' return value
from 'gomp_remove_var'".  In particular, we now can/should "use
'gomp_remove_var_async' here", and no longer "have to synchronize here"?

Indeed I'm happy to see that the logic below no longer depends on
'is_tgt_unmapped' for its loop exit condition.  Instead of the above,
this now can use the standard pattern:

    if (aq)
      /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
         'gomp_unref_tgt' comment in
         <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
         PR92881.  */
      gomp_remove_var_async (acc_dev, n, aq);
    else
      { [as follows] }

> +             int num_mappings = 0;
> +             /* If the target_mem_desc represents a single data mapping, we
> +                can check that it is freed when this splay tree key's
> +                refcount reaches zero.  Otherwise (e.g. for a struct
> +                mapping with multiple members), fall back to skipping the
> +                test.  */
> +             for (int j = 0; j < n->tgt->list_count; j++)
> +               if (n->tgt->list[j].key)
> +                 num_mappings++;
> +             bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
> +             assert (num_mappings > 1 || is_tgt_unmapped);
> +           }
>         }
>         break;

For reference, the old logic (mandating what was described in the comment
above) was:

    bool is_tgt_unmapped = false;
    for (size_t i = 0; i < t->list_count; i++)
     {
       is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
       if (is_tgt_unmapped)
         break;
     }
    assert (is_tgt_unmapped);


Grüße
 Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
Thomas Schwinge June 3, 2020, 3:19 p.m. UTC | #2
Hi Julian!

On 2020-06-03T14:36:14+0200, I wrote:
> On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> wrote:
>> This patch adjusts the semantics of dynamic reference counts, as described
>> in the parent email.
>
> Thanks!
>
> A few questions, but no need to send an updated patch.
>
>> --- a/libgomp/oacc-mem.c
>> +++ b/libgomp/oacc-mem.c
>
>> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>>  {
>>    for (size_t i = 0; i < mapnum; i++)
>>      {
>> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
>> +      splay_tree_key n;
>> +      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
>> +      bool struct_p = false;
>> +      size_t size, groupnum = (group_last - i) + 1;
>>
>> -      gomp_map_vars_async (acc_dev, aq,
>> -                       (group_last - i) + 1,
>> -                       &hostaddrs[i], NULL,
>> -                       &sizes[i], &kinds[i], true,
>> -                       GOMP_MAP_VARS_OPENACC_ENTER_DATA);
>> +      switch (kinds[i] & 0xff)
>> +    {
>> +    case GOMP_MAP_STRUCT:
>> +      {
>> +        int last = i + sizes[i];
>
> The 'last' calculated here must always equal the 'group_last' calculated
> above.  ;-) (... so we might just use 'group_last' instead of 'last' in
> the following.)
>
>> +        size = (uintptr_t) hostaddrs[last] + sizes[last]
>> +               - (uintptr_t) hostaddrs[i];
>> +        struct_p = true;
>> +      }
>> +      break;
>> +
>> +    case GOMP_MAP_ATTACH:
>> +      size = sizeof (void *);
>> +      break;
>> +
>> +    default:
>> +      size = sizes[i];
>> +    }
>> +
>> +      n = lookup_host (acc_dev, hostaddrs[i], size);
>> +
>
>> +      if (n && struct_p)
>> +    {
>> +      if (n->refcount != REFCOUNT_INFINITY)
>> +        n->refcount += groupnum - 1;
>> +      n->dynamic_refcount += groupnum - 1;
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +    }
>
> Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that
> just an optimization of the 'n && groupnum > 1' case below?

Eh, OK, I think I see where this is going; the 'n && groupnum > 1' case
below might not necessarily take care of the 'groupnum - 1' refcounts
that we're filing here?

>> +      else if (n && groupnum == 1)
>> +    {
>> +      void *h = hostaddrs[i];
>> +      size_t s = sizes[i];
>> +
>> +      /* A standalone attach clause.  */
>> +      if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
>> +        gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
>> +                             (uintptr_t) h, s, NULL);
>> +      else if (h + s > (void *) n->host_end)
>> +        {
>> +          gomp_mutex_unlock (&acc_dev->lock);
>> +          gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
>> +        }
>> +
>> +      assert (n->refcount != REFCOUNT_LINK);
>> +      if (n->refcount != REFCOUNT_INFINITY)
>> +        n->refcount++;
>> +      n->dynamic_refcount++;
>> +
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +    }
>
>> +      else if (n && groupnum > 1)
>> +    {
>> +      assert (n->refcount != REFCOUNT_INFINITY
>> +              && n->refcount != REFCOUNT_LINK);
>> +
>> +      bool processed = false;
>> +
>> +      struct target_mem_desc *tgt = n->tgt;
>> +      for (size_t j = 0; j < tgt->list_count; j++)
>> +        if (tgt->list[j].key == n)
>> +          {
>> +            for (size_t k = 0; k < groupnum; k++)
>> +              if (j + k < tgt->list_count && tgt->list[j + k].key)
>> +                {
>> +                  tgt->list[j + k].key->refcount++;
>> +                  tgt->list[j + k].key->dynamic_refcount++;
>> +                }
>> +            processed = true;
>> +          }
>> +
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +      if (!processed)
>> +        gomp_fatal ("dynamic refcount incrementing failed for "
>> +                    "pointer/pset");
>> +    }
>
> Please add some text to explain the nested 'j', 'k' loops and their 'if'
> conditionals, and the 'groupnum' usage in the 'k' loop boundary.  Should
> the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)'
> (..., or is 'groupnum' relevant?), and in the loop body then use 'k'
> instead of 'j + k'?  (Maybe I've now confused myself, staring at this for
> a while...)

Audacious as I am sometimes, I did put a '__builtin_abort' right after
'tgt->list[j].key == n' -- and it doesn't trigger one single time for the
current libgomp test cases, meaning this is all dead code?  I'm confused.

>> +      else if (hostaddrs[i])
>> +    {
>> +      gomp_mutex_unlock (&acc_dev->lock);
>> +
>> +      struct target_mem_desc *tgt
>> +        = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
>> +                               &sizes[i], &kinds[i], true,
>> +                               GOMP_MAP_VARS_ENTER_DATA);
>> +      assert (tgt);
>> +      for (size_t j = 0; j < tgt->list_count; j++)
>> +        {
>> +          n = tgt->list[j].key;
>> +          if (n)
>> +            n->dynamic_refcount++;
>> +        }
>> +    }
>
> ... else nothing.  This latter "nothing" case (not present, and no
> 'hostaddrs[i]') is exercised by
> 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only).  Is that
> alright?
>
>>
>>        i = group_last;
>>      }
>
>
>> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>
> (Diff slightly edited.)
>
>>          if (n->refcount == 0)
>> -          gomp_remove_var_async (acc_dev, n, aq);
>
>> +          {
>> +            if (aq)
>> +              {
>> +                /* TODO The way the following code is currently
>> +                   implemented, we need the 'is_tgt_unmapped' return
>> +                   value from 'gomp_remove_var', so can't use
>> +                   'gomp_remove_var_async' here -- see the
>> +                   'gomp_unref_tgt' comment in
>> +                   <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
>> +                   PR92881 -- so have to synchronize here.  */
>> +                if (!acc_dev->openacc.async.synchronize_func (aq))
>> +                  {
>> +                    gomp_mutex_unlock (&acc_dev->lock);
>> +                    gomp_fatal ("synchronize failed");
>> +                  }
>> +              }
>
> As far as I understand, it's no longer true that "The way the following
> code is [...] implemented, we need the 'is_tgt_unmapped' return value
> from 'gomp_remove_var'".  In particular, we now can/should "use
> 'gomp_remove_var_async' here", and no longer "have to synchronize here"?
>
> Indeed I'm happy to see that the logic below no longer depends on
> 'is_tgt_unmapped' for its loop exit condition.  Instead of the above,
> this now can use the standard pattern:
>
>     if (aq)
>       /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
>          'gomp_unref_tgt' comment in
>          <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
>          PR92881.  */
>       gomp_remove_var_async (acc_dev, n, aq);
>     else
>       { [as follows] }
>
>> +            int num_mappings = 0;
>> +            /* If the target_mem_desc represents a single data mapping, we
>> +               can check that it is freed when this splay tree key's
>> +               refcount reaches zero.  Otherwise (e.g. for a struct
>> +               mapping with multiple members), fall back to skipping the
>> +               test.  */
>> +            for (int j = 0; j < n->tgt->list_count; j++)
>> +              if (n->tgt->list[j].key)
>> +                num_mappings++;
>> +            bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
>> +            assert (num_mappings > 1 || is_tgt_unmapped);
>> +          }
>>        }
>>        break;
>
> For reference, the old logic (mandating what was described in the comment
> above) was:
>
>     bool is_tgt_unmapped = false;
>     for (size_t i = 0; i < t->list_count; i++)
>      {
>        is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
>        if (is_tgt_unmapped)
>          break;
>      }
>     assert (is_tgt_unmapped);


Grüße
 Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
Julian Brown June 5, 2020, 10:03 p.m. UTC | #3
On Wed, 3 Jun 2020 17:19:47 +0200
Thomas Schwinge <thomas@codesourcery.com> wrote:

> Hi Julian!
> 
> On 2020-06-03T14:36:14+0200, I wrote:
> > On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com>
> > wrote:  
> >> This patch adjusts the semantics of dynamic reference counts, as
> >> described in the parent email.  
> >
> > Thanks!
> >
> > A few questions, but no need to send an updated patch.
> >  
> >> --- a/libgomp/oacc-mem.c
> >> +++ b/libgomp/oacc-mem.c  
> >  
> >> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct
> >> gomp_device_descr *acc_dev, size_t mapnum, {
> >>    for (size_t i = 0; i < mapnum; i++)
> >>      {
> >> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
> >> +      splay_tree_key n;
> >> +      size_t group_last = find_group_last (i, mapnum, sizes,
> >> kinds);
> >> +      bool struct_p = false;
> >> +      size_t size, groupnum = (group_last - i) + 1;
> >>  
> >> -      gomp_map_vars_async (acc_dev, aq,
> >> -			   (group_last - i) + 1,
> >> -			   &hostaddrs[i], NULL,
> >> -			   &sizes[i], &kinds[i], true,
> >> -			   GOMP_MAP_VARS_OPENACC_ENTER_DATA);
> >> +      switch (kinds[i] & 0xff)
> >> +	{
> >> +	case GOMP_MAP_STRUCT:
> >> +	  {
> >> +	    int last = i + sizes[i];  
> >
> > The 'last' calculated here must always equal the 'group_last'
> > calculated above.  ;-) (... so we might just use 'group_last'
> > instead of 'last' in the following.)
> >  
> >> +	    size = (uintptr_t) hostaddrs[last] + sizes[last]
> >> +		   - (uintptr_t) hostaddrs[i];
> >> +	    struct_p = true;
> >> +	  }
> >> +	  break;
> >> +
> >> +	case GOMP_MAP_ATTACH:
> >> +	  size = sizeof (void *);
> >> +	  break;
> >> +
> >> +	default:
> >> +	  size = sizes[i];
> >> +	}
> >> +
> >> +      n = lookup_host (acc_dev, hostaddrs[i], size);
> >> +  
> >  
> >> +      if (n && struct_p)
> >> +	{
> >> +	  if (n->refcount != REFCOUNT_INFINITY)
> >> +	    n->refcount += groupnum - 1;
> >> +	  n->dynamic_refcount += groupnum - 1;
> >> +	  gomp_mutex_unlock (&acc_dev->lock);
> >> +	}  
> >
> > Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or
> > is that just an optimization of the 'n && groupnum > 1' case below?
> >  
> 
> Eh, OK, I think I see where this is going; the 'n && groupnum > 1'
> case below might not necessarily take care of the 'groupnum - 1'
> refcounts that we're filing here?

Right. GOMP_MAP_STRUCT is a little special in this case.

> >> +      else if (n && groupnum == 1)
> >> +	{
> >> +	  void *h = hostaddrs[i];
> >> +	  size_t s = sizes[i];
> >> +
> >> +	  /* A standalone attach clause.  */
> >> +	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
> >> +	    gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map,
> >> n,
> >> +				 (uintptr_t) h, s, NULL);
> >> +	  else if (h + s > (void *) n->host_end)
> >> +	    {
> >> +	      gomp_mutex_unlock (&acc_dev->lock);
> >> +	      gomp_fatal ("[%p,+%d] not mapped", (void *)h,
> >> (int)s);
> >> +	    }
> >> +
> >> +	  assert (n->refcount != REFCOUNT_LINK);
> >> +	  if (n->refcount != REFCOUNT_INFINITY)
> >> +	    n->refcount++;
> >> +	  n->dynamic_refcount++;
> >> +
> >> +	  gomp_mutex_unlock (&acc_dev->lock);
> >> +	}  
> >  
> >> +      else if (n && groupnum > 1)
> >> +	{
> >> +	  assert (n->refcount != REFCOUNT_INFINITY
> >> +		  && n->refcount != REFCOUNT_LINK);
> >> +
> >> +	  bool processed = false;
> >> +
> >> +	  struct target_mem_desc *tgt = n->tgt;
> >> +	  for (size_t j = 0; j < tgt->list_count; j++)
> >> +	    if (tgt->list[j].key == n)
> >> +	      {
> >> +		for (size_t k = 0; k < groupnum; k++)
> >> +		  if (j + k < tgt->list_count && tgt->list[j +
> >> k].key)
> >> +		    {
> >> +		      tgt->list[j + k].key->refcount++;
> >> +		      tgt->list[j + k].key->dynamic_refcount++;
> >> +		    }
> >> +		processed = true;
> >> +	      }
> >> +
> >> +	  gomp_mutex_unlock (&acc_dev->lock);
> >> +	  if (!processed)
> >> +	    gomp_fatal ("dynamic refcount incrementing failed for
> >> "
> >> +			"pointer/pset");
> >> +	}  
> >
> > Please add some text to explain the nested 'j', 'k' loops and their
> > 'if' conditionals, and the 'groupnum' usage in the 'k' loop
> > boundary.  Should the 'k' loop maybe run 'for (size_t k = j; k <
> > tgt->list_count; ++k)' (..., or is 'groupnum' relevant?), and in
> > the loop body then use 'k' instead of 'j + k'?  (Maybe I've now
> > confused myself, staring at this for a while...)  
> 
> Audacious as I am sometimes, I did put a '__builtin_abort' right after
> 'tgt->list[j].key == n' -- and it doesn't trigger one single time for
> the current libgomp test cases, meaning this is all dead code?  I'm
> confused.

Huh, I didn't expect that! Indeed that stanza appears to be dead code
(at least with mapping clauses generated from current GCC). The reason
is a late bug-fix to the manual deep copy code that strips
GOMP_MAP_TO_PSET and GOMP_MAP_POINTER from OpenACC enter/exit mappings
altogether. (In
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-12/msg01253.html).

That means "grouped" mappings are actually only now used
for GOMP_MAP_STRUCT, so actually even more of the find_group_last
function is probably dead now too, modulo backward compatibility issues.

Rewinding a bit, here is an explanation of the problem that the removal
of those clauses fixes, in case we want to revisit that.

With the attached patch (reverting the fix), the attached test case
fails (e.g. compiled at -O0). The problem is that with a dynamic data
lifetime, it's possible for an array descriptor on the stack to go out
of scope before the array data it is associated with does. This might
well be violating either Fortran rules or OpenACC semantics -- if that's
the case, then we had no problem here. (I did see a similar problem "in
the wild", but hadn't come up with a standalone test case until now.)

The attached test case starts out with a explicit-shape array local. It
passes this to a subroutine "enterdata_wrapper". This subroutine
fabricates an assumed-shape array pointer to its argument (creating an
array descriptor), and passes it to another subroutine "enterdata".

The "enterdata" subroutine then performs an OpenACC "enter data"
operation with the array -- whose data comes from the original
explicit-shape array in the main program, but whose descriptor comes
from the stack frame of the caller (i.e. "enterdata_wrapper"). This
descriptor then goes out of scope before returning to the main program.

The test case tries to fiddle with the stack layout by adding arbitrary
other arrays, and does the same dance again with nested subroutines to
perform an "exit data" operation.  But now the address of the (new)
descriptor is different, and the unmapping operation fails.

In short -- OpenACC "enter data" operations can (could) create hidden
dangling references to array descriptors, in some circumstances.

So, the fix was to strip out GOMP_MAP_TO_PSET (and GOMP_MAP_POINTER,
which I don't think has any meaning on these directives) from OpenACC
"enter data" and "exit data" directives altogether. If an array has a
descriptor when we get to a compute kernel, that descriptor is copied
to the target anyway, *even for present clauses*, so passing the
array descriptor to "enter data" descriptor doesn't appear to be
necessary, even in cases where it stays in scope before unmapping from
the target.

So, questions:

1. Does the attached program violate Fortran semantics in some way?

2. Or OpenACC semantics?

3. Are there unintended side-effects of removing GOMP_MAP_TO_PSET and
   GOMP_MAP_POINTER from OpenACC enter/exit data directives?

4. Should the clauses be stripped from the equivalent OpenMP directives
   too?

(FAOD, I'm not asking for review on the attached patch at this time.)

HTH,

Julian
diff mbox series

Patch

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index ca42e0de640..7b52ce7d5c2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1016,11 +1016,8 @@  struct splay_tree_key_s {
   uintptr_t tgt_offset;
   /* Reference count.  */
   uintptr_t refcount;
-  /* Reference counts beyond those that represent genuine references in the
-     linked splay tree key/target memory structures, e.g. for multiple OpenACC
-     "present increment" operations (via "acc enter data") referring to the same
-     host-memory block.  */
-  uintptr_t virtual_refcount;
+  /* Dynamic reference count.  */
+  uintptr_t dynamic_refcount;
   struct splay_tree_aux *aux;
 };
 
@@ -1153,7 +1150,6 @@  struct gomp_device_descr
 enum gomp_map_vars_kind
 {
   GOMP_MAP_VARS_OPENACC,
-  GOMP_MAP_VARS_OPENACC_ENTER_DATA,
   GOMP_MAP_VARS_TARGET,
   GOMP_MAP_VARS_DATA,
   GOMP_MAP_VARS_ENTER_DATA
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index c06b7341cbb..fff0d573f59 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -407,7 +407,7 @@  acc_map_data (void *h, void *d, size_t s)
       assert (tgt);
       splay_tree_key n = tgt->list[0].key;
       assert (n->refcount == 1);
-      assert (n->virtual_refcount == 0);
+      assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
       n->refcount = REFCOUNT_INFINITY;
 
@@ -454,7 +454,7 @@  acc_unmap_data (void *h)
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
   /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'virtual_refcount' can be used for disambiguating
+     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
      the different 'REFCOUNT_INFINITY' cases, or simply separate
      'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
      etc.)?  */
@@ -475,14 +475,19 @@  acc_unmap_data (void *h)
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("cannot unmap target block");
     }
-  else if (tgt->refcount > 1)
-    tgt->refcount--;
-  else
+
+  if (tgt->refcount == 1)
     {
-      free (tgt->array);
-      free (tgt);
+      /* This is the last reference.  Nullifying these fields prevents
+	 'gomp_unmap_tgt' via 'gomp_remove_var' from freeing the target
+	 memory.  */
+      tgt->tgt_end = 0;
+      tgt->to_free = NULL;
     }
 
+  bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+  assert (is_tgt_unmapped);
+
   gomp_mutex_unlock (&acc_dev->lock);
 
   if (profiling_p)
@@ -540,10 +545,8 @@  goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
 
       assert (n->refcount != REFCOUNT_LINK);
       if (n->refcount != REFCOUNT_INFINITY)
-	{
-	  n->refcount++;
-	  n->virtual_refcount++;
-	}
+	n->refcount++;
+      n->dynamic_refcount++;
 
       gomp_mutex_unlock (&acc_dev->lock);
     }
@@ -555,16 +558,18 @@  goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
 
       goacc_aq aq = get_goacc_asyncqueue (async);
 
-      gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
-			   true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+      struct target_mem_desc *tgt
+	= gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
+			       kinds, true, GOMP_MAP_VARS_ENTER_DATA);
+      assert (tgt);
+      assert (tgt->list_count == 1);
+      n = tgt->list[0].key;
+      assert (n);
+      assert (n->refcount == 1);
+      assert (n->dynamic_refcount == 0);
+      n->dynamic_refcount++;
 
-      gomp_mutex_lock (&acc_dev->lock);
-      n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
-      assert (n != NULL);
-      assert (n->tgt_offset == 0);
-      assert ((uintptr_t) hostaddrs[0] == n->host_start);
-      d = (void *) n->tgt->tgt_start;
-      gomp_mutex_unlock (&acc_dev->lock);
+      d = (void *) tgt->tgt_start;
     }
 
   if (profiling_p)
@@ -683,23 +688,28 @@  goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
 		  (void *) h, (int) s, (void *) n->host_start, (int) host_size);
     }
 
+  assert (n->refcount != REFCOUNT_LINK);
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount < n->dynamic_refcount)
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("Dynamic reference counting assert fail\n");
+    }
+
   bool finalize = (kind == GOMP_MAP_DELETE
 		   || kind == GOMP_MAP_FORCE_FROM);
   if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
-	n->refcount -= n->virtual_refcount;
-      n->virtual_refcount = 0;
+	n->refcount -= n->dynamic_refcount;
+      n->dynamic_refcount = 0;
     }
-
-  if (n->virtual_refcount > 0)
+  else if (n->dynamic_refcount)
     {
       if (n->refcount != REFCOUNT_INFINITY)
 	n->refcount--;
-      n->virtual_refcount--;
+      n->dynamic_refcount--;
     }
-  else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
-    n->refcount--;
 
   if (n->refcount == 0)
     {
@@ -722,8 +732,16 @@  goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
 	gomp_remove_var_async (acc_dev, n, aq);
       else
 	{
+	  int num_mappings = 0;
+	  /* If the target_mem_desc represents a single data mapping, we can
+	     check that it is freed when this splay tree key's refcount
+	     reaches zero.  Otherwise (e.g. for a struct mapping with multiple
+	     members), fall back to skipping the test.  */
+	  for (int i = 0; i < n->tgt->list_count; i++)
+	    if (n->tgt->list[i].key)
+	      num_mappings++;
 	  bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
-	  assert (is_tgt_unmapped);
+	  assert (num_mappings > 1 || is_tgt_unmapped);
 	}
     }
 
@@ -1018,13 +1036,102 @@  goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 {
   for (size_t i = 0; i < mapnum; i++)
     {
-      int group_last = find_group_last (i, mapnum, sizes, kinds);
+      splay_tree_key n;
+      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
+      bool struct_p = false;
+      size_t size, groupnum = (group_last - i) + 1;
 
-      gomp_map_vars_async (acc_dev, aq,
-			   (group_last - i) + 1,
-			   &hostaddrs[i], NULL,
-			   &sizes[i], &kinds[i], true,
-			   GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+      switch (kinds[i] & 0xff)
+	{
+	case GOMP_MAP_STRUCT:
+	  {
+	    int last = i + sizes[i];
+	    size = (uintptr_t) hostaddrs[last] + sizes[last]
+		   - (uintptr_t) hostaddrs[i];
+	    struct_p = true;
+	  }
+	  break;
+
+	case GOMP_MAP_ATTACH:
+	  size = sizeof (void *);
+	  break;
+
+	default:
+	  size = sizes[i];
+	}
+
+      n = lookup_host (acc_dev, hostaddrs[i], size);
+
+      if (n && struct_p)
+	{
+	  if (n->refcount != REFCOUNT_INFINITY)
+	    n->refcount += groupnum - 1;
+	  n->dynamic_refcount += groupnum - 1;
+	  gomp_mutex_unlock (&acc_dev->lock);
+	}
+      else if (n && groupnum == 1)
+	{
+	  void *h = hostaddrs[i];
+	  size_t s = sizes[i];
+
+	  /* A standalone attach clause.  */
+	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
+	    gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
+				 (uintptr_t) h, s, NULL);
+	  else if (h + s > (void *) n->host_end)
+	    {
+	      gomp_mutex_unlock (&acc_dev->lock);
+	      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+	    }
+
+	  assert (n->refcount != REFCOUNT_LINK);
+	  if (n->refcount != REFCOUNT_INFINITY)
+	    n->refcount++;
+	  n->dynamic_refcount++;
+
+	  gomp_mutex_unlock (&acc_dev->lock);
+	}
+      else if (n && groupnum > 1)
+	{
+	  assert (n->refcount != REFCOUNT_INFINITY
+		  && n->refcount != REFCOUNT_LINK);
+
+	  bool processed = false;
+
+	  struct target_mem_desc *tgt = n->tgt;
+	  for (size_t j = 0; j < tgt->list_count; j++)
+	    if (tgt->list[j].key == n)
+	      {
+		for (size_t k = 0; k < groupnum; k++)
+		  if (j + k < tgt->list_count && tgt->list[j + k].key)
+		    {
+		      tgt->list[j + k].key->refcount++;
+		      tgt->list[j + k].key->dynamic_refcount++;
+		    }
+		processed = true;
+	      }
+
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  if (!processed)
+	    gomp_fatal ("dynamic refcount incrementing failed for "
+			"pointer/pset");
+	}
+      else if (hostaddrs[i])
+	{
+	  gomp_mutex_unlock (&acc_dev->lock);
+
+	  struct target_mem_desc *tgt
+	    = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
+				   &sizes[i], &kinds[i], true,
+				   GOMP_MAP_VARS_ENTER_DATA);
+	  assert (tgt);
+	  for (size_t j = 0; j < tgt->list_count; j++)
+	    {
+	      n = tgt->list[j].key;
+	      if (n)
+		n->dynamic_refcount++;
+	    }
+	}
 
       i = group_last;
     }
@@ -1115,18 +1222,15 @@  goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    if (finalize)
 	      {
 		if (n->refcount != REFCOUNT_INFINITY)
-		  n->refcount -= n->virtual_refcount;
-		n->virtual_refcount = 0;
+		  n->refcount -= n->dynamic_refcount;
+		n->dynamic_refcount = 0;
 	      }
-
-	    if (n->virtual_refcount > 0)
+	    else if (n->dynamic_refcount)
 	      {
 		if (n->refcount != REFCOUNT_INFINITY)
 		  n->refcount--;
-		n->virtual_refcount--;
+		n->dynamic_refcount--;
 	      }
-	    else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
-	      n->refcount--;
 
 	    if (copyfrom
 		&& (kind != GOMP_MAP_FROM || n->refcount == 0))
@@ -1137,45 +1241,40 @@  goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 				  cur_node.host_end - cur_node.host_start);
 
 	    if (n->refcount == 0)
-	      gomp_remove_var_async (acc_dev, n, aq);
-	  }
-	  break;
-
-	case GOMP_MAP_STRUCT:
-	  {
-	    int elems = sizes[i];
-	    for (int j = 1; j <= elems; j++)
 	      {
-		struct splay_tree_key_s k;
-		k.host_start = (uintptr_t) hostaddrs[i + j];
-		k.host_end = k.host_start + sizes[i + j];
-		splay_tree_key str;
-		str = splay_tree_lookup (&acc_dev->mem_map, &k);
-		if (str)
+		if (aq)
 		  {
-		    if (finalize)
-		      {
-			if (str->refcount != REFCOUNT_INFINITY)
-			  str->refcount -= str->virtual_refcount;
-			str->virtual_refcount = 0;
-		      }
-		    if (str->virtual_refcount > 0)
+		    /* TODO The way the following code is currently
+		       implemented, we need the 'is_tgt_unmapped' return
+		       value from 'gomp_remove_var', so can't use
+		       'gomp_remove_var_async' here -- see the
+		       'gomp_unref_tgt' comment in
+		       <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
+		       PR92881 -- so have to synchronize here.  */
+		    if (!acc_dev->openacc.async.synchronize_func (aq))
 		      {
-			if (str->refcount != REFCOUNT_INFINITY)
-			  str->refcount--;
-			str->virtual_refcount--;
+			gomp_mutex_unlock (&acc_dev->lock);
+			gomp_fatal ("synchronize failed");
 		      }
-		    else if (str->refcount > 0
-			     && str->refcount != REFCOUNT_INFINITY)
-		      str->refcount--;
-		    if (str->refcount == 0)
-		      gomp_remove_var_async (acc_dev, str, aq);
 		  }
+		int num_mappings = 0;
+		/* If the target_mem_desc represents a single data mapping, we
+		   can check that it is freed when this splay tree key's
+		   refcount reaches zero.  Otherwise (e.g. for a struct
+		   mapping with multiple members), fall back to skipping the
+		   test.  */
+		for (int j = 0; j < n->tgt->list_count; j++)
+		  if (n->tgt->list[j].key)
+		    num_mappings++;
+		bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+		assert (num_mappings > 1 || is_tgt_unmapped);
 	      }
-	    i += elems;
 	  }
 	  break;
 
+	case GOMP_MAP_STRUCT:
+	  continue;
+
 	default:
 	  gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
 			  kind);
diff --git a/libgomp/target.c b/libgomp/target.c
index 36425477dcb..3f2becdae0e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -666,8 +666,7 @@  gomp_map_vars_internal (struct gomp_device_descr *devicep,
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
-  tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
-		   || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
+  tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
   tgt->device_descr = devicep;
   tgt->prev = NULL;
   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
@@ -1094,7 +1093,7 @@  gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		      tgt->list[i].copy_from = false;
 		      tgt->list[i].always_copy_from = false;
 		      tgt->list[i].do_detach
-			= (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+			= (pragma_kind != GOMP_MAP_VARS_ENTER_DATA);
 		      n->refcount++;
 		    }
 		  else
@@ -1155,7 +1154,7 @@  gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		tgt->list[i].offset = 0;
 		tgt->list[i].length = k->host_end - k->host_start;
 		k->refcount = 1;
-		k->virtual_refcount = 0;
+		k->dynamic_refcount = 0;
 		tgt->refcount++;
 		array->left = NULL;
 		array->right = NULL;
@@ -1294,20 +1293,8 @@  gomp_map_vars_internal (struct gomp_device_descr *devicep,
   /* If the variable from "omp target enter data" map-list was already mapped,
      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
      gomp_exit_data.  */
-  if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
-       || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
-      && tgt->refcount == 0)
-    {
-      /* If we're about to discard a target_mem_desc with no "structural"
-	 references (tgt->refcount == 0), any splay keys linked in the tgt's
-	 list must have their virtual refcount incremented to represent that
-	 "lost" reference in order to implement the semantics of the OpenACC
-	 "present increment" operation properly.  */
-      if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
-	for (i = 0; i < tgt->list_count; i++)
-	  if (tgt->list[i].key)
-	    tgt->list[i].key->virtual_refcount++;
-
+  if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
+    {
       free (tgt);
       tgt = NULL;
     }
@@ -1459,14 +1446,7 @@  gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
 	continue;
 
       bool do_unmap = false;
-      if (k->tgt == tgt
-	  && k->virtual_refcount > 0
-	  && k->refcount != REFCOUNT_INFINITY)
-	{
-	  k->virtual_refcount--;
-	  k->refcount--;
-	}
-      else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
 	k->refcount--;
       else if (k->refcount == 1)
 	{
@@ -1631,7 +1611,7 @@  gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_table[i].start;
       k->refcount = REFCOUNT_INFINITY;
-      k->virtual_refcount = 0;
+      k->dynamic_refcount = 0;
       k->aux = NULL;
       array->left = NULL;
       array->right = NULL;
@@ -1665,7 +1645,7 @@  gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
-      k->virtual_refcount = 0;
+      k->dynamic_refcount = 0;
       k->aux = NULL;
       array->left = NULL;
       array->right = NULL;
@@ -2935,7 +2915,7 @@  omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
       k->tgt = tgt;
       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
       k->refcount = REFCOUNT_INFINITY;
-      k->virtual_refcount = 0;
+      k->dynamic_refcount = 0;
       k->aux = NULL;
       array->left = NULL;
       array->right = NULL;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c
new file mode 100644
index 00000000000..4e6d06d48d5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c
@@ -0,0 +1,31 @@ 
+/* Test dynamic unmapping of separate structure members.  */
+
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <assert.h>
+#include <openacc.h>
+
+struct s
+{
+  char a;
+  char b;
+};
+
+int main ()
+{
+  struct s s;
+
+#pragma acc enter data create(s.a, s.b)
+
+  assert (acc_is_present (&s.a, sizeof s.a));
+  assert (acc_is_present (&s.b, sizeof s.b));
+
+#pragma acc exit data delete(s.a)
+#pragma acc exit data delete(s.b)
+
+  assert (!acc_is_present (&s.a, sizeof s.a));
+  assert (!acc_is_present (&s.b, sizeof s.b));
+
+  return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c
new file mode 100644
index 00000000000..5539fd8d57f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c
@@ -0,0 +1,31 @@ 
+/* Test dynamic unmapping of separate structure members.  */
+
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <assert.h>
+#include <openacc.h>
+
+struct s
+{
+  char a;
+  char b;
+};
+
+int main ()
+{
+  struct s s;
+
+#pragma acc enter data create(s.a, s.b)
+
+  assert (acc_is_present (&s.a, sizeof s.a));
+  assert (acc_is_present (&s.b, sizeof s.b));
+
+  acc_delete (&s.a, sizeof s.a);
+  acc_delete (&s.b, sizeof s.b);
+
+  assert (!acc_is_present (&s.a, sizeof s.a));
+  assert (!acc_is_present (&s.b, sizeof s.b));
+
+  return 0;
+}
+