Message ID | b23ea71697f77d8214411a3e1348e9dee496e5a6.1590182783.git.julian@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | Dynamic reference counts for mapped data | expand |
Hi Julian! On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> wrote: > This patch adjusts the semantics of dynamic reference counts, as described > in the parent email. Thanks! A few questions, but no need to send an updated patch. > --- a/libgomp/oacc-mem.c > +++ b/libgomp/oacc-mem.c > @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, > { > for (size_t i = 0; i < mapnum; i++) > { > - int group_last = find_group_last (i, mapnum, sizes, kinds); > + splay_tree_key n; > + size_t group_last = find_group_last (i, mapnum, sizes, kinds); > + bool struct_p = false; > + size_t size, groupnum = (group_last - i) + 1; > > - gomp_map_vars_async (acc_dev, aq, > - (group_last - i) + 1, > - &hostaddrs[i], NULL, > - &sizes[i], &kinds[i], true, > - GOMP_MAP_VARS_OPENACC_ENTER_DATA); > + switch (kinds[i] & 0xff) > + { > + case GOMP_MAP_STRUCT: > + { > + int last = i + sizes[i]; The 'last' calculated here must always equal the 'group_last' calculated above. ;-) (... so we might just use 'group_last' instead of 'last' in the following.) > + size = (uintptr_t) hostaddrs[last] + sizes[last] > + - (uintptr_t) hostaddrs[i]; > + struct_p = true; > + } > + break; > + > + case GOMP_MAP_ATTACH: > + size = sizeof (void *); > + break; > + > + default: > + size = sizes[i]; > + } > + > + n = lookup_host (acc_dev, hostaddrs[i], size); > + > + if (n && struct_p) > + { > + if (n->refcount != REFCOUNT_INFINITY) > + n->refcount += groupnum - 1; > + n->dynamic_refcount += groupnum - 1; > + gomp_mutex_unlock (&acc_dev->lock); > + } Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that just an optimization of the 'n && groupnum > 1' case below? > + else if (n && groupnum == 1) > + { > + void *h = hostaddrs[i]; > + size_t s = sizes[i]; > + > + /* A standalone attach clause. */ > + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) > + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, > + (uintptr_t) h, s, NULL); > + else if (h + s > (void *) n->host_end) > + { > + gomp_mutex_unlock (&acc_dev->lock); > + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); > + } > + > + assert (n->refcount != REFCOUNT_LINK); > + if (n->refcount != REFCOUNT_INFINITY) > + n->refcount++; > + n->dynamic_refcount++; > + > + gomp_mutex_unlock (&acc_dev->lock); > + } > + else if (n && groupnum > 1) > + { > + assert (n->refcount != REFCOUNT_INFINITY > + && n->refcount != REFCOUNT_LINK); > + > + bool processed = false; > + > + struct target_mem_desc *tgt = n->tgt; > + for (size_t j = 0; j < tgt->list_count; j++) > + if (tgt->list[j].key == n) > + { > + for (size_t k = 0; k < groupnum; k++) > + if (j + k < tgt->list_count && tgt->list[j + k].key) > + { > + tgt->list[j + k].key->refcount++; > + tgt->list[j + k].key->dynamic_refcount++; > + } > + processed = true; > + } > + > + gomp_mutex_unlock (&acc_dev->lock); > + if (!processed) > + gomp_fatal ("dynamic refcount incrementing failed for " > + "pointer/pset"); > + } Please add some text to explain the nested 'j', 'k' loops and their 'if' conditionals, and the 'groupnum' usage in the 'k' loop boundary. Should the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)' (..., or is 'groupnum' relevant?), and in the loop body then use 'k' instead of 'j + k'? (Maybe I've now confused myself, staring at this for a while...) > + else if (hostaddrs[i]) > + { > + gomp_mutex_unlock (&acc_dev->lock); > + > + struct target_mem_desc *tgt > + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, > + &sizes[i], &kinds[i], true, > + GOMP_MAP_VARS_ENTER_DATA); > + assert (tgt); > + for (size_t j = 0; j < tgt->list_count; j++) > + { > + n = tgt->list[j].key; > + if (n) > + n->dynamic_refcount++; > + } > + } ... else nothing. This latter "nothing" case (not present, and no 'hostaddrs[i]') is exercised by 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only). Is that alright? > > i = group_last; > } > @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, (Diff slightly edited.) > if (n->refcount == 0) > - gomp_remove_var_async (acc_dev, n, aq); > + { > + if (aq) > + { > + /* TODO The way the following code is currently > + implemented, we need the 'is_tgt_unmapped' return > + value from 'gomp_remove_var', so can't use > + 'gomp_remove_var_async' here -- see the > + 'gomp_unref_tgt' comment in > + <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; > + PR92881 -- so have to synchronize here. */ > + if (!acc_dev->openacc.async.synchronize_func (aq)) > + { > + gomp_mutex_unlock (&acc_dev->lock); > + gomp_fatal ("synchronize failed"); > + } > + } As far as I understand, it's no longer true that "The way the following code is [...] implemented, we need the 'is_tgt_unmapped' return value from 'gomp_remove_var'". In particular, we now can/should "use 'gomp_remove_var_async' here", and no longer "have to synchronize here"? Indeed I'm happy to see that the logic below no longer depends on 'is_tgt_unmapped' for its loop exit condition. Instead of the above, this now can use the standard pattern: if (aq) /* TODO We can't do the 'is_tgt_unmapped' checking -- see the 'gomp_unref_tgt' comment in <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; PR92881. */ gomp_remove_var_async (acc_dev, n, aq); else { [as follows] } > + int num_mappings = 0; > + /* If the target_mem_desc represents a single data mapping, we > + can check that it is freed when this splay tree key's > + refcount reaches zero. Otherwise (e.g. for a struct > + mapping with multiple members), fall back to skipping the > + test. */ > + for (int j = 0; j < n->tgt->list_count; j++) > + if (n->tgt->list[j].key) > + num_mappings++; > + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); > + assert (num_mappings > 1 || is_tgt_unmapped); > + } > } > break; For reference, the old logic (mandating what was described in the comment above) was: bool is_tgt_unmapped = false; for (size_t i = 0; i < t->list_count; i++) { is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); if (is_tgt_unmapped) break; } assert (is_tgt_unmapped); Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
Hi Julian! On 2020-06-03T14:36:14+0200, I wrote: > On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> wrote: >> This patch adjusts the semantics of dynamic reference counts, as described >> in the parent email. > > Thanks! > > A few questions, but no need to send an updated patch. > >> --- a/libgomp/oacc-mem.c >> +++ b/libgomp/oacc-mem.c > >> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, >> { >> for (size_t i = 0; i < mapnum; i++) >> { >> - int group_last = find_group_last (i, mapnum, sizes, kinds); >> + splay_tree_key n; >> + size_t group_last = find_group_last (i, mapnum, sizes, kinds); >> + bool struct_p = false; >> + size_t size, groupnum = (group_last - i) + 1; >> >> - gomp_map_vars_async (acc_dev, aq, >> - (group_last - i) + 1, >> - &hostaddrs[i], NULL, >> - &sizes[i], &kinds[i], true, >> - GOMP_MAP_VARS_OPENACC_ENTER_DATA); >> + switch (kinds[i] & 0xff) >> + { >> + case GOMP_MAP_STRUCT: >> + { >> + int last = i + sizes[i]; > > The 'last' calculated here must always equal the 'group_last' calculated > above. ;-) (... so we might just use 'group_last' instead of 'last' in > the following.) > >> + size = (uintptr_t) hostaddrs[last] + sizes[last] >> + - (uintptr_t) hostaddrs[i]; >> + struct_p = true; >> + } >> + break; >> + >> + case GOMP_MAP_ATTACH: >> + size = sizeof (void *); >> + break; >> + >> + default: >> + size = sizes[i]; >> + } >> + >> + n = lookup_host (acc_dev, hostaddrs[i], size); >> + > >> + if (n && struct_p) >> + { >> + if (n->refcount != REFCOUNT_INFINITY) >> + n->refcount += groupnum - 1; >> + n->dynamic_refcount += groupnum - 1; >> + gomp_mutex_unlock (&acc_dev->lock); >> + } > > Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that > just an optimization of the 'n && groupnum > 1' case below? Eh, OK, I think I see where this is going; the 'n && groupnum > 1' case below might not necessarily take care of the 'groupnum - 1' refcounts that we're filing here? >> + else if (n && groupnum == 1) >> + { >> + void *h = hostaddrs[i]; >> + size_t s = sizes[i]; >> + >> + /* A standalone attach clause. */ >> + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) >> + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, >> + (uintptr_t) h, s, NULL); >> + else if (h + s > (void *) n->host_end) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); >> + } >> + >> + assert (n->refcount != REFCOUNT_LINK); >> + if (n->refcount != REFCOUNT_INFINITY) >> + n->refcount++; >> + n->dynamic_refcount++; >> + >> + gomp_mutex_unlock (&acc_dev->lock); >> + } > >> + else if (n && groupnum > 1) >> + { >> + assert (n->refcount != REFCOUNT_INFINITY >> + && n->refcount != REFCOUNT_LINK); >> + >> + bool processed = false; >> + >> + struct target_mem_desc *tgt = n->tgt; >> + for (size_t j = 0; j < tgt->list_count; j++) >> + if (tgt->list[j].key == n) >> + { >> + for (size_t k = 0; k < groupnum; k++) >> + if (j + k < tgt->list_count && tgt->list[j + k].key) >> + { >> + tgt->list[j + k].key->refcount++; >> + tgt->list[j + k].key->dynamic_refcount++; >> + } >> + processed = true; >> + } >> + >> + gomp_mutex_unlock (&acc_dev->lock); >> + if (!processed) >> + gomp_fatal ("dynamic refcount incrementing failed for " >> + "pointer/pset"); >> + } > > Please add some text to explain the nested 'j', 'k' loops and their 'if' > conditionals, and the 'groupnum' usage in the 'k' loop boundary. Should > the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)' > (..., or is 'groupnum' relevant?), and in the loop body then use 'k' > instead of 'j + k'? (Maybe I've now confused myself, staring at this for > a while...) Audacious as I am sometimes, I did put a '__builtin_abort' right after 'tgt->list[j].key == n' -- and it doesn't trigger one single time for the current libgomp test cases, meaning this is all dead code? I'm confused. >> + else if (hostaddrs[i]) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + >> + struct target_mem_desc *tgt >> + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, >> + &sizes[i], &kinds[i], true, >> + GOMP_MAP_VARS_ENTER_DATA); >> + assert (tgt); >> + for (size_t j = 0; j < tgt->list_count; j++) >> + { >> + n = tgt->list[j].key; >> + if (n) >> + n->dynamic_refcount++; >> + } >> + } > > ... else nothing. This latter "nothing" case (not present, and no > 'hostaddrs[i]') is exercised by > 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only). Is that > alright? > >> >> i = group_last; >> } > > >> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, > > (Diff slightly edited.) > >> if (n->refcount == 0) >> - gomp_remove_var_async (acc_dev, n, aq); > >> + { >> + if (aq) >> + { >> + /* TODO The way the following code is currently >> + implemented, we need the 'is_tgt_unmapped' return >> + value from 'gomp_remove_var', so can't use >> + 'gomp_remove_var_async' here -- see the >> + 'gomp_unref_tgt' comment in >> + <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; >> + PR92881 -- so have to synchronize here. */ >> + if (!acc_dev->openacc.async.synchronize_func (aq)) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + gomp_fatal ("synchronize failed"); >> + } >> + } > > As far as I understand, it's no longer true that "The way the following > code is [...] implemented, we need the 'is_tgt_unmapped' return value > from 'gomp_remove_var'". In particular, we now can/should "use > 'gomp_remove_var_async' here", and no longer "have to synchronize here"? > > Indeed I'm happy to see that the logic below no longer depends on > 'is_tgt_unmapped' for its loop exit condition. Instead of the above, > this now can use the standard pattern: > > if (aq) > /* TODO We can't do the 'is_tgt_unmapped' checking -- see the > 'gomp_unref_tgt' comment in > <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; > PR92881. */ > gomp_remove_var_async (acc_dev, n, aq); > else > { [as follows] } > >> + int num_mappings = 0; >> + /* If the target_mem_desc represents a single data mapping, we >> + can check that it is freed when this splay tree key's >> + refcount reaches zero. Otherwise (e.g. for a struct >> + mapping with multiple members), fall back to skipping the >> + test. */ >> + for (int j = 0; j < n->tgt->list_count; j++) >> + if (n->tgt->list[j].key) >> + num_mappings++; >> + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); >> + assert (num_mappings > 1 || is_tgt_unmapped); >> + } >> } >> break; > > For reference, the old logic (mandating what was described in the comment > above) was: > > bool is_tgt_unmapped = false; > for (size_t i = 0; i < t->list_count; i++) > { > is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); > if (is_tgt_unmapped) > break; > } > assert (is_tgt_unmapped); Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
On Wed, 3 Jun 2020 17:19:47 +0200 Thomas Schwinge <thomas@codesourcery.com> wrote: > Hi Julian! > > On 2020-06-03T14:36:14+0200, I wrote: > > On 2020-05-22T15:16:05-0700, Julian Brown <julian@codesourcery.com> > > wrote: > >> This patch adjusts the semantics of dynamic reference counts, as > >> described in the parent email. > > > > Thanks! > > > > A few questions, but no need to send an updated patch. > > > >> --- a/libgomp/oacc-mem.c > >> +++ b/libgomp/oacc-mem.c > > > >> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct > >> gomp_device_descr *acc_dev, size_t mapnum, { > >> for (size_t i = 0; i < mapnum; i++) > >> { > >> - int group_last = find_group_last (i, mapnum, sizes, kinds); > >> + splay_tree_key n; > >> + size_t group_last = find_group_last (i, mapnum, sizes, > >> kinds); > >> + bool struct_p = false; > >> + size_t size, groupnum = (group_last - i) + 1; > >> > >> - gomp_map_vars_async (acc_dev, aq, > >> - (group_last - i) + 1, > >> - &hostaddrs[i], NULL, > >> - &sizes[i], &kinds[i], true, > >> - GOMP_MAP_VARS_OPENACC_ENTER_DATA); > >> + switch (kinds[i] & 0xff) > >> + { > >> + case GOMP_MAP_STRUCT: > >> + { > >> + int last = i + sizes[i]; > > > > The 'last' calculated here must always equal the 'group_last' > > calculated above. ;-) (... so we might just use 'group_last' > > instead of 'last' in the following.) > > > >> + size = (uintptr_t) hostaddrs[last] + sizes[last] > >> + - (uintptr_t) hostaddrs[i]; > >> + struct_p = true; > >> + } > >> + break; > >> + > >> + case GOMP_MAP_ATTACH: > >> + size = sizeof (void *); > >> + break; > >> + > >> + default: > >> + size = sizes[i]; > >> + } > >> + > >> + n = lookup_host (acc_dev, hostaddrs[i], size); > >> + > > > >> + if (n && struct_p) > >> + { > >> + if (n->refcount != REFCOUNT_INFINITY) > >> + n->refcount += groupnum - 1; > >> + n->dynamic_refcount += groupnum - 1; > >> + gomp_mutex_unlock (&acc_dev->lock); > >> + } > > > > Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or > > is that just an optimization of the 'n && groupnum > 1' case below? > > > > Eh, OK, I think I see where this is going; the 'n && groupnum > 1' > case below might not necessarily take care of the 'groupnum - 1' > refcounts that we're filing here? Right. GOMP_MAP_STRUCT is a little special in this case. > >> + else if (n && groupnum == 1) > >> + { > >> + void *h = hostaddrs[i]; > >> + size_t s = sizes[i]; > >> + > >> + /* A standalone attach clause. */ > >> + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) > >> + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, > >> n, > >> + (uintptr_t) h, s, NULL); > >> + else if (h + s > (void *) n->host_end) > >> + { > >> + gomp_mutex_unlock (&acc_dev->lock); > >> + gomp_fatal ("[%p,+%d] not mapped", (void *)h, > >> (int)s); > >> + } > >> + > >> + assert (n->refcount != REFCOUNT_LINK); > >> + if (n->refcount != REFCOUNT_INFINITY) > >> + n->refcount++; > >> + n->dynamic_refcount++; > >> + > >> + gomp_mutex_unlock (&acc_dev->lock); > >> + } > > > >> + else if (n && groupnum > 1) > >> + { > >> + assert (n->refcount != REFCOUNT_INFINITY > >> + && n->refcount != REFCOUNT_LINK); > >> + > >> + bool processed = false; > >> + > >> + struct target_mem_desc *tgt = n->tgt; > >> + for (size_t j = 0; j < tgt->list_count; j++) > >> + if (tgt->list[j].key == n) > >> + { > >> + for (size_t k = 0; k < groupnum; k++) > >> + if (j + k < tgt->list_count && tgt->list[j + > >> k].key) > >> + { > >> + tgt->list[j + k].key->refcount++; > >> + tgt->list[j + k].key->dynamic_refcount++; > >> + } > >> + processed = true; > >> + } > >> + > >> + gomp_mutex_unlock (&acc_dev->lock); > >> + if (!processed) > >> + gomp_fatal ("dynamic refcount incrementing failed for > >> " > >> + "pointer/pset"); > >> + } > > > > Please add some text to explain the nested 'j', 'k' loops and their > > 'if' conditionals, and the 'groupnum' usage in the 'k' loop > > boundary. Should the 'k' loop maybe run 'for (size_t k = j; k < > > tgt->list_count; ++k)' (..., or is 'groupnum' relevant?), and in > > the loop body then use 'k' instead of 'j + k'? (Maybe I've now > > confused myself, staring at this for a while...) > > Audacious as I am sometimes, I did put a '__builtin_abort' right after > 'tgt->list[j].key == n' -- and it doesn't trigger one single time for > the current libgomp test cases, meaning this is all dead code? I'm > confused. Huh, I didn't expect that! Indeed that stanza appears to be dead code (at least with mapping clauses generated from current GCC). The reason is a late bug-fix to the manual deep copy code that strips GOMP_MAP_TO_PSET and GOMP_MAP_POINTER from OpenACC enter/exit mappings altogether. (In https://gcc.gnu.org/legacy-ml/gcc-patches/2019-12/msg01253.html). That means "grouped" mappings are actually only now used for GOMP_MAP_STRUCT, so actually even more of the find_group_last function is probably dead now too, modulo backward compatibility issues. Rewinding a bit, here is an explanation of the problem that the removal of those clauses fixes, in case we want to revisit that. With the attached patch (reverting the fix), the attached test case fails (e.g. compiled at -O0). The problem is that with a dynamic data lifetime, it's possible for an array descriptor on the stack to go out of scope before the array data it is associated with does. This might well be violating either Fortran rules or OpenACC semantics -- if that's the case, then we had no problem here. (I did see a similar problem "in the wild", but hadn't come up with a standalone test case until now.) The attached test case starts out with a explicit-shape array local. It passes this to a subroutine "enterdata_wrapper". This subroutine fabricates an assumed-shape array pointer to its argument (creating an array descriptor), and passes it to another subroutine "enterdata". The "enterdata" subroutine then performs an OpenACC "enter data" operation with the array -- whose data comes from the original explicit-shape array in the main program, but whose descriptor comes from the stack frame of the caller (i.e. "enterdata_wrapper"). This descriptor then goes out of scope before returning to the main program. The test case tries to fiddle with the stack layout by adding arbitrary other arrays, and does the same dance again with nested subroutines to perform an "exit data" operation. But now the address of the (new) descriptor is different, and the unmapping operation fails. In short -- OpenACC "enter data" operations can (could) create hidden dangling references to array descriptors, in some circumstances. So, the fix was to strip out GOMP_MAP_TO_PSET (and GOMP_MAP_POINTER, which I don't think has any meaning on these directives) from OpenACC "enter data" and "exit data" directives altogether. If an array has a descriptor when we get to a compute kernel, that descriptor is copied to the target anyway, *even for present clauses*, so passing the array descriptor to "enter data" descriptor doesn't appear to be necessary, even in cases where it stays in scope before unmapping from the target. So, questions: 1. Does the attached program violate Fortran semantics in some way? 2. Or OpenACC semantics? 3. Are there unintended side-effects of removing GOMP_MAP_TO_PSET and GOMP_MAP_POINTER from OpenACC enter/exit data directives? 4. Should the clauses be stripped from the equivalent OpenMP directives too? (FAOD, I'm not asking for review on the attached patch at this time.) HTH, Julian
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index ca42e0de640..7b52ce7d5c2 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1016,11 +1016,8 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Reference counts beyond those that represent genuine references in the - linked splay tree key/target memory structures, e.g. for multiple OpenACC - "present increment" operations (via "acc enter data") referring to the same - host-memory block. */ - uintptr_t virtual_refcount; + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; struct splay_tree_aux *aux; }; @@ -1153,7 +1150,6 @@ struct gomp_device_descr enum gomp_map_vars_kind { GOMP_MAP_VARS_OPENACC, - GOMP_MAP_VARS_OPENACC_ENTER_DATA, GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_ENTER_DATA diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index c06b7341cbb..fff0d573f59 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -407,7 +407,7 @@ acc_map_data (void *h, void *d, size_t s) assert (tgt); splay_tree_key n = tgt->list[0].key; assert (n->refcount == 1); - assert (n->virtual_refcount == 0); + assert (n->dynamic_refcount == 0); /* Special reference counting behavior. */ n->refcount = REFCOUNT_INFINITY; @@ -454,7 +454,7 @@ acc_unmap_data (void *h) (void *) n->host_start, (int) host_size, (void *) h); } /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from - 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating + 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating the different 'REFCOUNT_INFINITY' cases, or simply separate 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' etc.)? */ @@ -475,14 +475,19 @@ acc_unmap_data (void *h) gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("cannot unmap target block"); } - else if (tgt->refcount > 1) - tgt->refcount--; - else + + if (tgt->refcount == 1) { - free (tgt->array); - free (tgt); + /* This is the last reference. Nullifying these fields prevents + 'gomp_unmap_tgt' via 'gomp_remove_var' from freeing the target + memory. */ + tgt->tgt_end = 0; + tgt->to_free = NULL; } + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped); + gomp_mutex_unlock (&acc_dev->lock); if (profiling_p) @@ -540,10 +545,8 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) assert (n->refcount != REFCOUNT_LINK); if (n->refcount != REFCOUNT_INFINITY) - { - n->refcount++; - n->virtual_refcount++; - } + n->refcount++; + n->dynamic_refcount++; gomp_mutex_unlock (&acc_dev->lock); } @@ -555,16 +558,18 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) goacc_aq aq = get_goacc_asyncqueue (async); - gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, + kinds, true, GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); + assert (tgt->list_count == 1); + n = tgt->list[0].key; + assert (n); + assert (n->refcount == 1); + assert (n->dynamic_refcount == 0); + n->dynamic_refcount++; - gomp_mutex_lock (&acc_dev->lock); - n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); - assert (n != NULL); - assert (n->tgt_offset == 0); - assert ((uintptr_t) hostaddrs[0] == n->host_start); - d = (void *) n->tgt->tgt_start; - gomp_mutex_unlock (&acc_dev->lock); + d = (void *) tgt->tgt_start; } if (profiling_p) @@ -683,23 +688,28 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) (void *) h, (int) s, (void *) n->host_start, (int) host_size); } + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY + && n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } + bool finalize = (kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FORCE_FROM); if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (n->refcount == 0) { @@ -722,8 +732,16 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) gomp_remove_var_async (acc_dev, n, aq); else { + int num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, we can + check that it is freed when this splay tree key's refcount + reaches zero. Otherwise (e.g. for a struct mapping with multiple + members), fall back to skipping the test. */ + for (int i = 0; i < n->tgt->list_count; i++) + if (n->tgt->list[i].key) + num_mappings++; bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); - assert (is_tgt_unmapped); + assert (num_mappings > 1 || is_tgt_unmapped); } } @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, { for (size_t i = 0; i < mapnum; i++) { - int group_last = find_group_last (i, mapnum, sizes, kinds); + splay_tree_key n; + size_t group_last = find_group_last (i, mapnum, sizes, kinds); + bool struct_p = false; + size_t size, groupnum = (group_last - i) + 1; - gomp_map_vars_async (acc_dev, aq, - (group_last - i) + 1, - &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, - GOMP_MAP_VARS_OPENACC_ENTER_DATA); + switch (kinds[i] & 0xff) + { + case GOMP_MAP_STRUCT: + { + int last = i + sizes[i]; + size = (uintptr_t) hostaddrs[last] + sizes[last] + - (uintptr_t) hostaddrs[i]; + struct_p = true; + } + break; + + case GOMP_MAP_ATTACH: + size = sizeof (void *); + break; + + default: + size = sizes[i]; + } + + n = lookup_host (acc_dev, hostaddrs[i], size); + + if (n && struct_p) + { + if (n->refcount != REFCOUNT_INFINITY) + n->refcount += groupnum - 1; + n->dynamic_refcount += groupnum - 1; + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum == 1) + { + void *h = hostaddrs[i]; + size_t s = sizes[i]; + + /* A standalone attach clause. */ + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, + (uintptr_t) h, s, NULL); + else if (h + s > (void *) n->host_end) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); + } + + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY) + n->refcount++; + n->dynamic_refcount++; + + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum > 1) + { + assert (n->refcount != REFCOUNT_INFINITY + && n->refcount != REFCOUNT_LINK); + + bool processed = false; + + struct target_mem_desc *tgt = n->tgt; + for (size_t j = 0; j < tgt->list_count; j++) + if (tgt->list[j].key == n) + { + for (size_t k = 0; k < groupnum; k++) + if (j + k < tgt->list_count && tgt->list[j + k].key) + { + tgt->list[j + k].key->refcount++; + tgt->list[j + k].key->dynamic_refcount++; + } + processed = true; + } + + gomp_mutex_unlock (&acc_dev->lock); + if (!processed) + gomp_fatal ("dynamic refcount incrementing failed for " + "pointer/pset"); + } + else if (hostaddrs[i]) + { + gomp_mutex_unlock (&acc_dev->lock); + + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); + for (size_t j = 0; j < tgt->list_count; j++) + { + n = tgt->list[j].key; + if (n) + n->dynamic_refcount++; + } + } i = group_last; } @@ -1115,18 +1222,15 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (copyfrom && (kind != GOMP_MAP_FROM || n->refcount == 0)) @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, cur_node.host_end - cur_node.host_start); if (n->refcount == 0) - gomp_remove_var_async (acc_dev, n, aq); - } - break; - - case GOMP_MAP_STRUCT: - { - int elems = sizes[i]; - for (int j = 1; j <= elems; j++) { - struct splay_tree_key_s k; - k.host_start = (uintptr_t) hostaddrs[i + j]; - k.host_end = k.host_start + sizes[i + j]; - splay_tree_key str; - str = splay_tree_lookup (&acc_dev->mem_map, &k); - if (str) + if (aq) { - if (finalize) - { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount -= str->virtual_refcount; - str->virtual_refcount = 0; - } - if (str->virtual_refcount > 0) + /* TODO The way the following code is currently + implemented, we need the 'is_tgt_unmapped' return + value from 'gomp_remove_var', so can't use + 'gomp_remove_var_async' here -- see the + 'gomp_unref_tgt' comment in + <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; + PR92881 -- so have to synchronize here. */ + if (!acc_dev->openacc.async.synchronize_func (aq)) { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount--; - str->virtual_refcount--; + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("synchronize failed"); } - else if (str->refcount > 0 - && str->refcount != REFCOUNT_INFINITY) - str->refcount--; - if (str->refcount == 0) - gomp_remove_var_async (acc_dev, str, aq); } + int num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, we + can check that it is freed when this splay tree key's + refcount reaches zero. Otherwise (e.g. for a struct + mapping with multiple members), fall back to skipping the + test. */ + for (int j = 0; j < n->tgt->list_count; j++) + if (n->tgt->list[j].key) + num_mappings++; + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (num_mappings > 1 || is_tgt_unmapped); } - i += elems; } break; + case GOMP_MAP_STRUCT: + continue; + default: gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", kind); diff --git a/libgomp/target.c b/libgomp/target.c index 36425477dcb..3f2becdae0e 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -666,8 +666,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; tgt->prev = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL; @@ -1094,7 +1093,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].copy_from = false; tgt->list[i].always_copy_from = false; tgt->list[i].do_detach - = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA); + = (pragma_kind != GOMP_MAP_VARS_ENTER_DATA); n->refcount++; } else @@ -1155,7 +1154,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1294,20 +1293,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - && tgt->refcount == 0) - { - /* If we're about to discard a target_mem_desc with no "structural" - references (tgt->refcount == 0), any splay keys linked in the tgt's - list must have their virtual refcount incremented to represent that - "lost" reference in order to implement the semantics of the OpenACC - "present increment" operation properly. */ - if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key) - tgt->list[i].key->virtual_refcount++; - + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { free (tgt); tgt = NULL; } @@ -1459,14 +1446,7 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, continue; bool do_unmap = false; - if (k->tgt == tgt - && k->virtual_refcount > 0 - && k->refcount != REFCOUNT_INFINITY) - { - k->virtual_refcount--; - k->refcount--; - } - else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) k->refcount--; else if (k->refcount == 1) { @@ -1631,7 +1611,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -1665,7 +1645,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -2935,7 +2915,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, k->tgt = tgt; k->tgt_offset = (uintptr_t) device_ptr + device_offset; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c new file mode 100644 index 00000000000..4e6d06d48d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + +#pragma acc exit data delete(s.a) +#pragma acc exit data delete(s.b) + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c new file mode 100644 index 00000000000..5539fd8d57f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + + acc_delete (&s.a, sizeof s.a); + acc_delete (&s.b, sizeof s.b); + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} +