diff mbox series

: libgompd add parallel handle functions

Message ID CAKS6CCp=N2NKZLY1pBAs1rVeO2xy5rpsH=jn6zSi=ufserdWqg@mail.gmail.com
State New
Headers show
Series : libgompd add parallel handle functions | expand

Commit Message

Mohamed Sayed June 5, 2022, 11:48 p.m. UTC
This patch adds parallel region handles specified in section 5.5.3.
From examining libgomp code, I found that struct gomp_team describes the
parallel region.
The Thread handle gives the address of gomp_thread so, I tried to
access *team
gomp_thread->ts->team.
The parallel handle is the team pointer in team state.
I have a question about ompd_get_task_parallel_handle
https://www.openmp.org/spec-html/5.0/openmpsu218.html
How can i reach gomp_team from gomp_task
And the union in gomp_task has two entries gomp_sem_t and gomp_team

libgomp/ChangeLog

2022-06-06  Mohamed Sayed  <mohamedsayed22198@gmail.com>


* Makefile.am: (libgompd_la_SOURCES): Add ompd-parallel.c.
* Makefile.in: Regenerate.
* libgompd.map: Add ompd_get_curr_parallel_handle,
ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle
and ompd_parallel_handle_compare symbol versions.
* ompd-support.h:() : Add gompd_access (gomp_team_state, team) and
gompd_access (gomp_team, prev_ts).

Comments

Mohamed Sayed June 6, 2022, 3:13 p.m. UTC | #1
This is the final diff

On Mon, Jun 6, 2022 at 1:48 AM Mohamed Sayed <mohamedsayed22198@gmail.com>
wrote:

> This patch adds parallel region handles specified in section 5.5.3.
> From examining libgomp code, I found that struct gomp_team describes the
> parallel region.
> The Thread handle gives the address of gomp_thread so, I tried to
> access *team
> gomp_thread->ts->team.
> The parallel handle is the team pointer in team state.
> I have a question about ompd_get_task_parallel_handle
> https://www.openmp.org/spec-html/5.0/openmpsu218.html
> How can i reach gomp_team from gomp_task
> And the union in gomp_task has two entries gomp_sem_t and gomp_team
>
> libgomp/ChangeLog
>
> 2022-06-06  Mohamed Sayed  <mohamedsayed22198@gmail.com>
>
>
> * Makefile.am: (libgompd_la_SOURCES): Add ompd-parallel.c.
> * Makefile.in: Regenerate.
> * libgompd.map: Add ompd_get_curr_parallel_handle,
> ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle
> and ompd_parallel_handle_compare symbol versions.
> * ompd-support.h:() : Add gompd_access (gomp_team_state, team) and
> gompd_access (gomp_team, prev_ts).
>
>
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..4e215450b25 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c
 
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..ab66ad1c8f0 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
 	oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+	ompd-parallel.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -800,6 +801,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-helper.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
diff --git a/libgomp/libgompd.map b/libgomp/libgompd.map
index 85bdc3695f6..1662dc56962 100644
--- a/libgomp/libgompd.map
+++ b/libgomp/libgompd.map
@@ -16,6 +16,10 @@ OMPD_5.1 {
     ompd_thread_handle_compare;
     ompd_get_thread_id;
     ompd_get_device_from_thread;
+    ompd_get_curr_parallel_handle;
+    ompd_get_enclosing_parallel_handle;
+    ompd_rel_parallel_handle;
+    ompd_parallel_handle_compare;
   local:
     *;
 };
diff --git a/libgomp/ompd-parallel.c b/libgomp/ompd-parallel.c
new file mode 100644
index 00000000000..3a278f7428a
--- /dev/null
+++ b/libgomp/ompd-parallel.c
@@ -0,0 +1,144 @@
+/* Copyright (C) The GNU Toolchain Authors.
+   Contributed by Mohamed Sayed <mohamedsayed22198@gmail.com>.
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains the implementation of functions defined in
+   section 5.5.6.  */
+
+#include "ompd-helper.h"
+
+ompd_rc_t 
+ompd_get_curr_parallel_handle (ompd_thread_handle_t *thread_handle, 
+			       ompd_parallel_handle_t **parallel_handle)
+{
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  CHECK (thread_handle->ah);
+
+  ompd_address_space_context_t *context = thread_handle->ah->context;
+  ompd_thread_context_t *thread_context = thread_handle->thread_context;
+
+  if (thread_context == NULL)
+    return ompd_rc_stale_handle;
+
+  ompd_address_t symbol_address = thread_handle->th;
+  ompd_address_t temp_symbol_address = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_word_t temp_offset;
+  ompd_rc_t ret;
+
+  /* get ts offset.  */
+  GET_VALUE (context, thread_context, "gompd_access_gomp_thread_ts",
+  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
+             temp_symbol_address);
+
+  symbol_address.address += temp_offset;
+
+  /* get team offset.  */
+  GET_VALUE (context, thread_context, "gompd_access_gomp_team_state_team",
+  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
+             temp_symbol_address);
+
+  symbol_address.address += temp_offset;
+
+  ret = callbacks->read_memory (context, thread_context,
+  				&symbol_address, target_sizes.sizeof_pointer,
+  				&symbol_address.address);
+
+  CHECK_RET (ret); 
+  ret = callbacks->alloc_memory (sizeof (ompd_parallel_handle_t),
+  				 (void **) (parallel_handle));
+  CHECK_RET (ret); 
+
+  (*parallel_handle)->ah = thread_handle->ah;
+  (*parallel_handle)->th = symbol_address;
+  return ompd_rc_ok;                 		
+}
+
+ompd_rc_t
+ompd_get_enclosing_parallel_handle (ompd_parallel_handle_t *parallel_handle,
+				    ompd_parallel_handle_t **enc_par_handle)
+{
+  if (parallel_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  CHECK (parallel_handle->ah);
+
+  ompd_address_space_context_t *context = parallel_handle->ah->context;  
+  ompd_address_t symbol_address = parallel_handle->th;
+  ompd_address_t temp_symbol_address = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_word_t temp_offset;
+  ompd_rc_t ret;
+
+  /* get prev_ts offset.  */
+  GET_VALUE (context, NULL, "gompd_access_gomp_team_prev_ts",
+  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
+             temp_symbol_address);
+
+  symbol_address.address += temp_offset;
+
+  /* get team offset.  */
+  GET_VALUE (context, NULL, "gompd_access_gomp_team_state_team",
+  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
+             temp_symbol_address);
+
+  symbol_address.address += temp_offset;
+
+  ret = callbacks->read_memory (context, NULL, &symbol_address,
+                                target_sizes.sizeof_pointer,
+                                &symbol_address.address);
+  CHECK_RET (ret); 
+  ret = callbacks->alloc_memory (sizeof (ompd_parallel_handle_t),
+                                 (void **) (enc_par_handle));
+  CHECK_RET (ret);
+
+  (*enc_par_handle)->ah = parallel_handle->ah;
+  (*enc_par_handle)->th = symbol_address; 
+  return ompd_rc_ok;
+}
+
+ompd_rc_t
+ompd_rel_parallel_handle (ompd_parallel_handle_t *parallel_handle)
+{
+  if (parallel_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  ompd_rc_t ret = callbacks->free_memory ((void *) (parallel_handle));
+
+  CHECK_RET (ret); 
+  return ompd_rc_ok;
+}
+
+ompd_rc_t 
+ompd_parallel_handle_compare (ompd_parallel_handle_t *parallel_handle_1,
+			      ompd_parallel_handle_t *parallel_handle_2,
+			      int *cmp_value)
+{
+  if (parallel_handle_1 == NULL || parallel_handle_2 == NULL)
+    return ompd_rc_stale_handle;
+
+  if (cmp_value == NULL)
+    return ompd_rc_bad_input;
+
+  if (parallel_handle_1->ah->kind != parallel_handle_2->ah->kind)
+    return ompd_rc_bad_input;
+
+  *cmp_value = parallel_handle_1->th.address - parallel_handle_2->th.address;
+  return ompd_rc_ok;
+}
diff --git a/libgomp/ompd-support.h b/libgomp/ompd-support.h
index 39d55161132..48a2e6133f5 100644
--- a/libgomp/ompd-support.h
+++ b/libgomp/ompd-support.h
@@ -83,12 +83,15 @@ extern __UINT64_TYPE__ gompd_state;
   gompd_access (gomp_thread_pool, threads) \
   gompd_access (gomp_thread, ts) \
   gompd_access (gomp_team_state, team_id) \
-  gompd_access (gomp_task, icv)
+  gompd_access (gomp_task, icv) \
+  gompd_access (gomp_team_state, team) \
+  gompd_access (gomp_team, prev_ts)
 
 #define GOMPD_SIZES(gompd_size) \
   gompd_size (gomp_thread) \
   gompd_size (gomp_task_icv) \
-  gompd_size (gomp_task)
+  gompd_size (gomp_task) 
+  
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
 #pragma GCC visibility pop
Jakub Jelinek June 6, 2022, 5:12 p.m. UTC | #2
On Mon, Jun 06, 2022 at 01:48:22AM +0200, Mohamed Sayed via Gcc-patches wrote:
> This patch adds parallel region handles specified in section 5.5.3.
> >From examining libgomp code, I found that struct gomp_team describes the
> parallel region.

I think it would be nice to first find out what the different
ompd_{parallel,thread,task}_handle_t should actually contain.

In GOMP, the first one maps to struct gomp_team, the middle one to
struct gomp_thread and the last one to struct gomp_task.

Functions that create those are:
For ompd_thread_handle_t that is ompd_get_thread_in_parallel and
ompd_get_thread_handle.
For ompd_parallel_handle_t that is ompd_get_curr_parallel_handle,
ompd_get_enclosing_parallel_handle and ompd_get_task_parallel_handle.
For ompd_task_handle_t that is ompd_get_curr_task_handle,
ompd_get_generating_task_handle, ompd_get_scheduling_task_handle and
ompd_get_task_in_parallel.

What those handles point to is something libgompd allocates using
the allocator callback and it is up to the library what it puts there,
typically it should contain the address of those corresponding
gomp structures and whatever else is needed (say address space handle
or some contexts).

> The Thread handle gives the address of gomp_thread so, I tried to
> access *team
> gomp_thread->ts->team.

Actually gomp_thread's ts.team (ts is not a pointer, but a nested struct).

> The parallel handle is the team pointer in team state.
> I have a question about ompd_get_task_parallel_handle
> https://www.openmp.org/spec-html/5.0/openmpsu218.html
> How can i reach gomp_team from gomp_taske
> And the union in gomp_task has two entries gomp_sem_t and gomp_team

So, for the 4 functions you want to implement:
ompd_get_curr_parallel_handle when you'll have struct gomp_thread *
and want struct gomp_team * you load thr->ts.team.
Note, the implicit parallel is usually represented by NULL in thr->ts.team,
that case then means it has just a single thread etc.

ompd_get_enclosing_parallel_handle when you'll have struct gomp_team *
and want the enclosing team.  team->prev_ts.team is what you are after,
if it is non-NULL, it is the enclosing parallel, if it is NULL, I think
one should verify e.g. host teams construct in that case, but
otherwise it is the implicit parallel that one probably needs to represent
somehow too.

ompd_get_task_parallel_handle when you'll have struct gomp_task *
and want the struct gomp_team it is in.
I'm afraid the library doesn't track this, it doesn't need it for anything.
One possibility to resolve this is perhaps if all functions that
allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
too, then you could store in the private structure or ompd_task_handle_t
both struct gomp_task * and struct gomp_thread *.
If that is impossible, we could add such a pointer, but it would increase
both memory and runtime overhead of the library.

	Jakub
Mohamed Atef June 6, 2022, 5:32 p.m. UTC | #3
في الاثنين، ٦ يونيو، ٢٠٢٢ ٧:١٣ م Jakub Jelinek via Gcc-patches <
gcc-patches@gcc.gnu.org> كتب:

> On Mon, Jun 06, 2022 at 01:48:22AM +0200, Mohamed Sayed via Gcc-patches
> wrote:
> > This patch adds parallel region handles specified in section 5.5.3.
> > >From examining libgomp code, I found that struct gomp_team describes the
> > parallel region.
>
> I think it would be nice to first find out what the different
> ompd_{parallel,thread,task}_handle_t should actually contain.
>
> In GOMP, the first one maps to struct gomp_team, the middle one to
> struct gomp_thread and the last one to struct gomp_task.
>
> Functions that create those are:
> For ompd_thread_handle_t that is ompd_get_thread_in_parallel and
> ompd_get_thread_handle.
> For ompd_parallel_handle_t that is ompd_get_curr_parallel_handle,
> ompd_get_enclosing_parallel_handle and ompd_get_task_parallel_handle.
> For ompd_task_handle_t that is ompd_get_curr_task_handle,
> ompd_get_generating_task_handle, ompd_get_scheduling_task_handle and
> ompd_get_task_in_parallel.
>
> What those handles point to is something libgompd allocates using
> the allocator callback and it is up to the library what it puts there,
> typically it should contain the address of those corresponding
> gomp structures and whatever else is needed (say address space handle
> or some contexts).
>
> > The Thread handle gives the address of gomp_thread so, I tried to
> > access *team
> > gomp_thread->ts->team.
>
> Actually gomp_thread's ts.team (ts is not a pointer, but a nested struct).
>
I think the implementation is correct But there's a typo in the comment.

>
> > The parallel handle is the team pointer in team state.
> > I have a question about ompd_get_task_parallel_handle
> > https://www.openmp.org/spec-html/5.0/openmpsu218.html
> > How can i reach gomp_team from gomp_taske
> > And the union in gomp_task has two entries gomp_sem_t and gomp_team
>
> So, for the 4 functions you want to implement:
> ompd_get_curr_parallel_handle when you'll have struct gomp_thread *
> and want struct gomp_team * you load thr->ts.team.
> Note, the implicit parallel is usually represented by NULL in thr->ts.team,
> that case then means it has just a single thread etc.
>
> ompd_get_enclosing_parallel_handle when you'll have struct gomp_team *
> and want the enclosing team.  team->prev_ts.team is what you are after,
> if it is non-NULL, it is the enclosing parallel, if it is NULL, I think
> one should verify e.g. host teams construct in that case, but
> otherwise it is the implicit parallel that one probably needs to represent
> somehow too.
>
So for both cases one should read the value of *team and if it's NULL, the
function returns some error state (eg. ompd_rc_unavailable)

>
> ompd_get_task_parallel_handle when you'll have struct gomp_task *
> and want the struct gomp_team it is in.
> I'm afraid the library doesn't track this, it doesn't need it for anything.
> One possibility to resolve this is perhaps if all functions that
> allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
> too, then you could store in the private structure or ompd_task_handle_t
> both struct gomp_task * and struct gomp_thread *.
>
I will ask the guys to try this if it's impossible then we delay this
function.

> If that is impossible, we could add such a pointer, but it would increase
> both memory and runtime overhead of the library.
>
>         Jakub
>
>
Jakub Jelinek June 6, 2022, 5:34 p.m. UTC | #4
On Mon, Jun 06, 2022 at 05:13:24PM +0200, Mohamed Sayed via Gcc-patches wrote:
> > 2022-06-06  Mohamed Sayed  <mohamedsayed22198@gmail.com>
> >
> >
> > * Makefile.am: (libgompd_la_SOURCES): Add ompd-parallel.c.

The ChangeLog formatting is wrong.  The above line should be:
	* Makefile.am (libgompd_la_SOURCES): Add ompd-parallel.c.
i.e. tab, * space filename and if there is something in the
file that is being modified, followed by space ( name )
All this then followed by : and description.

> > * Makefile.in: Regenerate.
> > * libgompd.map: Add ompd_get_curr_parallel_handle,
> > ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle
> > and ompd_parallel_handle_compare symbol versions.

We aren't very consistent on the map files, either it should be
	* libgompd.map (ompd_get_curr_parallel_handle,
	ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle,
	ompd_parallel_handle_compare): Export at OMPD_5.1.
or
	* libgompd.map (OMPD_5.1): Export ompd_get_curr_parallel_handle,
	ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle and
	ompd_parallel_handle_compare.

> > * ompd-support.h:() : Add gompd_access (gomp_team_state, team) and
> > gompd_access (gomp_team, prev_ts).

This is totally wrong.
You are modifying the GOMPD_FOREACH_ACCESS macro I think.
So you should say
	* ompd-support.h (GOMPD_FOREACH_ACCESS): Add
	gompd_access (gomp_team_state, team) and
	gompd_access (gomp_team, prev_ts).

> +    return ompd_rc_stale_handle;
> +
> +  CHECK (parallel_handle->ah);
> +
> +  ompd_address_space_context_t *context = parallel_handle->ah->context;  
> +  ompd_address_t symbol_address = parallel_handle->th;
> +  ompd_address_t temp_symbol_address = {OMPD_SEGMENT_UNSPECIFIED, 0};
> +  ompd_word_t temp_offset;
> +  ompd_rc_t ret;
> +
> +  /* get prev_ts offset.  */
> +  GET_VALUE (context, NULL, "gompd_access_gomp_team_prev_ts",
> +  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
> +             temp_symbol_address);

Formatting.  You can see the two lines are badly indented, each one in
a different wrong way.  The indenting rule is that the lines after the first
one in this case should start aligned below context on the GET_VALUE
line, and each 8 spaces should be replaced by a tab.

> +
> +  symbol_address.address += temp_offset;
> +
> +  /* get team offset.  */
> +  GET_VALUE (context, NULL, "gompd_access_gomp_team_state_team",
> +  	     temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
> +             temp_symbol_address);

Likewise.
> +
> +  symbol_address.address += temp_offset;
> +
> +  ret = callbacks->read_memory (context, NULL, &symbol_address,
> +                                target_sizes.sizeof_pointer,
> +                                &symbol_address.address);
> +  CHECK_RET (ret); 
> +  ret = callbacks->alloc_memory (sizeof (ompd_parallel_handle_t),
> +                                 (void **) (enc_par_handle));

Why the ()s around enc_par_handle?
Anyway, seems like an aliasing violation, if the callback stores
through void **, then you should just pass it address of a local
void * variable and *enc_par_handle = (ompd_parallel_handle_t) var;

> +ompd_rc_t
> +ompd_rel_parallel_handle (ompd_parallel_handle_t *parallel_handle)
> +{
> +  if (parallel_handle == NULL)
> +    return ompd_rc_stale_handle;
> +
> +  ompd_rc_t ret = callbacks->free_memory ((void *) (parallel_handle));

Likewise (both aliasing and the unnecessary ()s.
>  #define GOMPD_SIZES(gompd_size) \
>    gompd_size (gomp_thread) \
>    gompd_size (gomp_task_icv) \
> -  gompd_size (gomp_task)
> +  gompd_size (gomp_task) 
> +  

Why?

	Jakub
Jakub Jelinek June 6, 2022, 5:59 p.m. UTC | #5
On Mon, Jun 06, 2022 at 07:32:31PM +0200, Mohamed Atef wrote:
> So for both cases one should read the value of *team and if it's NULL, the
> function returns some error state (eg. ompd_rc_unavailable)

No, I think for team NULL it should simply push something different
to ompd_parallel_handle_t, something that would mean for uses of the handle
that it is not a normal explicit parallel, but the implicit parallel
and handle those cases differently.
E.g. if one has
int
main ()
{
  #pragma omp parallel
  sleep (1024);
}
then when you get the explicit parallel's handle, there is struct gomp_team
one can store and work with, but its enclosing parallel isn't non-existent,
it is an implicit parallel, only its enclosing parallel doesn't exist.

Unfortunately, it isn't that easy.  We sometimes do create struct gomp_team
even for the implicit parallel.
See libgomp/target.c (GOMP_target_ext) which is there for cases like
int
main ()
{
  #pragma omp target nowait
  something;
  something_else;
  #pragma omp taskwait
}
where we want the asynchronous target to be really asynchronous with
something_else; and need struct gomp_team for that.
But in the case of artificial struct gomp_team for this case
thr->ts.level will be 0 rather than > 0.

> > ompd_get_task_parallel_handle when you'll have struct gomp_task *
> > and want the struct gomp_team it is in.
> > I'm afraid the library doesn't track this, it doesn't need it for anything.
> > One possibility to resolve this is perhaps if all functions that
> > allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
> > too, then you could store in the private structure or ompd_task_handle_t
> > both struct gomp_task * and struct gomp_thread *.
> >
> I will ask the guys to try this if it's impossible then we delay this
> function.

Perhaps the function can be added but could just error unconditionally
until some solution is found.

BTW, when looking at the patch, I found I've missed some things in the first
already committed patch.

+  #define gompd_init_access(t, m)  \                                                                                                                                                 
+    gompd_access_##t##_##m = (__UINT64_TYPE__) & (((struct t *) NULL)->m);                                                                                                           
This is UB, should be using offsetof (struct t, m) instead.

Also, using __UINT64_TYPE__ for those offsets or sizes seems to be very
excessive, on x86_64-linux the largest struct from looking at debug info
is struct gomp_team right now with 1344 bytes.
So for the time being, I think using __UINT16_TYPE__ for all those sizes and
offsets should be 4 times as compact.

And, it would be nice to initialize those at least when possible at compile
time, not in gompd_load, offsetof of a non-VLA type is a constant
expression, similarly sizeof, so making all those const and initialized
directly would mean they don't waste a writable section (so all processes
can share those).  Probably it would be nice to stick them at least for ELF
into a names section so that they'd be together and not needing to be ever
touched unless OMPD is enabled.
Of course, I don't rule out the possibility of some values needing to be
initialized at runtime, they'd simply just not be const like the rest.

	Jakub
diff mbox series

Patch

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..4e215450b25 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c
 
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..ab66ad1c8f0 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@  am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
 	oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+	ompd-parallel.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -800,6 +801,7 @@  distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-helper.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
diff --git a/libgomp/libgompd.map b/libgomp/libgompd.map
index 85bdc3695f6..1662dc56962 100644
--- a/libgomp/libgompd.map
+++ b/libgomp/libgompd.map
@@ -16,6 +16,10 @@  OMPD_5.1 {
     ompd_thread_handle_compare;
     ompd_get_thread_id;
     ompd_get_device_from_thread;
+    ompd_get_curr_parallel_handle;
+    ompd_get_enclosing_parallel_handle;
+    ompd_rel_parallel_handle;
+    ompd_parallel_handle_compare;
   local:
     *;
 };
diff --git a/libgomp/ompd-support.h b/libgomp/ompd-support.h
index 39d55161132..48a2e6133f5 100644
--- a/libgomp/ompd-support.h
+++ b/libgomp/ompd-support.h
@@ -83,12 +83,15 @@  extern __UINT64_TYPE__ gompd_state;
   gompd_access (gomp_thread_pool, threads) \
   gompd_access (gomp_thread, ts) \
   gompd_access (gomp_team_state, team_id) \
-  gompd_access (gomp_task, icv)
+  gompd_access (gomp_task, icv) \
+  gompd_access (gomp_team_state, team) \
+  gompd_access (gomp_team, prev_ts)
 
 #define GOMPD_SIZES(gompd_size) \
   gompd_size (gomp_thread) \
   gompd_size (gomp_task_icv) \
-  gompd_size (gomp_task)
+  gompd_size (gomp_task) 
+  
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
 #pragma GCC visibility pop