Message ID | 4ef1a10a-7c14-f8ea-a8e8-d68274486039@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | openmp: Retire nest-var ICV | expand |
Hello Kwok, hi Jakub, On 06.11.20 21:13, Kwok Cheung Yeung wrote: > In addition to deprecating the omp_(get|set)_nested() functions and > OMP_NESTED environment variable, OpenMP 5.0 also removes the nest-var > ICV altogether, defining it in terms of the max-active-levels-var ICV > instead. [...] Shouldn't libgomp/libgomp.texi be also updated? Tobias ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
On 06/11/2020 8:33 pm, Tobias Burnus wrote: > Hello Kwok, hi Jakub, > > On 06.11.20 21:13, Kwok Cheung Yeung wrote: > >> In addition to deprecating the omp_(get|set)_nested() functions and OMP_NESTED >> environment variable, OpenMP 5.0 also removes the nest-var ICV altogether, >> defining it in terms of the max-active-levels-var ICV instead. [...] > > Shouldn't libgomp/libgomp.texi be also updated? > > Tobias I have added some documentation regarding the relationship between the nesting setting and the current maximum number active levels. The documentation does not detail ICVs though, so we probably don't need to explicitly state that one is in terms of another? Is this version okay for trunk? Thanks Kwok commit b4feb16f3c84b8f82163a4cbba6a31d55fbb8e5b Author: Kwok Cheung Yeung <kcy@codesourcery.com> Date: Mon Nov 9 09:34:39 2020 -0800 openmp: Retire nest-var ICV for OpenMP 5.0 This removes the nest-var ICV, expressing nesting in terms of the max-active-levels-var ICV instead. 2020-11-09 Kwok Cheung Yeung <kcy@codesourcery.com> libgomp/ * env.c (gomp_global_icv): Remove nest_var field. (gomp_max_active_levels_var): Initialize to 1. (parse_boolean): Return true on success. (handle_omp_display_env): Express OMP_NESTED in terms of gomp_max_active_levels_var. (initialize_env): Set gomp_max_active_levels_var from OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and OMP_PROC_BIND. * icv.c (omp_set_nested): Express in terms of gomp_max_active_levels_var. (omp_get_nested): Likewise. * libgomp.h (struct gomp_task_icv): Remove nest_var field. * libgomp.texi (omp_get_nested): Update documentation. (omp_set_nested): Likewise. (OMP_MAX_ACTIVE_LEVELS): Likewise. (OMP_NESTED): Likewise. (OMP_NUM_THREADS): Likewise. (OMP_PROC_BIND): Likewise. * parallel.c (gomp_resolve_num_threads): Replace reference to nest_var with gomp_max_active_levels_var. * testsuite/libgomp.c/target-5.c: Remove additional options. (main): Remove references to omp_get_nested and omp_set_nested. diff --git a/libgomp/env.c b/libgomp/env.c index ab22525..75d0fe2 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; +unsigned long gomp_max_active_levels_var = 1; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1417,16 +1421,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1450,23 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &gomp_max_active_levels_var, true)) + { + if (gomp_max_active_levels_var > gomp_supported_active_levels) + gomp_max_active_levels_var = gomp_supported_active_levels; + } + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_max_active_levels_var = nested ? gomp_supported_active_levels + : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/icv.c b/libgomp/icv.c index 8df15e3..f54ccb1 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -56,15 +56,16 @@ omp_get_dynamic (void) void omp_set_nested (int val) { - struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + gomp_max_active_levels_var = gomp_supported_active_levels; + else if (gomp_max_active_levels_var > 1) + gomp_max_active_levels_var = 1; } int omp_get_nested (void) { - struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return gomp_max_active_levels_var > 1; } #pragma GCC diagnostic pop diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index da7ac03..05aa1e4 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,6 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 6937063..9ad871b 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -489,8 +489,11 @@ represent their language-specific counterparts. Nested parallel regions may be initialized at startup by the @env{OMP_NESTED} environment variable or at runtime using -@code{omp_set_nested}. If undefined, nested parallel regions are -disabled by default. +@code{omp_set_nested}. Setting the maximum number of nested +regions to above one using the @env{OMP_MAX_ACTIVE_LEVELS} +environment variable or @code{omp_set_max_active_levels} will +also enable nesting. If undefined, nested parallel regions +are disabled by default. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -503,7 +506,8 @@ disabled by default. @end multitable @item @emph{See also}: -@ref{omp_set_nested}, @ref{OMP_NESTED} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11. @@ -964,6 +968,10 @@ are allowed to create new teams. The function takes the language-specific equivalent of @code{true} and @code{false}, where @code{true} enables dynamic adjustment of team sizes and @code{false} disables it. +Enabling nested parallel regions will also set the maximum number of +active nested regions to the maximum supported. Disabling nested parallel +regions will set the maximum number of active nested regions to one. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);} @@ -976,7 +984,8 @@ dynamic adjustment of team sizes and @code{false} disables it. @end multitable @item @emph{See also}: -@ref{OMP_NESTED}, @ref{omp_get_nested} +@ref{omp_get_nested}, @ref{omp_set_max_active_levels}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10. @@ -1502,10 +1511,11 @@ disabled by default. @item @emph{Description}: Specifies the initial value for the maximum number of nested parallel regions. The value of this variable shall be a positive integer. -If undefined, the number of active levels is unlimited. +If undefined, the number of active levels is one, which effectively +disables nested regions. @item @emph{See also}: -@ref{omp_set_max_active_levels} +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 @@ -1541,11 +1551,13 @@ integer, and zero is allowed. If undefined, the default priority is @item @emph{Description}: Enable or disable nested parallel regions, i.e., whether team members are allowed to create new teams. The value of this environment variable -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel -regions are disabled by default. +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number +of maximum active nested regions supported will by default be set to the +maximum supported, otherwise it will be set to one. If undefined, nested +parallel regions are disabled by default. @item @emph{See also}: -@ref{omp_set_nested} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6 @@ -1561,11 +1573,12 @@ regions are disabled by default. @item @emph{Description}: Specifies the default number of threads to use in parallel regions. The value of this variable shall be a comma-separated list of positive integers; -the value specified the number of threads to use for the corresponding nested -level. If undefined one thread per CPU is used. +the value specifies the number of threads to use for the corresponding nested +level. Specifying more than one item in the list will automatically enable +nesting by default. If undefined one thread per CPU is used. @item @emph{See also}: -@ref{omp_set_num_threads} +@ref{omp_set_num_threads}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2 @@ -1586,13 +1599,15 @@ the thread affinity policy for the corresponding nesting level. With @code{MASTER} the worker threads are in the same place partition as the master thread. With @code{CLOSE} those are kept close to the master thread in contiguous place partitions. And with @code{SPREAD} a sparse distribution -across the place partitions is used. +across the place partitions is used. Specifying more than one item in the +list will automatically enable nesting by default. When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise. @item @emph{See also}: -@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind} +@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY}, +@ref{OMP_NESTED}, @ref{OMP_PLACES} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4 diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573..9629470 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,7 +53,7 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && gomp_max_active_levels_var <= 1 #endif ) return 1; diff --git a/libgomp/testsuite/libgomp.c/target-5.c b/libgomp/testsuite/libgomp.c/target-5.c index 21a69ea..ec31f89 100644 --- a/libgomp/testsuite/libgomp.c/target-5.c +++ b/libgomp/testsuite/libgomp.c/target-5.c @@ -1,5 +1,3 @@ -/* { dg-additional-options "-Wno-deprecated-declarations" } */ - #include <omp.h> #include <stdlib.h> @@ -7,17 +5,14 @@ int main () { int d_o = omp_get_dynamic (); - int n_o = omp_get_nested (); omp_sched_t s_o; int c_o; omp_get_schedule (&s_o, &c_o); int m_o = omp_get_max_threads (); omp_set_dynamic (1); - omp_set_nested (1); omp_set_schedule (omp_sched_static, 2); omp_set_num_threads (4); int d = omp_get_dynamic (); - int n = omp_get_nested (); omp_sched_t s; int c; omp_get_schedule (&s, &c); @@ -30,13 +25,11 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () - || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); - omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) @@ -48,7 +41,6 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () - || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) @@ -60,13 +52,11 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () - || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); - omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) @@ -76,7 +66,6 @@ main () abort (); omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () - || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ())
On Mon, Nov 09, 2020 at 05:43:20PM +0000, Kwok Cheung Yeung wrote: > commit b4feb16f3c84b8f82163a4cbba6a31d55fbb8e5b > Author: Kwok Cheung Yeung <kcy@codesourcery.com> > Date: Mon Nov 9 09:34:39 2020 -0800 > > openmp: Retire nest-var ICV for OpenMP 5.0 > > This removes the nest-var ICV, expressing nesting in terms of the > max-active-levels-var ICV instead. > > 2020-11-09 Kwok Cheung Yeung <kcy@codesourcery.com> > > libgomp/ > * env.c (gomp_global_icv): Remove nest_var field. > (gomp_max_active_levels_var): Initialize to 1. > (parse_boolean): Return true on success. > (handle_omp_display_env): Express OMP_NESTED in terms of > gomp_max_active_levels_var. > (initialize_env): Set gomp_max_active_levels_var from > OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and > OMP_PROC_BIND. > * icv.c (omp_set_nested): Express in terms of > gomp_max_active_levels_var. > (omp_get_nested): Likewise. > * libgomp.h (struct gomp_task_icv): Remove nest_var field. > * libgomp.texi (omp_get_nested): Update documentation. > (omp_set_nested): Likewise. > (OMP_MAX_ACTIVE_LEVELS): Likewise. > (OMP_NESTED): Likewise. > (OMP_NUM_THREADS): Likewise. > (OMP_PROC_BIND): Likewise. > * parallel.c (gomp_resolve_num_threads): Replace reference > to nest_var with gomp_max_active_levels_var. > * testsuite/libgomp.c/target-5.c: Remove additional options. > (main): Remove references to omp_get_nested and omp_set_nested. One thing is that max-active-levels-var in 5.0 is per-device, but in 5.1 per-data environment. The question is if we should implement the problematic 5.0 way or the 5.1 one. E.g.: #include <omp.h> #include <stdio.h> int main () { #pragma omp parallel { omp_set_nested (1); #pragma omp parallel num_threads(2) printf ("Hello, world!\n"); } } which used to be valid in 4.5 (where nest-var used to be per-data environment) is in 5.0 racy (and in 5.1 will not be racy again). Though, as these are deprecated APIs, perhaps we can just do the 5.0 way for now. > --- a/libgomp/libgomp.texi > +++ b/libgomp/libgomp.texi > @@ -489,8 +489,11 @@ represent their language-specific counterparts. > > Nested parallel regions may be initialized at startup by the > @env{OMP_NESTED} environment variable or at runtime using > -@code{omp_set_nested}. If undefined, nested parallel regions are > -disabled by default. > +@code{omp_set_nested}. Setting the maximum number of nested > +regions to above one using the @env{OMP_MAX_ACTIVE_LEVELS} > +environment variable or @code{omp_set_max_active_levels} will > +also enable nesting. If undefined, nested parallel regions > +are disabled by default. This doesn't really describe what env.c does. If undefined, then if OMP_NESTED is defined, it will be folloed, and if neither is defined, the code sets the default based on "OMP_NUM_THREADS or OMP_PROC_BIND is set to a comma-separated list of more than one value" as the spec says and only is disabled otherwise. > @@ -1502,10 +1511,11 @@ disabled by default. > @item @emph{Description}: > Specifies the initial value for the maximum number of nested parallel > regions. The value of this variable shall be a positive integer. > -If undefined, the number of active levels is unlimited. > +If undefined, the number of active levels is one, which effectively > +disables nested regions. Similarly. > > @item @emph{See also}: > -@ref{omp_set_max_active_levels} > +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} > > @item @emph{Reference}: > @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 > @@ -1541,11 +1551,13 @@ integer, and zero is allowed. If undefined, the default priority is > @item @emph{Description}: > Enable or disable nested parallel regions, i.e., whether team members > are allowed to create new teams. The value of this environment variable > -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel > -regions are disabled by default. > +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number > +of maximum active nested regions supported will by default be set to the > +maximum supported, otherwise it will be set to one. If undefined, nested > +parallel regions are disabled by default. Again. > --- a/libgomp/testsuite/libgomp.c/target-5.c > +++ b/libgomp/testsuite/libgomp.c/target-5.c > @@ -1,5 +1,3 @@ > -/* { dg-additional-options "-Wno-deprecated-declarations" } */ > - > #include <omp.h> > #include <stdlib.h> > > @@ -7,17 +5,14 @@ int > main () > { > int d_o = omp_get_dynamic (); > - int n_o = omp_get_nested (); > omp_sched_t s_o; > int c_o; > omp_get_schedule (&s_o, &c_o); > int m_o = omp_get_max_threads (); > omp_set_dynamic (1); > - omp_set_nested (1); > omp_set_schedule (omp_sched_static, 2); > omp_set_num_threads (4); > int d = omp_get_dynamic (); > - int n = omp_get_nested (); > omp_sched_t s; > int c; > omp_get_schedule (&s, &c); > @@ -30,13 +25,11 @@ main () > int c_c; > omp_get_schedule (&s_c, &c_c); > if (d_o != omp_get_dynamic () > - || n_o != omp_get_nested () > || s_o != s_c > || c_o != c_c > || m_o != omp_get_max_threads ()) > abort (); > omp_set_dynamic (0); > - omp_set_nested (0); > omp_set_schedule (omp_sched_dynamic, 4); > omp_set_num_threads (2); > if (!omp_is_initial_device ()) > @@ -48,7 +41,6 @@ main () > int c_c; > omp_get_schedule (&s_c, &c_c); > if (d != omp_get_dynamic () > - || n != omp_get_nested () > || s != s_c > || c != c_c > || m != omp_get_max_threads ()) > @@ -60,13 +52,11 @@ main () > int c_c; > omp_get_schedule (&s_c, &c_c); > if (d_o != omp_get_dynamic () > - || n_o != omp_get_nested () > || s_o != s_c > || c_o != c_c > || m_o != omp_get_max_threads ()) > abort (); > omp_set_dynamic (0); > - omp_set_nested (0); > omp_set_schedule (omp_sched_dynamic, 4); > omp_set_num_threads (2); > if (!omp_is_initial_device ()) > @@ -76,7 +66,6 @@ main () > abort (); > omp_get_schedule (&s_c, &c_c); > if (d != omp_get_dynamic () > - || n != omp_get_nested () > || s != s_c > || c != c_c > || m != omp_get_max_threads ()) Why does this testcase need updates? It doesn't seem to use omp_[sg]et_max_active_levels and so I don't see why it couldn't use omp_[sg]et_nested. Jakub
On 10/11/2020 6:01 pm, Jakub Jelinek wrote: > One thing is that max-active-levels-var in 5.0 is per-device, > but in 5.1 per-data environment. The question is if we should implement > the problematic 5.0 way or the 5.1 one. E.g.: > #include <omp.h> > #include <stdio.h> > > int > main () > { > #pragma omp parallel > { > omp_set_nested (1); > #pragma omp parallel num_threads(2) > printf ("Hello, world!\n"); > } > } > which used to be valid in 4.5 (where nest-var used to be per-data > environment) is in 5.0 racy (and in 5.1 will not be racy again). > Though, as these are deprecated APIs, perhaps we can just do the 5.0 way for > now. Since max-active-levels-var is still current in 5.1, I guess we might as well do it properly :-). I have now placed max-active-levels-var into gomp_task_icv. The definition of omp_get_nested in 5.1 refers to the active-level-var ICV which is currently not implemented, so the comparison is against omp_get_active_level() instead. >> --- a/libgomp/libgomp.texi >> +++ b/libgomp/libgomp.texi >> @@ -489,8 +489,11 @@ represent their language-specific counterparts. >> >> Nested parallel regions may be initialized at startup by the >> @env{OMP_NESTED} environment variable or at runtime using >> -@code{omp_set_nested}. If undefined, nested parallel regions are >> -disabled by default. >> +@code{omp_set_nested}. Setting the maximum number of nested >> +regions to above one using the @env{OMP_MAX_ACTIVE_LEVELS} >> +environment variable or @code{omp_set_max_active_levels} will >> +also enable nesting. If undefined, nested parallel regions >> +are disabled by default. > > This doesn't really describe what env.c does. If undefined, then > if OMP_NESTED is defined, it will be folloed, and if neither is > defined, the code sets the default based on > "OMP_NUM_THREADS or OMP_PROC_BIND is set to a > comma-separated list of more than one value" > as the spec says and only is disabled otherwise. > Similarly. > Again. I have changed these to more accurately describe what is happening. The descriptions are starting to get rather verbose though... >> --- a/libgomp/testsuite/libgomp.c/target-5.c >> +++ b/libgomp/testsuite/libgomp.c/target-5.c > > Why does this testcase need updates? > It doesn't seem to use omp_[sg]et_max_active_levels and so I don't see > why it couldn't use omp_[sg]et_nested. > The problem is with max-active-levels-var (which nesting is now in terms of) being per device rather than per data environment. The test expects the nested setting to go back to its previous value after leaving a DE that sets it to something else. Anyway, with max-active-levels-var now being per data environment, that is all moot now, and the test can remain unchanged. Is this version okay for trunk? Bootstrapped on x86_64 and libgomp tested with no regressions with nvptx offloading. Thanks Kwok commit bcaa3dbf1f130e3a2c7e6033a10be3f61221a951 Author: Kwok Cheung Yeung <kcy@codesourcery.com> Date: Thu Nov 12 13:42:28 2020 -0800 openmp: Retire nest-var ICV for OpenMP 5.1 This removes the nest-var ICV, expressing nesting in terms of the max-active-levels-var ICV instead. The max-active-levels-var ICV is now per data environment rather than per device. 2020-11-12 Kwok Cheung Yeung <kcy@codesourcery.com> libgomp/ * env.c (gomp_global_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_max_active_levels_var): Remove. (parse_boolean): Return true on success. (handle_omp_display_env): Express OMP_NESTED in terms of max_active_levels_var. (initialize_env): Set max_active_levels_var from OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and OMP_PROC_BIND. * icv.c (omp_set_nested): Express in terms of max_active_levels_var. (omp_get_nested): Likewise. (omp_set_max_active_levels): Use max_active_levels_var field instead of gomp_max_active_levels_var. (omp_get_max_active_levels): Likewise. * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_max_active_levels_var): Delete. * libgomp.texi (omp_get_nested): Update documentation. (omp_set_nested): Likewise. (OMP_MAX_ACTIVE_LEVELS): Likewise. (OMP_NESTED): Likewise. (OMP_NUM_THREADS): Likewise. (OMP_PROC_BIND): Likewise. * parallel.c (gomp_resolve_num_threads): Replace reference to nest_var with max_active_levels_var. Use max_active_levels_var field instead of gomp_max_active_levels_var. diff --git a/libgomp/env.c b/libgomp/env.c index ab22525..b8ed1bd 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, + .max_active_levels_var = 1, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1345,7 +1349,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n", gomp_global_icv.thread_limit_var); fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n", - gomp_max_active_levels_var); + gomp_global_icv.max_active_levels_var); fprintf (stderr, " OMP_CANCELLATION = '%s'\n", gomp_cancel_var ? "TRUE" : "FALSE"); @@ -1417,16 +1421,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1450,23 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &gomp_global_icv.max_active_levels_var, true)) + { + if (gomp_global_icv.max_active_levels_var > gomp_supported_active_levels) + gomp_global_icv.max_active_levels_var = gomp_supported_active_levels; + } + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_global_icv.max_active_levels_var = + nested ? gomp_supported_active_levels : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_global_icv.max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/icv.c b/libgomp/icv.c index 8df15e3..8860cd1 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -57,14 +57,18 @@ void omp_set_nested (int val) { struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + icv->max_active_levels_var = gomp_supported_active_levels; + else if (icv->max_active_levels_var > 1) + icv->max_active_levels_var = 1; } int omp_get_nested (void) { struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return icv->max_active_levels_var > 1 + && icv->max_active_levels_var > omp_get_active_level (); } #pragma GCC diagnostic pop @@ -118,19 +122,21 @@ omp_get_thread_limit (void) void omp_set_max_active_levels (int max_levels) { + struct gomp_task_icv *icv = gomp_icv (false); if (max_levels >= 0) { if (max_levels <= gomp_supported_active_levels) - gomp_max_active_levels_var = max_levels; + icv->max_active_levels_var = max_levels; else - gomp_max_active_levels_var = gomp_supported_active_levels; + icv->max_active_levels_var = gomp_supported_active_levels; } } int omp_get_max_active_levels (void) { - return gomp_max_active_levels_var; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->max_active_levels_var; } int diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 0cc3f4d..1d500bf 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,7 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; + unsigned long max_active_levels_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; @@ -447,7 +447,6 @@ extern struct gomp_task_icv gomp_global_icv; #ifndef HAVE_SYNC_BUILTINS extern gomp_mutex_t gomp_managed_threads_lock; #endif -extern unsigned long gomp_max_active_levels_var; extern bool gomp_cancel_var; extern enum gomp_target_offload_t gomp_target_offload_var; extern int gomp_max_task_priority_var; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 6937063..473b191 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are enabled, @code{false} otherwise. Here, @code{true} and @code{false} represent their language-specific counterparts. -Nested parallel regions may be initialized at startup by the -@env{OMP_NESTED} environment variable or at runtime using -@code{omp_set_nested}. If undefined, nested parallel regions are -disabled by default. +The state of nested parallel regions at startup depends on several +environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined +and is set to greater than one, then nested parallel regions will be +enabled. If not defined, then the value of the @env{OMP_NESTED} +environment variable will be followed if defined. If neither are +defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} +are defined with a list of more than one value, then nested parallel +regions are enabled. If none of these are defined, then nested parallel +regions are disabled by default. + +Nested parallel regions can be enabled or disabled at runtime using +@code{omp_set_nested}, or by setting the maximum number of nested +regions with @code{omp_set_max_active_levels} to one to disable, or +above one to enable. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -503,7 +513,8 @@ disabled by default. @end multitable @item @emph{See also}: -@ref{omp_set_nested}, @ref{OMP_NESTED} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11. @@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific equivalent of @code{true} and @code{false}, where @code{true} enables dynamic adjustment of team sizes and @code{false} disables it. +Enabling nested parallel regions will also set the maximum number of +active nested regions to the maximum supported. Disabling nested parallel +regions will set the maximum number of active nested regions to one. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);} @@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it. @end multitable @item @emph{See also}: -@ref{OMP_NESTED}, @ref{omp_get_nested} +@ref{omp_get_nested}, @ref{omp_set_max_active_levels}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10. @@ -1502,10 +1518,14 @@ disabled by default. @item @emph{Description}: Specifies the initial value for the maximum number of nested parallel regions. The value of this variable shall be a positive integer. -If undefined, the number of active levels is unlimited. +If undefined, then if @env{OMP_NESTED} is defined and set to true, or +if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to +a list with more than one item, the maximum number of nested parallel +regions will be initialized to the largest number supported, otherwise +it will be set to one. @item @emph{See also}: -@ref{omp_set_max_active_levels} +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 @@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is @item @emph{Description}: Enable or disable nested parallel regions, i.e., whether team members are allowed to create new teams. The value of this environment variable -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel -regions are disabled by default. +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number +of maximum active nested regions supported will by default be set to the +maximum supported, otherwise it will be set to one. If +@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this +setting. If both are undefined, nested parallel regions are enabled if +@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with +more than one item, otherwise they are disabled by default. @item @emph{See also}: -@ref{omp_set_nested} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6 @@ -1561,11 +1586,12 @@ regions are disabled by default. @item @emph{Description}: Specifies the default number of threads to use in parallel regions. The value of this variable shall be a comma-separated list of positive integers; -the value specified the number of threads to use for the corresponding nested -level. If undefined one thread per CPU is used. +the value specifies the number of threads to use for the corresponding nested +level. Specifying more than one item in the list will automatically enable +nesting by default. If undefined one thread per CPU is used. @item @emph{See also}: -@ref{omp_set_num_threads} +@ref{omp_set_num_threads}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2 @@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With @code{MASTER} the worker threads are in the same place partition as the master thread. With @code{CLOSE} those are kept close to the master thread in contiguous place partitions. And with @code{SPREAD} a sparse distribution -across the place partitions is used. +across the place partitions is used. Specifying more than one item in the +list will automatically enable nesting by default. When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise. @item @emph{See also}: -@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind} +@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY}, +@ref{OMP_NESTED}, @ref{OMP_PLACES} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4 diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573..ebce492 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && icv->max_active_levels_var <= 1 #endif ) return 1; - else if (thr->ts.active_level >= gomp_max_active_levels_var) + else if (thr->ts.active_level >= icv->max_active_levels_var) return 1; /* If NUM_THREADS not specified, use nthreads_var. */
On Thu, Nov 12, 2020 at 10:44:35PM +0000, Kwok Cheung Yeung wrote: > + /* OMP_NESTED is deprecated in OpenMP 5.0. */ > + if (parse_boolean ("OMP_NESTED", &nested)) > + gomp_global_icv.max_active_levels_var = > + nested ? gomp_supported_active_levels : 1; Formatting - = should be on the next line, indented 2 columns further from gomp_global_icv. > int > omp_get_nested (void) > { > struct gomp_task_icv *icv = gomp_icv (false); > - return icv->nest_var; > + return icv->max_active_levels_var > 1 > + && icv->max_active_levels_var > omp_get_active_level (); Formatting, should be: return (icv->max_active_levels_var > 1 && icv->max_active_levels_var > omp_get_active_level ()); > @@ -118,19 +122,21 @@ omp_get_thread_limit (void) > void > omp_set_max_active_levels (int max_levels) > { > + struct gomp_task_icv *icv = gomp_icv (false); Should be gomp_icv (true), because it modifies the ICVs rather than just querying them. And perhaps move it inside of the if (max_levels >= 0) if. > if (max_levels >= 0) > { > if (max_levels <= gomp_supported_active_levels) > - gomp_max_active_levels_var = max_levels; > + icv->max_active_levels_var = max_levels; > else > - gomp_max_active_levels_var = gomp_supported_active_levels; > + icv->max_active_levels_var = gomp_supported_active_levels; > } > } > --- a/libgomp/libgomp.h > +++ b/libgomp/libgomp.h > @@ -428,7 +428,7 @@ struct gomp_task_icv > int default_device_var; > unsigned int thread_limit_var; > bool dyn_var; > - bool nest_var; > + unsigned long max_active_levels_var; > char bind_var; > /* Internal ICV. */ > struct target_mem_desc *target_data; This is in __thread vars, so we can't waste space in useless paddings or overlong fields. On 64-bit arches, thread_limit_var ends on 64-bit boundary and target_data starts at that boundary, so with bool; unsigned long; char in between that means 24 bytes in between them including 14 bytes of padding. There is no point to make max_active_levels_var unsigned long, gomp_supported_active_levels is INT_MAX, so one can't have anything higher than that anyway. So, either bool dyn_var; char bind_var; int max_active_levels_var; which will be 8 bytes together on both 64-bit and 32-bit arches, or perhaps we could go even further and lower the max active levels to USHRT_MAX and use unsigned short, 65535 nesting levels is many thousands more than what a reasonable program should try, after all, only active levels (aka where there are 2 or more threads) count, so already 32 active levels when each level has 2 threads means 4 billion threads, and 64 active levels means 18 quintillion threads, so even 64 active levels are impossible in 64-bit address spaces even if each thread occupied just 1 byte rather than several pages. So, let's change gomp_supported_active_levels to say 255 and use bool dyn_var; unsigned char max_active_levels_var; char bind_var; Jakub
On 18/11/2020 11:41 am, Jakub Jelinek wrote: > On Thu, Nov 12, 2020 at 10:44:35PM +0000, Kwok Cheung Yeung wrote: >> + /* OMP_NESTED is deprecated in OpenMP 5.0. */ >> + if (parse_boolean ("OMP_NESTED", &nested)) >> + gomp_global_icv.max_active_levels_var = >> + nested ? gomp_supported_active_levels : 1; > > Formatting - = should be on the next line, indented 2 columns further from > gomp_global_icv. > Fixed. >> int >> omp_get_nested (void) >> { >> struct gomp_task_icv *icv = gomp_icv (false); >> - return icv->nest_var; >> + return icv->max_active_levels_var > 1 >> + && icv->max_active_levels_var > omp_get_active_level (); > > Formatting, should be: > return (icv->max_active_levels_var > 1 > && icv->max_active_levels_var > omp_get_active_level ()); > Fixed. >> @@ -118,19 +122,21 @@ omp_get_thread_limit (void) >> void >> omp_set_max_active_levels (int max_levels) >> { >> + struct gomp_task_icv *icv = gomp_icv (false); > > Should be gomp_icv (true), because it modifies the ICVs rather than > just querying them. And perhaps move it inside of the if (max_levels >= 0) > if. Done. > So, let's change gomp_supported_active_levels to say 255 and use > bool dyn_var; > unsigned char max_active_levels_var; > char bind_var; > Done (though I used UCHAR_MAX instead of 255). The change in type requires changing a format specifier from %lu to %u in handle_omp_display_env, and the use of a temporary when parsing OMP_MAX_ACTIVE_LEVELS in initialize_env. If there are no objections, I will commit this to master and OG10 shortly. Bootstrapped on x86_64 with no offloading, and tested libgomp with Nvidia offloading with no regressions. Thanks Kwok From a75481979c86aa1da5b5da641fc776bc71d156f7 Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung <kcy@codesourcery.com> Date: Wed, 18 Nov 2020 10:02:00 -0800 Subject: [PATCH] openmp: Retire nest-var ICV for OpenMP 5.1 This removes the nest-var ICV, expressing nesting in terms of the max-active-levels-var ICV instead. The max-active-levels-var ICV is now per data environment rather than per device. 2020-11-18 Kwok Cheung Yeung <kcy@codesourcery.com> libgomp/ * env.c (gomp_global_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_max_active_levels_var): Remove. (parse_boolean): Return true on success. (handle_omp_display_env): Express OMP_NESTED in terms of max_active_levels_var. Change format specifier for max_active_levels_var. (initialize_env): Set max_active_levels_var from OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and OMP_PROC_BIND. * icv.c (omp_set_nested): Express in terms of max_active_levels_var. (omp_get_nested): Likewise. (omp_set_max_active_levels): Use max_active_levels_var field instead of gomp_max_active_levels_var. (omp_get_max_active_levels): Likewise. * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_supported_active_levels): Set to UCHAR_MAX. (gomp_max_active_levels_var): Delete. * libgomp.texi (omp_get_nested): Update documentation. (omp_set_nested): Likewise. (OMP_MAX_ACTIVE_LEVELS): Likewise. (OMP_NESTED): Likewise. (OMP_NUM_THREADS): Likewise. (OMP_PROC_BIND): Likewise. * parallel.c (gomp_resolve_num_threads): Replace reference to nest_var with max_active_levels_var. Use max_active_levels_var field instead of gomp_max_active_levels_var. --- libgomp/env.c | 44 ++++++++++++++++++++++++++------------ libgomp/icv.c | 17 ++++++++++----- libgomp/libgomp.h | 5 ++--- libgomp/libgomp.texi | 60 ++++++++++++++++++++++++++++++++++++++-------------- libgomp/parallel.c | 4 ++-- 5 files changed, 90 insertions(+), 40 deletions(-) diff --git a/libgomp/env.c b/libgomp/env.c index ab22525..5a49ae6 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, + .max_active_levels_var = 1, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1344,8 +1348,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) wait_policy > 0 ? "ACTIVE" : "PASSIVE"); fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n", gomp_global_icv.thread_limit_var); - fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n", - gomp_max_active_levels_var); + fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%u'\n", + gomp_global_icv.max_active_levels_var); fprintf (stderr, " OMP_CANCELLATION = '%s'\n", gomp_cancel_var ? "TRUE" : "FALSE"); @@ -1410,6 +1414,7 @@ static void __attribute__((constructor)) initialize_env (void) { unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE; + unsigned long max_active_levels_var; int wait_policy; /* Do a compile time check that mkomp_h.pl did good job. */ @@ -1417,16 +1422,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1451,22 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &max_active_levels_var, true)) + gomp_global_icv.max_active_levels_var + = (max_active_levels_var > gomp_supported_active_levels) + ? gomp_supported_active_levels : max_active_levels_var; + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_global_icv.max_active_levels_var + = nested ? gomp_supported_active_levels : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_global_icv.max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/icv.c b/libgomp/icv.c index 8df15e3..c0c0305 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -57,14 +57,18 @@ void omp_set_nested (int val) { struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + icv->max_active_levels_var = gomp_supported_active_levels; + else if (icv->max_active_levels_var > 1) + icv->max_active_levels_var = 1; } int omp_get_nested (void) { struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return (icv->max_active_levels_var > 1 + && icv->max_active_levels_var > omp_get_active_level ()); } #pragma GCC diagnostic pop @@ -120,17 +124,20 @@ omp_set_max_active_levels (int max_levels) { if (max_levels >= 0) { + struct gomp_task_icv *icv = gomp_icv (true); + if (max_levels <= gomp_supported_active_levels) - gomp_max_active_levels_var = max_levels; + icv->max_active_levels_var = max_levels; else - gomp_max_active_levels_var = gomp_supported_active_levels; + icv->max_active_levels_var = gomp_supported_active_levels; } } int omp_get_max_active_levels (void) { - return gomp_max_active_levels_var; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->max_active_levels_var; } int diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 0cc3f4d..070d29c 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,7 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; + unsigned char max_active_levels_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; @@ -441,13 +441,12 @@ enum gomp_target_offload_t GOMP_TARGET_OFFLOAD_DISABLED }; -#define gomp_supported_active_levels INT_MAX +#define gomp_supported_active_levels UCHAR_MAX extern struct gomp_task_icv gomp_global_icv; #ifndef HAVE_SYNC_BUILTINS extern gomp_mutex_t gomp_managed_threads_lock; #endif -extern unsigned long gomp_max_active_levels_var; extern bool gomp_cancel_var; extern enum gomp_target_offload_t gomp_target_offload_var; extern int gomp_max_task_priority_var; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 6937063..473b191 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are enabled, @code{false} otherwise. Here, @code{true} and @code{false} represent their language-specific counterparts. -Nested parallel regions may be initialized at startup by the -@env{OMP_NESTED} environment variable or at runtime using -@code{omp_set_nested}. If undefined, nested parallel regions are -disabled by default. +The state of nested parallel regions at startup depends on several +environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined +and is set to greater than one, then nested parallel regions will be +enabled. If not defined, then the value of the @env{OMP_NESTED} +environment variable will be followed if defined. If neither are +defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} +are defined with a list of more than one value, then nested parallel +regions are enabled. If none of these are defined, then nested parallel +regions are disabled by default. + +Nested parallel regions can be enabled or disabled at runtime using +@code{omp_set_nested}, or by setting the maximum number of nested +regions with @code{omp_set_max_active_levels} to one to disable, or +above one to enable. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -503,7 +513,8 @@ disabled by default. @end multitable @item @emph{See also}: -@ref{omp_set_nested}, @ref{OMP_NESTED} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11. @@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific equivalent of @code{true} and @code{false}, where @code{true} enables dynamic adjustment of team sizes and @code{false} disables it. +Enabling nested parallel regions will also set the maximum number of +active nested regions to the maximum supported. Disabling nested parallel +regions will set the maximum number of active nested regions to one. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);} @@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it. @end multitable @item @emph{See also}: -@ref{OMP_NESTED}, @ref{omp_get_nested} +@ref{omp_get_nested}, @ref{omp_set_max_active_levels}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10. @@ -1502,10 +1518,14 @@ disabled by default. @item @emph{Description}: Specifies the initial value for the maximum number of nested parallel regions. The value of this variable shall be a positive integer. -If undefined, the number of active levels is unlimited. +If undefined, then if @env{OMP_NESTED} is defined and set to true, or +if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to +a list with more than one item, the maximum number of nested parallel +regions will be initialized to the largest number supported, otherwise +it will be set to one. @item @emph{See also}: -@ref{omp_set_max_active_levels} +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 @@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is @item @emph{Description}: Enable or disable nested parallel regions, i.e., whether team members are allowed to create new teams. The value of this environment variable -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel -regions are disabled by default. +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number +of maximum active nested regions supported will by default be set to the +maximum supported, otherwise it will be set to one. If +@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this +setting. If both are undefined, nested parallel regions are enabled if +@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with +more than one item, otherwise they are disabled by default. @item @emph{See also}: -@ref{omp_set_nested} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6 @@ -1561,11 +1586,12 @@ regions are disabled by default. @item @emph{Description}: Specifies the default number of threads to use in parallel regions. The value of this variable shall be a comma-separated list of positive integers; -the value specified the number of threads to use for the corresponding nested -level. If undefined one thread per CPU is used. +the value specifies the number of threads to use for the corresponding nested +level. Specifying more than one item in the list will automatically enable +nesting by default. If undefined one thread per CPU is used. @item @emph{See also}: -@ref{omp_set_num_threads} +@ref{omp_set_num_threads}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2 @@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With @code{MASTER} the worker threads are in the same place partition as the master thread. With @code{CLOSE} those are kept close to the master thread in contiguous place partitions. And with @code{SPREAD} a sparse distribution -across the place partitions is used. +across the place partitions is used. Specifying more than one item in the +list will automatically enable nesting by default. When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise. @item @emph{See also}: -@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind} +@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY}, +@ref{OMP_NESTED}, @ref{OMP_PLACES} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4 diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573..ebce492 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && icv->max_active_levels_var <= 1 #endif ) return 1; - else if (thr->ts.active_level >= gomp_max_active_levels_var) + else if (thr->ts.active_level >= icv->max_active_levels_var) return 1; /* If NUM_THREADS not specified, use nthreads_var. */
On Wed, Nov 18, 2020 at 07:05:19PM +0000, Kwok Cheung Yeung wrote: > From a75481979c86aa1da5b5da641fc776bc71d156f7 Mon Sep 17 00:00:00 2001 > From: Kwok Cheung Yeung <kcy@codesourcery.com> > Date: Wed, 18 Nov 2020 10:02:00 -0800 > Subject: [PATCH] openmp: Retire nest-var ICV for OpenMP 5.1 > > This removes the nest-var ICV, expressing nesting in terms of the > max-active-levels-var ICV instead. The max-active-levels-var ICV > is now per data environment rather than per device. > > 2020-11-18 Kwok Cheung Yeung <kcy@codesourcery.com> > > libgomp/ > * env.c (gomp_global_icv): Remove nest_var field. Add > max_active_levels_var field. > (gomp_max_active_levels_var): Remove. > (parse_boolean): Return true on success. > (handle_omp_display_env): Express OMP_NESTED in terms of > max_active_levels_var. Change format specifier for > max_active_levels_var. > (initialize_env): Set max_active_levels_var from > OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and > OMP_PROC_BIND. > * icv.c (omp_set_nested): Express in terms of > max_active_levels_var. > (omp_get_nested): Likewise. > (omp_set_max_active_levels): Use max_active_levels_var field instead > of gomp_max_active_levels_var. > (omp_get_max_active_levels): Likewise. > * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add > max_active_levels_var field. > (gomp_supported_active_levels): Set to UCHAR_MAX. > (gomp_max_active_levels_var): Delete. > * libgomp.texi (omp_get_nested): Update documentation. > (omp_set_nested): Likewise. > (OMP_MAX_ACTIVE_LEVELS): Likewise. > (OMP_NESTED): Likewise. > (OMP_NUM_THREADS): Likewise. > (OMP_PROC_BIND): Likewise. > * parallel.c (gomp_resolve_num_threads): Replace reference > to nest_var with max_active_levels_var. Use max_active_levels_var > field instead of gomp_max_active_levels_var. LGTM, thanks. Jakub
diff --git a/libgomp/env.c b/libgomp/env.c index ab22525..75d0fe2 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; +unsigned long gomp_max_active_levels_var = 1; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1417,16 +1421,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1450,23 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &gomp_max_active_levels_var, true)) + { + if (gomp_max_active_levels_var > gomp_supported_active_levels) + gomp_max_active_levels_var = gomp_supported_active_levels; + } + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_max_active_levels_var = nested ? gomp_supported_active_levels + : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/icv.c b/libgomp/icv.c index 8df15e3..f54ccb1 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -56,15 +56,16 @@ omp_get_dynamic (void) void omp_set_nested (int val) { - struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + gomp_max_active_levels_var = gomp_supported_active_levels; + else if (gomp_max_active_levels_var > 1) + gomp_max_active_levels_var = 1; } int omp_get_nested (void) { - struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return gomp_max_active_levels_var > 1; } #pragma GCC diagnostic pop diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index da7ac03..05aa1e4 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,6 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573..9629470 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,7 +53,7 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && gomp_max_active_levels_var <= 1 #endif ) return 1; diff --git a/libgomp/testsuite/libgomp.c/target-5.c b/libgomp/testsuite/libgomp.c/target-5.c index 21a69ea..ec31f89 100644 --- a/libgomp/testsuite/libgomp.c/target-5.c +++ b/libgomp/testsuite/libgomp.c/target-5.c @@ -1,5 +1,3 @@ -/* { dg-additional-options "-Wno-deprecated-declarations" } */ - #include <omp.h> #include <stdlib.h> @@ -7,17 +5,14 @@ int main () { int d_o = omp_get_dynamic (); - int n_o = omp_get_nested (); omp_sched_t s_o; int c_o; omp_get_schedule (&s_o, &c_o); int m_o = omp_get_max_threads (); omp_set_dynamic (1); - omp_set_nested (1); omp_set_schedule (omp_sched_static, 2); omp_set_num_threads (4); int d = omp_get_dynamic (); - int n = omp_get_nested (); omp_sched_t s; int c; omp_get_schedule (&s, &c); @@ -30,13 +25,11 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () - || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); - omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) @@ -48,7 +41,6 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () - || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) @@ -60,13 +52,11 @@ main () int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () - || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); - omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) @@ -76,7 +66,6 @@ main () abort (); omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () - || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ())