diff mbox series

[3/8] libgomp: runtime support for target_device selector

Message ID 20240106185257.126445-4-sandra@codesourcery.com
State New
Headers show
Series OpenMP: Implement metadirective support | expand

Commit Message

Sandra Loosemore Jan. 6, 2024, 6:52 p.m. UTC
From: Kwok Cheung Yeung <kcy@codesourcery.com>

This patch implements the libgomp runtime support for the dynamic
target_device selector via the GOMP_evaluate_target_device function.

include/ChangeLog
	* cuda/cuda.h (CUdevice_attribute): Add definitions for
	CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and
	CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR.

libgomp/ChangeLog
	* Makefile.am (libgomp_la_SOURCES): Add selector.c.
	* Makefile.am: Regenerate.
	* config/gcn/selector.c: New.
	* config/linux/selector.c: New.
	* config/linux/x86/selector.c: New.
	* config/nvptx/selector.c: New.
	* libgomp-plugin.h (GOMP_OFFLOAD_evaluate_device): New.
	* libgomp.h (struct gomp_device_descr): Add evaluate_device_func field.
	* libgomp.map (GOMP_5.1): Add GOMP_evaluate_target_device.
	* libgomp_g.h (GOMP_evaluate_current_device): New.
	(GOMP_evaluate_target_device): New.
	* oacc-host.c (host_evaluate_device): New.
	(host_openacc_exec): Initialize evaluate_device_func field to
	host_evaluate_device.
	* plugin/plugin-gcn.c (GOMP_OFFLOAD_evaluate_device): New.
	* plugin/plugin-nvptx.c (struct ptx_device): Add compute_major and
	compute_minor fields.
	(nvptx_open_device): Read compute capability information from device.
	(CHECK_ISA): New macro.
	(GOMP_OFFLOAD_evaluate_device): New.
	* selector.c: New.
	* target.c (GOMP_evaluate_target_device): New.
	(gomp_load_plugin_for_device): Load evaluate_device plugin function.
---
 include/cuda/cuda.h                 |   2 +
 libgomp/Makefile.am                 |   2 +-
 libgomp/Makefile.in                 |   5 +-
 libgomp/config/gcn/selector.c       |  57 +++++
 libgomp/config/linux/selector.c     |  43 ++++
 libgomp/config/linux/x86/selector.c | 325 ++++++++++++++++++++++++++++
 libgomp/config/nvptx/selector.c     |  65 ++++++
 libgomp/libgomp-plugin.h            |   2 +
 libgomp/libgomp.h                   |   1 +
 libgomp/libgomp.map                 |   1 +
 libgomp/libgomp_g.h                 |   8 +
 libgomp/oacc-host.c                 |  11 +
 libgomp/plugin/plugin-gcn.c         |  14 ++
 libgomp/plugin/plugin-nvptx.c       |  45 ++++
 libgomp/selector.c                  |  36 +++
 libgomp/target.c                    |  38 ++++
 16 files changed, 652 insertions(+), 3 deletions(-)
 create mode 100644 libgomp/config/gcn/selector.c
 create mode 100644 libgomp/config/linux/selector.c
 create mode 100644 libgomp/config/linux/x86/selector.c
 create mode 100644 libgomp/config/nvptx/selector.c
 create mode 100644 libgomp/selector.c

Comments

Tobias Burnus Jan. 6, 2024, 10:44 p.m. UTC | #1
Hi Sandra,

looks quite okay, but I have a couple of remarks:

Sandra Loosemore wrote:
> From: Kwok Cheung Yeung<kcy@codesourcery.com>
> 
> This patch implements the libgomp runtime support for the dynamic
> target_device selector via the GOMP_evaluate_target_device function.

...
> --- /dev/null
> +++ b/libgomp/config/gcn/selector.c
...
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> +			      const char *isa)
> +{
> +  if (kind && strcmp (kind, "gpu") != 0)
> +    return false;

This should also match: kind == nohost.

> +
> +  if (arch && strcmp (arch, "gcn") != 0)
> +    return false;

"amdgcn" missing - we support both for better compatibility with LLVM.

> +  if (!isa)
> +    return true;
> +
> +#ifdef __GCN3__
> +  if (strcmp (isa, "fiji") == 0 || strcmp (isa, "gfx803") == 0)
> +    return true;
> +#endif
> +
> +#ifdef __GCN5__
> +  if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
> +      || strcmp (isa, "gfx908") == 0)
> +    return true;
> +#endif

This misses gfx90a and gfx1030. Additionally, the last conditions 
matches too much. Can you use

#ifdef __fiji__
#ifdef __gfx900__
etc.

instead?

> --- /dev/null
> +++ b/libgomp/config/linux/selector.c
...
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> +			      const char *isa)
> +{
> +  if (kind && strcmp (kind, "cpu") != 0)
> +    return false;

You also need to match "host".

> diff --git a/libgomp/config/linux/x86/selector.c b/libgomp/config/linux/x86/selector.c
> new file mode 100644
> index 00000000000..2b6c2ba165b
> --- /dev/null
> +++ b/libgomp/config/linux/x86/selector.c
...

> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> +			      const char *isa)
> +{
> +  if (kind && strcmp (kind, "cpu") != 0)
> +    return false;

This misses "host" as well.

> +  if (arch
> +      && strcmp (arch, "x86") != 0
> +      && strcmp (arch, "ia32") != 0
> +#ifdef __x86_64__
> +      && strcmp (arch, "x86_64") != 0
> +#endif
> +#ifdef __ILP32__
> +      && strcmp (arch, "x32") != 0
> +#endif
> +      && strcmp (arch, "i386") != 0
> +      && strcmp (arch, "i486") != 0
> +#ifndef __i486__
> +      && strcmp (arch, "i586") != 0
> +#endif
> +#if !defined (__i486__) && !defined (__i586__)
> +      && strcmp (arch, "i686") != 0
> +#endif

The 'i486' seems to lack a #ifdef __i486__ check.
And it seems to be such that
   i486 implies i386
   i586 implies i486 and i386
   etc.
if I understand ix86_omp_device_kind_arch_isa in
gcc/config/i386/i386-options.cc correctly.


There is of course the problem that the compilation flags used for 
libgomp are very likely different to the compilation flags of the user 
program, which in term can differ between files.

Thus, I think we should update
   https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Context-Selectors.html
(a) the host compiler always also matches "cpu"
(b) We probably should state somewhere that:
     * on x86, both the arch = i486 to i686 and the isa flags depend on
       the command line arguments more than on the actual hardware.
     * that's especially true for dynamic selectors as the flags used
       can differ between 'compilation units' and also the flags used
       for the run-time library.
     * For nvptx: on the device side, the -march= implies that all
       sm_* lower than that value is set.
       For target_device, the actual hardware is checked at run time,
       implying the highest of the gcc-manual listed -march= values is
       selected that the hardware actually supports at runtime.

For (b) we should have to find some better wording and possibly be less 
precise but I think some kind of warning/note is needed here.

> +  if (!isa)
> +    return true;
> +
> +#ifdef __WBNOINVD__
> +  if (strcmp (isa, "wbnoinvd") == 0) return true;
> +#endif

I think at least the following are missing:

-mavx10.1-256 and -mavx10.1-512
do not seem to have a #define
→ Maybe we should file a PR given that those
seem to be the only missing ones.

otherwise:

__AVX10_512BIT__ and "avx10-max-512bit"
__AVX10_1__ and "avx10.1"
__AMX_FP16__ and -mamx-fp16
__CMPCCXADD__ and "cmpccxadd"
__AVXNECONVERT__ and "avxneconvert"
__RAOINT__ and "raoint"
__PREFETCHI__ and "refetchi"
__USER_MSR__ and "usermsr".
__EVEX256__ and "evex512".
__AVXVNNIINT8__ and "avxvnniint8"
__SM4__ and "sm4"
__SHA512__ and "sha512"
__SM3__ and "sm3"
__AVXVNNIINT16__ and "avxvnniint16"
__AMX_COMPLEX__ and "amx-complex"
__AVXIFMA__ and avxifma"

and possibly some more but it might be also be complete.


> +++ b/libgomp/config/nvptx/selector.c
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> +			      const char *isa)
> +{
> +  if (kind && strcmp (kind, "gpu") != 0)
> +    return false;
"nohost" missing.

> --- a/libgomp/libgomp.map
> +++ b/libgomp/libgomp.map
> @@ -414,6 +414,7 @@ GOMP_5.1 {
>   	GOMP_scope_start;
>   	GOMP_warning;
>   	GOMP_teams4;
> +	GOMP_evaluate_target_device;
>   } GOMP_5.0.1;

This looks wrong. In my understanding you cannot just randomly
add entries to old map entries but it needs to be a new group
in a new compiler release. In any case, I believe for GCC 14
it should be added to GOMP_5.1.2.

But in doubt ask Jakub, who knows this inside out.

> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c
> @@ -3984,6 +3984,20 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
>   		       GOMP_PLUGIN_target_task_completion, async_data);
>   }
>   
> +bool
> +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
> +			      const char *arch, const char *isa)
> +{
> +  struct agent_info *agent = get_agent_info (device_num);
> +
> +  if (kind && strcmp (kind, "gpu") != 0)
> +    return false;

"nohost" missing

> +  if (arch && strcmp (arch, "gcn") != 0)
> +    return false;

"amdgcn" missing.

> index c04c3acd679..9dcd8a6f6eb 100644
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
...
> +bool
> +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
> +			      const char *arch, const char *isa)
> +{
> +  if (kind && strcmp (kind, "gpu") != 0)
> +    return false;

"nohost" missing.

> +++ b/libgomp/selector.c
> @@ -0,0 +1,36 @@
> +/* Copyright (C) 2022 Free Software Foundation, Inc.
> +   Contributed by Mentor, a Siemens Business.
> +
> +   This file is part of the GNU Offloading and Multi Processing Library
> +   (libgomp).
> +
> +   Libgomp is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* This file contains a placeholder implementation of
> +   GOMP_evaluate_current_device.  */
> +
> +#include "libgomp.h"
> +
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> +			      const char *isa)
> +{
> +  return false;
> +}

Isn't this called in some cases on the host? If so, it should
support kind == "host" and kind == "cpu".

> diff --git a/libgomp/target.c b/libgomp/target.c
> index 1367e9cce6c..206987953dc 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -5088,6 +5088,43 @@ omp_pause_resource_all (omp_pause_resource_t kind)
>   ialias (omp_pause_resource)
>   ialias (omp_pause_resource_all)
>   
> +bool
> +GOMP_evaluate_target_device (int device_num, const char *kind,
> +			     const char *arch, const char *isa)
> +{
> +  bool result = true;
> +
> +  if (device_num < 0)
> +    device_num = omp_get_default_device ();

As mentioned with regards to 1/8, 'omp_initial_device == -1' according
to the OpenMP standard and there is additionally 'omp_invalid_device'.

Thanks,

Tobias
Tobias Burnus Jan. 7, 2024, 11:48 a.m. UTC | #2
Tobias Burnus wrote:
> Sandra Loosemore wrote:
>> From: Kwok Cheung Yeung<kcy@codesourcery.com>
>>
>> This patch implements the libgomp runtime support for the dynamic
>> target_device selector via the GOMP_evaluate_target_device function.

...

> +GOMP_evaluate_target_device (int device_num, const char *kind,
> +			     const char *arch, const char *isa)
> +{
> +  bool result = true;
> +
> +  if (device_num < 0)
> +    device_num = omp_get_default_device ();
> +
> +  if (kind && strcmp (kind, "any") == 0)
> +    kind = NULL;

I wonder whether we shouldn't be able to do an early return here,
given that:

"If trait-property 'any' is specified in the 'kind' trait-selector of 
the device selector set or the target_device selector sets, no other 
trait-property may be specified in the same selector set."

[From "Restrictions to context selectors are as follows:", here quoting 
TR12]

Tobias
diff mbox series

Patch

diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 114aba4e074..0d57bdd68e9 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -82,6 +82,8 @@  typedef enum {
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
 } CUdevice_attribute;
 
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 1871590596d..87658da2d5d 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -72,7 +72,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
 	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c
+	oacc-target.c target-indirect.c selector.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 56a6beab867..81de2ddb943 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,7 @@  am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
 	oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-	oacc-target.lo target-indirect.lo $(am__objects_1)
+	oacc-target.lo target-indirect.lo selector.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +552,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c $(am__append_3)
+	oacc-target.c target-indirect.c selector.c $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -777,6 +777,7 @@  distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
diff --git a/libgomp/config/gcn/selector.c b/libgomp/config/gcn/selector.c
new file mode 100644
index 00000000000..60793fc05d3
--- /dev/null
+++ b/libgomp/config/gcn/selector.c
@@ -0,0 +1,57 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Mentor, a Siemens Business.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+   an AMD GCN GPU.  */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+			      const char *isa)
+{
+  if (kind && strcmp (kind, "gpu") != 0)
+    return false;
+
+  if (arch && strcmp (arch, "gcn") != 0)
+    return false;
+
+  if (!isa)
+    return true;
+
+#ifdef __GCN3__
+  if (strcmp (isa, "fiji") == 0 || strcmp (isa, "gfx803") == 0)
+    return true;
+#endif
+
+#ifdef __GCN5__
+  if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
+      || strcmp (isa, "gfx908") == 0)
+    return true;
+#endif
+
+  return false;
+}
diff --git a/libgomp/config/linux/selector.c b/libgomp/config/linux/selector.c
new file mode 100644
index 00000000000..84e59c7aabe
--- /dev/null
+++ b/libgomp/config/linux/selector.c
@@ -0,0 +1,43 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Mentor, a Siemens Business.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains a generic implementation of
+   GOMP_evaluate_current_device when run on a Linux host.  */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+			      const char *isa)
+{
+  if (kind && strcmp (kind, "cpu") != 0)
+    return false;
+
+  if (!arch && !isa)
+    return true;
+
+  return false;
+}
diff --git a/libgomp/config/linux/x86/selector.c b/libgomp/config/linux/x86/selector.c
new file mode 100644
index 00000000000..2b6c2ba165b
--- /dev/null
+++ b/libgomp/config/linux/x86/selector.c
@@ -0,0 +1,325 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Mentor, a Siemens Business.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+   an x86/x64-based Linux host.  */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+			      const char *isa)
+{
+  if (kind && strcmp (kind, "cpu") != 0)
+    return false;
+
+  if (arch
+      && strcmp (arch, "x86") != 0
+      && strcmp (arch, "ia32") != 0
+#ifdef __x86_64__
+      && strcmp (arch, "x86_64") != 0
+#endif
+#ifdef __ILP32__
+      && strcmp (arch, "x32") != 0
+#endif
+      && strcmp (arch, "i386") != 0
+      && strcmp (arch, "i486") != 0
+#ifndef __i486__
+      && strcmp (arch, "i586") != 0
+#endif
+#if !defined (__i486__) && !defined (__i586__)
+      && strcmp (arch, "i686") != 0
+#endif
+      )
+    return false;
+
+  if (!isa)
+    return true;
+
+#ifdef __WBNOINVD__
+  if (strcmp (isa, "wbnoinvd") == 0) return true;
+#endif
+#ifdef __AVX512VP2INTERSECT__
+  if (strcmp (isa, "avx512vp2intersect") == 0) return true;
+#endif
+#ifdef __MMX__
+  if (strcmp (isa, "mmx") == 0) return true;
+#endif
+#ifdef __3dNOW__
+  if (strcmp (isa, "3dnow") == 0) return true;
+#endif
+#ifdef __3dNOW_A__
+  if (strcmp (isa, "3dnowa") == 0) return true;
+#endif
+#ifdef __SSE__
+  if (strcmp (isa, "sse") == 0) return true;
+#endif
+#ifdef __SSE2__
+  if (strcmp (isa, "sse2") == 0) return true;
+#endif
+#ifdef __SSE3__
+  if (strcmp (isa, "sse3") == 0) return true;
+#endif
+#ifdef __SSSE3__
+  if (strcmp (isa, "ssse3") == 0) return true;
+#endif
+#ifdef __SSE4_1__
+  if (strcmp (isa, "sse4.1") == 0) return true;
+#endif
+#ifdef __SSE4_2__
+  if (strcmp (isa, "sse4") == 0 || strcmp (isa, "sse4.2") == 0) return true;
+#endif
+#ifdef __AES__
+  if (strcmp (isa, "aes") == 0) return true;
+#endif
+#ifdef __SHA__
+  if (strcmp (isa, "sha") == 0) return true;
+#endif
+#ifdef __PCLMUL__
+  if (strcmp (isa, "pclmul") == 0) return true;
+#endif
+#ifdef __AVX__
+  if (strcmp (isa, "avx") == 0) return true;
+#endif
+#ifdef __AVX2__
+  if (strcmp (isa, "avx2") == 0) return true;
+#endif
+#ifdef __AVX512F__
+  if (strcmp (isa, "avx512f") == 0) return true;
+#endif
+#ifdef __AVX512ER__
+  if (strcmp (isa, "avx512er") == 0) return true;
+#endif
+#ifdef __AVX512CD__
+  if (strcmp (isa, "avx512cd") == 0) return true;
+#endif
+#ifdef __AVX512PF__
+  if (strcmp (isa, "avx512pf") == 0) return true;
+#endif
+#ifdef __AVX512DQ__
+  if (strcmp (isa, "avx512dq") == 0) return true;
+#endif
+#ifdef __AVX512BW__
+  if (strcmp (isa, "avx512bw") == 0) return true;
+#endif
+#ifdef __AVX512VL__
+  if (strcmp (isa, "avx512vl") == 0) return true;
+#endif
+#ifdef __AVX512VBMI__
+  if (strcmp (isa, "avx512vbmi") == 0) return true;
+#endif
+#ifdef __AVX512IFMA__
+  if (strcmp (isa, "avx512ifma") == 0) return true;
+#endif
+#ifdef __AVX5124VNNIW__
+  if (strcmp (isa, "avx5124vnniw") == 0) return true;
+#endif
+#ifdef __AVX512VBMI2__
+  if (strcmp (isa, "avx512vbmi2") == 0) return true;
+#endif
+#ifdef __AVX512VNNI__
+  if (strcmp (isa, "avx512vnni") == 0) return true;
+#endif
+#ifdef __PCONFIG__
+  if (strcmp (isa, "pconfig") == 0) return true;
+#endif
+#ifdef __SGX__
+  if (strcmp (isa, "sgx") == 0) return true;
+#endif
+#ifdef __AVX5124FMAPS__
+  if (strcmp (isa, "avx5124fmaps") == 0) return true;
+#endif
+#ifdef __AVX512BITALG__
+  if (strcmp (isa, "avx512bitalg") == 0) return true;
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+  if (strcmp (isa, "avx512vpopcntdq") == 0) return true;
+#endif
+#ifdef __FMA__
+  if (strcmp (isa, "fma") == 0) return true;
+#endif
+#ifdef __RTM__
+  if (strcmp (isa, "rtm") == 0) return true;
+#endif
+#ifdef __SSE4A__
+  if (strcmp (isa, "sse4a") == 0) return true;
+#endif
+#ifdef __FMA4__
+  if (strcmp (isa, "fma4") == 0) return true;
+#endif
+#ifdef __XOP__
+  if (strcmp (isa, "xop") == 0) return true;
+#endif
+#ifdef __LWP__
+  if (strcmp (isa, "lwp") == 0) return true;
+#endif
+#ifdef __ABM__
+  if (strcmp (isa, "abm") == 0) return true;
+#endif
+#ifdef __BMI__
+  if (strcmp (isa, "bmi") == 0) return true;
+#endif
+#ifdef __BMI2__
+  if (strcmp (isa, "bmi2") == 0) return true;
+#endif
+#ifdef __LZCNT__
+  if (strcmp (isa, "lzcnt") == 0) return true;
+#endif
+#ifdef __TBM__
+  if (strcmp (isa, "tbm") == 0) return true;
+#endif
+#ifdef __CRC32__
+  if (strcmp (isa, "crc32") == 0) return true;
+#endif
+#ifdef __POPCNT__
+  if (strcmp (isa, "popcnt") == 0) return true;
+#endif
+#ifdef __FSGSBASE__
+  if (strcmp (isa, "fsgsbase") == 0) return true;
+#endif
+#ifdef __RDRND__
+  if (strcmp (isa, "rdrnd") == 0) return true;
+#endif
+#ifdef __F16C__
+  if (strcmp (isa, "f16c") == 0) return true;
+#endif
+#ifdef __RDSEED__
+  if (strcmp (isa, "rdseed") == 0) return true;
+#endif
+#ifdef __PRFCHW__
+  if (strcmp (isa, "prfchw") == 0) return true;
+#endif
+#ifdef __ADX__
+  if (strcmp (isa, "adx") == 0) return true;
+#endif
+#ifdef __FXSR__
+  if (strcmp (isa, "fxsr") == 0) return true;
+#endif
+#ifdef __XSAVE__
+  if (strcmp (isa, "xsave") == 0) return true;
+#endif
+#ifdef __XSAVEOPT__
+  if (strcmp (isa, "xsaveopt") == 0) return true;
+#endif
+#ifdef __PREFETCHWT1__
+  if (strcmp (isa, "prefetchwt1") == 0) return true;
+#endif
+#ifdef __CLFLUSHOPT__
+  if (strcmp (isa, "clflushopt") == 0) return true;
+#endif
+#ifdef __CLZERO__
+  if (strcmp (isa, "clzero") == 0) return true;
+#endif
+#ifdef __XSAVEC__
+  if (strcmp (isa, "xsavec") == 0) return true;
+#endif
+#ifdef __XSAVES__
+  if (strcmp (isa, "xsaves") == 0) return true;
+#endif
+#ifdef __CLWB__
+  if (strcmp (isa, "clwb") == 0) return true;
+#endif
+#ifdef __MWAITX__
+  if (strcmp (isa, "mwaitx") == 0) return true;
+#endif
+#ifdef __PKU__
+  if (strcmp (isa, "pku") == 0) return true;
+#endif
+#ifdef __RDPID__
+  if (strcmp (isa, "rdpid") == 0) return true;
+#endif
+#ifdef __GFNI__
+  if (strcmp (isa, "gfni") == 0) return true;
+#endif
+#ifdef __SHSTK__
+  if (strcmp (isa, "shstk") == 0) return true;
+#endif
+#ifdef __VAES__
+  if (strcmp (isa, "vaes") == 0) return true;
+#endif
+#ifdef __VPCLMULQDQ__
+  if (strcmp (isa, "vpclmulqdq") == 0) return true;
+#endif
+#ifdef __MOVDIRI__
+  if (strcmp (isa, "movdiri") == 0) return true;
+#endif
+#ifdef __MOVDIR64B__
+  if (strcmp (isa, "movdir64b") == 0) return true;
+#endif
+#ifdef __WAITPKG__
+  if (strcmp (isa, "waitpkg") == 0) return true;
+#endif
+#ifdef __CLDEMOTE__
+  if (strcmp (isa, "cldemote") == 0) return true;
+#endif
+#ifdef __SERIALIZE__
+  if (strcmp (isa, "serialize") == 0) return true;
+#endif
+#ifdef __PTWRITE__
+  if (strcmp (isa, "ptwrite") == 0) return true;
+#endif
+#ifdef __AVX512BF16__
+  if (strcmp (isa, "avx512bf16") == 0) return true;
+#endif
+#ifdef __AVX512FP16__
+  if (strcmp (isa, "avx512fp16") == 0) return true;
+#endif
+#ifdef __ENQCMD__
+  if (strcmp (isa, "enqcmd") == 0) return true;
+#endif
+#ifdef __TSXLDTRK__
+  if (strcmp (isa, "tsxldtrk") == 0) return true;
+#endif
+#ifdef __AMX_TILE__
+  if (strcmp (isa, "amx-tile") == 0) return true;
+#endif
+#ifdef __AMX_INT8__
+  if (strcmp (isa, "amx-int8") == 0) return true;
+#endif
+#ifdef __AMX_BF16__
+  if (strcmp (isa, "amx-bf16") == 0) return true;
+#endif
+#ifdef __LAHF_SAHF__
+  if (strcmp (isa, "sahf") == 0) return true;
+#endif
+#ifdef __MOVBE__
+  if (strcmp (isa, "movbe") == 0) return true;
+#endif
+#ifdef __UINTR__
+  if (strcmp (isa, "uintr") == 0) return true;
+#endif
+#ifdef __HRESET__
+  if (strcmp (isa, "hreset") == 0) return true;
+#endif
+#ifdef __KL__
+  if (strcmp (isa, "kl") == 0) return true;
+#endif
+#ifdef __WIDEKL__
+  if (strcmp (isa, "widekl") == 0) return true;
+#endif
+
+  return false;
+}
diff --git a/libgomp/config/nvptx/selector.c b/libgomp/config/nvptx/selector.c
new file mode 100644
index 00000000000..50b5f9020ac
--- /dev/null
+++ b/libgomp/config/nvptx/selector.c
@@ -0,0 +1,65 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Mentor, a Siemens Business.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+   a Nvidia GPU.  */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+			      const char *isa)
+{
+  if (kind && strcmp (kind, "gpu") != 0)
+    return false;
+
+  if (arch && strcmp (arch, "nvptx") != 0)
+    return false;
+
+  if (!isa)
+    return true;
+
+  if (strcmp (isa, "sm_30") == 0)
+    return true;
+#if __PTX_SM__ >= 350
+  if (strcmp (isa, "sm_35") == 0)
+    return true;
+#endif
+#if __PTX_SM__ >= 530
+  if (strcmp (isa, "sm_53") == 0)
+    return true;
+#endif
+#if __PTX_SM__ >= 750
+  if (strcmp (isa, "sm_75") == 0)
+    return true;
+#endif
+#if __PTX_SM__ >= 800
+  if (strcmp (isa, "sm_80") == 0)
+    return true;
+#endif
+
+  return false;
+}
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 0c9c28c65cf..73f880ffa2f 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -152,6 +152,8 @@  extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
 extern bool GOMP_OFFLOAD_can_run (void *);
 extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
 extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
+extern bool GOMP_OFFLOAD_evaluate_device (int, const char *, const char *,
+					  const char *);
 
 extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
 				       void **, unsigned *, void *);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..9cb1313677f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1415,6 +1415,7 @@  struct gomp_device_descr
   __typeof (GOMP_OFFLOAD_can_run) *can_run_func;
   __typeof (GOMP_OFFLOAD_run) *run_func;
   __typeof (GOMP_OFFLOAD_async_run) *async_run_func;
+  __typeof (GOMP_OFFLOAD_evaluate_device) *evaluate_device_func;
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 65901dff235..2b7d49bd533 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -414,6 +414,7 @@  GOMP_5.1 {
 	GOMP_scope_start;
 	GOMP_warning;
 	GOMP_teams4;
+	GOMP_evaluate_target_device;
 } GOMP_5.0.1;
 
 GOMP_5.1.1 {
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index c0cc03ae61f..e9d60238e2b 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -337,6 +337,11 @@  extern void GOMP_single_copy_end (void *);
 
 extern void GOMP_scope_start (uintptr_t *);
 
+/* selector.c */
+
+extern bool GOMP_evaluate_current_device (const char *, const char *,
+					  const char *);
+
 /* target.c */
 
 extern void GOMP_target (int, void (*) (void *), const void *,
@@ -359,6 +364,9 @@  extern void GOMP_teams (unsigned int, unsigned int);
 extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
 extern void *GOMP_target_map_indirect_ptr (void *);
 
+extern bool GOMP_evaluate_target_device (int, const char *, const char *,
+					 const char *);
+
 /* teams.c */
 
 extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 5efdf7fb796..b6883850250 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -136,6 +136,16 @@  host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
   fn (vars);
 }
 
+static bool
+host_evaluate_device (int device_num __attribute__ ((unused)),
+		      const char *kind __attribute__ ((unused)),
+		      const char *arch __attribute__ ((unused)),
+		      const char *isa __attribute__ ((unused)))
+{
+  __builtin_unreachable ();
+  return false;
+}
+
 static void
 host_openacc_exec (void (*fn) (void *),
 		   size_t mapnum __attribute__ ((unused)),
@@ -285,6 +295,7 @@  static struct gomp_device_descr host_dispatch =
     .memcpy2d_func = NULL,
     .memcpy3d_func = NULL,
     .run_func = host_run,
+    .evaluate_device_func = host_evaluate_device,
 
     .mem_map = { NULL },
     .mem_map_rev = { NULL },
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index bc8131a6c2d..9025702c543 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3984,6 +3984,20 @@  GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
 		       GOMP_PLUGIN_target_task_completion, async_data);
 }
 
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+			      const char *arch, const char *isa)
+{
+  struct agent_info *agent = get_agent_info (device_num);
+
+  if (kind && strcmp (kind, "gpu") != 0)
+    return false;
+  if (arch && strcmp (arch, "gcn") != 0)
+    return false;
+
+  return !isa || isa_code (isa) == agent->device_isa;
+}
+
 /* }}} */
 /* {{{ OpenACC Plugin API  */
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index c04c3acd679..9dcd8a6f6eb 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -317,6 +317,7 @@  struct ptx_device
   int max_threads_per_block;
   int max_threads_per_multiprocessor;
   int default_dims[GOMP_DIM_MAX];
+  int compute_major, compute_minor;
 
   /* Length as used by the CUDA Runtime API ('struct cudaDeviceProp').  */
   char name[256];
@@ -541,6 +542,14 @@  nvptx_open_device (int n)
   for (int i = 0; i != GOMP_DIM_MAX; i++)
     ptx_dev->default_dims[i] = 0;
 
+  CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+		  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
+  ptx_dev->compute_major = pi;
+
+  CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+		  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
+  ptx_dev->compute_minor = pi;
+
   CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name,
 		  dev);
 
@@ -2312,3 +2321,39 @@  GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
 }
 
 /* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#define CHECK_ISA(major, minor) \
+  if (device->compute_major >= major && device->compute_minor >= minor \
+      && strcmp (isa, "sm_"#major#minor) == 0) \
+    return true
+
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+			      const char *arch, const char *isa)
+{
+  if (kind && strcmp (kind, "gpu") != 0)
+    return false;
+  if (arch && strcmp (arch, "nvptx") != 0)
+    return false;
+  if (!isa)
+    return true;
+
+  struct ptx_device *device = ptx_devices[device_num];
+
+  CHECK_ISA (3, 0);
+  CHECK_ISA (3, 5);
+  CHECK_ISA (3, 7);
+  CHECK_ISA (5, 0);
+  CHECK_ISA (5, 2);
+  CHECK_ISA (5, 3);
+  CHECK_ISA (6, 0);
+  CHECK_ISA (6, 1);
+  CHECK_ISA (6, 2);
+  CHECK_ISA (7, 0);
+  CHECK_ISA (7, 2);
+  CHECK_ISA (7, 5);
+  CHECK_ISA (8, 0);
+  CHECK_ISA (8, 6);
+
+  return false;
+}
diff --git a/libgomp/selector.c b/libgomp/selector.c
new file mode 100644
index 00000000000..dc920ee065f
--- /dev/null
+++ b/libgomp/selector.c
@@ -0,0 +1,36 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Mentor, a Siemens Business.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains a placeholder implementation of
+   GOMP_evaluate_current_device.  */
+
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+			      const char *isa)
+{
+  return false;
+}
diff --git a/libgomp/target.c b/libgomp/target.c
index 1367e9cce6c..206987953dc 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -5088,6 +5088,43 @@  omp_pause_resource_all (omp_pause_resource_t kind)
 ialias (omp_pause_resource)
 ialias (omp_pause_resource_all)
 
+bool
+GOMP_evaluate_target_device (int device_num, const char *kind,
+			     const char *arch, const char *isa)
+{
+  bool result = true;
+
+  if (device_num < 0)
+    device_num = omp_get_default_device ();
+
+  if (kind && strcmp (kind, "any") == 0)
+    kind = NULL;
+
+  gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
+	      __FUNCTION__, device_num, kind, arch, isa);
+
+  if (omp_get_device_num () == device_num)
+    result = GOMP_evaluate_current_device (kind, arch, isa);
+  else
+    {
+      if (!omp_is_initial_device ())
+	/* Accelerators are not expected to know about other devices.  */
+	result = false;
+      else
+	{
+	  struct gomp_device_descr *device = resolve_device (device_num, true);
+	  if (device == NULL)
+	    result = false;
+	  else if (device->evaluate_device_func)
+	    result = device->evaluate_device_func (device_num, kind, arch,
+						   isa);
+	}
+    }
+
+  gomp_debug (1, " -> %s\n", result ? "true" : "false");
+  return result;
+}
+
 #ifdef PLUGIN_SUPPORT
 
 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
@@ -5140,6 +5177,7 @@  gomp_load_plugin_for_device (struct gomp_device_descr *device,
   DLSYM (free);
   DLSYM (dev2host);
   DLSYM (host2dev);
+  DLSYM (evaluate_device);
   DLSYM_OPT (memcpy2d, memcpy2d);
   DLSYM_OPT (memcpy3d, memcpy3d);
   device->capabilities = device->get_caps_func ();