diff mbox series

x86-64: Don't use SSE resolvers for ISA level 3 or above

Message ID 20240228175114.271103-1-hjl.tools@gmail.com
State New
Headers show
Series x86-64: Don't use SSE resolvers for ISA level 3 or above | expand

Commit Message

H.J. Lu Feb. 28, 2024, 5:51 p.m. UTC
When glibc is built with ISA level 3 or above enabled, SSE resolvers
aren't available and glibc fails to build:

ld: .../elf/librtld.os: in function `init_cpu_features':
.../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
/usr/local/bin/ld: final link failed: bad value

For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
_dl_tlsdesc_dynamic_fxsave and also exclude _dl_tlsdesc_dynamic_fxsave.

This fixes BZ #31429.
---
 sysdeps/x86/cpu-features.c  | 17 +++++++++++------
 sysdeps/x86_64/dl-tlsdesc.S | 15 +++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

Comments

Noah Goldstein Feb. 28, 2024, 5:57 p.m. UTC | #1
On Wed, Feb 28, 2024 at 11:51 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> When glibc is built with ISA level 3 or above enabled, SSE resolvers
> aren't available and glibc fails to build:
>
> ld: .../elf/librtld.os: in function `init_cpu_features':
> .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
> ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
> /usr/local/bin/ld: final link failed: bad value
>
> For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
> _dl_tlsdesc_dynamic_fxsave and also exclude _dl_tlsdesc_dynamic_fxsave.
>
> This fixes BZ #31429.
> ---
>  sysdeps/x86/cpu-features.c  | 17 +++++++++++------
>  sysdeps/x86_64/dl-tlsdesc.S | 15 +++++++++------
>  2 files changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index d71e8d3d2e..e7c7ece462 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -18,6 +18,7 @@
>
>  #include <dl-hwcap.h>
>  #include <libc-pointer-arith.h>
> +#include <isa-level.h>
>  #include <get-isa-level.h>
>  #include <cacheinfo.h>
>  #include <dl-cacheinfo.h>
> @@ -1195,7 +1196,9 @@ no_cpuid:
>                TUNABLE_CALLBACK (set_x86_shstk));
>  #endif
>
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>    if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> +#endif
>      {
>        if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
>         {
> @@ -1216,22 +1219,24 @@ no_cpuid:
>  #endif
>         }
>      }
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>    else
>      {
> -#ifdef __x86_64__
> +# ifdef __x86_64__
>        GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
> -# ifdef SHARED
> +#  ifdef SHARED
>        GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
> -# endif
> -#else
> -# ifdef SHARED
> +#  endif
> +# else
> +#  ifdef SHARED
>        if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
>         GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
>        else
>         GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
> +#  endif
>  # endif
> -#endif
>      }
> +#endif
>
>  #ifdef SHARED
>  # ifdef __x86_64__
> diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
> index ea69f5223a..057a10862a 100644
> --- a/sysdeps/x86_64/dl-tlsdesc.S
> +++ b/sysdeps/x86_64/dl-tlsdesc.S
> @@ -20,6 +20,7 @@
>  #include <tls.h>
>  #include <cpu-features-offsets.h>
>  #include <features-offsets.h>
> +#include <isa-level.h>
>  #include "tlsdesc.h"
>  #include "dl-trampoline-save.h"
>
> @@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak:
>         .size   _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>
>  #ifdef SHARED
> -# define USE_FXSAVE
> -# define STATE_SAVE_ALIGNMENT  16
> -# define _dl_tlsdesc_dynamic   _dl_tlsdesc_dynamic_fxsave
> -# include "dl-tlsdesc-dynamic.h"
> -# undef _dl_tlsdesc_dynamic
> -# undef USE_FXSAVE
> +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> +#  define USE_FXSAVE
> +#  define STATE_SAVE_ALIGNMENT 16
> +#  define _dl_tlsdesc_dynamic  _dl_tlsdesc_dynamic_fxsave
> +#  include "dl-tlsdesc-dynamic.h"
> +#  undef _dl_tlsdesc_dynamic
> +#  undef USE_FXSAVE
> +# endif
>
>  # define USE_XSAVE
>  # define STATE_SAVE_ALIGNMENT  64
> --
> 2.43.2
>

LGTM.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Richard Henderson Feb. 29, 2024, 8:55 p.m. UTC | #2
On 2/28/24 07:51, H.J. Lu wrote:
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -18,6 +18,7 @@
>   
>   #include <dl-hwcap.h>
>   #include <libc-pointer-arith.h>
> +#include <isa-level.h>
>   #include <get-isa-level.h>
>   #include <cacheinfo.h>
>   #include <dl-cacheinfo.h>
> @@ -1195,7 +1196,9 @@ no_cpuid:
>   	       TUNABLE_CALLBACK (set_x86_shstk));
>   #endif
>   
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>     if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> +#endif
>       {
>         if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
>   	{

Surely this can be written with IF and not IFDEF:

   if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
       || GLRO(dl_x86_cpu_features).xsave_state_size != 0)


r~
Sunil Pandey March 1, 2024, 2:40 a.m. UTC | #3
On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 2/28/24 07:51, H.J. Lu wrote:
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -18,6 +18,7 @@
> >
> >   #include <dl-hwcap.h>
> >   #include <libc-pointer-arith.h>
> > +#include <isa-level.h>
> >   #include <get-isa-level.h>
> >   #include <cacheinfo.h>
> >   #include <dl-cacheinfo.h>
> > @@ -1195,7 +1196,9 @@ no_cpuid:
> >              TUNABLE_CALLBACK (set_x86_shstk));
> >   #endif
> >
> > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> >     if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > +#endif
> >       {
> >         if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> >       {
>
> Surely this can be written with IF and not IFDEF:
>
>    if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
>        || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
>

For sure it can be written like IF, but using IFDEF has the following perf
and code size advantage.

For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case

it will remove conditional

if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)

and else block code of this conditional during preprocessing.

--Sunil
Richard Henderson March 1, 2024, 10:22 p.m. UTC | #4
On 2/29/24 16:40, Sunil Pandey wrote:
> 
> 
> On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <richard.henderson@linaro.org 
> <mailto:richard.henderson@linaro.org>> wrote:
> 
>     On 2/28/24 07:51, H.J. Lu wrote:
>      > --- a/sysdeps/x86/cpu-features.c
>      > +++ b/sysdeps/x86/cpu-features.c
>      > @@ -18,6 +18,7 @@
>      >
>      >   #include <dl-hwcap.h>
>      >   #include <libc-pointer-arith.h>
>      > +#include <isa-level.h>
>      >   #include <get-isa-level.h>
>      >   #include <cacheinfo.h>
>      >   #include <dl-cacheinfo.h>
>      > @@ -1195,7 +1196,9 @@ no_cpuid:
>      >              TUNABLE_CALLBACK (set_x86_shstk));
>      >   #endif
>      >
>      > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
>      >     if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
>      > +#endif
>      >       {
>      >         if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
>      >       {
> 
>     Surely this can be written with IF and not IFDEF:
> 
>         if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
>             || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> 
> 
> For sure it can be written like IF, but using IFDEF has the following perf and code size 
> advantage.
> 
> For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case
> 
> it will remove conditional
> 
> if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> 
> and else block code of this conditional during preprocessing.

You miss the point -- both of these are constants and will be folded by the compiler. 
There is no perf or code size advantage for cpp.


r~
Sunil Pandey March 2, 2024, 12:18 a.m. UTC | #5
On Fri, Mar 1, 2024 at 2:23 PM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 2/29/24 16:40, Sunil Pandey wrote:
> >
> >
> > On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <
> richard.henderson@linaro.org
> > <mailto:richard.henderson@linaro.org>> wrote:
> >
> >     On 2/28/24 07:51, H.J. Lu wrote:
> >      > --- a/sysdeps/x86/cpu-features.c
> >      > +++ b/sysdeps/x86/cpu-features.c
> >      > @@ -18,6 +18,7 @@
> >      >
> >      >   #include <dl-hwcap.h>
> >      >   #include <libc-pointer-arith.h>
> >      > +#include <isa-level.h>
> >      >   #include <get-isa-level.h>
> >      >   #include <cacheinfo.h>
> >      >   #include <dl-cacheinfo.h>
> >      > @@ -1195,7 +1196,9 @@ no_cpuid:
> >      >              TUNABLE_CALLBACK (set_x86_shstk));
> >      >   #endif
> >      >
> >      > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> >      >     if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> >      > +#endif
> >      >       {
> >      >         if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> >      >       {
> >
> >     Surely this can be written with IF and not IFDEF:
> >
> >         if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
> >             || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> >
> >
> > For sure it can be written like IF, but using IFDEF has the following
> perf and code size
> > advantage.
> >
> > For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case
> >
> > it will remove conditional
> >
> > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> >
> > and else block code of this conditional during preprocessing.
>
> You miss the point -- both of these are constants and will be folded by
> the compiler.
> There is no perf or code size advantage for cpp.
>
>
Got it. Will submit a patch to fix it.

--Sunil
diff mbox series

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index d71e8d3d2e..e7c7ece462 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -18,6 +18,7 @@ 
 
 #include <dl-hwcap.h>
 #include <libc-pointer-arith.h>
+#include <isa-level.h>
 #include <get-isa-level.h>
 #include <cacheinfo.h>
 #include <dl-cacheinfo.h>
@@ -1195,7 +1196,9 @@  no_cpuid:
 	       TUNABLE_CALLBACK (set_x86_shstk));
 #endif
 
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
   if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+#endif
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
 	{
@@ -1216,22 +1219,24 @@  no_cpuid:
 #endif
 	}
     }
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
   else
     {
-#ifdef __x86_64__
+# ifdef __x86_64__
       GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
-# ifdef SHARED
+#  ifdef SHARED
       GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
-# endif
-#else
-# ifdef SHARED
+#  endif
+# else
+#  ifdef SHARED
       if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
       else
 	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
+#  endif
 # endif
-#endif
     }
+#endif
 
 #ifdef SHARED
 # ifdef __x86_64__
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index ea69f5223a..057a10862a 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -20,6 +20,7 @@ 
 #include <tls.h>
 #include <cpu-features-offsets.h>
 #include <features-offsets.h>
+#include <isa-level.h>
 #include "tlsdesc.h"
 #include "dl-trampoline-save.h"
 
@@ -79,12 +80,14 @@  _dl_tlsdesc_undefweak:
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
 #ifdef SHARED
-# define USE_FXSAVE
-# define STATE_SAVE_ALIGNMENT	16
-# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
-# include "dl-tlsdesc-dynamic.h"
-# undef _dl_tlsdesc_dynamic
-# undef USE_FXSAVE
+# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+#  define USE_FXSAVE
+#  define STATE_SAVE_ALIGNMENT	16
+#  define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
+#  include "dl-tlsdesc-dynamic.h"
+#  undef _dl_tlsdesc_dynamic
+#  undef USE_FXSAVE
+# endif
 
 # define USE_XSAVE
 # define STATE_SAVE_ALIGNMENT	64