diff mbox series

x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch

Message ID 20240220165805.3629140-1-skpgkp2@gmail.com
State New
Headers show
Series x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch | expand

Commit Message

Sunil Pandey Feb. 20, 2024, 4:58 p.m. UTC
When glibc is built with FMA and AVX2 enabled by default, the resulting
glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
processors, including VMs, should also support FMA and vice versa.

When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
variant.

Fixes BZ 31335.
---
 config.h.in                                   |   5 +
 sysdeps/x86/configure                         |  77 +++++++++
 sysdeps/x86/configure.ac                      |  44 ++++++
 sysdeps/x86_64/fpu/multiarch/Makefile         | 147 +++++++++---------
 sysdeps/x86_64/fpu/multiarch/e_asin.c         |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_atan2.c        |  10 +-
 sysdeps/x86_64/fpu/multiarch/e_exp.c          |  12 +-
 sysdeps/x86_64/fpu/multiarch/e_exp2f.c        |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_expf.c         |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_log.c          |  12 +-
 sysdeps/x86_64/fpu/multiarch/e_log2.c         |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_log2f.c        |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_logf.c         |  18 ++-
 sysdeps/x86_64/fpu/multiarch/e_pow.c          |  12 +-
 sysdeps/x86_64/fpu/multiarch/e_powf.c         |  26 ++--
 sysdeps/x86_64/fpu/multiarch/s_atan.c         |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S     |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S  |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_ceil.c         |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S    |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_ceilf.c        |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_cosf.c         |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_expm1.c        |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_floor-avx.S    |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_floor.c        |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S   |  28 ++++
 .../x86_64/fpu/multiarch/s_floorf-sse4_1.S    |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_floorf.c       |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_log1p.c        |  10 +-
 .../x86_64/fpu/multiarch/s_nearbyint-avx.S    |  28 ++++
 .../x86_64/fpu/multiarch/s_nearbyint-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_nearbyint.c    |  18 ++-
 .../x86_64/fpu/multiarch/s_nearbyintf-avx.S   |  28 ++++
 .../fpu/multiarch/s_nearbyintf-sse4_1.S       |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c   |  18 ++-
 sysdeps/x86_64/fpu/multiarch/s_rint-avx.S     |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S  |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_rint.c         |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S    |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_rintf.c        |  20 +--
 .../x86_64/fpu/multiarch/s_roundeven-avx.S    |  28 ++++
 .../x86_64/fpu/multiarch/s_roundeven-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_roundeven.c    |  18 ++-
 .../x86_64/fpu/multiarch/s_roundevenf-avx.S   |  28 ++++
 .../fpu/multiarch/s_roundevenf-sse4_1.S       |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_roundevenf.c   |  18 ++-
 sysdeps/x86_64/fpu/multiarch/s_sin.c          |  18 ++-
 sysdeps/x86_64/fpu/multiarch/s_sincos.c       |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_sincosf.c      |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_sinf.c         |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_tan.c          |  10 +-
 sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S    |  28 ++++
 sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_trunc.c        |  20 +--
 sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S   |  28 ++++
 .../x86_64/fpu/multiarch/s_truncf-sse4_1.S    |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_truncf.c       |  20 +--
 sysdeps/x86_64/fpu/multiarch/w_exp.c          |   6 +-
 sysdeps/x86_64/fpu/multiarch/w_log.c          |   6 +-
 sysdeps/x86_64/fpu/multiarch/w_pow.c          |   6 +-
 63 files changed, 974 insertions(+), 295 deletions(-)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S

Comments

Noah Goldstein Feb. 20, 2024, 5:33 p.m. UTC | #1
On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>
> When glibc is built with FMA and AVX2 enabled by default, the resulting
> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> processors, including VMs, should also support FMA and vice versa.
>
> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> variant.
Not avx2 + FMA as well?
>
> Fixes BZ 31335.
> ---
>  config.h.in                                   |   5 +
>  sysdeps/x86/configure                         |  77 +++++++++
>  sysdeps/x86/configure.ac                      |  44 ++++++
>  sysdeps/x86_64/fpu/multiarch/Makefile         | 147 +++++++++---------
>  sysdeps/x86_64/fpu/multiarch/e_asin.c         |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_atan2.c        |  10 +-
>  sysdeps/x86_64/fpu/multiarch/e_exp.c          |  12 +-
>  sysdeps/x86_64/fpu/multiarch/e_exp2f.c        |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_expf.c         |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_log.c          |  12 +-
>  sysdeps/x86_64/fpu/multiarch/e_log2.c         |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_log2f.c        |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_logf.c         |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/e_pow.c          |  12 +-
>  sysdeps/x86_64/fpu/multiarch/e_powf.c         |  26 ++--
>  sysdeps/x86_64/fpu/multiarch/s_atan.c         |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S     |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S  |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_ceil.c         |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S    |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_ceilf.c        |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_cosf.c         |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_expm1.c        |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_floor-avx.S    |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_floor.c        |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S   |  28 ++++
>  .../x86_64/fpu/multiarch/s_floorf-sse4_1.S    |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_floorf.c       |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_log1p.c        |  10 +-
>  .../x86_64/fpu/multiarch/s_nearbyint-avx.S    |  28 ++++
>  .../x86_64/fpu/multiarch/s_nearbyint-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_nearbyint.c    |  18 ++-
>  .../x86_64/fpu/multiarch/s_nearbyintf-avx.S   |  28 ++++
>  .../fpu/multiarch/s_nearbyintf-sse4_1.S       |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c   |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/s_rint-avx.S     |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S  |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_rint.c         |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S    |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_rintf.c        |  20 +--
>  .../x86_64/fpu/multiarch/s_roundeven-avx.S    |  28 ++++
>  .../x86_64/fpu/multiarch/s_roundeven-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_roundeven.c    |  18 ++-
>  .../x86_64/fpu/multiarch/s_roundevenf-avx.S   |  28 ++++
>  .../fpu/multiarch/s_roundevenf-sse4_1.S       |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_roundevenf.c   |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/s_sin.c          |  18 ++-
>  sysdeps/x86_64/fpu/multiarch/s_sincos.c       |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_sincosf.c      |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_sinf.c         |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_tan.c          |  10 +-
>  sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S    |  28 ++++
>  sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_trunc.c        |  20 +--
>  sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S   |  28 ++++
>  .../x86_64/fpu/multiarch/s_truncf-sse4_1.S    |  11 ++
>  sysdeps/x86_64/fpu/multiarch/s_truncf.c       |  20 +--
>  sysdeps/x86_64/fpu/multiarch/w_exp.c          |   6 +-
>  sysdeps/x86_64/fpu/multiarch/w_log.c          |   6 +-
>  sysdeps/x86_64/fpu/multiarch/w_pow.c          |   6 +-
>  63 files changed, 974 insertions(+), 295 deletions(-)
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
>
> diff --git a/config.h.in b/config.h.in
> index 2f0669e19b..0a9626cbe8 100644
> --- a/config.h.in
> +++ b/config.h.in
> @@ -292,4 +292,9 @@
>  /* Define if -mmovbe is enabled by default on x86.  */
>  #undef HAVE_X86_MOVBE
>
> +/* Define if -msse4.1 is enabled by default on x86.  */
> +#undef HAVE_X86_SSE4_1
> +
> +/* Define if -mavx2 and -mfma are enabled by default on x86.  */
> +#undef HAVE_X86_AVX2_FMA
>  #endif
> diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
> index 1f4c2d67fd..1c0e0d0640 100644
> --- a/sysdeps/x86/configure
> +++ b/sysdeps/x86/configure
> @@ -128,3 +128,80 @@ enable-x86-isa-level = $libc_cv_include_x86_isa_level"
>  printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
>
>
> +# Check if AVX2 and FMA are available.
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for AVX2 and FMA instruction support" >&5
> +printf %s "checking for AVX2 and FMA instruction support... " >&6; }
> +if test ${libc_cv_have_x86_avx2_fma+y}
> +then :
> +  printf %s "(cached) " >&6
> +else $as_nop
> +  cat > conftest.c <<EOF
> +#if !defined __AVX2__ || !defined __FMA__
> +# error AVX2 and/or FMA are disabled.
> +# if defined __AVX2__ || defined __FMA__
> +#  error Only one of AVX2 and FMA is enabled.
> +# endif
> +#endif
> +EOF
> +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }; then
> +                libc_cv_have_x86_avx2_fma=yes
> +              else
> +                if { ac_try='grep -q "Only one of AVX2 and FMA is enabled" conftest.err'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }; then
> +                  as_fn_error $? "Only one of AVX2 and FMA is enabled." "$LINENO" 5
> +                fi
> +                libc_cv_have_x86_avx2_fma=no
> +              fi
> +              rm -rf conftest*
> +fi
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_avx2_fma" >&5
> +printf "%s\n" "$libc_cv_have_x86_avx2_fma" >&6; }
> +if test $libc_cv_have_x86_avx2_fma = yes; then
> +  printf "%s\n" "#define HAVE_X86_AVX2_FMA 1" >>confdefs.h
> +
> +fi
> +config_vars="$config_vars
> +enable-avx2-fma = $libc_cv_have_x86_avx2_fma"
> +
> +# Check if SSE4.1 is available.
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SSE4.1 instruction support" >&5
> +printf %s "checking for SSE4.1 instruction support... " >&6; }
> +if test ${libc_cv_have_x86_sse4_1+y}
> +then :
> +  printf %s "(cached) " >&6
> +else $as_nop
> +  cat > conftest.c <<EOF
> +#if !defined __SSE4_1__
> +# error SSE4.1 is not available.
> +#endif
> +EOF
> +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&5'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }; then
> +                libc_cv_have_x86_sse4_1=yes
> +              else
> +                libc_cv_have_x86_sse4_1=no
> +              fi
> +              rm -rf conftest*
> +fi
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_sse4_1" >&5
> +printf "%s\n" "$libc_cv_have_x86_sse4_1" >&6; }
> +if test $libc_cv_have_x86_sse4_1 = yes; then
> +  printf "%s\n" "#define HAVE_X86_SSE4_1 1" >>confdefs.h
> +
> +fi
> +config_vars="$config_vars
> +enable-sse4-1 = $libc_cv_have_x86_sse4_1"
> +
> diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
> index 437a50623b..df3db3fdc2 100644
> --- a/sysdeps/x86/configure.ac
> +++ b/sysdeps/x86/configure.ac
> @@ -87,3 +87,47 @@ LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
>
>  dnl Static PIE is supported.
>  AC_DEFINE(SUPPORT_STATIC_PIE)
> +
> +# Check if AVX2 and FMA are available.
> +AC_CACHE_CHECK([for AVX2 and FMA instruction support],
> +              libc_cv_have_x86_avx2_fma, [dnl
> +cat > conftest.c <<EOF
> +#if !defined __AVX2__ || !defined __FMA__
> +# error AVX2 and/or FMA are disabled.
> +# if defined __AVX2__ || defined __FMA__
> +#  error Only one of AVX2 and FMA is enabled.
> +# endif
> +#endif
> +EOF
> +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err); then
> +                libc_cv_have_x86_avx2_fma=yes
> +              else
> +                if AC_TRY_COMMAND(grep -q "Only one of AVX2 and FMA is enabled" conftest.err); then
> +                  AC_MSG_ERROR([Only one of AVX2 and FMA is enabled.])
> +                fi
> +                libc_cv_have_x86_avx2_fma=no
> +              fi
> +              rm -rf conftest*])
> +if test $libc_cv_have_x86_avx2_fma = yes; then
> +  AC_DEFINE(HAVE_X86_AVX2_FMA)
> +fi
> +LIBC_CONFIG_VAR([enable-avx2-fma], [$libc_cv_have_x86_avx2_fma])
> +
> +# Check if SSE4.1 is available.
> +AC_CACHE_CHECK([for SSE4.1 instruction support],
> +              libc_cv_have_x86_sse4_1, [dnl
> +cat > conftest.c <<EOF
> +#if !defined __SSE4_1__
> +# error SSE4.1 is not available.
> +#endif
> +EOF
> +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&AS_MESSAGE_LOG_FD); then
> +                libc_cv_have_x86_sse4_1=yes
> +              else
> +                libc_cv_have_x86_sse4_1=no
> +              fi
> +              rm -rf conftest*])
> +if test $libc_cv_have_x86_sse4_1 = yes; then
> +  AC_DEFINE(HAVE_X86_SSE4_1)
> +fi
> +LIBC_CONFIG_VAR([enable-sse4-1], [$libc_cv_have_x86_sse4_1])
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index e1a490dd98..5eeb106b79 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -1,49 +1,4 @@
>  ifeq ($(subdir),math)
> -libm-sysdep_routines += \
> -  s_ceil-c \
> -  s_ceilf-c \
> -  s_floor-c \
> -  s_floorf-c \
> -  s_nearbyint-c \
> -  s_nearbyintf-c \
> -  s_rint-c \
> -  s_rintf-c \
> -  s_roundeven-c \
> -  s_roundevenf-c \
> -  s_trunc-c \
> -  s_truncf-c \
> -# libm-sysdep_routines
> -
> -libm-sysdep_routines += \
> -  s_ceil-sse4_1 \
> -  s_ceilf-sse4_1 \
> -  s_floor-sse4_1 \
> -  s_floorf-sse4_1 \
> -  s_nearbyint-sse4_1 \
> -  s_nearbyintf-sse4_1 \
> -  s_rint-sse4_1 \
> -  s_rintf-sse4_1 \
> -  s_roundeven-sse4_1 \
> -  s_roundevenf-sse4_1 \
> -  s_trunc-sse4_1 \
> -  s_truncf-sse4_1 \
> -# libm-sysdep_routines
> -
> -libm-sysdep_routines += \
> -  e_asin-fma \
> -  e_atan2-fma \
> -  e_exp-fma \
> -  e_log-fma \
> -  e_log2-fma \
> -  e_pow-fma \
> -  s_atan-fma \
> -  s_expm1-fma \
> -  s_log1p-fma \
> -  s_sin-fma \
> -  s_sincos-fma \
> -  s_tan-fma \
> -# libm-sysdep_routines
> -
>  CFLAGS-e_asin-fma.c = -mfma -mavx2
>  CFLAGS-e_atan2-fma.c = -mfma -mavx2
>  CFLAGS-e_exp-fma.c = -mfma -mavx2
> @@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
>  CFLAGS-s_tan-fma.c = -mfma -mavx2
>  CFLAGS-s_sincos-fma.c = -mfma -mavx2
>
> -libm-sysdep_routines += \
> -  s_cosf-sse2 \
> -  s_sincosf-sse2 \
> -  s_sinf-sse2 \
> -# libm-sysdep_routines
> -
> -libm-sysdep_routines += \
> -  e_exp2f-fma \
> -  e_expf-fma \
> -  e_log2f-fma \
> -  e_logf-fma \
> -  e_powf-fma \
> -  s_cosf-fma \
> -  s_sincosf-fma \
> -  s_sinf-fma \
> -# libm-sysdep_routines
> -
>  CFLAGS-e_exp2f-fma.c = -mfma -mavx2
>  CFLAGS-e_expf-fma.c = -mfma -mavx2
>  CFLAGS-e_log2f-fma.c = -mfma -mavx2
> @@ -83,17 +21,92 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
>  CFLAGS-s_cosf-fma.c = -mfma -mavx2
>  CFLAGS-s_sincosf-fma.c = -mfma -mavx2
>
> +ifeq ($(enable-avx2-fma),yes)
>  libm-sysdep_routines += \
> +  s_ceil-avx \
> +  s_ceilf-avx \
> +  s_floor-avx \
> +  s_floorf-avx \
> +  s_nearbyint-avx \
> +  s_nearbyintf-avx \
> +  s_rint-avx \
> +  s_rintf-avx \
> +  s_roundeven-avx \
> +  s_roundevenf-avx \
> +  s_trunc-avx \
> +  s_truncf-avx \
> +# libm-sysdep_routines
> +else
> +libm-sysdep_routines += \
> +  e_asin-fma \
>    e_asin-fma4 \
> +  e_atan2-avx \
> +  e_atan2-fma \
>    e_atan2-fma4 \
> +  e_exp-avx \
> +  e_exp-fma \
>    e_exp-fma4 \
> +  e_exp2f-fma \
> +  e_expf-fma \
> +  e_log-avx \
> +  e_log-fma \
>    e_log-fma4 \
> +  e_log2-fma \
> +  e_log2f-fma \
> +  e_logf-fma \
> +  e_pow-fma \
>    e_pow-fma4 \
> +  e_powf-fma \
> +  s_atan-avx \
> +  s_atan-fma \
>    s_atan-fma4 \
> +  s_ceil-sse4_1 \
> +  s_ceilf-sse4_1 \
> +  s_cosf-fma \
> +  s_cosf-sse2 \
> +  s_expm1-fma \
> +  s_floor-sse4_1 \
> +  s_floorf-sse4_1 \
> +  s_log1p-fma \
> +  s_nearbyint-sse4_1 \
> +  s_nearbyintf-sse4_1 \
> +  s_rint-sse4_1 \
> +  s_rintf-sse4_1 \
> +  s_roundeven-sse4_1 \
> +  s_roundevenf-sse4_1 \
> +  s_sin-avx \
> +  s_sin-fma \
>    s_sin-fma4 \
> +  s_sincos-avx \
> +  s_sincos-fma \
>    s_sincos-fma4 \
> +  s_sincosf-fma \
> +  s_sincosf-sse2 \
> +  s_sinf-fma \
> +  s_sinf-sse2 \
> +  s_tan-avx \
> +  s_tan-fma \
>    s_tan-fma4 \
> +  s_trunc-sse4_1 \
> +  s_truncf-sse4_1 \
>  # libm-sysdep_routines
> +ifeq ($(enable-sse4-1),no)
> +libm-sysdep_routines += \
> +  s_ceil-c \
> +  s_ceilf-c \
> +  s_floor-c \
> +  s_floorf-c \
> +  s_nearbyint-c \
> +  s_nearbyintf-c \
> +  s_rint-c \
> +  s_rintf-c \
> +  s_roundeven-c \
> +  s_roundevenf-c \
> +  s_trunc-c \
> +  s_truncf-c \
> +# libm-sysdep_routines
> +endif
> +endif
>
>  CFLAGS-e_asin-fma4.c = -mfma4
>  CFLAGS-e_atan2-fma4.c = -mfma4
> @@ -105,16 +118,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
>  CFLAGS-s_tan-fma4.c = -mfma4
>  CFLAGS-s_sincos-fma4.c = -mfma4
>
> -libm-sysdep_routines += \
> -  e_atan2-avx \
> -  e_exp-avx \
> -  e_log-avx \
> -  s_atan-avx \
> -  s_sin-avx \
> -  s_sincos-avx \
> -  s_tan-avx \
> -# libm-sysdep_routines
> -
>  CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
>  CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
>  CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
> index 2eaa6c2c04..3c1654ba3e 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
> @@ -16,26 +16,28 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_ieee754_asin (double);
>  extern double __redirect_ieee754_acos (double);
>
> -#define SYMBOL_NAME ieee754_asin
> -#include "ifunc-fma4.h"
> +# define SYMBOL_NAME ieee754_asin
> +# include "ifunc-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
>                        IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_asin, __asin)
>
> -#undef SYMBOL_NAME
> -#define SYMBOL_NAME ieee754_acos
> -#include "ifunc-fma4.h"
> +# undef SYMBOL_NAME
> +# define SYMBOL_NAME ieee754_acos
> +# include "ifunc-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
>                        IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_acos, __acos)
>
> -#define __ieee754_acos __ieee754_acos_sse2
> -#define __ieee754_asin __ieee754_asin_sse2
> +# define __ieee754_acos __ieee754_acos_sse2
> +# define __ieee754_asin __ieee754_asin_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_asin.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> index 17ee4f3c36..f48ab8762a 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> @@ -16,16 +16,18 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_ieee754_atan2 (double, double);
>
> -#define SYMBOL_NAME ieee754_atan2
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME ieee754_atan2
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_atan2,
>                        __ieee754_atan2, IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_atan2, __atan2)
>
> -#define __ieee754_atan2 __ieee754_atan2_sse2
> +# define __ieee754_atan2 __ieee754_atan2_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_atan2.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
> index 406b7ebd44..034f5b894f 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <math.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <math.h>
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_ieee754_exp (double);
>
> -#define SYMBOL_NAME ieee754_exp
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME ieee754_exp
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
>                        IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_exp, __exp)
>
> -#define __exp __ieee754_exp_sse2
> +# define __exp __ieee754_exp_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_exp.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> index 804fd6be85..74f92bfa0c 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> @@ -16,25 +16,27 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
> +# include <libm-alias-finite.h>
>
>  extern float __redirect_exp2f (float);
>
> -#define SYMBOL_NAME exp2f
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME exp2f
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
>  libm_alias_float_other (__exp2, exp2)
> -#else
> +# else
>  libm_alias_float (__exp2, exp2)
> -#endif
> +# endif
>
>  strong_alias (__exp2f, __ieee754_exp2f)
>  libm_alias_finite (__exp2f, __exp2f)
>
> -#define __exp2f __exp2f_sse2
> +# define __exp2f __exp2f_sse2
> +#endif
>  #include <sysdeps/ieee754/flt-32/e_exp2f.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
> index 4a7e2a5bce..e8d6f393ff 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
> @@ -16,28 +16,30 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
> +# include <libm-alias-finite.h>
>
>  extern float __redirect_expf (float);
>
> -#define SYMBOL_NAME expf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME expf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
>    __attribute__ ((visibility ("hidden")));
>
>  versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
>  libm_alias_float_other (__exp, exp)
> -#else
> +# else
>  libm_alias_float (__exp, exp)
> -#endif
> +# endif
>
>  strong_alias (__expf, __ieee754_expf)
>  libm_alias_finite (__expf, __expf)
>
> -#define __expf __expf_sse2
> +# define __expf __expf_sse2
> +#endif
>  #include <sysdeps/ieee754/flt-32/e_expf.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
> index 067fbf58c3..3a678235d9 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_log.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <math.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <math.h>
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_ieee754_log (double);
>
> -#define SYMBOL_NAME ieee754_log
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME ieee754_log
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
>                        IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_log, __log)
>
> -#define __log __ieee754_log_sse2
> +# define __log __ieee754_log_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_log.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
> index 9c57a2f6cc..c032758b4e 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
> @@ -16,28 +16,30 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_log2 (double);
>
> -#define SYMBOL_NAME log2
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME log2
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
>    __attribute__ ((visibility ("hidden")));
>
>  versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
>  libm_alias_double_other (__log2, log2)
> -#else
> +# else
>  libm_alias_double (__log2, log2)
> -#endif
> +# endif
>
>  strong_alias (__log2, __ieee754_log2)
>  libm_alias_finite (__log2, __log2)
>
> -#define __log2 __log2_sse2
> +# define __log2 __log2_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_log2.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> index 2b45c87f38..0f8d1f0abc 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> @@ -16,28 +16,30 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
> +# include <libm-alias-finite.h>
>
>  extern float __redirect_log2f (float);
>
> -#define SYMBOL_NAME log2f
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME log2f
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
>    __attribute__ ((visibility ("hidden")));
>
>  versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
>  libm_alias_float_other (__log2, log2)
> -#else
> +# else
>  libm_alias_float (__log2, log2)
> -#endif
> +# endif
>
>  strong_alias (__log2f, __ieee754_log2f)
>  libm_alias_finite (__log2f, __log2f)
>
> -#define __log2f __log2f_sse2
> +# define __log2f __log2f_sse2
> +#endif
>  #include <sysdeps/ieee754/flt-32/e_log2f.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
> index 97e23c8fea..9d94dd614f 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
> @@ -16,28 +16,30 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
> +# include <libm-alias-finite.h>
>
>  extern float __redirect_logf (float);
>
> -#define SYMBOL_NAME logf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME logf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
>    __attribute__ ((visibility ("hidden")));
>
>  versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
>  libm_alias_float_other (__log, log)
> -#else
> +# else
>  libm_alias_float (__log, log)
> -#endif
> +# endif
>
>  strong_alias (__logf, __ieee754_logf)
>  libm_alias_finite (__logf, __logf)
>
> -#define __logf __logf_sse2
> +# define __logf __logf_sse2
> +#endif
>  #include <sysdeps/ieee754/flt-32/e_logf.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
> index 42618e7112..07436d420c 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <math.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <math.h>
> +# include <libm-alias-finite.h>
>
>  extern double __redirect_ieee754_pow (double, double);
>
> -#define SYMBOL_NAME ieee754_pow
> -#include "ifunc-fma4.h"
> +# define SYMBOL_NAME ieee754_pow
> +# include "ifunc-fma4.h"
>
>  libc_ifunc_redirected (__redirect_ieee754_pow,
>                        __ieee754_pow, IFUNC_SELECTOR ());
>  libm_alias_finite (__ieee754_pow, __pow)
>
> -#define __pow __ieee754_pow_sse2
> +# define __pow __ieee754_pow_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/e_pow.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
> index 8e6ce13cc1..c64c8a4302 100644
> --- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
> @@ -16,31 +16,33 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> -#include <libm-alias-finite.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
> +# include <libm-alias-finite.h>
>
> -#define powf __redirect_powf
> -#define __DECL_SIMD___redirect_powf
> -#include <math.h>
> -#undef powf
> +# define powf __redirect_powf
> +# define __DECL_SIMD___redirect_powf
> +# include <math.h>
> +# undef powf
>
> -#define SYMBOL_NAME powf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME powf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
>
> -#ifdef SHARED
> +# ifdef SHARED
>  __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
>    __attribute__ ((visibility ("hidden")));
>
>  versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
>  libm_alias_float_other (__pow, pow)
> -#else
> +# else
>  libm_alias_float (__pow, pow)
> -#endif
> +# endif
>
>  strong_alias (__powf, __ieee754_powf)
>  libm_alias_finite (__powf, __powf)
>
> -#define __powf __powf_sse2
> +# define __powf __powf_sse2
> +#endif
>  #include <sysdeps/ieee754/flt-32/e_powf.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
> index 71bad096a9..f9ec4e7b37 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
> @@ -16,15 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern double __redirect_atan (double);
>
> -#define SYMBOL_NAME atan
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME atan
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
>  libm_alias_double (__atan, atan)
>
> -#define __atan __atan_sse2
> +# define __atan __atan_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_atan.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> new file mode 100644
> index 0000000000..e6c1106753
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of ceil function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__ceil)
> +       vroundsd $10, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__ceil)
> +
> +libm_alias_double (__ceil, ceil)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> index 64119011ad..4be069b8da 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __ceil_sse41 __ceil
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__ceil_sse41)
>         roundsd $10, %xmm0, %xmm0
>         ret
>  END(__ceil_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__ceil, ceil)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> index cc028addee..0199863c8f 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-double.h>
>
> -#define ceil __redirect_ceil
> -#define __ceil __redirect___ceil
> -#include <math.h>
> -#undef ceil
> -#undef __ceil
> +# define ceil __redirect_ceil
> +# define __ceil __redirect___ceil
> +# include <math.h>
> +# undef ceil
> +# undef __ceil
>
> -#define SYMBOL_NAME ceil
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME ceil
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
>  libm_alias_double (__ceil, ceil)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> new file mode 100644
> index 0000000000..b4d8ac0455
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of ceilf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__ceilf)
> +       vroundss $10, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__ceilf)
> +
> +libm_alias_float (__ceil, ceil)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> index dd9a9f6b71..1a85e9c925 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __ceilf_sse41 __ceilf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__ceilf_sse41)
>         roundss $10, %xmm0, %xmm0
>         ret
>  END(__ceilf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__ceil, ceil)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> index 97a0ca7d19..dfce9225dd 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-float.h>
>
> -#define ceilf __redirect_ceilf
> -#define __ceilf __redirect___ceilf
> -#include <math.h>
> -#undef ceilf
> -#undef __ceilf
> +# define ceilf __redirect_ceilf
> +# define __ceilf __redirect___ceilf
> +# include <math.h>
> +# undef ceilf
> +# undef __ceilf
>
> -#define SYMBOL_NAME ceilf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME ceilf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
>  libm_alias_float (__ceil, ceil)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> index 2703c576df..9be9327b80 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> @@ -16,13 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
>
>  extern float __redirect_cosf (float);
>
> -#define SYMBOL_NAME cosf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME cosf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
>
>  libm_alias_float (__cos, cos)
> +#else
> +# include <sysdeps/ieee754/flt-32/s_cosf.c>
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> index 8a2d69f9b2..1ed45245cb 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> @@ -16,21 +16,23 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern double __redirect_expm1 (double);
>
> -#define SYMBOL_NAME expm1
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME expm1
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
>  libm_alias_double (__expm1, expm1)
>
> -#define __expm1 __expm1_sse2
> +# define __expm1 __expm1_sse2
>
>  /* NB: __expm1 may be expanded to __expm1_sse2 in the following
>     prototypes.  */
>  extern long double __expm1l (long double);
>  extern long double __expm1f128 (long double);
>
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_expm1.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> new file mode 100644
> index 0000000000..ff74b5a8bf
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of floor function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__floor)
> +       vroundsd $9, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__floor)
> +
> +libm_alias_double (__floor, floor)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> index 2f7521f39f..957d018177 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __floor_sse41 __floor
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__floor_sse41)
>         roundsd $9, %xmm0, %xmm0
>         ret
>  END(__floor_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__floor, floor)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
> index 8cebd48e10..a30c88671e 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-double.h>
>
> -#define floor __redirect_floor
> -#define __floor __redirect___floor
> -#include <math.h>
> -#undef floor
> -#undef __floor
> +# define floor __redirect_floor
> +# define __floor __redirect___floor
> +# include <math.h>
> +# undef floor
> +# undef __floor
>
> -#define SYMBOL_NAME floor
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME floor
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
>  libm_alias_double (__floor, floor)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> new file mode 100644
> index 0000000000..c378baae8e
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of floorf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__floorf)
> +       vroundss $9, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__floorf)
> +
> +libm_alias_float (__floor, floor)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> index 5f6020d27d..eacabe167c 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __floorf_sse41 __floorf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__floorf_sse41)
>         roundss $9, %xmm0, %xmm0
>         ret
>  END(__floorf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__floor, floor)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> index a14e18b03c..6531b78443 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-float.h>
>
> -#define floorf __redirect_floorf
> -#define __floorf __redirect___floorf
> -#include <math.h>
> -#undef floorf
> -#undef __floorf
> +# define floorf __redirect_floorf
> +# define __floorf __redirect___floorf
> +# include <math.h>
> +# undef floorf
> +# undef __floorf
>
> -#define SYMBOL_NAME floorf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME floorf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
>  libm_alias_float (__floor, floor)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> index a8e1a3f21b..76e1672e2d 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> @@ -16,14 +16,16 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern double __redirect_log1p (double);
>
> -#define SYMBOL_NAME log1p
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME log1p
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
>
> -#define __log1p __log1p_sse2
> +# define __log1p __log1p_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_log1p.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> new file mode 100644
> index 0000000000..5bfdf73c28
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of nearbyint function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__nearbyint)
> +       vroundsd $0xc, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__nearbyint)
> +
> +libm_alias_double (__nearbyint, nearbyint)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> index 674f7eb40a..ee0b17e470 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __nearbyint_sse41 __nearbyint
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__nearbyint_sse41)
>         roundsd $0xc, %xmm0, %xmm0
>         ret
>  END(__nearbyint_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__nearbyint, nearbyint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> index 693e42dd4e..649a9df869 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
> -#define nearbyint __redirect_nearbyint
> -#define __nearbyint __redirect___nearbyint
> -#include <math.h>
> -#undef nearbyint
> -#undef __nearbyint
> +# define nearbyint __redirect_nearbyint
> +# define __nearbyint __redirect___nearbyint
> +# include <math.h>
> +# undef nearbyint
> +# undef __nearbyint
>
> -#define SYMBOL_NAME nearbyint
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME nearbyint
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
>                        IFUNC_SELECTOR ());
>  libm_alias_double (__nearbyint, nearbyint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> new file mode 100644
> index 0000000000..1dbaed0324
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implmentation of nearbyintf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__nearbyintf)
> +       vroundss $0xc, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__nearbyintf)
> +
> +libm_alias_float (__nearbyint, nearbyint)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> index 5892bd7563..8b3e307b78 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __nearbyintf_sse41 __nearbyintf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__nearbyintf_sse41)
>         roundss $0xc, %xmm0, %xmm0
>         ret
>  END(__nearbyintf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__nearbyint, nearbyint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> index a0ac009f4b..7762467ad9 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
>
> -#define nearbyintf __redirect_nearbyintf
> -#define __nearbyintf __redirect___nearbyintf
> -#include <math.h>
> -#undef nearbyintf
> -#undef __nearbyintf
> +# define nearbyintf __redirect_nearbyintf
> +# define __nearbyintf __redirect___nearbyintf
> +# include <math.h>
> +# undef nearbyintf
> +# undef __nearbyintf
>
> -#define SYMBOL_NAME nearbyintf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME nearbyintf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
>                        IFUNC_SELECTOR ());
>  libm_alias_float (__nearbyint, nearbyint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> new file mode 100644
> index 0000000000..2b403b331f
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of rint function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__rint)
> +       vroundsd $4, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__rint)
> +
> +libm_alias_double (__rint, rint)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> index 405372991b..4c7c1c37de 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __rint_sse41 __rint
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__rint_sse41)
>         roundsd $4, %xmm0, %xmm0
>         ret
>  END(__rint_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__rint, rint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
> index 754c87e004..49693c9728 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-double.h>
>
> -#define rint __redirect_rint
> -#define __rint __redirect___rint
> -#include <math.h>
> -#undef rint
> -#undef __rint
> +# define rint __redirect_rint
> +# define __rint __redirect___rint
> +# include <math.h>
> +# undef rint
> +# undef __rint
>
> -#define SYMBOL_NAME rint
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME rint
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
>  libm_alias_double (__rint, rint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> new file mode 100644
> index 0000000000..171c2867f4
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of rintf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__rintf)
> +       vroundss $4, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__rintf)
> +
> +libm_alias_float (__rint, rint)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> index 8ac67ce767..55443d7238 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __rintf_sse41 __rintf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__rintf_sse41)
>         roundss $4, %xmm0, %xmm0
>         ret
>  END(__rintf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__rint, rint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> index e9d6b7a5f2..c7cf09701d 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-float.h>
>
> -#define rintf __redirect_rintf
> -#define __rintf __redirect___rintf
> -#include <math.h>
> -#undef rintf
> -#undef __rintf
> +# define rintf __redirect_rintf
> +# define __rintf __redirect___rintf
> +# include <math.h>
> +# undef rintf
> +# undef __rintf
>
> -#define SYMBOL_NAME rintf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME rintf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
>  libm_alias_float (__rint, rint)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> new file mode 100644
> index 0000000000..576790355c
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of roundeven function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__roundeven)
> +       vroundsd $8, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__roundeven)
> +
> +libm_alias_double (__roundeven, roundeven)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> index 5ef102336b..f0644cce81 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __roundeven_sse41 __roundeven
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__roundeven_sse41)
>         roundsd $8, %xmm0, %xmm0
>         ret
>  END(__roundeven_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__roundeven, roundeven)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> index 8737b32e26..a250297918 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> @@ -16,16 +16,18 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
> -#define roundeven __redirect_roundeven
> -#define __roundeven __redirect___roundeven
> -#include <math.h>
> -#undef roundeven
> -#undef __roundeven
> +# define roundeven __redirect_roundeven
> +# define __roundeven __redirect___roundeven
> +# include <math.h>
> +# undef roundeven
> +# undef __roundeven
>
> -#define SYMBOL_NAME roundeven
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME roundeven
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
>  libm_alias_double (__roundeven, roundeven)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> new file mode 100644
> index 0000000000..42c359f4cd
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of roundevenf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__roundevenf)
> +       vroundss $8, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__roundevenf)
> +
> +libm_alias_float (__roundeven, roundeven)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> index 792c90ba07..d1dd6b0e8b 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> @@ -17,8 +17,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __roundevenf_sse41 __roundevenf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__roundevenf_sse41)
>         roundss $8, %xmm0, %xmm0
>         ret
>  END(__roundevenf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__roundeven, roundeven)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> index e96016a4d5..534941e67f 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> @@ -16,16 +16,18 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
>
> -#define roundevenf __redirect_roundevenf
> -#define __roundevenf __redirect___roundevenf
> -#include <math.h>
> -#undef roundevenf
> -#undef __roundevenf
> +# define roundevenf __redirect_roundevenf
> +# define __roundevenf __redirect___roundevenf
> +# include <math.h>
> +# undef roundevenf
> +# undef __roundevenf
>
> -#define SYMBOL_NAME roundevenf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME roundevenf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
>  libm_alias_float (__roundeven, roundeven)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
> index 355cc0092e..21eaa5e984 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
> @@ -16,24 +16,26 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern double __redirect_sin (double);
>  extern double __redirect_cos (double);
>
> -#define SYMBOL_NAME sin
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME sin
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
>  libm_alias_double (__sin, sin)
>
> -#undef SYMBOL_NAME
> -#define SYMBOL_NAME cos
> -#include "ifunc-avx-fma4.h"
> +# undef SYMBOL_NAME
> +# define SYMBOL_NAME cos
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
>  libm_alias_double (__cos, cos)
>
> -#define __cos __cos_sse2
> -#define __sin __sin_sse2
> +# define __cos __cos_sse2
> +# define __sin __sin_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_sin.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> index 70107e999c..729163cdde 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> @@ -16,15 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern void __redirect_sincos (double, double *, double *);
>
> -#define SYMBOL_NAME sincos
> -#include "ifunc-fma4.h"
> +# define SYMBOL_NAME sincos
> +# include "ifunc-fma4.h"
>
>  libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
>  libm_alias_double (__sincos, sincos)
>
> -#define __sincos __sincos_sse2
> +# define __sincos __sincos_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_sincos.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> index 80bc028451..136dd62c81 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> @@ -16,13 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
>
>  extern void __redirect_sincosf (float, float *, float *);
>
> -#define SYMBOL_NAME sincosf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME sincosf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
>
>  libm_alias_float (__sincos, sincos)
> +#else
> +# include <sysdeps/ieee754/flt-32/s_sincosf.c>
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> index a32b9e9550..fabbf55604 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> @@ -16,13 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-float.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-float.h>
>
>  extern float __redirect_sinf (float);
>
> -#define SYMBOL_NAME sinf
> -#include "ifunc-fma.h"
> +# define SYMBOL_NAME sinf
> +# include "ifunc-fma.h"
>
>  libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
>
>  libm_alias_float (__sin, sin)
> +#else
> +# include <sysdeps/ieee754/flt-32/s_sinf.c>
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
> index f9a2474a13..c85e327ff8 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
> @@ -16,15 +16,17 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <libm-alias-double.h>
> +#ifndef HAVE_X86_AVX2_FMA
> +# include <libm-alias-double.h>
>
>  extern double __redirect_tan (double);
>
> -#define SYMBOL_NAME tan
> -#include "ifunc-avx-fma4.h"
> +# define SYMBOL_NAME tan
> +# include "ifunc-avx-fma4.h"
>
>  libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
>  libm_alias_double (__tan, tan)
>
> -#define __tan __tan_sse2
> +# define __tan __tan_sse2
> +#endif
>  #include <sysdeps/ieee754/dbl-64/s_tan.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> new file mode 100644
> index 0000000000..b3e87e9606
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of trunc function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-double.h>
> +
> +       .text
> +ENTRY(__trunc)
> +       vroundsd $11, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__trunc)
> +
> +libm_alias_double (__trunc, trunc)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> index b496a6ef49..062cd1fb36 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> @@ -18,8 +18,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-double.h>
> +# define __trunc_sse41 __trunc
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__trunc_sse41)
>         roundsd $11, %xmm0, %xmm0
>         ret
>  END(__trunc_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_double (__trunc, trunc)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> index 9bc9df8744..568e818826 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-double.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-double.h>
>
> -#define trunc __redirect_trunc
> -#define __trunc __redirect___trunc
> -#include <math.h>
> -#undef trunc
> -#undef __trunc
> +# define trunc __redirect_trunc
> +# define __trunc __redirect___trunc
> +# include <math.h>
> +# undef trunc
> +# undef __trunc
>
> -#define SYMBOL_NAME trunc
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME trunc
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
>  libm_alias_double (__trunc, trunc)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> new file mode 100644
> index 0000000000..f31ac7d7f7
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> @@ -0,0 +1,28 @@
> +/* AVX implementation of truncf function.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <libm-alias-float.h>
> +
> +       .text
> +ENTRY(__truncf)
> +       vroundss $11, %xmm0, %xmm0, %xmm0
> +       ret
> +END(__truncf)
> +
> +libm_alias_float (__trunc, trunc)
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> index 22e9a83307..ecd0ae5c05 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> @@ -18,8 +18,19 @@
>
>  #include <sysdep.h>
>
> +#ifdef HAVE_X86_SSE4_1
> +# include <libm-alias-float.h>
> +# define __truncf_sse41 __truncf
> +       .text
> +#else
>         .section .text.sse4.1,"ax",@progbits
> +#endif
> +
>  ENTRY(__truncf_sse41)
>         roundss $11, %xmm0, %xmm0
>         ret
>  END(__truncf_sse41)
> +
> +#ifdef HAVE_X86_SSE4_1
> +libm_alias_float (__trunc, trunc)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> index dae01d166a..57783c805a 100644
> --- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> @@ -16,17 +16,19 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define NO_MATH_REDIRECT
> -#include <libm-alias-float.h>
> +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> +# define NO_MATH_REDIRECT
> +# include <libm-alias-float.h>
>
> -#define truncf __redirect_truncf
> -#define __truncf __redirect___truncf
> -#include <math.h>
> -#undef truncf
> -#undef __truncf
> +# define truncf __redirect_truncf
> +# define __truncf __redirect___truncf
> +# include <math.h>
> +# undef truncf
> +# undef __truncf
>
> -#define SYMBOL_NAME truncf
> -#include "ifunc-sse4_1.h"
> +# define SYMBOL_NAME truncf
> +# include "ifunc-sse4_1.h"
>
>  libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
>  libm_alias_float (__trunc, trunc)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
> index 27eee98a0a..fb2045e6cf 100644
> --- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
> +++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
> @@ -1 +1,5 @@
> -#include <sysdeps/../math/w_exp.c>
> +#ifdef HAVE_X86_AVX2_FMA
> +# include <sysdeps/ieee754/dbl-64/w_exp.c>
> +#else
> +# include <sysdeps/../math/w_exp.c>
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
> index 9b2b018711..b85be8221e 100644
> --- a/sysdeps/x86_64/fpu/multiarch/w_log.c
> +++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
> @@ -1 +1,5 @@
> -#include <sysdeps/../math/w_log.c>
> +#ifdef HAVE_X86_AVX2_FMA
> +# include <sysdeps/ieee754/dbl-64/w_log.c>
> +#else
> +# include <sysdeps/../math/w_log.c>
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
> index b50c1988de..849f4f97ff 100644
> --- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
> +++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
> @@ -1 +1,5 @@
> -#include <sysdeps/../math/w_pow.c>
> +#ifdef HAVE_X86_AVX2_FMA
> +# include <sysdeps/ieee754/dbl-64/w_pow.c>
> +#else
> +# include <sysdeps/../math/w_pow.c>
> +#endif
> --
> 2.43.0
>
Sunil Pandey Feb. 20, 2024, 5:51 p.m. UTC | #2
On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com>
wrote:

> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >
> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > processors, including VMs, should also support FMA and vice versa.
> >
> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > variant.
> Not avx2 + FMA as well?


Correct. Logic is as follows
If (build with AVX2+FMA): Keep AVX2+FMA variants only.
else if (build with SSE4.1): Keep SSE4.1 variants only.
else: No change.

>
> > Fixes BZ 31335.
> > ---
> >  config.h.in                                   |   5 +
> >  sysdeps/x86/configure                         |  77 +++++++++
> >  sysdeps/x86/configure.ac                      |  44 ++++++
> >  sysdeps/x86_64/fpu/multiarch/Makefile         | 147 +++++++++---------
> >  sysdeps/x86_64/fpu/multiarch/e_asin.c         |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_atan2.c        |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/e_exp.c          |  12 +-
> >  sysdeps/x86_64/fpu/multiarch/e_exp2f.c        |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_expf.c         |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_log.c          |  12 +-
> >  sysdeps/x86_64/fpu/multiarch/e_log2.c         |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_log2f.c        |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_logf.c         |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/e_pow.c          |  12 +-
> >  sysdeps/x86_64/fpu/multiarch/e_powf.c         |  26 ++--
> >  sysdeps/x86_64/fpu/multiarch/s_atan.c         |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S     |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S  |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_ceil.c         |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S    |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_ceilf.c        |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_cosf.c         |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_expm1.c        |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_floor-avx.S    |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_floor.c        |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S   |  28 ++++
> >  .../x86_64/fpu/multiarch/s_floorf-sse4_1.S    |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_floorf.c       |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_log1p.c        |  10 +-
> >  .../x86_64/fpu/multiarch/s_nearbyint-avx.S    |  28 ++++
> >  .../x86_64/fpu/multiarch/s_nearbyint-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_nearbyint.c    |  18 ++-
> >  .../x86_64/fpu/multiarch/s_nearbyintf-avx.S   |  28 ++++
> >  .../fpu/multiarch/s_nearbyintf-sse4_1.S       |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c   |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/s_rint-avx.S     |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S  |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_rint.c         |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S    |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_rintf.c        |  20 +--
> >  .../x86_64/fpu/multiarch/s_roundeven-avx.S    |  28 ++++
> >  .../x86_64/fpu/multiarch/s_roundeven-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_roundeven.c    |  18 ++-
> >  .../x86_64/fpu/multiarch/s_roundevenf-avx.S   |  28 ++++
> >  .../fpu/multiarch/s_roundevenf-sse4_1.S       |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_roundevenf.c   |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/s_sin.c          |  18 ++-
> >  sysdeps/x86_64/fpu/multiarch/s_sincos.c       |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_sincosf.c      |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_sinf.c         |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_tan.c          |  10 +-
> >  sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S    |  28 ++++
> >  sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_trunc.c        |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S   |  28 ++++
> >  .../x86_64/fpu/multiarch/s_truncf-sse4_1.S    |  11 ++
> >  sysdeps/x86_64/fpu/multiarch/s_truncf.c       |  20 +--
> >  sysdeps/x86_64/fpu/multiarch/w_exp.c          |   6 +-
> >  sysdeps/x86_64/fpu/multiarch/w_log.c          |   6 +-
> >  sysdeps/x86_64/fpu/multiarch/w_pow.c          |   6 +-
> >  63 files changed, 974 insertions(+), 295 deletions(-)
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> >
> > diff --git a/config.h.in b/config.h.in
> > index 2f0669e19b..0a9626cbe8 100644
> > --- a/config.h.in
> > +++ b/config.h.in
> > @@ -292,4 +292,9 @@
> >  /* Define if -mmovbe is enabled by default on x86.  */
> >  #undef HAVE_X86_MOVBE
> >
> > +/* Define if -msse4.1 is enabled by default on x86.  */
> > +#undef HAVE_X86_SSE4_1
> > +
> > +/* Define if -mavx2 and -mfma are enabled by default on x86.  */
> > +#undef HAVE_X86_AVX2_FMA
> >  #endif
> > diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
> > index 1f4c2d67fd..1c0e0d0640 100644
> > --- a/sysdeps/x86/configure
> > +++ b/sysdeps/x86/configure
> > @@ -128,3 +128,80 @@ enable-x86-isa-level =
> $libc_cv_include_x86_isa_level"
> >  printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
> >
> >
> > +# Check if AVX2 and FMA are available.
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for AVX2 and FMA
> instruction support" >&5
> > +printf %s "checking for AVX2 and FMA instruction support... " >&6; }
> > +if test ${libc_cv_have_x86_avx2_fma+y}
> > +then :
> > +  printf %s "(cached) " >&6
> > +else $as_nop
> > +  cat > conftest.c <<EOF
> > +#if !defined __AVX2__ || !defined __FMA__
> > +# error AVX2 and/or FMA are disabled.
> > +# if defined __AVX2__ || defined __FMA__
> > +#  error Only one of AVX2 and FMA is enabled.
> > +# endif
> > +#endif
> > +EOF
> > +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c
> 1>&conftest.err'
> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > +  (eval $ac_try) 2>&5
> > +  ac_status=$?
> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > +  test $ac_status = 0; }; }; then
> > +                libc_cv_have_x86_avx2_fma=yes
> > +              else
> > +                if { ac_try='grep -q "Only one of AVX2 and FMA is
> enabled" conftest.err'
> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > +  (eval $ac_try) 2>&5
> > +  ac_status=$?
> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > +  test $ac_status = 0; }; }; then
> > +                  as_fn_error $? "Only one of AVX2 and FMA is enabled."
> "$LINENO" 5
> > +                fi
> > +                libc_cv_have_x86_avx2_fma=no
> > +              fi
> > +              rm -rf conftest*
> > +fi
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result:
> $libc_cv_have_x86_avx2_fma" >&5
> > +printf "%s\n" "$libc_cv_have_x86_avx2_fma" >&6; }
> > +if test $libc_cv_have_x86_avx2_fma = yes; then
> > +  printf "%s\n" "#define HAVE_X86_AVX2_FMA 1" >>confdefs.h
> > +
> > +fi
> > +config_vars="$config_vars
> > +enable-avx2-fma = $libc_cv_have_x86_avx2_fma"
> > +
> > +# Check if SSE4.1 is available.
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SSE4.1
> instruction support" >&5
> > +printf %s "checking for SSE4.1 instruction support... " >&6; }
> > +if test ${libc_cv_have_x86_sse4_1+y}
> > +then :
> > +  printf %s "(cached) " >&6
> > +else $as_nop
> > +  cat > conftest.c <<EOF
> > +#if !defined __SSE4_1__
> > +# error SSE4.1 is not available.
> > +#endif
> > +EOF
> > +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&5'
> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > +  (eval $ac_try) 2>&5
> > +  ac_status=$?
> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > +  test $ac_status = 0; }; }; then
> > +                libc_cv_have_x86_sse4_1=yes
> > +              else
> > +                libc_cv_have_x86_sse4_1=no
> > +              fi
> > +              rm -rf conftest*
> > +fi
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result:
> $libc_cv_have_x86_sse4_1" >&5
> > +printf "%s\n" "$libc_cv_have_x86_sse4_1" >&6; }
> > +if test $libc_cv_have_x86_sse4_1 = yes; then
> > +  printf "%s\n" "#define HAVE_X86_SSE4_1 1" >>confdefs.h
> > +
> > +fi
> > +config_vars="$config_vars
> > +enable-sse4-1 = $libc_cv_have_x86_sse4_1"
> > +
> > diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
> > index 437a50623b..df3db3fdc2 100644
> > --- a/sysdeps/x86/configure.ac
> > +++ b/sysdeps/x86/configure.ac
> > @@ -87,3 +87,47 @@ LIBC_CONFIG_VAR([enable-x86-isa-level],
> [$libc_cv_include_x86_isa_level])
> >
> >  dnl Static PIE is supported.
> >  AC_DEFINE(SUPPORT_STATIC_PIE)
> > +
> > +# Check if AVX2 and FMA are available.
> > +AC_CACHE_CHECK([for AVX2 and FMA instruction support],
> > +              libc_cv_have_x86_avx2_fma, [dnl
> > +cat > conftest.c <<EOF
> > +#if !defined __AVX2__ || !defined __FMA__
> > +# error AVX2 and/or FMA are disabled.
> > +# if defined __AVX2__ || defined __FMA__
> > +#  error Only one of AVX2 and FMA is enabled.
> > +# endif
> > +#endif
> > +EOF
> > +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c
> 1>&conftest.err); then
> > +                libc_cv_have_x86_avx2_fma=yes
> > +              else
> > +                if AC_TRY_COMMAND(grep -q "Only one of AVX2 and FMA is
> enabled" conftest.err); then
> > +                  AC_MSG_ERROR([Only one of AVX2 and FMA is enabled.])
> > +                fi
> > +                libc_cv_have_x86_avx2_fma=no
> > +              fi
> > +              rm -rf conftest*])
> > +if test $libc_cv_have_x86_avx2_fma = yes; then
> > +  AC_DEFINE(HAVE_X86_AVX2_FMA)
> > +fi
> > +LIBC_CONFIG_VAR([enable-avx2-fma], [$libc_cv_have_x86_avx2_fma])
> > +
> > +# Check if SSE4.1 is available.
> > +AC_CACHE_CHECK([for SSE4.1 instruction support],
> > +              libc_cv_have_x86_sse4_1, [dnl
> > +cat > conftest.c <<EOF
> > +#if !defined __SSE4_1__
> > +# error SSE4.1 is not available.
> > +#endif
> > +EOF
> > +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c
> 1>&AS_MESSAGE_LOG_FD); then
> > +                libc_cv_have_x86_sse4_1=yes
> > +              else
> > +                libc_cv_have_x86_sse4_1=no
> > +              fi
> > +              rm -rf conftest*])
> > +if test $libc_cv_have_x86_sse4_1 = yes; then
> > +  AC_DEFINE(HAVE_X86_SSE4_1)
> > +fi
> > +LIBC_CONFIG_VAR([enable-sse4-1], [$libc_cv_have_x86_sse4_1])
> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile
> b/sysdeps/x86_64/fpu/multiarch/Makefile
> > index e1a490dd98..5eeb106b79 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> > @@ -1,49 +1,4 @@
> >  ifeq ($(subdir),math)
> > -libm-sysdep_routines += \
> > -  s_ceil-c \
> > -  s_ceilf-c \
> > -  s_floor-c \
> > -  s_floorf-c \
> > -  s_nearbyint-c \
> > -  s_nearbyintf-c \
> > -  s_rint-c \
> > -  s_rintf-c \
> > -  s_roundeven-c \
> > -  s_roundevenf-c \
> > -  s_trunc-c \
> > -  s_truncf-c \
> > -# libm-sysdep_routines
> > -
> > -libm-sysdep_routines += \
> > -  s_ceil-sse4_1 \
> > -  s_ceilf-sse4_1 \
> > -  s_floor-sse4_1 \
> > -  s_floorf-sse4_1 \
> > -  s_nearbyint-sse4_1 \
> > -  s_nearbyintf-sse4_1 \
> > -  s_rint-sse4_1 \
> > -  s_rintf-sse4_1 \
> > -  s_roundeven-sse4_1 \
> > -  s_roundevenf-sse4_1 \
> > -  s_trunc-sse4_1 \
> > -  s_truncf-sse4_1 \
> > -# libm-sysdep_routines
> > -
> > -libm-sysdep_routines += \
> > -  e_asin-fma \
> > -  e_atan2-fma \
> > -  e_exp-fma \
> > -  e_log-fma \
> > -  e_log2-fma \
> > -  e_pow-fma \
> > -  s_atan-fma \
> > -  s_expm1-fma \
> > -  s_log1p-fma \
> > -  s_sin-fma \
> > -  s_sincos-fma \
> > -  s_tan-fma \
> > -# libm-sysdep_routines
> > -
> >  CFLAGS-e_asin-fma.c = -mfma -mavx2
> >  CFLAGS-e_atan2-fma.c = -mfma -mavx2
> >  CFLAGS-e_exp-fma.c = -mfma -mavx2
> > @@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
> >  CFLAGS-s_tan-fma.c = -mfma -mavx2
> >  CFLAGS-s_sincos-fma.c = -mfma -mavx2
> >
> > -libm-sysdep_routines += \
> > -  s_cosf-sse2 \
> > -  s_sincosf-sse2 \
> > -  s_sinf-sse2 \
> > -# libm-sysdep_routines
> > -
> > -libm-sysdep_routines += \
> > -  e_exp2f-fma \
> > -  e_expf-fma \
> > -  e_log2f-fma \
> > -  e_logf-fma \
> > -  e_powf-fma \
> > -  s_cosf-fma \
> > -  s_sincosf-fma \
> > -  s_sinf-fma \
> > -# libm-sysdep_routines
> > -
> >  CFLAGS-e_exp2f-fma.c = -mfma -mavx2
> >  CFLAGS-e_expf-fma.c = -mfma -mavx2
> >  CFLAGS-e_log2f-fma.c = -mfma -mavx2
> > @@ -83,17 +21,92 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
> >  CFLAGS-s_cosf-fma.c = -mfma -mavx2
> >  CFLAGS-s_sincosf-fma.c = -mfma -mavx2
> >
> > +ifeq ($(enable-avx2-fma),yes)
> >  libm-sysdep_routines += \
> > +  s_ceil-avx \
> > +  s_ceilf-avx \
> > +  s_floor-avx \
> > +  s_floorf-avx \
> > +  s_nearbyint-avx \
> > +  s_nearbyintf-avx \
> > +  s_rint-avx \
> > +  s_rintf-avx \
> > +  s_roundeven-avx \
> > +  s_roundevenf-avx \
> > +  s_trunc-avx \
> > +  s_truncf-avx \
> > +# libm-sysdep_routines
> > +else
> > +libm-sysdep_routines += \
> > +  e_asin-fma \
> >    e_asin-fma4 \
> > +  e_atan2-avx \
> > +  e_atan2-fma \
> >    e_atan2-fma4 \
> > +  e_exp-avx \
> > +  e_exp-fma \
> >    e_exp-fma4 \
> > +  e_exp2f-fma \
> > +  e_expf-fma \
> > +  e_log-avx \
> > +  e_log-fma \
> >    e_log-fma4 \
> > +  e_log2-fma \
> > +  e_log2f-fma \
> > +  e_logf-fma \
> > +  e_pow-fma \
> >    e_pow-fma4 \
> > +  e_powf-fma \
> > +  s_atan-avx \
> > +  s_atan-fma \
> >    s_atan-fma4 \
> > +  s_ceil-sse4_1 \
> > +  s_ceilf-sse4_1 \
> > +  s_cosf-fma \
> > +  s_cosf-sse2 \
> > +  s_expm1-fma \
> > +  s_floor-sse4_1 \
> > +  s_floorf-sse4_1 \
> > +  s_log1p-fma \
> > +  s_nearbyint-sse4_1 \
> > +  s_nearbyintf-sse4_1 \
> > +  s_rint-sse4_1 \
> > +  s_rintf-sse4_1 \
> > +  s_roundeven-sse4_1 \
> > +  s_roundevenf-sse4_1 \
> > +  s_sin-avx \
> > +  s_sin-fma \
> >    s_sin-fma4 \
> > +  s_sincos-avx \
> > +  s_sincos-fma \
> >    s_sincos-fma4 \
> > +  s_sincosf-fma \
> > +  s_sincosf-sse2 \
> > +  s_sinf-fma \
> > +  s_sinf-sse2 \
> > +  s_tan-avx \
> > +  s_tan-fma \
> >    s_tan-fma4 \
> > +  s_trunc-sse4_1 \
> > +  s_truncf-sse4_1 \
> >  # libm-sysdep_routines
> > +ifeq ($(enable-sse4-1),no)
> > +libm-sysdep_routines += \
> > +  s_ceil-c \
> > +  s_ceilf-c \
> > +  s_floor-c \
> > +  s_floorf-c \
> > +  s_nearbyint-c \
> > +  s_nearbyintf-c \
> > +  s_rint-c \
> > +  s_rintf-c \
> > +  s_roundeven-c \
> > +  s_roundevenf-c \
> > +  s_trunc-c \
> > +  s_truncf-c \
> > +# libm-sysdep_routines
> > +endif
> > +endif
> >
> >  CFLAGS-e_asin-fma4.c = -mfma4
> >  CFLAGS-e_atan2-fma4.c = -mfma4
> > @@ -105,16 +118,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
> >  CFLAGS-s_tan-fma4.c = -mfma4
> >  CFLAGS-s_sincos-fma4.c = -mfma4
> >
> > -libm-sysdep_routines += \
> > -  e_atan2-avx \
> > -  e_exp-avx \
> > -  e_log-avx \
> > -  s_atan-avx \
> > -  s_sin-avx \
> > -  s_sincos-avx \
> > -  s_tan-avx \
> > -# libm-sysdep_routines
> > -
> >  CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
> >  CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
> >  CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c
> b/sysdeps/x86_64/fpu/multiarch/e_asin.c
> > index 2eaa6c2c04..3c1654ba3e 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
> > @@ -16,26 +16,28 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_ieee754_asin (double);
> >  extern double __redirect_ieee754_acos (double);
> >
> > -#define SYMBOL_NAME ieee754_asin
> > -#include "ifunc-fma4.h"
> > +# define SYMBOL_NAME ieee754_asin
> > +# include "ifunc-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_asin, __asin)
> >
> > -#undef SYMBOL_NAME
> > -#define SYMBOL_NAME ieee754_acos
> > -#include "ifunc-fma4.h"
> > +# undef SYMBOL_NAME
> > +# define SYMBOL_NAME ieee754_acos
> > +# include "ifunc-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_acos, __acos)
> >
> > -#define __ieee754_acos __ieee754_acos_sse2
> > -#define __ieee754_asin __ieee754_asin_sse2
> > +# define __ieee754_acos __ieee754_acos_sse2
> > +# define __ieee754_asin __ieee754_asin_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_asin.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> > index 17ee4f3c36..f48ab8762a 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
> > @@ -16,16 +16,18 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_ieee754_atan2 (double, double);
> >
> > -#define SYMBOL_NAME ieee754_atan2
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME ieee754_atan2
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_atan2,
> >                        __ieee754_atan2, IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_atan2, __atan2)
> >
> > -#define __ieee754_atan2 __ieee754_atan2_sse2
> > +# define __ieee754_atan2 __ieee754_atan2_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_atan2.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c
> b/sysdeps/x86_64/fpu/multiarch/e_exp.c
> > index 406b7ebd44..034f5b894f 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <math.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <math.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_ieee754_exp (double);
> >
> > -#define SYMBOL_NAME ieee754_exp
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME ieee754_exp
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_exp, __exp)
> >
> > -#define __exp __ieee754_exp_sse2
> > +# define __exp __ieee754_exp_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_exp.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> > index 804fd6be85..74f92bfa0c 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
> > @@ -16,25 +16,27 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern float __redirect_exp2f (float);
> >
> > -#define SYMBOL_NAME exp2f
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME exp2f
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
> >  libm_alias_float_other (__exp2, exp2)
> > -#else
> > +# else
> >  libm_alias_float (__exp2, exp2)
> > -#endif
> > +# endif
> >
> >  strong_alias (__exp2f, __ieee754_exp2f)
> >  libm_alias_finite (__exp2f, __exp2f)
> >
> > -#define __exp2f __exp2f_sse2
> > +# define __exp2f __exp2f_sse2
> > +#endif
> >  #include <sysdeps/ieee754/flt-32/e_exp2f.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c
> b/sysdeps/x86_64/fpu/multiarch/e_expf.c
> > index 4a7e2a5bce..e8d6f393ff 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
> > @@ -16,28 +16,30 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern float __redirect_expf (float);
> >
> > -#define SYMBOL_NAME expf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME expf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
> >    __attribute__ ((visibility ("hidden")));
> >
> >  versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
> >  libm_alias_float_other (__exp, exp)
> > -#else
> > +# else
> >  libm_alias_float (__exp, exp)
> > -#endif
> > +# endif
> >
> >  strong_alias (__expf, __ieee754_expf)
> >  libm_alias_finite (__expf, __expf)
> >
> > -#define __expf __expf_sse2
> > +# define __expf __expf_sse2
> > +#endif
> >  #include <sysdeps/ieee754/flt-32/e_expf.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c
> b/sysdeps/x86_64/fpu/multiarch/e_log.c
> > index 067fbf58c3..3a678235d9 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_log.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <math.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <math.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_ieee754_log (double);
> >
> > -#define SYMBOL_NAME ieee754_log
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME ieee754_log
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_log, __log)
> >
> > -#define __log __ieee754_log_sse2
> > +# define __log __ieee754_log_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_log.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c
> b/sysdeps/x86_64/fpu/multiarch/e_log2.c
> > index 9c57a2f6cc..c032758b4e 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
> > @@ -16,28 +16,30 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_log2 (double);
> >
> > -#define SYMBOL_NAME log2
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME log2
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
> >    __attribute__ ((visibility ("hidden")));
> >
> >  versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
> >  libm_alias_double_other (__log2, log2)
> > -#else
> > +# else
> >  libm_alias_double (__log2, log2)
> > -#endif
> > +# endif
> >
> >  strong_alias (__log2, __ieee754_log2)
> >  libm_alias_finite (__log2, __log2)
> >
> > -#define __log2 __log2_sse2
> > +# define __log2 __log2_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_log2.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> > index 2b45c87f38..0f8d1f0abc 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
> > @@ -16,28 +16,30 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern float __redirect_log2f (float);
> >
> > -#define SYMBOL_NAME log2f
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME log2f
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
> >    __attribute__ ((visibility ("hidden")));
> >
> >  versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
> >  libm_alias_float_other (__log2, log2)
> > -#else
> > +# else
> >  libm_alias_float (__log2, log2)
> > -#endif
> > +# endif
> >
> >  strong_alias (__log2f, __ieee754_log2f)
> >  libm_alias_finite (__log2f, __log2f)
> >
> > -#define __log2f __log2f_sse2
> > +# define __log2f __log2f_sse2
> > +#endif
> >  #include <sysdeps/ieee754/flt-32/e_log2f.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c
> b/sysdeps/x86_64/fpu/multiarch/e_logf.c
> > index 97e23c8fea..9d94dd614f 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
> > @@ -16,28 +16,30 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern float __redirect_logf (float);
> >
> > -#define SYMBOL_NAME logf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME logf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
> >    __attribute__ ((visibility ("hidden")));
> >
> >  versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
> >  libm_alias_float_other (__log, log)
> > -#else
> > +# else
> >  libm_alias_float (__log, log)
> > -#endif
> > +# endif
> >
> >  strong_alias (__logf, __ieee754_logf)
> >  libm_alias_finite (__logf, __logf)
> >
> > -#define __logf __logf_sse2
> > +# define __logf __logf_sse2
> > +#endif
> >  #include <sysdeps/ieee754/flt-32/e_logf.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c
> b/sysdeps/x86_64/fpu/multiarch/e_pow.c
> > index 42618e7112..07436d420c 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <math.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <math.h>
> > +# include <libm-alias-finite.h>
> >
> >  extern double __redirect_ieee754_pow (double, double);
> >
> > -#define SYMBOL_NAME ieee754_pow
> > -#include "ifunc-fma4.h"
> > +# define SYMBOL_NAME ieee754_pow
> > +# include "ifunc-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_ieee754_pow,
> >                        __ieee754_pow, IFUNC_SELECTOR ());
> >  libm_alias_finite (__ieee754_pow, __pow)
> >
> > -#define __pow __ieee754_pow_sse2
> > +# define __pow __ieee754_pow_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/e_pow.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c
> b/sysdeps/x86_64/fpu/multiarch/e_powf.c
> > index 8e6ce13cc1..c64c8a4302 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
> > @@ -16,31 +16,33 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > -#include <libm-alias-finite.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> > +# include <libm-alias-finite.h>
> >
> > -#define powf __redirect_powf
> > -#define __DECL_SIMD___redirect_powf
> > -#include <math.h>
> > -#undef powf
> > +# define powf __redirect_powf
> > +# define __DECL_SIMD___redirect_powf
> > +# include <math.h>
> > +# undef powf
> >
> > -#define SYMBOL_NAME powf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME powf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
> >
> > -#ifdef SHARED
> > +# ifdef SHARED
> >  __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
> >    __attribute__ ((visibility ("hidden")));
> >
> >  versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
> >  libm_alias_float_other (__pow, pow)
> > -#else
> > +# else
> >  libm_alias_float (__pow, pow)
> > -#endif
> > +# endif
> >
> >  strong_alias (__powf, __ieee754_powf)
> >  libm_alias_finite (__powf, __powf)
> >
> > -#define __powf __powf_sse2
> > +# define __powf __powf_sse2
> > +#endif
> >  #include <sysdeps/ieee754/flt-32/e_powf.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c
> b/sysdeps/x86_64/fpu/multiarch/s_atan.c
> > index 71bad096a9..f9ec4e7b37 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
> > @@ -16,15 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern double __redirect_atan (double);
> >
> > -#define SYMBOL_NAME atan
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME atan
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
> >  libm_alias_double (__atan, atan)
> >
> > -#define __atan __atan_sse2
> > +# define __atan __atan_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_atan.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> > new file mode 100644
> > index 0000000000..e6c1106753
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of ceil function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__ceil)
> > +       vroundsd $10, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__ceil)
> > +
> > +libm_alias_double (__ceil, ceil)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> > index 64119011ad..4be069b8da 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __ceil_sse41 __ceil
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__ceil_sse41)
> >         roundsd $10, %xmm0, %xmm0
> >         ret
> >  END(__ceil_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__ceil, ceil)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> > index cc028addee..0199863c8f 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-double.h>
> >
> > -#define ceil __redirect_ceil
> > -#define __ceil __redirect___ceil
> > -#include <math.h>
> > -#undef ceil
> > -#undef __ceil
> > +# define ceil __redirect_ceil
> > +# define __ceil __redirect___ceil
> > +# include <math.h>
> > +# undef ceil
> > +# undef __ceil
> >
> > -#define SYMBOL_NAME ceil
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME ceil
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
> >  libm_alias_double (__ceil, ceil)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> > new file mode 100644
> > index 0000000000..b4d8ac0455
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of ceilf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__ceilf)
> > +       vroundss $10, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__ceilf)
> > +
> > +libm_alias_float (__ceil, ceil)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> > index dd9a9f6b71..1a85e9c925 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __ceilf_sse41 __ceilf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__ceilf_sse41)
> >         roundss $10, %xmm0, %xmm0
> >         ret
> >  END(__ceilf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__ceil, ceil)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> > index 97a0ca7d19..dfce9225dd 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-float.h>
> >
> > -#define ceilf __redirect_ceilf
> > -#define __ceilf __redirect___ceilf
> > -#include <math.h>
> > -#undef ceilf
> > -#undef __ceilf
> > +# define ceilf __redirect_ceilf
> > +# define __ceilf __redirect___ceilf
> > +# include <math.h>
> > +# undef ceilf
> > +# undef __ceilf
> >
> > -#define SYMBOL_NAME ceilf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME ceilf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
> >  libm_alias_float (__ceil, ceil)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> > index 2703c576df..9be9327b80 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
> > @@ -16,13 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> >
> >  extern float __redirect_cosf (float);
> >
> > -#define SYMBOL_NAME cosf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME cosf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
> >
> >  libm_alias_float (__cos, cos)
> > +#else
> > +# include <sysdeps/ieee754/flt-32/s_cosf.c>
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> > index 8a2d69f9b2..1ed45245cb 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
> > @@ -16,21 +16,23 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern double __redirect_expm1 (double);
> >
> > -#define SYMBOL_NAME expm1
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME expm1
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
> >  libm_alias_double (__expm1, expm1)
> >
> > -#define __expm1 __expm1_sse2
> > +# define __expm1 __expm1_sse2
> >
> >  /* NB: __expm1 may be expanded to __expm1_sse2 in the following
> >     prototypes.  */
> >  extern long double __expm1l (long double);
> >  extern long double __expm1f128 (long double);
> >
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_expm1.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> > new file mode 100644
> > index 0000000000..ff74b5a8bf
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of floor function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__floor)
> > +       vroundsd $9, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__floor)
> > +
> > +libm_alias_double (__floor, floor)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> > index 2f7521f39f..957d018177 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __floor_sse41 __floor
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__floor_sse41)
> >         roundsd $9, %xmm0, %xmm0
> >         ret
> >  END(__floor_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__floor, floor)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c
> b/sysdeps/x86_64/fpu/multiarch/s_floor.c
> > index 8cebd48e10..a30c88671e 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-double.h>
> >
> > -#define floor __redirect_floor
> > -#define __floor __redirect___floor
> > -#include <math.h>
> > -#undef floor
> > -#undef __floor
> > +# define floor __redirect_floor
> > +# define __floor __redirect___floor
> > +# include <math.h>
> > +# undef floor
> > +# undef __floor
> >
> > -#define SYMBOL_NAME floor
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME floor
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
> >  libm_alias_double (__floor, floor)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> > new file mode 100644
> > index 0000000000..c378baae8e
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of floorf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__floorf)
> > +       vroundss $9, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__floorf)
> > +
> > +libm_alias_float (__floor, floor)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> > index 5f6020d27d..eacabe167c 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __floorf_sse41 __floorf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__floorf_sse41)
> >         roundss $9, %xmm0, %xmm0
> >         ret
> >  END(__floorf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__floor, floor)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> > index a14e18b03c..6531b78443 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-float.h>
> >
> > -#define floorf __redirect_floorf
> > -#define __floorf __redirect___floorf
> > -#include <math.h>
> > -#undef floorf
> > -#undef __floorf
> > +# define floorf __redirect_floorf
> > +# define __floorf __redirect___floorf
> > +# include <math.h>
> > +# undef floorf
> > +# undef __floorf
> >
> > -#define SYMBOL_NAME floorf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME floorf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
> >  libm_alias_float (__floor, floor)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> > index a8e1a3f21b..76e1672e2d 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
> > @@ -16,14 +16,16 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern double __redirect_log1p (double);
> >
> > -#define SYMBOL_NAME log1p
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME log1p
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
> >
> > -#define __log1p __log1p_sse2
> > +# define __log1p __log1p_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_log1p.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> > new file mode 100644
> > index 0000000000..5bfdf73c28
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of nearbyint function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__nearbyint)
> > +       vroundsd $0xc, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__nearbyint)
> > +
> > +libm_alias_double (__nearbyint, nearbyint)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> > index 674f7eb40a..ee0b17e470 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __nearbyint_sse41 __nearbyint
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__nearbyint_sse41)
> >         roundsd $0xc, %xmm0, %xmm0
> >         ret
> >  END(__nearbyint_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__nearbyint, nearbyint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> > index 693e42dd4e..649a9df869 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> > -#define nearbyint __redirect_nearbyint
> > -#define __nearbyint __redirect___nearbyint
> > -#include <math.h>
> > -#undef nearbyint
> > -#undef __nearbyint
> > +# define nearbyint __redirect_nearbyint
> > +# define __nearbyint __redirect___nearbyint
> > +# include <math.h>
> > +# undef nearbyint
> > +# undef __nearbyint
> >
> > -#define SYMBOL_NAME nearbyint
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME nearbyint
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_double (__nearbyint, nearbyint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> > new file mode 100644
> > index 0000000000..1dbaed0324
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implmentation of nearbyintf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__nearbyintf)
> > +       vroundss $0xc, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__nearbyintf)
> > +
> > +libm_alias_float (__nearbyint, nearbyint)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> > index 5892bd7563..8b3e307b78 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __nearbyintf_sse41 __nearbyintf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__nearbyintf_sse41)
> >         roundss $0xc, %xmm0, %xmm0
> >         ret
> >  END(__nearbyintf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__nearbyint, nearbyint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> > index a0ac009f4b..7762467ad9 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> >
> > -#define nearbyintf __redirect_nearbyintf
> > -#define __nearbyintf __redirect___nearbyintf
> > -#include <math.h>
> > -#undef nearbyintf
> > -#undef __nearbyintf
> > +# define nearbyintf __redirect_nearbyintf
> > +# define __nearbyintf __redirect___nearbyintf
> > +# include <math.h>
> > +# undef nearbyintf
> > +# undef __nearbyintf
> >
> > -#define SYMBOL_NAME nearbyintf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME nearbyintf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
> >                        IFUNC_SELECTOR ());
> >  libm_alias_float (__nearbyint, nearbyint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> > new file mode 100644
> > index 0000000000..2b403b331f
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of rint function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__rint)
> > +       vroundsd $4, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__rint)
> > +
> > +libm_alias_double (__rint, rint)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> > index 405372991b..4c7c1c37de 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __rint_sse41 __rint
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__rint_sse41)
> >         roundsd $4, %xmm0, %xmm0
> >         ret
> >  END(__rint_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__rint, rint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c
> b/sysdeps/x86_64/fpu/multiarch/s_rint.c
> > index 754c87e004..49693c9728 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-double.h>
> >
> > -#define rint __redirect_rint
> > -#define __rint __redirect___rint
> > -#include <math.h>
> > -#undef rint
> > -#undef __rint
> > +# define rint __redirect_rint
> > +# define __rint __redirect___rint
> > +# include <math.h>
> > +# undef rint
> > +# undef __rint
> >
> > -#define SYMBOL_NAME rint
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME rint
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
> >  libm_alias_double (__rint, rint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> > new file mode 100644
> > index 0000000000..171c2867f4
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of rintf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__rintf)
> > +       vroundss $4, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__rintf)
> > +
> > +libm_alias_float (__rint, rint)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> > index 8ac67ce767..55443d7238 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __rintf_sse41 __rintf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__rintf_sse41)
> >         roundss $4, %xmm0, %xmm0
> >         ret
> >  END(__rintf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__rint, rint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> > index e9d6b7a5f2..c7cf09701d 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-float.h>
> >
> > -#define rintf __redirect_rintf
> > -#define __rintf __redirect___rintf
> > -#include <math.h>
> > -#undef rintf
> > -#undef __rintf
> > +# define rintf __redirect_rintf
> > +# define __rintf __redirect___rintf
> > +# include <math.h>
> > +# undef rintf
> > +# undef __rintf
> >
> > -#define SYMBOL_NAME rintf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME rintf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
> >  libm_alias_float (__rint, rint)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> > new file mode 100644
> > index 0000000000..576790355c
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of roundeven function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__roundeven)
> > +       vroundsd $8, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__roundeven)
> > +
> > +libm_alias_double (__roundeven, roundeven)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> > index 5ef102336b..f0644cce81 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __roundeven_sse41 __roundeven
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__roundeven_sse41)
> >         roundsd $8, %xmm0, %xmm0
> >         ret
> >  END(__roundeven_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__roundeven, roundeven)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> > index 8737b32e26..a250297918 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
> > @@ -16,16 +16,18 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> > -#define roundeven __redirect_roundeven
> > -#define __roundeven __redirect___roundeven
> > -#include <math.h>
> > -#undef roundeven
> > -#undef __roundeven
> > +# define roundeven __redirect_roundeven
> > +# define __roundeven __redirect___roundeven
> > +# include <math.h>
> > +# undef roundeven
> > +# undef __roundeven
> >
> > -#define SYMBOL_NAME roundeven
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME roundeven
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_roundeven, __roundeven,
> IFUNC_SELECTOR ());
> >  libm_alias_double (__roundeven, roundeven)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> > new file mode 100644
> > index 0000000000..42c359f4cd
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of roundevenf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__roundevenf)
> > +       vroundss $8, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__roundevenf)
> > +
> > +libm_alias_float (__roundeven, roundeven)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> > index 792c90ba07..d1dd6b0e8b 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
> > @@ -17,8 +17,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __roundevenf_sse41 __roundevenf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__roundevenf_sse41)
> >         roundss $8, %xmm0, %xmm0
> >         ret
> >  END(__roundevenf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__roundeven, roundeven)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> > index e96016a4d5..534941e67f 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
> > @@ -16,16 +16,18 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> >
> > -#define roundevenf __redirect_roundevenf
> > -#define __roundevenf __redirect___roundevenf
> > -#include <math.h>
> > -#undef roundevenf
> > -#undef __roundevenf
> > +# define roundevenf __redirect_roundevenf
> > +# define __roundevenf __redirect___roundevenf
> > +# include <math.h>
> > +# undef roundevenf
> > +# undef __roundevenf
> >
> > -#define SYMBOL_NAME roundevenf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME roundevenf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_roundevenf, __roundevenf,
> IFUNC_SELECTOR ());
> >  libm_alias_float (__roundeven, roundeven)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c
> b/sysdeps/x86_64/fpu/multiarch/s_sin.c
> > index 355cc0092e..21eaa5e984 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
> > @@ -16,24 +16,26 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern double __redirect_sin (double);
> >  extern double __redirect_cos (double);
> >
> > -#define SYMBOL_NAME sin
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME sin
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
> >  libm_alias_double (__sin, sin)
> >
> > -#undef SYMBOL_NAME
> > -#define SYMBOL_NAME cos
> > -#include "ifunc-avx-fma4.h"
> > +# undef SYMBOL_NAME
> > +# define SYMBOL_NAME cos
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
> >  libm_alias_double (__cos, cos)
> >
> > -#define __cos __cos_sse2
> > -#define __sin __sin_sse2
> > +# define __cos __cos_sse2
> > +# define __sin __sin_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_sin.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> > index 70107e999c..729163cdde 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
> > @@ -16,15 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern void __redirect_sincos (double, double *, double *);
> >
> > -#define SYMBOL_NAME sincos
> > -#include "ifunc-fma4.h"
> > +# define SYMBOL_NAME sincos
> > +# include "ifunc-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
> >  libm_alias_double (__sincos, sincos)
> >
> > -#define __sincos __sincos_sse2
> > +# define __sincos __sincos_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_sincos.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> > index 80bc028451..136dd62c81 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
> > @@ -16,13 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> >
> >  extern void __redirect_sincosf (float, float *, float *);
> >
> > -#define SYMBOL_NAME sincosf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME sincosf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR
> ());
> >
> >  libm_alias_float (__sincos, sincos)
> > +#else
> > +# include <sysdeps/ieee754/flt-32/s_sincosf.c>
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> > index a32b9e9550..fabbf55604 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> > @@ -16,13 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-float.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-float.h>
> >
> >  extern float __redirect_sinf (float);
> >
> > -#define SYMBOL_NAME sinf
> > -#include "ifunc-fma.h"
> > +# define SYMBOL_NAME sinf
> > +# include "ifunc-fma.h"
> >
> >  libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
> >
> >  libm_alias_float (__sin, sin)
> > +#else
> > +# include <sysdeps/ieee754/flt-32/s_sinf.c>
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c
> b/sysdeps/x86_64/fpu/multiarch/s_tan.c
> > index f9a2474a13..c85e327ff8 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
> > @@ -16,15 +16,17 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <libm-alias-double.h>
> > +#ifndef HAVE_X86_AVX2_FMA
> > +# include <libm-alias-double.h>
> >
> >  extern double __redirect_tan (double);
> >
> > -#define SYMBOL_NAME tan
> > -#include "ifunc-avx-fma4.h"
> > +# define SYMBOL_NAME tan
> > +# include "ifunc-avx-fma4.h"
> >
> >  libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
> >  libm_alias_double (__tan, tan)
> >
> > -#define __tan __tan_sse2
> > +# define __tan __tan_sse2
> > +#endif
> >  #include <sysdeps/ieee754/dbl-64/s_tan.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> > new file mode 100644
> > index 0000000000..b3e87e9606
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of trunc function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-double.h>
> > +
> > +       .text
> > +ENTRY(__trunc)
> > +       vroundsd $11, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__trunc)
> > +
> > +libm_alias_double (__trunc, trunc)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> > index b496a6ef49..062cd1fb36 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
> > @@ -18,8 +18,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-double.h>
> > +# define __trunc_sse41 __trunc
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__trunc_sse41)
> >         roundsd $11, %xmm0, %xmm0
> >         ret
> >  END(__trunc_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_double (__trunc, trunc)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> > index 9bc9df8744..568e818826 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-double.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-double.h>
> >
> > -#define trunc __redirect_trunc
> > -#define __trunc __redirect___trunc
> > -#include <math.h>
> > -#undef trunc
> > -#undef __trunc
> > +# define trunc __redirect_trunc
> > +# define __trunc __redirect___trunc
> > +# include <math.h>
> > +# undef trunc
> > +# undef __trunc
> >
> > -#define SYMBOL_NAME trunc
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME trunc
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
> >  libm_alias_double (__trunc, trunc)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> > new file mode 100644
> > index 0000000000..f31ac7d7f7
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
> > @@ -0,0 +1,28 @@
> > +/* AVX implementation of truncf function.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +#include <libm-alias-float.h>
> > +
> > +       .text
> > +ENTRY(__truncf)
> > +       vroundss $11, %xmm0, %xmm0, %xmm0
> > +       ret
> > +END(__truncf)
> > +
> > +libm_alias_float (__trunc, trunc)
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> > index 22e9a83307..ecd0ae5c05 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
> > @@ -18,8 +18,19 @@
> >
> >  #include <sysdep.h>
> >
> > +#ifdef HAVE_X86_SSE4_1
> > +# include <libm-alias-float.h>
> > +# define __truncf_sse41 __truncf
> > +       .text
> > +#else
> >         .section .text.sse4.1,"ax",@progbits
> > +#endif
> > +
> >  ENTRY(__truncf_sse41)
> >         roundss $11, %xmm0, %xmm0
> >         ret
> >  END(__truncf_sse41)
> > +
> > +#ifdef HAVE_X86_SSE4_1
> > +libm_alias_float (__trunc, trunc)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> > index dae01d166a..57783c805a 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
> > @@ -16,17 +16,19 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define NO_MATH_REDIRECT
> > -#include <libm-alias-float.h>
> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
> > +# define NO_MATH_REDIRECT
> > +# include <libm-alias-float.h>
> >
> > -#define truncf __redirect_truncf
> > -#define __truncf __redirect___truncf
> > -#include <math.h>
> > -#undef truncf
> > -#undef __truncf
> > +# define truncf __redirect_truncf
> > +# define __truncf __redirect___truncf
> > +# include <math.h>
> > +# undef truncf
> > +# undef __truncf
> >
> > -#define SYMBOL_NAME truncf
> > -#include "ifunc-sse4_1.h"
> > +# define SYMBOL_NAME truncf
> > +# include "ifunc-sse4_1.h"
> >
> >  libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
> >  libm_alias_float (__trunc, trunc)
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c
> b/sysdeps/x86_64/fpu/multiarch/w_exp.c
> > index 27eee98a0a..fb2045e6cf 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
> > @@ -1 +1,5 @@
> > -#include <sysdeps/../math/w_exp.c>
> > +#ifdef HAVE_X86_AVX2_FMA
> > +# include <sysdeps/ieee754/dbl-64/w_exp.c>
> > +#else
> > +# include <sysdeps/../math/w_exp.c>
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c
> b/sysdeps/x86_64/fpu/multiarch/w_log.c
> > index 9b2b018711..b85be8221e 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/w_log.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
> > @@ -1 +1,5 @@
> > -#include <sysdeps/../math/w_log.c>
> > +#ifdef HAVE_X86_AVX2_FMA
> > +# include <sysdeps/ieee754/dbl-64/w_log.c>
> > +#else
> > +# include <sysdeps/../math/w_log.c>
> > +#endif
> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c
> b/sysdeps/x86_64/fpu/multiarch/w_pow.c
> > index b50c1988de..849f4f97ff 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
> > +++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
> > @@ -1 +1,5 @@
> > -#include <sysdeps/../math/w_pow.c>
> > +#ifdef HAVE_X86_AVX2_FMA
> > +# include <sysdeps/ieee754/dbl-64/w_pow.c>
> > +#else
> > +# include <sysdeps/../math/w_pow.c>
> > +#endif
> > --
> > 2.43.0
> >
>
Noah Goldstein Feb. 20, 2024, 5:56 p.m. UTC | #3
On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>
>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>> >
>> > When glibc is built with FMA and AVX2 enabled by default, the resulting
>> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
>> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
>> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
>> > processors, including VMs, should also support FMA and vice versa.
>> >
>> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
>> > variant.
>> Not avx2 + FMA as well?
>
>
> Correct. Logic is as follows
> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> else if (build with SSE4.1): Keep SSE4.1 variants only.
What if someone builds with sse4.1 as a minimum but then
runs on avx2+ machines?
for string/memory at least we only skip the impls that the built
isa level precludes, we don't assume the built isa level also stands
as the max.
> else: No change.
>
>> >
>> > Fixes BZ 31335.
>> > ---
>> >  config.h.in                                   |   5 +
>> >  sysdeps/x86/configure                         |  77 +++++++++
>> >  sysdeps/x86/configure.ac                      |  44 ++++++
>> >  sysdeps/x86_64/fpu/multiarch/Makefile         | 147 +++++++++---------
>> >  sysdeps/x86_64/fpu/multiarch/e_asin.c         |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_atan2.c        |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/e_exp.c          |  12 +-
>> >  sysdeps/x86_64/fpu/multiarch/e_exp2f.c        |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_expf.c         |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_log.c          |  12 +-
>> >  sysdeps/x86_64/fpu/multiarch/e_log2.c         |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_log2f.c        |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_logf.c         |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/e_pow.c          |  12 +-
>> >  sysdeps/x86_64/fpu/multiarch/e_powf.c         |  26 ++--
>> >  sysdeps/x86_64/fpu/multiarch/s_atan.c         |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S     |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S  |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_ceil.c         |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S    |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_ceilf.c        |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_cosf.c         |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_expm1.c        |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_floor-avx.S    |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_floor.c        |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S   |  28 ++++
>> >  .../x86_64/fpu/multiarch/s_floorf-sse4_1.S    |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_floorf.c       |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_log1p.c        |  10 +-
>> >  .../x86_64/fpu/multiarch/s_nearbyint-avx.S    |  28 ++++
>> >  .../x86_64/fpu/multiarch/s_nearbyint-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_nearbyint.c    |  18 ++-
>> >  .../x86_64/fpu/multiarch/s_nearbyintf-avx.S   |  28 ++++
>> >  .../fpu/multiarch/s_nearbyintf-sse4_1.S       |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c   |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/s_rint-avx.S     |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S  |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_rint.c         |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S    |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_rintf.c        |  20 +--
>> >  .../x86_64/fpu/multiarch/s_roundeven-avx.S    |  28 ++++
>> >  .../x86_64/fpu/multiarch/s_roundeven-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_roundeven.c    |  18 ++-
>> >  .../x86_64/fpu/multiarch/s_roundevenf-avx.S   |  28 ++++
>> >  .../fpu/multiarch/s_roundevenf-sse4_1.S       |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_roundevenf.c   |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/s_sin.c          |  18 ++-
>> >  sysdeps/x86_64/fpu/multiarch/s_sincos.c       |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_sincosf.c      |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_sinf.c         |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_tan.c          |  10 +-
>> >  sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S    |  28 ++++
>> >  sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_trunc.c        |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S   |  28 ++++
>> >  .../x86_64/fpu/multiarch/s_truncf-sse4_1.S    |  11 ++
>> >  sysdeps/x86_64/fpu/multiarch/s_truncf.c       |  20 +--
>> >  sysdeps/x86_64/fpu/multiarch/w_exp.c          |   6 +-
>> >  sysdeps/x86_64/fpu/multiarch/w_log.c          |   6 +-
>> >  sysdeps/x86_64/fpu/multiarch/w_pow.c          |   6 +-
>> >  63 files changed, 974 insertions(+), 295 deletions(-)
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
>> >  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
>> >
>> > diff --git a/config.h.in b/config.h.in
>> > index 2f0669e19b..0a9626cbe8 100644
>> > --- a/config.h.in
>> > +++ b/config.h.in
>> > @@ -292,4 +292,9 @@
>> >  /* Define if -mmovbe is enabled by default on x86.  */
>> >  #undef HAVE_X86_MOVBE
>> >
>> > +/* Define if -msse4.1 is enabled by default on x86.  */
>> > +#undef HAVE_X86_SSE4_1
>> > +
>> > +/* Define if -mavx2 and -mfma are enabled by default on x86.  */
>> > +#undef HAVE_X86_AVX2_FMA
>> >  #endif
>> > diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
>> > index 1f4c2d67fd..1c0e0d0640 100644
>> > --- a/sysdeps/x86/configure
>> > +++ b/sysdeps/x86/configure
>> > @@ -128,3 +128,80 @@ enable-x86-isa-level = $libc_cv_include_x86_isa_level"
>> >  printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
>> >
>> >
>> > +# Check if AVX2 and FMA are available.
>> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for AVX2 and FMA instruction support" >&5
>> > +printf %s "checking for AVX2 and FMA instruction support... " >&6; }
>> > +if test ${libc_cv_have_x86_avx2_fma+y}
>> > +then :
>> > +  printf %s "(cached) " >&6
>> > +else $as_nop
>> > +  cat > conftest.c <<EOF
>> > +#if !defined __AVX2__ || !defined __FMA__
>> > +# error AVX2 and/or FMA are disabled.
>> > +# if defined __AVX2__ || defined __FMA__
>> > +#  error Only one of AVX2 and FMA is enabled.
>> > +# endif
>> > +#endif
>> > +EOF
>> > +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err'
>> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> > +  (eval $ac_try) 2>&5
>> > +  ac_status=$?
>> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> > +  test $ac_status = 0; }; }; then
>> > +                libc_cv_have_x86_avx2_fma=yes
>> > +              else
>> > +                if { ac_try='grep -q "Only one of AVX2 and FMA is enabled" conftest.err'
>> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> > +  (eval $ac_try) 2>&5
>> > +  ac_status=$?
>> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> > +  test $ac_status = 0; }; }; then
>> > +                  as_fn_error $? "Only one of AVX2 and FMA is enabled." "$LINENO" 5
>> > +                fi
>> > +                libc_cv_have_x86_avx2_fma=no
>> > +              fi
>> > +              rm -rf conftest*
>> > +fi
>> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_avx2_fma" >&5
>> > +printf "%s\n" "$libc_cv_have_x86_avx2_fma" >&6; }
>> > +if test $libc_cv_have_x86_avx2_fma = yes; then
>> > +  printf "%s\n" "#define HAVE_X86_AVX2_FMA 1" >>confdefs.h
>> > +
>> > +fi
>> > +config_vars="$config_vars
>> > +enable-avx2-fma = $libc_cv_have_x86_avx2_fma"
>> > +
>> > +# Check if SSE4.1 is available.
>> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SSE4.1 instruction support" >&5
>> > +printf %s "checking for SSE4.1 instruction support... " >&6; }
>> > +if test ${libc_cv_have_x86_sse4_1+y}
>> > +then :
>> > +  printf %s "(cached) " >&6
>> > +else $as_nop
>> > +  cat > conftest.c <<EOF
>> > +#if !defined __SSE4_1__
>> > +# error SSE4.1 is not available.
>> > +#endif
>> > +EOF
>> > +              if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&5'
>> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> > +  (eval $ac_try) 2>&5
>> > +  ac_status=$?
>> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> > +  test $ac_status = 0; }; }; then
>> > +                libc_cv_have_x86_sse4_1=yes
>> > +              else
>> > +                libc_cv_have_x86_sse4_1=no
>> > +              fi
>> > +              rm -rf conftest*
>> > +fi
>> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_sse4_1" >&5
>> > +printf "%s\n" "$libc_cv_have_x86_sse4_1" >&6; }
>> > +if test $libc_cv_have_x86_sse4_1 = yes; then
>> > +  printf "%s\n" "#define HAVE_X86_SSE4_1 1" >>confdefs.h
>> > +
>> > +fi
>> > +config_vars="$config_vars
>> > +enable-sse4-1 = $libc_cv_have_x86_sse4_1"
>> > +
>> > diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
>> > index 437a50623b..df3db3fdc2 100644
>> > --- a/sysdeps/x86/configure.ac
>> > +++ b/sysdeps/x86/configure.ac
>> > @@ -87,3 +87,47 @@ LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
>> >
>> >  dnl Static PIE is supported.
>> >  AC_DEFINE(SUPPORT_STATIC_PIE)
>> > +
>> > +# Check if AVX2 and FMA are available.
>> > +AC_CACHE_CHECK([for AVX2 and FMA instruction support],
>> > +              libc_cv_have_x86_avx2_fma, [dnl
>> > +cat > conftest.c <<EOF
>> > +#if !defined __AVX2__ || !defined __FMA__
>> > +# error AVX2 and/or FMA are disabled.
>> > +# if defined __AVX2__ || defined __FMA__
>> > +#  error Only one of AVX2 and FMA is enabled.
>> > +# endif
>> > +#endif
>> > +EOF
>> > +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err); then
>> > +                libc_cv_have_x86_avx2_fma=yes
>> > +              else
>> > +                if AC_TRY_COMMAND(grep -q "Only one of AVX2 and FMA is enabled" conftest.err); then
>> > +                  AC_MSG_ERROR([Only one of AVX2 and FMA is enabled.])
>> > +                fi
>> > +                libc_cv_have_x86_avx2_fma=no
>> > +              fi
>> > +              rm -rf conftest*])
>> > +if test $libc_cv_have_x86_avx2_fma = yes; then
>> > +  AC_DEFINE(HAVE_X86_AVX2_FMA)
>> > +fi
>> > +LIBC_CONFIG_VAR([enable-avx2-fma], [$libc_cv_have_x86_avx2_fma])
>> > +
>> > +# Check if SSE4.1 is available.
>> > +AC_CACHE_CHECK([for SSE4.1 instruction support],
>> > +              libc_cv_have_x86_sse4_1, [dnl
>> > +cat > conftest.c <<EOF
>> > +#if !defined __SSE4_1__
>> > +# error SSE4.1 is not available.
>> > +#endif
>> > +EOF
>> > +              if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&AS_MESSAGE_LOG_FD); then
>> > +                libc_cv_have_x86_sse4_1=yes
>> > +              else
>> > +                libc_cv_have_x86_sse4_1=no
>> > +              fi
>> > +              rm -rf conftest*])
>> > +if test $libc_cv_have_x86_sse4_1 = yes; then
>> > +  AC_DEFINE(HAVE_X86_SSE4_1)
>> > +fi
>> > +LIBC_CONFIG_VAR([enable-sse4-1], [$libc_cv_have_x86_sse4_1])
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > index e1a490dd98..5eeb106b79 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
>> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > @@ -1,49 +1,4 @@
>> >  ifeq ($(subdir),math)
>> > -libm-sysdep_routines += \
>> > -  s_ceil-c \
>> > -  s_ceilf-c \
>> > -  s_floor-c \
>> > -  s_floorf-c \
>> > -  s_nearbyint-c \
>> > -  s_nearbyintf-c \
>> > -  s_rint-c \
>> > -  s_rintf-c \
>> > -  s_roundeven-c \
>> > -  s_roundevenf-c \
>> > -  s_trunc-c \
>> > -  s_truncf-c \
>> > -# libm-sysdep_routines
>> > -
>> > -libm-sysdep_routines += \
>> > -  s_ceil-sse4_1 \
>> > -  s_ceilf-sse4_1 \
>> > -  s_floor-sse4_1 \
>> > -  s_floorf-sse4_1 \
>> > -  s_nearbyint-sse4_1 \
>> > -  s_nearbyintf-sse4_1 \
>> > -  s_rint-sse4_1 \
>> > -  s_rintf-sse4_1 \
>> > -  s_roundeven-sse4_1 \
>> > -  s_roundevenf-sse4_1 \
>> > -  s_trunc-sse4_1 \
>> > -  s_truncf-sse4_1 \
>> > -# libm-sysdep_routines
>> > -
>> > -libm-sysdep_routines += \
>> > -  e_asin-fma \
>> > -  e_atan2-fma \
>> > -  e_exp-fma \
>> > -  e_log-fma \
>> > -  e_log2-fma \
>> > -  e_pow-fma \
>> > -  s_atan-fma \
>> > -  s_expm1-fma \
>> > -  s_log1p-fma \
>> > -  s_sin-fma \
>> > -  s_sincos-fma \
>> > -  s_tan-fma \
>> > -# libm-sysdep_routines
>> > -
>> >  CFLAGS-e_asin-fma.c = -mfma -mavx2
>> >  CFLAGS-e_atan2-fma.c = -mfma -mavx2
>> >  CFLAGS-e_exp-fma.c = -mfma -mavx2
>> > @@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
>> >  CFLAGS-s_tan-fma.c = -mfma -mavx2
>> >  CFLAGS-s_sincos-fma.c = -mfma -mavx2
>> >
>> > -libm-sysdep_routines += \
>> > -  s_cosf-sse2 \
>> > -  s_sincosf-sse2 \
>> > -  s_sinf-sse2 \
>> > -# libm-sysdep_routines
>> > -
>> > -libm-sysdep_routines += \
>> > -  e_exp2f-fma \
>> > -  e_expf-fma \
>> > -  e_log2f-fma \
>> > -  e_logf-fma \
>> > -  e_powf-fma \
>> > -  s_cosf-fma \
>> > -  s_sincosf-fma \
>> > -  s_sinf-fma \
>> > -# libm-sysdep_routines
>> > -
>> >  CFLAGS-e_exp2f-fma.c = -mfma -mavx2
>> >  CFLAGS-e_expf-fma.c = -mfma -mavx2
>> >  CFLAGS-e_log2f-fma.c = -mfma -mavx2
>> > @@ -83,17 +21,92 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
>> >  CFLAGS-s_cosf-fma.c = -mfma -mavx2
>> >  CFLAGS-s_sincosf-fma.c = -mfma -mavx2
>> >
>> > +ifeq ($(enable-avx2-fma),yes)
>> >  libm-sysdep_routines += \
>> > +  s_ceil-avx \
>> > +  s_ceilf-avx \
>> > +  s_floor-avx \
>> > +  s_floorf-avx \
>> > +  s_nearbyint-avx \
>> > +  s_nearbyintf-avx \
>> > +  s_rint-avx \
>> > +  s_rintf-avx \
>> > +  s_roundeven-avx \
>> > +  s_roundevenf-avx \
>> > +  s_trunc-avx \
>> > +  s_truncf-avx \
>> > +# libm-sysdep_routines
>> > +else
>> > +libm-sysdep_routines += \
>> > +  e_asin-fma \
>> >    e_asin-fma4 \
>> > +  e_atan2-avx \
>> > +  e_atan2-fma \
>> >    e_atan2-fma4 \
>> > +  e_exp-avx \
>> > +  e_exp-fma \
>> >    e_exp-fma4 \
>> > +  e_exp2f-fma \
>> > +  e_expf-fma \
>> > +  e_log-avx \
>> > +  e_log-fma \
>> >    e_log-fma4 \
>> > +  e_log2-fma \
>> > +  e_log2f-fma \
>> > +  e_logf-fma \
>> > +  e_pow-fma \
>> >    e_pow-fma4 \
>> > +  e_powf-fma \
>> > +  s_atan-avx \
>> > +  s_atan-fma \
>> >    s_atan-fma4 \
>> > +  s_ceil-sse4_1 \
>> > +  s_ceilf-sse4_1 \
>> > +  s_cosf-fma \
>> > +  s_cosf-sse2 \
>> > +  s_expm1-fma \
>> > +  s_floor-sse4_1 \
>> > +  s_floorf-sse4_1 \
>> > +  s_log1p-fma \
>> > +  s_nearbyint-sse4_1 \
>> > +  s_nearbyintf-sse4_1 \
>> > +  s_rint-sse4_1 \
>> > +  s_rintf-sse4_1 \
>> > +  s_roundeven-sse4_1 \
>> > +  s_roundevenf-sse4_1 \
>> > +  s_sin-avx \
>> > +  s_sin-fma \
>> >    s_sin-fma4 \
>> > +  s_sincos-avx \
>> > +  s_sincos-fma \
>> >    s_sincos-fma4 \
>> > +  s_sincosf-fma \
>> > +  s_sincosf-sse2 \
>> > +  s_sinf-fma \
>> > +  s_sinf-sse2 \
>> > +  s_tan-avx \
>> > +  s_tan-fma \
>> >    s_tan-fma4 \
>> > +  s_trunc-sse4_1 \
>> > +  s_truncf-sse4_1 \
>> >  # libm-sysdep_routines
>> > +ifeq ($(enable-sse4-1),no)
>> > +libm-sysdep_routines += \
>> > +  s_ceil-c \
>> > +  s_ceilf-c \
>> > +  s_floor-c \
>> > +  s_floorf-c \
>> > +  s_nearbyint-c \
>> > +  s_nearbyintf-c \
>> > +  s_rint-c \
>> > +  s_rintf-c \
>> > +  s_roundeven-c \
>> > +  s_roundevenf-c \
>> > +  s_trunc-c \
>> > +  s_truncf-c \
>> > +# libm-sysdep_routines
>> > +endif
>> > +endif
>> >
>> >  CFLAGS-e_asin-fma4.c = -mfma4
>> >  CFLAGS-e_atan2-fma4.c = -mfma4
>> > @@ -105,16 +118,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
>> >  CFLAGS-s_tan-fma4.c = -mfma4
>> >  CFLAGS-s_sincos-fma4.c = -mfma4
>> >
>> > -libm-sysdep_routines += \
>> > -  e_atan2-avx \
>> > -  e_exp-avx \
>> > -  e_log-avx \
>> > -  s_atan-avx \
>> > -  s_sin-avx \
>> > -  s_sincos-avx \
>> > -  s_tan-avx \
>> > -# libm-sysdep_routines
>> > -
>> >  CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
>> >  CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
>> >  CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
>> > index 2eaa6c2c04..3c1654ba3e 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
>> > @@ -16,26 +16,28 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_ieee754_asin (double);
>> >  extern double __redirect_ieee754_acos (double);
>> >
>> > -#define SYMBOL_NAME ieee754_asin
>> > -#include "ifunc-fma4.h"
>> > +# define SYMBOL_NAME ieee754_asin
>> > +# include "ifunc-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_asin, __asin)
>> >
>> > -#undef SYMBOL_NAME
>> > -#define SYMBOL_NAME ieee754_acos
>> > -#include "ifunc-fma4.h"
>> > +# undef SYMBOL_NAME
>> > +# define SYMBOL_NAME ieee754_acos
>> > +# include "ifunc-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_acos, __acos)
>> >
>> > -#define __ieee754_acos __ieee754_acos_sse2
>> > -#define __ieee754_asin __ieee754_asin_sse2
>> > +# define __ieee754_acos __ieee754_acos_sse2
>> > +# define __ieee754_asin __ieee754_asin_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_asin.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
>> > index 17ee4f3c36..f48ab8762a 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
>> > @@ -16,16 +16,18 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_ieee754_atan2 (double, double);
>> >
>> > -#define SYMBOL_NAME ieee754_atan2
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME ieee754_atan2
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_atan2,
>> >                        __ieee754_atan2, IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_atan2, __atan2)
>> >
>> > -#define __ieee754_atan2 __ieee754_atan2_sse2
>> > +# define __ieee754_atan2 __ieee754_atan2_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_atan2.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
>> > index 406b7ebd44..034f5b894f 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <math.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <math.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_ieee754_exp (double);
>> >
>> > -#define SYMBOL_NAME ieee754_exp
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME ieee754_exp
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_exp, __exp)
>> >
>> > -#define __exp __ieee754_exp_sse2
>> > +# define __exp __ieee754_exp_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_exp.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
>> > index 804fd6be85..74f92bfa0c 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
>> > @@ -16,25 +16,27 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern float __redirect_exp2f (float);
>> >
>> > -#define SYMBOL_NAME exp2f
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME exp2f
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
>> >  libm_alias_float_other (__exp2, exp2)
>> > -#else
>> > +# else
>> >  libm_alias_float (__exp2, exp2)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__exp2f, __ieee754_exp2f)
>> >  libm_alias_finite (__exp2f, __exp2f)
>> >
>> > -#define __exp2f __exp2f_sse2
>> > +# define __exp2f __exp2f_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/flt-32/e_exp2f.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
>> > index 4a7e2a5bce..e8d6f393ff 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
>> > @@ -16,28 +16,30 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern float __redirect_expf (float);
>> >
>> > -#define SYMBOL_NAME expf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME expf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
>> >    __attribute__ ((visibility ("hidden")));
>> >
>> >  versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
>> >  libm_alias_float_other (__exp, exp)
>> > -#else
>> > +# else
>> >  libm_alias_float (__exp, exp)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__expf, __ieee754_expf)
>> >  libm_alias_finite (__expf, __expf)
>> >
>> > -#define __expf __expf_sse2
>> > +# define __expf __expf_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/flt-32/e_expf.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
>> > index 067fbf58c3..3a678235d9 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_log.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <math.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <math.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_ieee754_log (double);
>> >
>> > -#define SYMBOL_NAME ieee754_log
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME ieee754_log
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_log, __log)
>> >
>> > -#define __log __ieee754_log_sse2
>> > +# define __log __ieee754_log_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_log.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
>> > index 9c57a2f6cc..c032758b4e 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
>> > @@ -16,28 +16,30 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_log2 (double);
>> >
>> > -#define SYMBOL_NAME log2
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME log2
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
>> >    __attribute__ ((visibility ("hidden")));
>> >
>> >  versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
>> >  libm_alias_double_other (__log2, log2)
>> > -#else
>> > +# else
>> >  libm_alias_double (__log2, log2)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__log2, __ieee754_log2)
>> >  libm_alias_finite (__log2, __log2)
>> >
>> > -#define __log2 __log2_sse2
>> > +# define __log2 __log2_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_log2.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
>> > index 2b45c87f38..0f8d1f0abc 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
>> > @@ -16,28 +16,30 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern float __redirect_log2f (float);
>> >
>> > -#define SYMBOL_NAME log2f
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME log2f
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
>> >    __attribute__ ((visibility ("hidden")));
>> >
>> >  versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
>> >  libm_alias_float_other (__log2, log2)
>> > -#else
>> > +# else
>> >  libm_alias_float (__log2, log2)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__log2f, __ieee754_log2f)
>> >  libm_alias_finite (__log2f, __log2f)
>> >
>> > -#define __log2f __log2f_sse2
>> > +# define __log2f __log2f_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/flt-32/e_log2f.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
>> > index 97e23c8fea..9d94dd614f 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
>> > @@ -16,28 +16,30 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern float __redirect_logf (float);
>> >
>> > -#define SYMBOL_NAME logf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME logf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
>> >    __attribute__ ((visibility ("hidden")));
>> >
>> >  versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
>> >  libm_alias_float_other (__log, log)
>> > -#else
>> > +# else
>> >  libm_alias_float (__log, log)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__logf, __ieee754_logf)
>> >  libm_alias_finite (__logf, __logf)
>> >
>> > -#define __logf __logf_sse2
>> > +# define __logf __logf_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/flt-32/e_logf.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
>> > index 42618e7112..07436d420c 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <math.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <math.h>
>> > +# include <libm-alias-finite.h>
>> >
>> >  extern double __redirect_ieee754_pow (double, double);
>> >
>> > -#define SYMBOL_NAME ieee754_pow
>> > -#include "ifunc-fma4.h"
>> > +# define SYMBOL_NAME ieee754_pow
>> > +# include "ifunc-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ieee754_pow,
>> >                        __ieee754_pow, IFUNC_SELECTOR ());
>> >  libm_alias_finite (__ieee754_pow, __pow)
>> >
>> > -#define __pow __ieee754_pow_sse2
>> > +# define __pow __ieee754_pow_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/e_pow.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
>> > index 8e6ce13cc1..c64c8a4302 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
>> > @@ -16,31 +16,33 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > -#include <libm-alias-finite.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> > +# include <libm-alias-finite.h>
>> >
>> > -#define powf __redirect_powf
>> > -#define __DECL_SIMD___redirect_powf
>> > -#include <math.h>
>> > -#undef powf
>> > +# define powf __redirect_powf
>> > +# define __DECL_SIMD___redirect_powf
>> > +# include <math.h>
>> > +# undef powf
>> >
>> > -#define SYMBOL_NAME powf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME powf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
>> >
>> > -#ifdef SHARED
>> > +# ifdef SHARED
>> >  __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
>> >    __attribute__ ((visibility ("hidden")));
>> >
>> >  versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
>> >  libm_alias_float_other (__pow, pow)
>> > -#else
>> > +# else
>> >  libm_alias_float (__pow, pow)
>> > -#endif
>> > +# endif
>> >
>> >  strong_alias (__powf, __ieee754_powf)
>> >  libm_alias_finite (__powf, __powf)
>> >
>> > -#define __powf __powf_sse2
>> > +# define __powf __powf_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/flt-32/e_powf.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
>> > index 71bad096a9..f9ec4e7b37 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
>> > @@ -16,15 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern double __redirect_atan (double);
>> >
>> > -#define SYMBOL_NAME atan
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME atan
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
>> >  libm_alias_double (__atan, atan)
>> >
>> > -#define __atan __atan_sse2
>> > +# define __atan __atan_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_atan.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
>> > new file mode 100644
>> > index 0000000000..e6c1106753
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of ceil function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__ceil)
>> > +       vroundsd $10, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__ceil)
>> > +
>> > +libm_alias_double (__ceil, ceil)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
>> > index 64119011ad..4be069b8da 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __ceil_sse41 __ceil
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__ceil_sse41)
>> >         roundsd $10, %xmm0, %xmm0
>> >         ret
>> >  END(__ceil_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__ceil, ceil)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
>> > index cc028addee..0199863c8f 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-double.h>
>> >
>> > -#define ceil __redirect_ceil
>> > -#define __ceil __redirect___ceil
>> > -#include <math.h>
>> > -#undef ceil
>> > -#undef __ceil
>> > +# define ceil __redirect_ceil
>> > +# define __ceil __redirect___ceil
>> > +# include <math.h>
>> > +# undef ceil
>> > +# undef __ceil
>> >
>> > -#define SYMBOL_NAME ceil
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME ceil
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
>> >  libm_alias_double (__ceil, ceil)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
>> > new file mode 100644
>> > index 0000000000..b4d8ac0455
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of ceilf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__ceilf)
>> > +       vroundss $10, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__ceilf)
>> > +
>> > +libm_alias_float (__ceil, ceil)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
>> > index dd9a9f6b71..1a85e9c925 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __ceilf_sse41 __ceilf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__ceilf_sse41)
>> >         roundss $10, %xmm0, %xmm0
>> >         ret
>> >  END(__ceilf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__ceil, ceil)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
>> > index 97a0ca7d19..dfce9225dd 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-float.h>
>> >
>> > -#define ceilf __redirect_ceilf
>> > -#define __ceilf __redirect___ceilf
>> > -#include <math.h>
>> > -#undef ceilf
>> > -#undef __ceilf
>> > +# define ceilf __redirect_ceilf
>> > +# define __ceilf __redirect___ceilf
>> > +# include <math.h>
>> > +# undef ceilf
>> > +# undef __ceilf
>> >
>> > -#define SYMBOL_NAME ceilf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME ceilf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
>> >  libm_alias_float (__ceil, ceil)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
>> > index 2703c576df..9be9327b80 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
>> > @@ -16,13 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> >
>> >  extern float __redirect_cosf (float);
>> >
>> > -#define SYMBOL_NAME cosf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME cosf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
>> >
>> >  libm_alias_float (__cos, cos)
>> > +#else
>> > +# include <sysdeps/ieee754/flt-32/s_cosf.c>
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
>> > index 8a2d69f9b2..1ed45245cb 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
>> > @@ -16,21 +16,23 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern double __redirect_expm1 (double);
>> >
>> > -#define SYMBOL_NAME expm1
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME expm1
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
>> >  libm_alias_double (__expm1, expm1)
>> >
>> > -#define __expm1 __expm1_sse2
>> > +# define __expm1 __expm1_sse2
>> >
>> >  /* NB: __expm1 may be expanded to __expm1_sse2 in the following
>> >     prototypes.  */
>> >  extern long double __expm1l (long double);
>> >  extern long double __expm1f128 (long double);
>> >
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_expm1.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
>> > new file mode 100644
>> > index 0000000000..ff74b5a8bf
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of floor function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__floor)
>> > +       vroundsd $9, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__floor)
>> > +
>> > +libm_alias_double (__floor, floor)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
>> > index 2f7521f39f..957d018177 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __floor_sse41 __floor
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__floor_sse41)
>> >         roundsd $9, %xmm0, %xmm0
>> >         ret
>> >  END(__floor_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__floor, floor)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
>> > index 8cebd48e10..a30c88671e 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-double.h>
>> >
>> > -#define floor __redirect_floor
>> > -#define __floor __redirect___floor
>> > -#include <math.h>
>> > -#undef floor
>> > -#undef __floor
>> > +# define floor __redirect_floor
>> > +# define __floor __redirect___floor
>> > +# include <math.h>
>> > +# undef floor
>> > +# undef __floor
>> >
>> > -#define SYMBOL_NAME floor
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME floor
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
>> >  libm_alias_double (__floor, floor)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
>> > new file mode 100644
>> > index 0000000000..c378baae8e
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of floorf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__floorf)
>> > +       vroundss $9, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__floorf)
>> > +
>> > +libm_alias_float (__floor, floor)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
>> > index 5f6020d27d..eacabe167c 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __floorf_sse41 __floorf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__floorf_sse41)
>> >         roundss $9, %xmm0, %xmm0
>> >         ret
>> >  END(__floorf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__floor, floor)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
>> > index a14e18b03c..6531b78443 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-float.h>
>> >
>> > -#define floorf __redirect_floorf
>> > -#define __floorf __redirect___floorf
>> > -#include <math.h>
>> > -#undef floorf
>> > -#undef __floorf
>> > +# define floorf __redirect_floorf
>> > +# define __floorf __redirect___floorf
>> > +# include <math.h>
>> > +# undef floorf
>> > +# undef __floorf
>> >
>> > -#define SYMBOL_NAME floorf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME floorf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
>> >  libm_alias_float (__floor, floor)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
>> > index a8e1a3f21b..76e1672e2d 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
>> > @@ -16,14 +16,16 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern double __redirect_log1p (double);
>> >
>> > -#define SYMBOL_NAME log1p
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME log1p
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
>> >
>> > -#define __log1p __log1p_sse2
>> > +# define __log1p __log1p_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_log1p.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
>> > new file mode 100644
>> > index 0000000000..5bfdf73c28
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of nearbyint function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__nearbyint)
>> > +       vroundsd $0xc, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__nearbyint)
>> > +
>> > +libm_alias_double (__nearbyint, nearbyint)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
>> > index 674f7eb40a..ee0b17e470 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __nearbyint_sse41 __nearbyint
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__nearbyint_sse41)
>> >         roundsd $0xc, %xmm0, %xmm0
>> >         ret
>> >  END(__nearbyint_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__nearbyint, nearbyint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
>> > index 693e42dd4e..649a9df869 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> > -#define nearbyint __redirect_nearbyint
>> > -#define __nearbyint __redirect___nearbyint
>> > -#include <math.h>
>> > -#undef nearbyint
>> > -#undef __nearbyint
>> > +# define nearbyint __redirect_nearbyint
>> > +# define __nearbyint __redirect___nearbyint
>> > +# include <math.h>
>> > +# undef nearbyint
>> > +# undef __nearbyint
>> >
>> > -#define SYMBOL_NAME nearbyint
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME nearbyint
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_double (__nearbyint, nearbyint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
>> > new file mode 100644
>> > index 0000000000..1dbaed0324
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implmentation of nearbyintf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__nearbyintf)
>> > +       vroundss $0xc, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__nearbyintf)
>> > +
>> > +libm_alias_float (__nearbyint, nearbyint)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
>> > index 5892bd7563..8b3e307b78 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __nearbyintf_sse41 __nearbyintf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__nearbyintf_sse41)
>> >         roundss $0xc, %xmm0, %xmm0
>> >         ret
>> >  END(__nearbyintf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__nearbyint, nearbyint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
>> > index a0ac009f4b..7762467ad9 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> >
>> > -#define nearbyintf __redirect_nearbyintf
>> > -#define __nearbyintf __redirect___nearbyintf
>> > -#include <math.h>
>> > -#undef nearbyintf
>> > -#undef __nearbyintf
>> > +# define nearbyintf __redirect_nearbyintf
>> > +# define __nearbyintf __redirect___nearbyintf
>> > +# include <math.h>
>> > +# undef nearbyintf
>> > +# undef __nearbyintf
>> >
>> > -#define SYMBOL_NAME nearbyintf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME nearbyintf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
>> >                        IFUNC_SELECTOR ());
>> >  libm_alias_float (__nearbyint, nearbyint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
>> > new file mode 100644
>> > index 0000000000..2b403b331f
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of rint function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__rint)
>> > +       vroundsd $4, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__rint)
>> > +
>> > +libm_alias_double (__rint, rint)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
>> > index 405372991b..4c7c1c37de 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __rint_sse41 __rint
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__rint_sse41)
>> >         roundsd $4, %xmm0, %xmm0
>> >         ret
>> >  END(__rint_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__rint, rint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
>> > index 754c87e004..49693c9728 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-double.h>
>> >
>> > -#define rint __redirect_rint
>> > -#define __rint __redirect___rint
>> > -#include <math.h>
>> > -#undef rint
>> > -#undef __rint
>> > +# define rint __redirect_rint
>> > +# define __rint __redirect___rint
>> > +# include <math.h>
>> > +# undef rint
>> > +# undef __rint
>> >
>> > -#define SYMBOL_NAME rint
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME rint
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
>> >  libm_alias_double (__rint, rint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
>> > new file mode 100644
>> > index 0000000000..171c2867f4
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of rintf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__rintf)
>> > +       vroundss $4, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__rintf)
>> > +
>> > +libm_alias_float (__rint, rint)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
>> > index 8ac67ce767..55443d7238 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __rintf_sse41 __rintf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__rintf_sse41)
>> >         roundss $4, %xmm0, %xmm0
>> >         ret
>> >  END(__rintf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__rint, rint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
>> > index e9d6b7a5f2..c7cf09701d 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-float.h>
>> >
>> > -#define rintf __redirect_rintf
>> > -#define __rintf __redirect___rintf
>> > -#include <math.h>
>> > -#undef rintf
>> > -#undef __rintf
>> > +# define rintf __redirect_rintf
>> > +# define __rintf __redirect___rintf
>> > +# include <math.h>
>> > +# undef rintf
>> > +# undef __rintf
>> >
>> > -#define SYMBOL_NAME rintf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME rintf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
>> >  libm_alias_float (__rint, rint)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
>> > new file mode 100644
>> > index 0000000000..576790355c
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of roundeven function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__roundeven)
>> > +       vroundsd $8, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__roundeven)
>> > +
>> > +libm_alias_double (__roundeven, roundeven)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
>> > index 5ef102336b..f0644cce81 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __roundeven_sse41 __roundeven
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__roundeven_sse41)
>> >         roundsd $8, %xmm0, %xmm0
>> >         ret
>> >  END(__roundeven_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__roundeven, roundeven)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
>> > index 8737b32e26..a250297918 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
>> > @@ -16,16 +16,18 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> > -#define roundeven __redirect_roundeven
>> > -#define __roundeven __redirect___roundeven
>> > -#include <math.h>
>> > -#undef roundeven
>> > -#undef __roundeven
>> > +# define roundeven __redirect_roundeven
>> > +# define __roundeven __redirect___roundeven
>> > +# include <math.h>
>> > +# undef roundeven
>> > +# undef __roundeven
>> >
>> > -#define SYMBOL_NAME roundeven
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME roundeven
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
>> >  libm_alias_double (__roundeven, roundeven)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
>> > new file mode 100644
>> > index 0000000000..42c359f4cd
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of roundevenf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__roundevenf)
>> > +       vroundss $8, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__roundevenf)
>> > +
>> > +libm_alias_float (__roundeven, roundeven)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
>> > index 792c90ba07..d1dd6b0e8b 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
>> > @@ -17,8 +17,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __roundevenf_sse41 __roundevenf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__roundevenf_sse41)
>> >         roundss $8, %xmm0, %xmm0
>> >         ret
>> >  END(__roundevenf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__roundeven, roundeven)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
>> > index e96016a4d5..534941e67f 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
>> > @@ -16,16 +16,18 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> >
>> > -#define roundevenf __redirect_roundevenf
>> > -#define __roundevenf __redirect___roundevenf
>> > -#include <math.h>
>> > -#undef roundevenf
>> > -#undef __roundevenf
>> > +# define roundevenf __redirect_roundevenf
>> > +# define __roundevenf __redirect___roundevenf
>> > +# include <math.h>
>> > +# undef roundevenf
>> > +# undef __roundevenf
>> >
>> > -#define SYMBOL_NAME roundevenf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME roundevenf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
>> >  libm_alias_float (__roundeven, roundeven)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
>> > index 355cc0092e..21eaa5e984 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
>> > @@ -16,24 +16,26 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern double __redirect_sin (double);
>> >  extern double __redirect_cos (double);
>> >
>> > -#define SYMBOL_NAME sin
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME sin
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
>> >  libm_alias_double (__sin, sin)
>> >
>> > -#undef SYMBOL_NAME
>> > -#define SYMBOL_NAME cos
>> > -#include "ifunc-avx-fma4.h"
>> > +# undef SYMBOL_NAME
>> > +# define SYMBOL_NAME cos
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
>> >  libm_alias_double (__cos, cos)
>> >
>> > -#define __cos __cos_sse2
>> > -#define __sin __sin_sse2
>> > +# define __cos __cos_sse2
>> > +# define __sin __sin_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_sin.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
>> > index 70107e999c..729163cdde 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
>> > @@ -16,15 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern void __redirect_sincos (double, double *, double *);
>> >
>> > -#define SYMBOL_NAME sincos
>> > -#include "ifunc-fma4.h"
>> > +# define SYMBOL_NAME sincos
>> > +# include "ifunc-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
>> >  libm_alias_double (__sincos, sincos)
>> >
>> > -#define __sincos __sincos_sse2
>> > +# define __sincos __sincos_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_sincos.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
>> > index 80bc028451..136dd62c81 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
>> > @@ -16,13 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> >
>> >  extern void __redirect_sincosf (float, float *, float *);
>> >
>> > -#define SYMBOL_NAME sincosf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME sincosf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
>> >
>> >  libm_alias_float (__sincos, sincos)
>> > +#else
>> > +# include <sysdeps/ieee754/flt-32/s_sincosf.c>
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
>> > index a32b9e9550..fabbf55604 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
>> > @@ -16,13 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-float.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-float.h>
>> >
>> >  extern float __redirect_sinf (float);
>> >
>> > -#define SYMBOL_NAME sinf
>> > -#include "ifunc-fma.h"
>> > +# define SYMBOL_NAME sinf
>> > +# include "ifunc-fma.h"
>> >
>> >  libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
>> >
>> >  libm_alias_float (__sin, sin)
>> > +#else
>> > +# include <sysdeps/ieee754/flt-32/s_sinf.c>
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
>> > index f9a2474a13..c85e327ff8 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
>> > @@ -16,15 +16,17 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#include <libm-alias-double.h>
>> > +#ifndef HAVE_X86_AVX2_FMA
>> > +# include <libm-alias-double.h>
>> >
>> >  extern double __redirect_tan (double);
>> >
>> > -#define SYMBOL_NAME tan
>> > -#include "ifunc-avx-fma4.h"
>> > +# define SYMBOL_NAME tan
>> > +# include "ifunc-avx-fma4.h"
>> >
>> >  libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
>> >  libm_alias_double (__tan, tan)
>> >
>> > -#define __tan __tan_sse2
>> > +# define __tan __tan_sse2
>> > +#endif
>> >  #include <sysdeps/ieee754/dbl-64/s_tan.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
>> > new file mode 100644
>> > index 0000000000..b3e87e9606
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of trunc function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-double.h>
>> > +
>> > +       .text
>> > +ENTRY(__trunc)
>> > +       vroundsd $11, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__trunc)
>> > +
>> > +libm_alias_double (__trunc, trunc)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
>> > index b496a6ef49..062cd1fb36 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
>> > @@ -18,8 +18,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-double.h>
>> > +# define __trunc_sse41 __trunc
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__trunc_sse41)
>> >         roundsd $11, %xmm0, %xmm0
>> >         ret
>> >  END(__trunc_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_double (__trunc, trunc)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
>> > index 9bc9df8744..568e818826 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-double.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-double.h>
>> >
>> > -#define trunc __redirect_trunc
>> > -#define __trunc __redirect___trunc
>> > -#include <math.h>
>> > -#undef trunc
>> > -#undef __trunc
>> > +# define trunc __redirect_trunc
>> > +# define __trunc __redirect___trunc
>> > +# include <math.h>
>> > +# undef trunc
>> > +# undef __trunc
>> >
>> > -#define SYMBOL_NAME trunc
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME trunc
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
>> >  libm_alias_double (__trunc, trunc)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
>> > new file mode 100644
>> > index 0000000000..f31ac7d7f7
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
>> > @@ -0,0 +1,28 @@
>> > +/* AVX implementation of truncf function.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <https://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>> > +#include <libm-alias-float.h>
>> > +
>> > +       .text
>> > +ENTRY(__truncf)
>> > +       vroundss $11, %xmm0, %xmm0, %xmm0
>> > +       ret
>> > +END(__truncf)
>> > +
>> > +libm_alias_float (__trunc, trunc)
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
>> > index 22e9a83307..ecd0ae5c05 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
>> > @@ -18,8 +18,19 @@
>> >
>> >  #include <sysdep.h>
>> >
>> > +#ifdef HAVE_X86_SSE4_1
>> > +# include <libm-alias-float.h>
>> > +# define __truncf_sse41 __truncf
>> > +       .text
>> > +#else
>> >         .section .text.sse4.1,"ax",@progbits
>> > +#endif
>> > +
>> >  ENTRY(__truncf_sse41)
>> >         roundss $11, %xmm0, %xmm0
>> >         ret
>> >  END(__truncf_sse41)
>> > +
>> > +#ifdef HAVE_X86_SSE4_1
>> > +libm_alias_float (__trunc, trunc)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
>> > index dae01d166a..57783c805a 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
>> > @@ -16,17 +16,19 @@
>> >     License along with the GNU C Library; if not, see
>> >     <https://www.gnu.org/licenses/>.  */
>> >
>> > -#define NO_MATH_REDIRECT
>> > -#include <libm-alias-float.h>
>> > +#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
>> > +# define NO_MATH_REDIRECT
>> > +# include <libm-alias-float.h>
>> >
>> > -#define truncf __redirect_truncf
>> > -#define __truncf __redirect___truncf
>> > -#include <math.h>
>> > -#undef truncf
>> > -#undef __truncf
>> > +# define truncf __redirect_truncf
>> > +# define __truncf __redirect___truncf
>> > +# include <math.h>
>> > +# undef truncf
>> > +# undef __truncf
>> >
>> > -#define SYMBOL_NAME truncf
>> > -#include "ifunc-sse4_1.h"
>> > +# define SYMBOL_NAME truncf
>> > +# include "ifunc-sse4_1.h"
>> >
>> >  libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
>> >  libm_alias_float (__trunc, trunc)
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
>> > index 27eee98a0a..fb2045e6cf 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
>> > @@ -1 +1,5 @@
>> > -#include <sysdeps/../math/w_exp.c>
>> > +#ifdef HAVE_X86_AVX2_FMA
>> > +# include <sysdeps/ieee754/dbl-64/w_exp.c>
>> > +#else
>> > +# include <sysdeps/../math/w_exp.c>
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
>> > index 9b2b018711..b85be8221e 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/w_log.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
>> > @@ -1 +1,5 @@
>> > -#include <sysdeps/../math/w_log.c>
>> > +#ifdef HAVE_X86_AVX2_FMA
>> > +# include <sysdeps/ieee754/dbl-64/w_log.c>
>> > +#else
>> > +# include <sysdeps/../math/w_log.c>
>> > +#endif
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
>> > index b50c1988de..849f4f97ff 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
>> > +++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
>> > @@ -1 +1,5 @@
>> > -#include <sysdeps/../math/w_pow.c>
>> > +#ifdef HAVE_X86_AVX2_FMA
>> > +# include <sysdeps/ieee754/dbl-64/w_pow.c>
>> > +#else
>> > +# include <sysdeps/../math/w_pow.c>
>> > +#endif
>> > --
>> > 2.43.0
>> >
H.J. Lu Feb. 20, 2024, 6:04 p.m. UTC | #4
On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >
> >
> >
> > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>
> >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >> >
> >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> >> > processors, including VMs, should also support FMA and vice versa.
> >> >
> >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> >> > variant.
> >> Not avx2 + FMA as well?
> >
> >
> > Correct. Logic is as follows
> > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > else if (build with SSE4.1): Keep SSE4.1 variants only.
> What if someone builds with sse4.1 as a minimum but then
> runs on avx2+ machines?

Only SSE4.1 variant will be used in this case.   Both SSE4.1
and AVX versions only have a single instruction.  This matches
the compiler builtin function of SS4.1 and AVX.
Noah Goldstein Feb. 20, 2024, 6:07 p.m. UTC | #5
On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > >
> > >
> > >
> > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>
> > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > >> >
> > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > >> > processors, including VMs, should also support FMA and vice versa.
> > >> >
> > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > >> > variant.
> > >> Not avx2 + FMA as well?
> > >
> > >
> > > Correct. Logic is as follows
> > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > What if someone builds with sse4.1 as a minimum but then
> > runs on avx2+ machines?
>
> Only SSE4.1 variant will be used in this case.   Both SSE4.1
> and AVX versions only have a single instruction.  This matches
> the compiler builtin function of SS4.1 and AVX.

if they are all the same, whats the rationale for having an
avx version at all?
>
>
> --
> H.J.
H.J. Lu Feb. 20, 2024, 6:13 p.m. UTC | #6
On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > >
> > > >
> > > >
> > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >>
> > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > >> >
> > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > >> > processors, including VMs, should also support FMA and vice versa.
> > > >> >
> > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > >> > variant.
> > > >> Not avx2 + FMA as well?
> > > >
> > > >
> > > > Correct. Logic is as follows
> > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > What if someone builds with sse4.1 as a minimum but then
> > > runs on avx2+ machines?
> >
> > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > and AVX versions only have a single instruction.  This matches
> > the compiler builtin function of SS4.1 and AVX.
>
> if they are all the same, whats the rationale for having an
> avx version at all?

They aren't the same.  For ceil, it is

roundsd $10, %xmm0, %xmm0
ret

vs

vroundsd $10, %xmm0, %xmm0, %xmm0
ret

You get the same things with

return __builtin_ceil (x);
Noah Goldstein Feb. 20, 2024, 6:19 p.m. UTC | #7
On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > > >
> > > > >
> > > > >
> > > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > >>
> > > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > > >> >
> > > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > > >> > processors, including VMs, should also support FMA and vice versa.
> > > > >> >
> > > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > > >> > variant.
> > > > >> Not avx2 + FMA as well?
> > > > >
> > > > >
> > > > > Correct. Logic is as follows
> > > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > > What if someone builds with sse4.1 as a minimum but then
> > > > runs on avx2+ machines?
> > >
> > > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > > and AVX versions only have a single instruction.  This matches
> > > the compiler builtin function of SS4.1 and AVX.
> >
> > if they are all the same, whats the rationale for having an
> > avx version at all?
>
> They aren't the same.  For ceil, it is
>
> roundsd $10, %xmm0, %xmm0
> ret
>
> vs
>
> vroundsd $10, %xmm0, %xmm0, %xmm0
> ret
>
> You get the same things with
>
> return __builtin_ceil (x);

I mean if they are equal quality sse4.1 / avx,
why not just remove the avx impls are using sse4.1 impls
on avx targets?
>
>
> --
> H.J.
H.J. Lu Feb. 20, 2024, 6:27 p.m. UTC | #8
On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > >
> > > > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > >>
> > > > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > > > >> >
> > > > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > > > >> > processors, including VMs, should also support FMA and vice versa.
> > > > > >> >
> > > > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > > > >> > variant.
> > > > > >> Not avx2 + FMA as well?
> > > > > >
> > > > > >
> > > > > > Correct. Logic is as follows
> > > > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > > > What if someone builds with sse4.1 as a minimum but then
> > > > > runs on avx2+ machines?
> > > >
> > > > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > > > and AVX versions only have a single instruction.  This matches
> > > > the compiler builtin function of SS4.1 and AVX.
> > >
> > > if they are all the same, whats the rationale for having an
> > > avx version at all?
> >
> > They aren't the same.  For ceil, it is
> >
> > roundsd $10, %xmm0, %xmm0
> > ret
> >
> > vs
> >
> > vroundsd $10, %xmm0, %xmm0, %xmm0
> > ret
> >
> > You get the same things with
> >
> > return __builtin_ceil (x);
>
> I mean if they are equal quality sse4.1 / avx,
> why not just remove the avx impls are using sse4.1 impls
> on avx targets?

If glibc is compiled with AVX, we should use the AVX version if
appropriate.   Since the minimum GCC for glibc build can't inline
 __builtin_ceil, we inline  __builtin_ceil by hand.
Noah Goldstein Feb. 20, 2024, 6:32 p.m. UTC | #9
On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > > >>
> > > > > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > > > > >> >
> > > > > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > > > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > > > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > > > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > > > > >> > processors, including VMs, should also support FMA and vice versa.
> > > > > > >> >
> > > > > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > > > > >> > variant.
> > > > > > >> Not avx2 + FMA as well?
> > > > > > >
> > > > > > >
> > > > > > > Correct. Logic is as follows
> > > > > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > > > > What if someone builds with sse4.1 as a minimum but then
> > > > > > runs on avx2+ machines?
> > > > >
> > > > > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > > > > and AVX versions only have a single instruction.  This matches
> > > > > the compiler builtin function of SS4.1 and AVX.
> > > >
> > > > if they are all the same, whats the rationale for having an
> > > > avx version at all?
> > >
> > > They aren't the same.  For ceil, it is
> > >
> > > roundsd $10, %xmm0, %xmm0
> > > ret
> > >
> > > vs
> > >
> > > vroundsd $10, %xmm0, %xmm0, %xmm0
> > > ret
> > >
> > > You get the same things with
> > >
> > > return __builtin_ceil (x);
> >
> > I mean if they are equal quality sse4.1 / avx,
> > why not just remove the avx impls are using sse4.1 impls
> > on avx targets?
>
> If glibc is compiled with AVX, we should use the AVX version if
> appropriate.   Since the minimum GCC for glibc build can't inline
>  __builtin_ceil, we inline  __builtin_ceil by hand.
if compiled with avx, but for generic target do we need to hold
onto avx versions for any reason?
>
> --
> H.J.
H.J. Lu Feb. 20, 2024, 6:36 p.m. UTC | #10
On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > >
> > > > > On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > > >
> > > > > > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > > > >>
> > > > > > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > > > > > >> >
> > > > > > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > > > > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > > > > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > > > > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > > > > > >> > processors, including VMs, should also support FMA and vice versa.
> > > > > > > >> >
> > > > > > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > > > > > >> > variant.
> > > > > > > >> Not avx2 + FMA as well?
> > > > > > > >
> > > > > > > >
> > > > > > > > Correct. Logic is as follows
> > > > > > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > > > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > > > > > What if someone builds with sse4.1 as a minimum but then
> > > > > > > runs on avx2+ machines?
> > > > > >
> > > > > > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > > > > > and AVX versions only have a single instruction.  This matches
> > > > > > the compiler builtin function of SS4.1 and AVX.
> > > > >
> > > > > if they are all the same, whats the rationale for having an
> > > > > avx version at all?
> > > >
> > > > They aren't the same.  For ceil, it is
> > > >
> > > > roundsd $10, %xmm0, %xmm0
> > > > ret
> > > >
> > > > vs
> > > >
> > > > vroundsd $10, %xmm0, %xmm0, %xmm0
> > > > ret
> > > >
> > > > You get the same things with
> > > >
> > > > return __builtin_ceil (x);
> > >
> > > I mean if they are equal quality sse4.1 / avx,
> > > why not just remove the avx impls are using sse4.1 impls
> > > on avx targets?
> >
> > If glibc is compiled with AVX, we should use the AVX version if
> > appropriate.   Since the minimum GCC for glibc build can't inline
> >  __builtin_ceil, we inline  __builtin_ceil by hand.
> if compiled with avx, but for generic target do we need to hold
> onto avx versions for any reason?

I don't understand what you were asking.   This patch leads to the same
assembly code generated from

double
__ceil (double x)
{
  return __builtin_ceil (x);
}

by a GCC which can inline __builtin_ceil, compiling with -msse4.1 or -mavx.
Noah Goldstein Feb. 20, 2024, 6:38 p.m. UTC | #11
On Tue, Feb 20, 2024 at 6:37 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > >
> > > > > > > On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > > > > > >>
> > > > > > > > >> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > > > > > > > >> >
> > > > > > > > >> > When glibc is built with FMA and AVX2 enabled by default, the resulting
> > > > > > > > >> > glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > > > > > > > >> > FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > > > > > > > >> > default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > > > > > > > >> > processors, including VMs, should also support FMA and vice versa.
> > > > > > > > >> >
> > > > > > > > >> > When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > > > > > > > >> > variant.
> > > > > > > > >> Not avx2 + FMA as well?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Correct. Logic is as follows
> > > > > > > > > If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > > > > > > > > else if (build with SSE4.1): Keep SSE4.1 variants only.
> > > > > > > > What if someone builds with sse4.1 as a minimum but then
> > > > > > > > runs on avx2+ machines?
> > > > > > >
> > > > > > > Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > > > > > > and AVX versions only have a single instruction.  This matches
> > > > > > > the compiler builtin function of SS4.1 and AVX.
> > > > > >
> > > > > > if they are all the same, whats the rationale for having an
> > > > > > avx version at all?
> > > > >
> > > > > They aren't the same.  For ceil, it is
> > > > >
> > > > > roundsd $10, %xmm0, %xmm0
> > > > > ret
> > > > >
> > > > > vs
> > > > >
> > > > > vroundsd $10, %xmm0, %xmm0, %xmm0
> > > > > ret
> > > > >
> > > > > You get the same things with
> > > > >
> > > > > return __builtin_ceil (x);
> > > >
> > > > I mean if they are equal quality sse4.1 / avx,
> > > > why not just remove the avx impls are using sse4.1 impls
> > > > on avx targets?
> > >
> > > If glibc is compiled with AVX, we should use the AVX version if
> > > appropriate.   Since the minimum GCC for glibc build can't inline
> > >  __builtin_ceil, we inline  __builtin_ceil by hand.
> > if compiled with avx, but for generic target do we need to hold
> > onto avx versions for any reason?
>
> I don't understand what you were asking.   This patch leads to the same
> assembly code generated from
>
> double
> __ceil (double x)
> {
>   return __builtin_ceil (x);
> }
>
> by a GCC which can inline __builtin_ceil, compiling with -msse4.1 or -mavx.

Ahh, I had a misunderstanding, okay its clear sse4.1 makes sense.
>
> --
> H.J.
Adhemerval Zanella Netto Feb. 20, 2024, 6:48 p.m. UTC | #12
On 20/02/24 15:36, H.J. Lu wrote:
> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>
>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>
>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>
>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>
>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>
>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
>>>>>>>>>>>
>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
>>>>>>>>>>> variant.
>>>>>>>>>> Not avx2 + FMA as well?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Correct. Logic is as follows
>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
>>>>>>>> What if someone builds with sse4.1 as a minimum but then
>>>>>>>> runs on avx2+ machines?
>>>>>>>
>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
>>>>>>> and AVX versions only have a single instruction.  This matches
>>>>>>> the compiler builtin function of SS4.1 and AVX.
>>>>>>
>>>>>> if they are all the same, whats the rationale for having an
>>>>>> avx version at all?
>>>>>
>>>>> They aren't the same.  For ceil, it is
>>>>>
>>>>> roundsd $10, %xmm0, %xmm0
>>>>> ret
>>>>>
>>>>> vs
>>>>>
>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
>>>>> ret
>>>>>
>>>>> You get the same things with
>>>>>
>>>>> return __builtin_ceil (x);
>>>>
>>>> I mean if they are equal quality sse4.1 / avx,
>>>> why not just remove the avx impls are using sse4.1 impls
>>>> on avx targets?
>>>
>>> If glibc is compiled with AVX, we should use the AVX version if
>>> appropriate.   Since the minimum GCC for glibc build can't inline
>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
>> if compiled with avx, but for generic target do we need to hold
>> onto avx versions for any reason?
> 
> I don't understand what you were asking.   This patch leads to the same
> assembly code generated from
> 
> double
> __ceil (double x)
> {
>   return __builtin_ceil (x);
> }

Wouldn't make sense to follow the already define x86_64 ABI versions and
provided the ifunc variants based on the ABI uses? 

I am kinda worried by the multiple configuration permutations we will have
to build/check.
H.J. Lu Feb. 20, 2024, 6:54 p.m. UTC | #13
On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 20/02/24 15:36, H.J. Lu wrote:
> > On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>
> >> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>
> >>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>
> >>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>
> >>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>
> >>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>
> >>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>
> >>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>>>
> >>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
> >>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> >>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> >>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> >>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
> >>>>>>>>>>>
> >>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> >>>>>>>>>>> variant.
> >>>>>>>>>> Not avx2 + FMA as well?
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Correct. Logic is as follows
> >>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> >>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
> >>>>>>>> What if someone builds with sse4.1 as a minimum but then
> >>>>>>>> runs on avx2+ machines?
> >>>>>>>
> >>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
> >>>>>>> and AVX versions only have a single instruction.  This matches
> >>>>>>> the compiler builtin function of SS4.1 and AVX.
> >>>>>>
> >>>>>> if they are all the same, whats the rationale for having an
> >>>>>> avx version at all?
> >>>>>
> >>>>> They aren't the same.  For ceil, it is
> >>>>>
> >>>>> roundsd $10, %xmm0, %xmm0
> >>>>> ret
> >>>>>
> >>>>> vs
> >>>>>
> >>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
> >>>>> ret
> >>>>>
> >>>>> You get the same things with
> >>>>>
> >>>>> return __builtin_ceil (x);
> >>>>
> >>>> I mean if they are equal quality sse4.1 / avx,
> >>>> why not just remove the avx impls are using sse4.1 impls
> >>>> on avx targets?
> >>>
> >>> If glibc is compiled with AVX, we should use the AVX version if
> >>> appropriate.   Since the minimum GCC for glibc build can't inline
> >>>  __builtin_ceil, we inline  __builtin_ceil by hand.
> >> if compiled with avx, but for generic target do we need to hold
> >> onto avx versions for any reason?
> >
> > I don't understand what you were asking.   This patch leads to the same
> > assembly code generated from
> >
> > double
> > __ceil (double x)
> > {
> >   return __builtin_ceil (x);
> > }
>
> Wouldn't make sense to follow the already define x86_64 ABI versions and
> provided the ifunc variants based on the ABI uses?

There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
will exclude ISA level N-1 or older variants in IFUNC selection.

> I am kinda worried by the multiple configuration permutations we will have
> to build/check.
Adhemerval Zanella Netto Feb. 20, 2024, 7:02 p.m. UTC | #14
On 20/02/24 15:54, H.J. Lu wrote:
> On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 20/02/24 15:36, H.J. Lu wrote:
>>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>
>>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>
>>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>
>>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>
>>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
>>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
>>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
>>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
>>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
>>>>>>>>>>>>>
>>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
>>>>>>>>>>>>> variant.
>>>>>>>>>>>> Not avx2 + FMA as well?
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Correct. Logic is as follows
>>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
>>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
>>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
>>>>>>>>>> runs on avx2+ machines?
>>>>>>>>>
>>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
>>>>>>>>> and AVX versions only have a single instruction.  This matches
>>>>>>>>> the compiler builtin function of SS4.1 and AVX.
>>>>>>>>
>>>>>>>> if they are all the same, whats the rationale for having an
>>>>>>>> avx version at all?
>>>>>>>
>>>>>>> They aren't the same.  For ceil, it is
>>>>>>>
>>>>>>> roundsd $10, %xmm0, %xmm0
>>>>>>> ret
>>>>>>>
>>>>>>> vs
>>>>>>>
>>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
>>>>>>> ret
>>>>>>>
>>>>>>> You get the same things with
>>>>>>>
>>>>>>> return __builtin_ceil (x);
>>>>>>
>>>>>> I mean if they are equal quality sse4.1 / avx,
>>>>>> why not just remove the avx impls are using sse4.1 impls
>>>>>> on avx targets?
>>>>>
>>>>> If glibc is compiled with AVX, we should use the AVX version if
>>>>> appropriate.   Since the minimum GCC for glibc build can't inline
>>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
>>>> if compiled with avx, but for generic target do we need to hold
>>>> onto avx versions for any reason?
>>>
>>> I don't understand what you were asking.   This patch leads to the same
>>> assembly code generated from
>>>
>>> double
>>> __ceil (double x)
>>> {
>>>   return __builtin_ceil (x);
>>> }
>>
>> Wouldn't make sense to follow the already define x86_64 ABI versions and
>> provided the ifunc variants based on the ABI uses?
> 
> There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
> and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
> will exclude ISA level N-1 or older variants in IFUNC selection.
> 

I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
the variants instead of adding two new configure checks?
H.J. Lu Feb. 20, 2024, 7:10 p.m. UTC | #15
On Tue, Feb 20, 2024 at 11:02 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 20/02/24 15:54, H.J. Lu wrote:
> > On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
> > <adhemerval.zanella@linaro.org> wrote:
> >>
> >>
> >>
> >> On 20/02/24 15:36, H.J. Lu wrote:
> >>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>
> >>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>
> >>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>
> >>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>
> >>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>
> >>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>>>
> >>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
> >>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> >>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> >>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> >>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> >>>>>>>>>>>>> variant.
> >>>>>>>>>>>> Not avx2 + FMA as well?
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> Correct. Logic is as follows
> >>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> >>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
> >>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
> >>>>>>>>>> runs on avx2+ machines?
> >>>>>>>>>
> >>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
> >>>>>>>>> and AVX versions only have a single instruction.  This matches
> >>>>>>>>> the compiler builtin function of SS4.1 and AVX.
> >>>>>>>>
> >>>>>>>> if they are all the same, whats the rationale for having an
> >>>>>>>> avx version at all?
> >>>>>>>
> >>>>>>> They aren't the same.  For ceil, it is
> >>>>>>>
> >>>>>>> roundsd $10, %xmm0, %xmm0
> >>>>>>> ret
> >>>>>>>
> >>>>>>> vs
> >>>>>>>
> >>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
> >>>>>>> ret
> >>>>>>>
> >>>>>>> You get the same things with
> >>>>>>>
> >>>>>>> return __builtin_ceil (x);
> >>>>>>
> >>>>>> I mean if they are equal quality sse4.1 / avx,
> >>>>>> why not just remove the avx impls are using sse4.1 impls
> >>>>>> on avx targets?
> >>>>>
> >>>>> If glibc is compiled with AVX, we should use the AVX version if
> >>>>> appropriate.   Since the minimum GCC for glibc build can't inline
> >>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
> >>>> if compiled with avx, but for generic target do we need to hold
> >>>> onto avx versions for any reason?
> >>>
> >>> I don't understand what you were asking.   This patch leads to the same
> >>> assembly code generated from
> >>>
> >>> double
> >>> __ceil (double x)
> >>> {
> >>>   return __builtin_ceil (x);
> >>> }
> >>
> >> Wouldn't make sense to follow the already define x86_64 ABI versions and
> >> provided the ifunc variants based on the ABI uses?
> >
> > There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
> > and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
> > will exclude ISA level N-1 or older variants in IFUNC selection.
> >
>
> I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
> the variants instead of adding two new configure checks?

One issue is that the minimum GCC (GCC 6?) doesn't support -march=x86-64-vN.
Another reason is that these math functions don't need the full ISA
level instructions.
Adhemerval Zanella Netto Feb. 20, 2024, 7:56 p.m. UTC | #16
On 20/02/24 16:10, H.J. Lu wrote:
> On Tue, Feb 20, 2024 at 11:02 AM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 20/02/24 15:54, H.J. Lu wrote:
>>> On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
>>> <adhemerval.zanella@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 20/02/24 15:36, H.J. Lu wrote:
>>>>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>
>>>>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>
>>>>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
>>>>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
>>>>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
>>>>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
>>>>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
>>>>>>>>>>>>>>> variant.
>>>>>>>>>>>>>> Not avx2 + FMA as well?
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Correct. Logic is as follows
>>>>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
>>>>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
>>>>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
>>>>>>>>>>>> runs on avx2+ machines?
>>>>>>>>>>>
>>>>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
>>>>>>>>>>> and AVX versions only have a single instruction.  This matches
>>>>>>>>>>> the compiler builtin function of SS4.1 and AVX.
>>>>>>>>>>
>>>>>>>>>> if they are all the same, whats the rationale for having an
>>>>>>>>>> avx version at all?
>>>>>>>>>
>>>>>>>>> They aren't the same.  For ceil, it is
>>>>>>>>>
>>>>>>>>> roundsd $10, %xmm0, %xmm0
>>>>>>>>> ret
>>>>>>>>>
>>>>>>>>> vs
>>>>>>>>>
>>>>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
>>>>>>>>> ret
>>>>>>>>>
>>>>>>>>> You get the same things with
>>>>>>>>>
>>>>>>>>> return __builtin_ceil (x);
>>>>>>>>
>>>>>>>> I mean if they are equal quality sse4.1 / avx,
>>>>>>>> why not just remove the avx impls are using sse4.1 impls
>>>>>>>> on avx targets?
>>>>>>>
>>>>>>> If glibc is compiled with AVX, we should use the AVX version if
>>>>>>> appropriate.   Since the minimum GCC for glibc build can't inline
>>>>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
>>>>>> if compiled with avx, but for generic target do we need to hold
>>>>>> onto avx versions for any reason?
>>>>>
>>>>> I don't understand what you were asking.   This patch leads to the same
>>>>> assembly code generated from
>>>>>
>>>>> double
>>>>> __ceil (double x)
>>>>> {
>>>>>   return __builtin_ceil (x);
>>>>> }
>>>>
>>>> Wouldn't make sense to follow the already define x86_64 ABI versions and
>>>> provided the ifunc variants based on the ABI uses?
>>>
>>> There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
>>> and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
>>> will exclude ISA level N-1 or older variants in IFUNC selection.
>>>
>>
>> I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
>> the variants instead of adding two new configure checks?
> 
> One issue is that the minimum GCC (GCC 6?) doesn't support -march=x86-64-vN.
> Another reason is that these math functions don't need the full ISA
> level instructions.
> 

But afaiu gcc support for -march=x86_64-vN does not really matter, isa-level.h
will define MINIMUM_X86_ISA_LEVEL based compiler preprocessor that should be
compatible with GCC 6.

So instead of checking for HAVE_X86_AVX2_FMA, it would be for
MINIMUM_X86_ISA_LEVEL >= 3. It does not work for C files that explicit uses
-march, but I don't that is the case here.
Adhemerval Zanella Netto Feb. 20, 2024, 8:03 p.m. UTC | #17
On 20/02/24 16:56, Adhemerval Zanella Netto wrote:
> 
> 
> On 20/02/24 16:10, H.J. Lu wrote:
>> On Tue, Feb 20, 2024 at 11:02 AM Adhemerval Zanella Netto
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>>
>>> On 20/02/24 15:54, H.J. Lu wrote:
>>>> On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 20/02/24 15:36, H.J. Lu wrote:
>>>>>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>
>>>>>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
>>>>>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
>>>>>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
>>>>>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
>>>>>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
>>>>>>>>>>>>>>>> variant.
>>>>>>>>>>>>>>> Not avx2 + FMA as well?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Correct. Logic is as follows
>>>>>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
>>>>>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
>>>>>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
>>>>>>>>>>>>> runs on avx2+ machines?
>>>>>>>>>>>>
>>>>>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
>>>>>>>>>>>> and AVX versions only have a single instruction.  This matches
>>>>>>>>>>>> the compiler builtin function of SS4.1 and AVX.
>>>>>>>>>>>
>>>>>>>>>>> if they are all the same, whats the rationale for having an
>>>>>>>>>>> avx version at all?
>>>>>>>>>>
>>>>>>>>>> They aren't the same.  For ceil, it is
>>>>>>>>>>
>>>>>>>>>> roundsd $10, %xmm0, %xmm0
>>>>>>>>>> ret
>>>>>>>>>>
>>>>>>>>>> vs
>>>>>>>>>>
>>>>>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
>>>>>>>>>> ret
>>>>>>>>>>
>>>>>>>>>> You get the same things with
>>>>>>>>>>
>>>>>>>>>> return __builtin_ceil (x);
>>>>>>>>>
>>>>>>>>> I mean if they are equal quality sse4.1 / avx,
>>>>>>>>> why not just remove the avx impls are using sse4.1 impls
>>>>>>>>> on avx targets?
>>>>>>>>
>>>>>>>> If glibc is compiled with AVX, we should use the AVX version if
>>>>>>>> appropriate.   Since the minimum GCC for glibc build can't inline
>>>>>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
>>>>>>> if compiled with avx, but for generic target do we need to hold
>>>>>>> onto avx versions for any reason?
>>>>>>
>>>>>> I don't understand what you were asking.   This patch leads to the same
>>>>>> assembly code generated from
>>>>>>
>>>>>> double
>>>>>> __ceil (double x)
>>>>>> {
>>>>>>   return __builtin_ceil (x);
>>>>>> }
>>>>>
>>>>> Wouldn't make sense to follow the already define x86_64 ABI versions and
>>>>> provided the ifunc variants based on the ABI uses?
>>>>
>>>> There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
>>>> and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
>>>> will exclude ISA level N-1 or older variants in IFUNC selection.
>>>>
>>>
>>> I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
>>> the variants instead of adding two new configure checks?
>>
>> One issue is that the minimum GCC (GCC 6?) doesn't support -march=x86-64-vN.
>> Another reason is that these math functions don't need the full ISA
>> level instructions.
>>
> 
> But afaiu gcc support for -march=x86_64-vN does not really matter, isa-level.h
> will define MINIMUM_X86_ISA_LEVEL based compiler preprocessor that should be
> compatible with GCC 6.
> 
> So instead of checking for HAVE_X86_AVX2_FMA, it would be for
> MINIMUM_X86_ISA_LEVEL >= 3. It does not work for C files that explicit uses
> -march, but I don't that is the case here.

I understand that math support is not really tied to x86_64-vX, but I also think
that there is not strong reason to also use the same logic libc.so is already
using on ifunc variant to provide the ifunc variants on libm.so.

It simplifies the testing and minimize the build permutation, since I know that
checking for -march=x86_64-v{1,2,3,4} should be suffice instead of adding
-mfma, etc.

So I also think that instead of libc_cv_have_x86_avx2_fma/libc_cv_have_x86_sse4_1,
it should check for libc_cv_x86_64_vN (similar to what isa-level.h does).  Also,
I don't see much point in adding support for 32 bits.
Noah Goldstein Feb. 20, 2024, 8:18 p.m. UTC | #18
On Tue, Feb 20, 2024 at 8:03 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 20/02/24 16:56, Adhemerval Zanella Netto wrote:
> >
> >
> > On 20/02/24 16:10, H.J. Lu wrote:
> >> On Tue, Feb 20, 2024 at 11:02 AM Adhemerval Zanella Netto
> >> <adhemerval.zanella@linaro.org> wrote:
> >>>
> >>>
> >>>
> >>> On 20/02/24 15:54, H.J. Lu wrote:
> >>>> On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
> >>>> <adhemerval.zanella@linaro.org> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 20/02/24 15:36, H.J. Lu wrote:
> >>>>>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>
> >>>>>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>>
> >>>>>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>
> >>>>>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>>
> >>>>>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
> >>>>>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> >>>>>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> >>>>>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> >>>>>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> >>>>>>>>>>>>>>>> variant.
> >>>>>>>>>>>>>>> Not avx2 + FMA as well?
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Correct. Logic is as follows
> >>>>>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> >>>>>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
> >>>>>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
> >>>>>>>>>>>>> runs on avx2+ machines?
> >>>>>>>>>>>>
> >>>>>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
> >>>>>>>>>>>> and AVX versions only have a single instruction.  This matches
> >>>>>>>>>>>> the compiler builtin function of SS4.1 and AVX.
> >>>>>>>>>>>
> >>>>>>>>>>> if they are all the same, whats the rationale for having an
> >>>>>>>>>>> avx version at all?
> >>>>>>>>>>
> >>>>>>>>>> They aren't the same.  For ceil, it is
> >>>>>>>>>>
> >>>>>>>>>> roundsd $10, %xmm0, %xmm0
> >>>>>>>>>> ret
> >>>>>>>>>>
> >>>>>>>>>> vs
> >>>>>>>>>>
> >>>>>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
> >>>>>>>>>> ret
> >>>>>>>>>>
> >>>>>>>>>> You get the same things with
> >>>>>>>>>>
> >>>>>>>>>> return __builtin_ceil (x);
> >>>>>>>>>
> >>>>>>>>> I mean if they are equal quality sse4.1 / avx,
> >>>>>>>>> why not just remove the avx impls are using sse4.1 impls
> >>>>>>>>> on avx targets?
> >>>>>>>>
> >>>>>>>> If glibc is compiled with AVX, we should use the AVX version if
> >>>>>>>> appropriate.   Since the minimum GCC for glibc build can't inline
> >>>>>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
> >>>>>>> if compiled with avx, but for generic target do we need to hold
> >>>>>>> onto avx versions for any reason?
> >>>>>>
> >>>>>> I don't understand what you were asking.   This patch leads to the same
> >>>>>> assembly code generated from
> >>>>>>
> >>>>>> double
> >>>>>> __ceil (double x)
> >>>>>> {
> >>>>>>   return __builtin_ceil (x);
> >>>>>> }
> >>>>>
> >>>>> Wouldn't make sense to follow the already define x86_64 ABI versions and
> >>>>> provided the ifunc variants based on the ABI uses?
> >>>>
> >>>> There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
> >>>> and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
> >>>> will exclude ISA level N-1 or older variants in IFUNC selection.
> >>>>
> >>>
> >>> I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
> >>> the variants instead of adding two new configure checks?
> >>
> >> One issue is that the minimum GCC (GCC 6?) doesn't support -march=x86-64-vN.
> >> Another reason is that these math functions don't need the full ISA
> >> level instructions.
> >>
> >
> > But afaiu gcc support for -march=x86_64-vN does not really matter, isa-level.h
> > will define MINIMUM_X86_ISA_LEVEL based compiler preprocessor that should be
> > compatible with GCC 6.
> >
> > So instead of checking for HAVE_X86_AVX2_FMA, it would be for
> > MINIMUM_X86_ISA_LEVEL >= 3. It does not work for C files that explicit uses
> > -march, but I don't that is the case here.
>
> I understand that math support is not really tied to x86_64-vX, but I also think
> that there is not strong reason to also use the same logic libc.so is already
> using on ifunc variant to provide the ifunc variants on libm.so.
>
> It simplifies the testing and minimize the build permutation, since I know that
> checking for -march=x86_64-v{1,2,3,4} should be suffice instead of adding
> -mfma, etc.
>
+1
> So I also think that instead of libc_cv_have_x86_avx2_fma/libc_cv_have_x86_sse4_1,
> it should check for libc_cv_x86_64_vN (similar to what isa-level.h does).  Also,
> I don't see much point in adding support for 32 bits.
H.J. Lu Feb. 20, 2024, 8:27 p.m. UTC | #19
On Tue, Feb 20, 2024 at 12:18 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Feb 20, 2024 at 8:03 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
> >
> >
> >
> > On 20/02/24 16:56, Adhemerval Zanella Netto wrote:
> > >
> > >
> > > On 20/02/24 16:10, H.J. Lu wrote:
> > >> On Tue, Feb 20, 2024 at 11:02 AM Adhemerval Zanella Netto
> > >> <adhemerval.zanella@linaro.org> wrote:
> > >>>
> > >>>
> > >>>
> > >>> On 20/02/24 15:54, H.J. Lu wrote:
> > >>>> On Tue, Feb 20, 2024 at 10:48 AM Adhemerval Zanella Netto
> > >>>> <adhemerval.zanella@linaro.org> wrote:
> > >>>>>
> > >>>>>
> > >>>>>
> > >>>>> On 20/02/24 15:36, H.J. Lu wrote:
> > >>>>>> On Tue, Feb 20, 2024 at 10:32 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>>>>>>
> > >>>>>>> On Tue, Feb 20, 2024 at 6:28 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >>>>>>>>
> > >>>>>>>> On Tue, Feb 20, 2024 at 10:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>>>>>>>>
> > >>>>>>>>> On Tue, Feb 20, 2024 at 6:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >>>>>>>>>>
> > >>>>>>>>>> On Tue, Feb 20, 2024 at 10:07 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>>>>>>>>>>
> > >>>>>>>>>>> On Tue, Feb 20, 2024 at 6:05 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >>>>>>>>>>>>
> > >>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:56 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>>>>>>>>>>>>
> > >>>>>>>>>>>>> On Tue, Feb 20, 2024 at 5:51 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 9:34 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>> On Tue, Feb 20, 2024 at 4:58 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> > >>>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>>> When glibc is built with FMA and AVX2 enabled by default, the resulting
> > >>>>>>>>>>>>>>>> glibc binaries won't run on SSE or FMA4 processors.  Exclude SSE, AVX and
> > >>>>>>>>>>>>>>>> FMA4 variants in libm multiarch when both FMA and AVX2 are enabled by
> > >>>>>>>>>>>>>>>> default.  Disallow glibc build with only AVX2 or FMA enabled as all AVX2
> > >>>>>>>>>>>>>>>> processors, including VMs, should also support FMA and vice versa.
> > >>>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>>> When glibc is built with SSE4.1 enabled by default, only keep SSE4.1
> > >>>>>>>>>>>>>>>> variant.
> > >>>>>>>>>>>>>>> Not avx2 + FMA as well?
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>> Correct. Logic is as follows
> > >>>>>>>>>>>>>> If (build with AVX2+FMA): Keep AVX2+FMA variants only.
> > >>>>>>>>>>>>>> else if (build with SSE4.1): Keep SSE4.1 variants only.
> > >>>>>>>>>>>>> What if someone builds with sse4.1 as a minimum but then
> > >>>>>>>>>>>>> runs on avx2+ machines?
> > >>>>>>>>>>>>
> > >>>>>>>>>>>> Only SSE4.1 variant will be used in this case.   Both SSE4.1
> > >>>>>>>>>>>> and AVX versions only have a single instruction.  This matches
> > >>>>>>>>>>>> the compiler builtin function of SS4.1 and AVX.
> > >>>>>>>>>>>
> > >>>>>>>>>>> if they are all the same, whats the rationale for having an
> > >>>>>>>>>>> avx version at all?
> > >>>>>>>>>>
> > >>>>>>>>>> They aren't the same.  For ceil, it is
> > >>>>>>>>>>
> > >>>>>>>>>> roundsd $10, %xmm0, %xmm0
> > >>>>>>>>>> ret
> > >>>>>>>>>>
> > >>>>>>>>>> vs
> > >>>>>>>>>>
> > >>>>>>>>>> vroundsd $10, %xmm0, %xmm0, %xmm0
> > >>>>>>>>>> ret
> > >>>>>>>>>>
> > >>>>>>>>>> You get the same things with
> > >>>>>>>>>>
> > >>>>>>>>>> return __builtin_ceil (x);
> > >>>>>>>>>
> > >>>>>>>>> I mean if they are equal quality sse4.1 / avx,
> > >>>>>>>>> why not just remove the avx impls are using sse4.1 impls
> > >>>>>>>>> on avx targets?
> > >>>>>>>>
> > >>>>>>>> If glibc is compiled with AVX, we should use the AVX version if
> > >>>>>>>> appropriate.   Since the minimum GCC for glibc build can't inline
> > >>>>>>>>  __builtin_ceil, we inline  __builtin_ceil by hand.
> > >>>>>>> if compiled with avx, but for generic target do we need to hold
> > >>>>>>> onto avx versions for any reason?
> > >>>>>>
> > >>>>>> I don't understand what you were asking.   This patch leads to the same
> > >>>>>> assembly code generated from
> > >>>>>>
> > >>>>>> double
> > >>>>>> __ceil (double x)
> > >>>>>> {
> > >>>>>>   return __builtin_ceil (x);
> > >>>>>> }
> > >>>>>
> > >>>>> Wouldn't make sense to follow the already define x86_64 ABI versions and
> > >>>>> provided the ifunc variants based on the ABI uses?
> > >>>>
> > >>>> There are no conflicts here.  For these math functions, ISA level 2 == SSE4.1
> > >>>> and ISA level 3 == AVX2 + FMA.   If glibc is built with ISA level N, this patch
> > >>>> will exclude ISA level N-1 or older variants in IFUNC selection.
> > >>>>
> > >>>
> > >>> I mean, why not use the MINIMUM_X86_ISA_LEVEL to define whether to provide/build
> > >>> the variants instead of adding two new configure checks?
> > >>
> > >> One issue is that the minimum GCC (GCC 6?) doesn't support -march=x86-64-vN.
> > >> Another reason is that these math functions don't need the full ISA
> > >> level instructions.
> > >>
> > >
> > > But afaiu gcc support for -march=x86_64-vN does not really matter, isa-level.h
> > > will define MINIMUM_X86_ISA_LEVEL based compiler preprocessor that should be
> > > compatible with GCC 6.
> > >
> > > So instead of checking for HAVE_X86_AVX2_FMA, it would be for
> > > MINIMUM_X86_ISA_LEVEL >= 3. It does not work for C files that explicit uses
> > > -march, but I don't that is the case here.
> >
> > I understand that math support is not really tied to x86_64-vX, but I also think
> > that there is not strong reason to also use the same logic libc.so is already
> > using on ifunc variant to provide the ifunc variants on libm.so.
> >
> > It simplifies the testing and minimize the build permutation, since I know that
> > checking for -march=x86_64-v{1,2,3,4} should be suffice instead of adding
> > -mfma, etc.
> >
> +1
> > So I also think that instead of libc_cv_have_x86_avx2_fma/libc_cv_have_x86_sse4_1,
> > it should check for libc_cv_x86_64_vN (similar to what isa-level.h does).  Also,
> > I don't see much point in adding support for 32 bits.

We can check INCLUDE_X86_ISA_LEVEL >= 3 instead of HAVE_X86_AVX2_FMA.
But "enable-x86-isa-level = yes" isn't sufficient.  We need
have-x86-isa-level-N or
something like it for Makefiles.
diff mbox series

Patch

diff --git a/config.h.in b/config.h.in
index 2f0669e19b..0a9626cbe8 100644
--- a/config.h.in
+++ b/config.h.in
@@ -292,4 +292,9 @@ 
 /* Define if -mmovbe is enabled by default on x86.  */
 #undef HAVE_X86_MOVBE
 
+/* Define if -msse4.1 is enabled by default on x86.  */
+#undef HAVE_X86_SSE4_1
+
+/* Define if -mavx2 and -mfma are enabled by default on x86.  */
+#undef HAVE_X86_AVX2_FMA
 #endif
diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
index 1f4c2d67fd..1c0e0d0640 100644
--- a/sysdeps/x86/configure
+++ b/sysdeps/x86/configure
@@ -128,3 +128,80 @@  enable-x86-isa-level = $libc_cv_include_x86_isa_level"
 printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
 
 
+# Check if AVX2 and FMA are available.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for AVX2 and FMA instruction support" >&5
+printf %s "checking for AVX2 and FMA instruction support... " >&6; }
+if test ${libc_cv_have_x86_avx2_fma+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat > conftest.c <<EOF
+#if !defined __AVX2__ || !defined __FMA__
+# error AVX2 and/or FMA are disabled.
+# if defined __AVX2__ || defined __FMA__
+#  error Only one of AVX2 and FMA is enabled.
+# endif
+#endif
+EOF
+	       if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+		 libc_cv_have_x86_avx2_fma=yes
+	       else
+		 if { ac_try='grep -q "Only one of AVX2 and FMA is enabled" conftest.err'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+		   as_fn_error $? "Only one of AVX2 and FMA is enabled." "$LINENO" 5
+		 fi
+		 libc_cv_have_x86_avx2_fma=no
+	       fi
+	       rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_avx2_fma" >&5
+printf "%s\n" "$libc_cv_have_x86_avx2_fma" >&6; }
+if test $libc_cv_have_x86_avx2_fma = yes; then
+  printf "%s\n" "#define HAVE_X86_AVX2_FMA 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+enable-avx2-fma = $libc_cv_have_x86_avx2_fma"
+
+# Check if SSE4.1 is available.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SSE4.1 instruction support" >&5
+printf %s "checking for SSE4.1 instruction support... " >&6; }
+if test ${libc_cv_have_x86_sse4_1+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat > conftest.c <<EOF
+#if !defined __SSE4_1__
+# error SSE4.1 is not available.
+#endif
+EOF
+	       if { ac_try='${CC-cc} -c $CFLAGS conftest.c 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+		 libc_cv_have_x86_sse4_1=yes
+	       else
+		 libc_cv_have_x86_sse4_1=no
+	       fi
+	       rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_sse4_1" >&5
+printf "%s\n" "$libc_cv_have_x86_sse4_1" >&6; }
+if test $libc_cv_have_x86_sse4_1 = yes; then
+  printf "%s\n" "#define HAVE_X86_SSE4_1 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+enable-sse4-1 = $libc_cv_have_x86_sse4_1"
+
diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
index 437a50623b..df3db3fdc2 100644
--- a/sysdeps/x86/configure.ac
+++ b/sysdeps/x86/configure.ac
@@ -87,3 +87,47 @@  LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
 
 dnl Static PIE is supported.
 AC_DEFINE(SUPPORT_STATIC_PIE)
+
+# Check if AVX2 and FMA are available.
+AC_CACHE_CHECK([for AVX2 and FMA instruction support],
+	       libc_cv_have_x86_avx2_fma, [dnl
+cat > conftest.c <<EOF
+#if !defined __AVX2__ || !defined __FMA__
+# error AVX2 and/or FMA are disabled.
+# if defined __AVX2__ || defined __FMA__
+#  error Only one of AVX2 and FMA is enabled.
+# endif
+#endif
+EOF
+	       if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&conftest.err); then
+		 libc_cv_have_x86_avx2_fma=yes
+	       else
+		 if AC_TRY_COMMAND(grep -q "Only one of AVX2 and FMA is enabled" conftest.err); then
+		   AC_MSG_ERROR([Only one of AVX2 and FMA is enabled.])
+		 fi
+		 libc_cv_have_x86_avx2_fma=no
+	       fi
+	       rm -rf conftest*])
+if test $libc_cv_have_x86_avx2_fma = yes; then
+  AC_DEFINE(HAVE_X86_AVX2_FMA)
+fi
+LIBC_CONFIG_VAR([enable-avx2-fma], [$libc_cv_have_x86_avx2_fma])
+
+# Check if SSE4.1 is available.
+AC_CACHE_CHECK([for SSE4.1 instruction support],
+	       libc_cv_have_x86_sse4_1, [dnl
+cat > conftest.c <<EOF
+#if !defined __SSE4_1__
+# error SSE4.1 is not available.
+#endif
+EOF
+	       if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.c 1>&AS_MESSAGE_LOG_FD); then
+		 libc_cv_have_x86_sse4_1=yes
+	       else
+		 libc_cv_have_x86_sse4_1=no
+	       fi
+	       rm -rf conftest*])
+if test $libc_cv_have_x86_sse4_1 = yes; then
+  AC_DEFINE(HAVE_X86_SSE4_1)
+fi
+LIBC_CONFIG_VAR([enable-sse4-1], [$libc_cv_have_x86_sse4_1])
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index e1a490dd98..5eeb106b79 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,49 +1,4 @@ 
 ifeq ($(subdir),math)
-libm-sysdep_routines += \
-  s_ceil-c \
-  s_ceilf-c \
-  s_floor-c \
-  s_floorf-c \
-  s_nearbyint-c \
-  s_nearbyintf-c \
-  s_rint-c \
-  s_rintf-c \
-  s_roundeven-c \
-  s_roundevenf-c \
-  s_trunc-c \
-  s_truncf-c \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  s_ceil-sse4_1 \
-  s_ceilf-sse4_1 \
-  s_floor-sse4_1 \
-  s_floorf-sse4_1 \
-  s_nearbyint-sse4_1 \
-  s_nearbyintf-sse4_1 \
-  s_rint-sse4_1 \
-  s_rintf-sse4_1 \
-  s_roundeven-sse4_1 \
-  s_roundevenf-sse4_1 \
-  s_trunc-sse4_1 \
-  s_truncf-sse4_1 \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  e_asin-fma \
-  e_atan2-fma \
-  e_exp-fma \
-  e_log-fma \
-  e_log2-fma \
-  e_pow-fma \
-  s_atan-fma \
-  s_expm1-fma \
-  s_log1p-fma \
-  s_sin-fma \
-  s_sincos-fma \
-  s_tan-fma \
-# libm-sysdep_routines
-
 CFLAGS-e_asin-fma.c = -mfma -mavx2
 CFLAGS-e_atan2-fma.c = -mfma -mavx2
 CFLAGS-e_exp-fma.c = -mfma -mavx2
@@ -57,23 +12,6 @@  CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 CFLAGS-s_sincos-fma.c = -mfma -mavx2
 
-libm-sysdep_routines += \
-  s_cosf-sse2 \
-  s_sincosf-sse2 \
-  s_sinf-sse2 \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  e_exp2f-fma \
-  e_expf-fma \
-  e_log2f-fma \
-  e_logf-fma \
-  e_powf-fma \
-  s_cosf-fma \
-  s_sincosf-fma \
-  s_sinf-fma \
-# libm-sysdep_routines
-
 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
 CFLAGS-e_expf-fma.c = -mfma -mavx2
 CFLAGS-e_log2f-fma.c = -mfma -mavx2
@@ -83,17 +21,92 @@  CFLAGS-s_sinf-fma.c = -mfma -mavx2
 CFLAGS-s_cosf-fma.c = -mfma -mavx2
 CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
+ifeq ($(enable-avx2-fma),yes)
 libm-sysdep_routines += \
+  s_ceil-avx \
+  s_ceilf-avx \
+  s_floor-avx \
+  s_floorf-avx \
+  s_nearbyint-avx \
+  s_nearbyintf-avx \
+  s_rint-avx \
+  s_rintf-avx \
+  s_roundeven-avx \
+  s_roundevenf-avx \
+  s_trunc-avx \
+  s_truncf-avx \
+# libm-sysdep_routines
+else
+libm-sysdep_routines += \
+  e_asin-fma \
   e_asin-fma4 \
+  e_atan2-avx \
+  e_atan2-fma \
   e_atan2-fma4 \
+  e_exp-avx \
+  e_exp-fma \
   e_exp-fma4 \
+  e_exp2f-fma \
+  e_expf-fma \
+  e_log-avx \
+  e_log-fma \
   e_log-fma4 \
+  e_log2-fma \
+  e_log2f-fma \
+  e_logf-fma \
+  e_pow-fma \
   e_pow-fma4 \
+  e_powf-fma \
+  s_atan-avx \
+  s_atan-fma \
   s_atan-fma4 \
+  s_ceil-sse4_1 \
+  s_ceilf-sse4_1 \
+  s_cosf-fma \
+  s_cosf-sse2 \
+  s_expm1-fma \
+  s_floor-sse4_1 \
+  s_floorf-sse4_1 \
+  s_log1p-fma \
+  s_nearbyint-sse4_1 \
+  s_nearbyintf-sse4_1 \
+  s_rint-sse4_1 \
+  s_rintf-sse4_1 \
+  s_roundeven-sse4_1 \
+  s_roundevenf-sse4_1 \
+  s_sin-avx \
+  s_sin-fma \
   s_sin-fma4 \
+  s_sincos-avx \
+  s_sincos-fma \
   s_sincos-fma4 \
+  s_sincosf-fma \
+  s_sincosf-sse2 \
+  s_sinf-fma \
+  s_sinf-sse2 \
+  s_tan-avx \
+  s_tan-fma \
   s_tan-fma4 \
+  s_trunc-sse4_1 \
+  s_truncf-sse4_1 \
 # libm-sysdep_routines
+ifeq ($(enable-sse4-1),no)
+libm-sysdep_routines += \
+  s_ceil-c \
+  s_ceilf-c \
+  s_floor-c \
+  s_floorf-c \
+  s_nearbyint-c \
+  s_nearbyintf-c \
+  s_rint-c \
+  s_rintf-c \
+  s_roundeven-c \
+  s_roundevenf-c \
+  s_trunc-c \
+  s_truncf-c \
+# libm-sysdep_routines
+endif
+endif
 
 CFLAGS-e_asin-fma4.c = -mfma4
 CFLAGS-e_atan2-fma4.c = -mfma4
@@ -105,16 +118,6 @@  CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 CFLAGS-s_sincos-fma4.c = -mfma4
 
-libm-sysdep_routines += \
-  e_atan2-avx \
-  e_exp-avx \
-  e_log-avx \
-  s_atan-avx \
-  s_sin-avx \
-  s_sincos-avx \
-  s_tan-avx \
-# libm-sysdep_routines
-
 CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
index 2eaa6c2c04..3c1654ba3e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -16,26 +16,28 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_asin (double);
 extern double __redirect_ieee754_acos (double);
 
-#define SYMBOL_NAME ieee754_asin
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME ieee754_asin
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_asin, __asin)
 
-#undef SYMBOL_NAME
-#define SYMBOL_NAME ieee754_acos
-#include "ifunc-fma4.h"
+# undef SYMBOL_NAME
+# define SYMBOL_NAME ieee754_acos
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_acos, __acos)
 
-#define __ieee754_acos __ieee754_acos_sse2
-#define __ieee754_asin __ieee754_asin_sse2
+# define __ieee754_acos __ieee754_acos_sse2
+# define __ieee754_asin __ieee754_asin_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 17ee4f3c36..f48ab8762a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -16,16 +16,18 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_atan2 (double, double);
 
-#define SYMBOL_NAME ieee754_atan2
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_atan2
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_atan2,
 		       __ieee754_atan2, IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_atan2, __atan2)
 
-#define __ieee754_atan2 __ieee754_atan2_sse2
+# define __ieee754_atan2 __ieee754_atan2_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 406b7ebd44..034f5b894f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_exp (double);
 
-#define SYMBOL_NAME ieee754_exp
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_exp
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_exp, __exp)
 
-#define __exp __ieee754_exp_sse2
+# define __exp __ieee754_exp_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
index 804fd6be85..74f92bfa0c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
@@ -16,25 +16,27 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_exp2f (float);
 
-#define SYMBOL_NAME exp2f
-#include "ifunc-fma.h"
+# define SYMBOL_NAME exp2f
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
 libm_alias_float_other (__exp2, exp2)
-#else
+# else
 libm_alias_float (__exp2, exp2)
-#endif
+# endif
 
 strong_alias (__exp2f, __ieee754_exp2f)
 libm_alias_finite (__exp2f, __exp2f)
 
-#define __exp2f __exp2f_sse2
+# define __exp2f __exp2f_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
index 4a7e2a5bce..e8d6f393ff 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
@@ -16,28 +16,30 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_expf (float);
 
-#define SYMBOL_NAME expf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME expf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
 libm_alias_float_other (__exp, exp)
-#else
+# else
 libm_alias_float (__exp, exp)
-#endif
+# endif
 
 strong_alias (__expf, __ieee754_expf)
 libm_alias_finite (__expf, __expf)
 
-#define __expf __expf_sse2
+# define __expf __expf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index 067fbf58c3..3a678235d9 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_log (double);
 
-#define SYMBOL_NAME ieee754_log
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_log
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_log, __log)
 
-#define __log __ieee754_log_sse2
+# define __log __ieee754_log_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
index 9c57a2f6cc..c032758b4e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
@@ -16,28 +16,30 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_log2 (double);
 
-#define SYMBOL_NAME log2
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log2
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
 libm_alias_double_other (__log2, log2)
-#else
+# else
 libm_alias_double (__log2, log2)
-#endif
+# endif
 
 strong_alias (__log2, __ieee754_log2)
 libm_alias_finite (__log2, __log2)
 
-#define __log2 __log2_sse2
+# define __log2 __log2_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_log2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
index 2b45c87f38..0f8d1f0abc 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
@@ -16,28 +16,30 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_log2f (float);
 
-#define SYMBOL_NAME log2f
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log2f
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
 libm_alias_float_other (__log2, log2)
-#else
+# else
 libm_alias_float (__log2, log2)
-#endif
+# endif
 
 strong_alias (__log2f, __ieee754_log2f)
 libm_alias_finite (__log2f, __log2f)
 
-#define __log2f __log2f_sse2
+# define __log2f __log2f_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
index 97e23c8fea..9d94dd614f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
@@ -16,28 +16,30 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_logf (float);
 
-#define SYMBOL_NAME logf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME logf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
 libm_alias_float_other (__log, log)
-#else
+# else
 libm_alias_float (__log, log)
-#endif
+# endif
 
 strong_alias (__logf, __ieee754_logf)
 libm_alias_finite (__logf, __logf)
 
-#define __logf __logf_sse2
+# define __logf __logf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index 42618e7112..07436d420c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_pow (double, double);
 
-#define SYMBOL_NAME ieee754_pow
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME ieee754_pow
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_pow,
 		       __ieee754_pow, IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_pow, __pow)
 
-#define __pow __ieee754_pow_sse2
+# define __pow __ieee754_pow_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
index 8e6ce13cc1..c64c8a4302 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
@@ -16,31 +16,33 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
-#define powf __redirect_powf
-#define __DECL_SIMD___redirect_powf
-#include <math.h>
-#undef powf
+# define powf __redirect_powf
+# define __DECL_SIMD___redirect_powf
+# include <math.h>
+# undef powf
 
-#define SYMBOL_NAME powf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME powf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
 libm_alias_float_other (__pow, pow)
-#else
+# else
 libm_alias_float (__pow, pow)
-#endif
+# endif
 
 strong_alias (__powf, __ieee754_powf)
 libm_alias_finite (__powf, __powf)
 
-#define __powf __powf_sse2
+# define __powf __powf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 71bad096a9..f9ec4e7b37 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -16,15 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern double __redirect_atan (double);
 
-#define SYMBOL_NAME atan
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME atan
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
 libm_alias_double (__atan, atan)
 
-#define __atan __atan_sse2
+# define __atan __atan_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
new file mode 100644
index 0000000000..e6c1106753
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of ceil function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__ceil)
+	vroundsd $10, %xmm0, %xmm0, %xmm0
+	ret
+END(__ceil)
+
+libm_alias_double (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
index 64119011ad..4be069b8da 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __ceil_sse41 __ceil
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__ceil_sse41)
 	roundsd	$10, %xmm0, %xmm0
 	ret
 END(__ceil_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
index cc028addee..0199863c8f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define ceil __redirect_ceil
-#define __ceil __redirect___ceil
-#include <math.h>
-#undef ceil
-#undef __ceil
+# define ceil __redirect_ceil
+# define __ceil __redirect___ceil
+# include <math.h>
+# undef ceil
+# undef __ceil
 
-#define SYMBOL_NAME ceil
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME ceil
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
 libm_alias_double (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
new file mode 100644
index 0000000000..b4d8ac0455
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of ceilf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__ceilf)
+	vroundss $10, %xmm0, %xmm0, %xmm0
+	ret
+END(__ceilf)
+
+libm_alias_float (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
index dd9a9f6b71..1a85e9c925 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __ceilf_sse41 __ceilf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__ceilf_sse41)
 	roundss	$10, %xmm0, %xmm0
 	ret
 END(__ceilf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
index 97a0ca7d19..dfce9225dd 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define ceilf __redirect_ceilf
-#define __ceilf __redirect___ceilf
-#include <math.h>
-#undef ceilf
-#undef __ceilf
+# define ceilf __redirect_ceilf
+# define __ceilf __redirect___ceilf
+# include <math.h>
+# undef ceilf
+# undef __ceilf
 
-#define SYMBOL_NAME ceilf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME ceilf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
 libm_alias_float (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
index 2703c576df..9be9327b80 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
@@ -16,13 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
 
 extern float __redirect_cosf (float);
 
-#define SYMBOL_NAME cosf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME cosf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
 
 libm_alias_float (__cos, cos)
+#else
+# include <sysdeps/ieee754/flt-32/s_cosf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
index 8a2d69f9b2..1ed45245cb 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
@@ -16,21 +16,23 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern double __redirect_expm1 (double);
 
-#define SYMBOL_NAME expm1
-#include "ifunc-fma.h"
+# define SYMBOL_NAME expm1
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
 libm_alias_double (__expm1, expm1)
 
-#define __expm1 __expm1_sse2
+# define __expm1 __expm1_sse2
 
 /* NB: __expm1 may be expanded to __expm1_sse2 in the following
    prototypes.  */
 extern long double __expm1l (long double);
 extern long double __expm1f128 (long double);
 
+#endif
 #include <sysdeps/ieee754/dbl-64/s_expm1.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
new file mode 100644
index 0000000000..ff74b5a8bf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of floor function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__floor)
+	vroundsd $9, %xmm0, %xmm0, %xmm0
+	ret
+END(__floor)
+
+libm_alias_double (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
index 2f7521f39f..957d018177 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __floor_sse41 __floor
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__floor_sse41)
 	roundsd	$9, %xmm0, %xmm0
 	ret
 END(__floor_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
index 8cebd48e10..a30c88671e 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define floor __redirect_floor
-#define __floor __redirect___floor
-#include <math.h>
-#undef floor
-#undef __floor
+# define floor __redirect_floor
+# define __floor __redirect___floor
+# include <math.h>
+# undef floor
+# undef __floor
 
-#define SYMBOL_NAME floor
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME floor
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
 libm_alias_double (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
new file mode 100644
index 0000000000..c378baae8e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of floorf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__floorf)
+	vroundss $9, %xmm0, %xmm0, %xmm0
+	ret
+END(__floorf)
+
+libm_alias_float (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
index 5f6020d27d..eacabe167c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __floorf_sse41 __floorf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__floorf_sse41)
 	roundss	$9, %xmm0, %xmm0
 	ret
 END(__floorf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
index a14e18b03c..6531b78443 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define floorf __redirect_floorf
-#define __floorf __redirect___floorf
-#include <math.h>
-#undef floorf
-#undef __floorf
+# define floorf __redirect_floorf
+# define __floorf __redirect___floorf
+# include <math.h>
+# undef floorf
+# undef __floorf
 
-#define SYMBOL_NAME floorf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME floorf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
 libm_alias_float (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
index a8e1a3f21b..76e1672e2d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
@@ -16,14 +16,16 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern double __redirect_log1p (double);
 
-#define SYMBOL_NAME log1p
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log1p
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
 
-#define __log1p __log1p_sse2
+# define __log1p __log1p_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_log1p.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
new file mode 100644
index 0000000000..5bfdf73c28
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of nearbyint function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__nearbyint)
+	vroundsd $0xc, %xmm0, %xmm0, %xmm0
+	ret
+END(__nearbyint)
+
+libm_alias_double (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
index 674f7eb40a..ee0b17e470 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __nearbyint_sse41 __nearbyint
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__nearbyint_sse41)
 	roundsd	$0xc, %xmm0, %xmm0
 	ret
 END(__nearbyint_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
index 693e42dd4e..649a9df869 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
-#define nearbyint __redirect_nearbyint
-#define __nearbyint __redirect___nearbyint
-#include <math.h>
-#undef nearbyint
-#undef __nearbyint
+# define nearbyint __redirect_nearbyint
+# define __nearbyint __redirect___nearbyint
+# include <math.h>
+# undef nearbyint
+# undef __nearbyint
 
-#define SYMBOL_NAME nearbyint
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME nearbyint
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
 		       IFUNC_SELECTOR ());
 libm_alias_double (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
new file mode 100644
index 0000000000..1dbaed0324
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implmentation of nearbyintf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__nearbyintf)
+	vroundss $0xc, %xmm0, %xmm0, %xmm0
+	ret
+END(__nearbyintf)
+
+libm_alias_float (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
index 5892bd7563..8b3e307b78 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __nearbyintf_sse41 __nearbyintf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__nearbyintf_sse41)
 	roundss	$0xc, %xmm0, %xmm0
 	ret
 END(__nearbyintf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
index a0ac009f4b..7762467ad9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
 
-#define nearbyintf __redirect_nearbyintf
-#define __nearbyintf __redirect___nearbyintf
-#include <math.h>
-#undef nearbyintf
-#undef __nearbyintf
+# define nearbyintf __redirect_nearbyintf
+# define __nearbyintf __redirect___nearbyintf
+# include <math.h>
+# undef nearbyintf
+# undef __nearbyintf
 
-#define SYMBOL_NAME nearbyintf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME nearbyintf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
 		       IFUNC_SELECTOR ());
 libm_alias_float (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
new file mode 100644
index 0000000000..2b403b331f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of rint function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__rint)
+	vroundsd $4, %xmm0, %xmm0, %xmm0
+	ret
+END(__rint)
+
+libm_alias_double (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
index 405372991b..4c7c1c37de 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __rint_sse41 __rint
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__rint_sse41)
 	roundsd	$4, %xmm0, %xmm0
 	ret
 END(__rint_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
index 754c87e004..49693c9728 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define rint __redirect_rint
-#define __rint __redirect___rint
-#include <math.h>
-#undef rint
-#undef __rint
+# define rint __redirect_rint
+# define __rint __redirect___rint
+# include <math.h>
+# undef rint
+# undef __rint
 
-#define SYMBOL_NAME rint
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME rint
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
 libm_alias_double (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
new file mode 100644
index 0000000000..171c2867f4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of rintf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__rintf)
+	vroundss $4, %xmm0, %xmm0, %xmm0
+	ret
+END(__rintf)
+
+libm_alias_float (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
index 8ac67ce767..55443d7238 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __rintf_sse41 __rintf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__rintf_sse41)
 	roundss	$4, %xmm0, %xmm0
 	ret
 END(__rintf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
index e9d6b7a5f2..c7cf09701d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define rintf __redirect_rintf
-#define __rintf __redirect___rintf
-#include <math.h>
-#undef rintf
-#undef __rintf
+# define rintf __redirect_rintf
+# define __rintf __redirect___rintf
+# include <math.h>
+# undef rintf
+# undef __rintf
 
-#define SYMBOL_NAME rintf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME rintf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
 libm_alias_float (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
new file mode 100644
index 0000000000..576790355c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of roundeven function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__roundeven)
+	vroundsd $8, %xmm0, %xmm0, %xmm0
+	ret
+END(__roundeven)
+
+libm_alias_double (__roundeven, roundeven)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
index 5ef102336b..f0644cce81 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __roundeven_sse41 __roundeven
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__roundeven_sse41)
 	roundsd	$8, %xmm0, %xmm0
 	ret
 END(__roundeven_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
index 8737b32e26..a250297918 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
@@ -16,16 +16,18 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
-#define roundeven __redirect_roundeven
-#define __roundeven __redirect___roundeven
-#include <math.h>
-#undef roundeven
-#undef __roundeven
+# define roundeven __redirect_roundeven
+# define __roundeven __redirect___roundeven
+# include <math.h>
+# undef roundeven
+# undef __roundeven
 
-#define SYMBOL_NAME roundeven
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME roundeven
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
 libm_alias_double (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
new file mode 100644
index 0000000000..42c359f4cd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of roundevenf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__roundevenf)
+	vroundss $8, %xmm0, %xmm0, %xmm0
+	ret
+END(__roundevenf)
+
+libm_alias_float (__roundeven, roundeven)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
index 792c90ba07..d1dd6b0e8b 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
@@ -17,8 +17,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __roundevenf_sse41 __roundevenf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__roundevenf_sse41)
 	roundss	$8, %xmm0, %xmm0
 	ret
 END(__roundevenf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
index e96016a4d5..534941e67f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
@@ -16,16 +16,18 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
 
-#define roundevenf __redirect_roundevenf
-#define __roundevenf __redirect___roundevenf
-#include <math.h>
-#undef roundevenf
-#undef __roundevenf
+# define roundevenf __redirect_roundevenf
+# define __roundevenf __redirect___roundevenf
+# include <math.h>
+# undef roundevenf
+# undef __roundevenf
 
-#define SYMBOL_NAME roundevenf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME roundevenf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
 libm_alias_float (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 355cc0092e..21eaa5e984 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -16,24 +16,26 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern double __redirect_sin (double);
 extern double __redirect_cos (double);
 
-#define SYMBOL_NAME sin
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME sin
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
 libm_alias_double (__sin, sin)
 
-#undef SYMBOL_NAME
-#define SYMBOL_NAME cos
-#include "ifunc-avx-fma4.h"
+# undef SYMBOL_NAME
+# define SYMBOL_NAME cos
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
 libm_alias_double (__cos, cos)
 
-#define __cos __cos_sse2
-#define __sin __sin_sse2
+# define __cos __cos_sse2
+# define __sin __sin_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
index 70107e999c..729163cdde 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
@@ -16,15 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern void __redirect_sincos (double, double *, double *);
 
-#define SYMBOL_NAME sincos
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME sincos
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
 libm_alias_double (__sincos, sincos)
 
-#define __sincos __sincos_sse2
+# define __sincos __sincos_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_sincos.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
index 80bc028451..136dd62c81 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
@@ -16,13 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
 
 extern void __redirect_sincosf (float, float *, float *);
 
-#define SYMBOL_NAME sincosf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME sincosf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
 
 libm_alias_float (__sincos, sincos)
+#else
+# include <sysdeps/ieee754/flt-32/s_sincosf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
index a32b9e9550..fabbf55604 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
@@ -16,13 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-float.h>
 
 extern float __redirect_sinf (float);
 
-#define SYMBOL_NAME sinf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME sinf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
 
 libm_alias_float (__sin, sin)
+#else
+# include <sysdeps/ieee754/flt-32/s_sinf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index f9a2474a13..c85e327ff8 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -16,15 +16,17 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#ifndef HAVE_X86_AVX2_FMA
+# include <libm-alias-double.h>
 
 extern double __redirect_tan (double);
 
-#define SYMBOL_NAME tan
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME tan
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
 libm_alias_double (__tan, tan)
 
-#define __tan __tan_sse2
+# define __tan __tan_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
new file mode 100644
index 0000000000..b3e87e9606
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of trunc function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__trunc)
+	vroundsd $11, %xmm0, %xmm0, %xmm0
+	ret
+END(__trunc)
+
+libm_alias_double (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
index b496a6ef49..062cd1fb36 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
@@ -18,8 +18,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-double.h>
+# define __trunc_sse41 __trunc
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__trunc_sse41)
 	roundsd	$11, %xmm0, %xmm0
 	ret
 END(__trunc_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_double (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
index 9bc9df8744..568e818826 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define trunc __redirect_trunc
-#define __trunc __redirect___trunc
-#include <math.h>
-#undef trunc
-#undef __trunc
+# define trunc __redirect_trunc
+# define __trunc __redirect___trunc
+# include <math.h>
+# undef trunc
+# undef __trunc
 
-#define SYMBOL_NAME trunc
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME trunc
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
 libm_alias_double (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
new file mode 100644
index 0000000000..f31ac7d7f7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
@@ -0,0 +1,28 @@ 
+/* AVX implementation of truncf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__truncf)
+	vroundss $11, %xmm0, %xmm0, %xmm0
+	ret
+END(__truncf)
+
+libm_alias_float (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
index 22e9a83307..ecd0ae5c05 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
@@ -18,8 +18,19 @@ 
 
 #include <sysdep.h>
 
+#ifdef HAVE_X86_SSE4_1
+# include <libm-alias-float.h>
+# define __truncf_sse41 __truncf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__truncf_sse41)
 	roundss	$11, %xmm0, %xmm0
 	ret
 END(__truncf_sse41)
+
+#ifdef HAVE_X86_SSE4_1
+libm_alias_float (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
index dae01d166a..57783c805a 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
@@ -16,17 +16,19 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#if !defined HAVE_X86_SSE4_1 && !defined HAVE_X86_AVX2_FMA
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define truncf __redirect_truncf
-#define __truncf __redirect___truncf
-#include <math.h>
-#undef truncf
-#undef __truncf
+# define truncf __redirect_truncf
+# define __truncf __redirect___truncf
+# include <math.h>
+# undef truncf
+# undef __truncf
 
-#define SYMBOL_NAME truncf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME truncf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
 libm_alias_float (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
index 27eee98a0a..fb2045e6cf 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
@@ -1 +1,5 @@ 
-#include <sysdeps/../math/w_exp.c>
+#ifdef HAVE_X86_AVX2_FMA
+# include <sysdeps/ieee754/dbl-64/w_exp.c>
+#else
+# include <sysdeps/../math/w_exp.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
index 9b2b018711..b85be8221e 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
@@ -1 +1,5 @@ 
-#include <sysdeps/../math/w_log.c>
+#ifdef HAVE_X86_AVX2_FMA
+# include <sysdeps/ieee754/dbl-64/w_log.c>
+#else
+# include <sysdeps/../math/w_log.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
index b50c1988de..849f4f97ff 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
@@ -1 +1,5 @@ 
-#include <sysdeps/../math/w_pow.c>
+#ifdef HAVE_X86_AVX2_FMA
+# include <sysdeps/ieee754/dbl-64/w_pow.c>
+#else
+# include <sysdeps/../math/w_pow.c>
+#endif