diff mbox series

[v2,06/10] i386: Use generic exp10

Message ID 20240327194024.1409677-7-adhemerval.zanella@linaro.org
State New
Headers show
Series Fix some libm static issues | expand

Commit Message

Adhemerval Zanella Netto March 27, 2024, 7:40 p.m. UTC
The resulting performance is slight better (Ryzen 5900, gcc 13.2.1):

 * master
  "exp10": {
   "": {
    "duration": 3.70091e+09,
    "iterations": 5.8534e+07,
    "max": 91.279,
    "min": 62.6225,
    "mean": 63.2267
   }
  }

 * patch
  "exp10": {
   "": {
    "duration": 3.70793e+09,
    "iterations": 6.328e+07,
    "max": 259.592,
    "min": 52.1145,
    "mean": 58.5957
   }
  }

Checked on i686-linux-gnu.
---
 sysdeps/i386/fpu/Versions                 |  1 +
 sysdeps/i386/fpu/e_exp10.S                | 51 -----------------------
 sysdeps/i386/fpu/e_exp10.c                |  2 +
 sysdeps/i386/fpu/e_exp_data.c             |  1 -
 sysdeps/i386/fpu/w_exp10_compat.c         |  8 ----
 sysdeps/ieee754/dbl-64/e_exp10.c          |  7 +++-
 sysdeps/mach/hurd/i386/libm.abilist       |  1 +
 sysdeps/unix/sysv/linux/i386/libm.abilist |  1 +
 8 files changed, 10 insertions(+), 62 deletions(-)
 delete mode 100644 sysdeps/i386/fpu/e_exp10.S
 create mode 100644 sysdeps/i386/fpu/e_exp10.c
 delete mode 100644 sysdeps/i386/fpu/e_exp_data.c
 delete mode 100644 sysdeps/i386/fpu/w_exp10_compat.c

Comments

H.J. Lu March 27, 2024, 8:14 p.m. UTC | #1
On Wed, Mar 27, 2024 at 12:40 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
> The resulting performance is slight better (Ryzen 5900, gcc 13.2.1):
>
>  * master
>   "exp10": {
>    "": {
>     "duration": 3.70091e+09,
>     "iterations": 5.8534e+07,
>     "max": 91.279,
>     "min": 62.6225,
>     "mean": 63.2267
>    }
>   }
>
>  * patch
>   "exp10": {
>    "": {
>     "duration": 3.70793e+09,
>     "iterations": 6.328e+07,
>     "max": 259.592,
>     "min": 52.1145,
>     "mean": 58.5957
>    }
>   }
>
> Checked on i686-linux-gnu.
> ---
>  sysdeps/i386/fpu/Versions                 |  1 +
>  sysdeps/i386/fpu/e_exp10.S                | 51 -----------------------
>  sysdeps/i386/fpu/e_exp10.c                |  2 +
>  sysdeps/i386/fpu/e_exp_data.c             |  1 -
>  sysdeps/i386/fpu/w_exp10_compat.c         |  8 ----
>  sysdeps/ieee754/dbl-64/e_exp10.c          |  7 +++-
>  sysdeps/mach/hurd/i386/libm.abilist       |  1 +
>  sysdeps/unix/sysv/linux/i386/libm.abilist |  1 +
>  8 files changed, 10 insertions(+), 62 deletions(-)
>  delete mode 100644 sysdeps/i386/fpu/e_exp10.S
>  create mode 100644 sysdeps/i386/fpu/e_exp10.c
>  delete mode 100644 sysdeps/i386/fpu/e_exp_data.c
>  delete mode 100644 sysdeps/i386/fpu/w_exp10_compat.c
>
> diff --git a/sysdeps/i386/fpu/Versions b/sysdeps/i386/fpu/Versions
> index 9509f9b7c7..7326f25583 100644
> --- a/sysdeps/i386/fpu/Versions
> +++ b/sysdeps/i386/fpu/Versions
> @@ -5,6 +5,7 @@ libm {
>    }
>    GLIBC_2.40 {
>      # No SVID compatible error handling.
> +    exp10;
>      fmod; fmodf;
>    }
>  }
> diff --git a/sysdeps/i386/fpu/e_exp10.S b/sysdeps/i386/fpu/e_exp10.S
> deleted file mode 100644
> index 902f70b77f..0000000000
> --- a/sysdeps/i386/fpu/e_exp10.S
> +++ /dev/null
> @@ -1,51 +0,0 @@
> -
> -#include <machine/asm.h>
> -#include <i386-math-asm.h>
> -#include <libm-alias-finite.h>
> -
> -DEFINE_DBL_MIN
> -
> -#ifdef PIC
> -# define MO(op) op##@GOTOFF(%ecx)
> -#else
> -# define MO(op) op
> -#endif
> -
> -       .text
> -/* 10^x = 2^(x * log2(10)) */
> -ENTRY(__ieee754_exp10)
> -#ifdef  PIC
> -       LOAD_PIC_REG (cx)
> -#endif
> -       fldl    4(%esp)
> -/* I added the following ugly construct because exp(+-Inf) resulted
> -   in NaN.  The ugliness results from the bright minds at Intel.
> -   For the i686 the code can be written better.
> -   -- drepper@cygnus.com.  */
> -       fxam                            /* Is NaN or +-Inf?  */
> -       fstsw   %ax
> -       movb    $0x45, %dh
> -       andb    %ah, %dh
> -       cmpb    $0x05, %dh
> -       je      1f                      /* Is +-Inf, jump.  */
> -       fldl2t
> -       fmulp                           /* x * log2(10) */
> -       fld     %st
> -       frndint                         /* int(x * log2(10)) */
> -       fsubr   %st,%st(1)              /* fract(x * log2(10)) */
> -       fxch
> -       f2xm1                           /* 2^(fract(x * log2(10))) - 1 */
> -       fld1
> -       faddp                           /* 2^(fract(x * log2(10))) */
> -       fscale                          /* e^x */
> -       fstp    %st(1)
> -       DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
> -       ret
> -
> -1:     testl   $0x200, %eax            /* Test sign.  */
> -       jz      2f                      /* If positive, jump.  */
> -       fstp    %st
> -       fldz                            /* Set result to 0.  */
> -2:     ret
> -END (__ieee754_exp10)
> -libm_alias_finite (__ieee754_exp10, __exp10)
> diff --git a/sysdeps/i386/fpu/e_exp10.c b/sysdeps/i386/fpu/e_exp10.c
> new file mode 100644
> index 0000000000..340254fc6e
> --- /dev/null
> +++ b/sysdeps/i386/fpu/e_exp10.c
> @@ -0,0 +1,2 @@
> +#define EXP10_VERSION GLIBC_2_40
> +#include <sysdeps/ieee754/dbl-64/e_exp10.c>
> diff --git a/sysdeps/i386/fpu/e_exp_data.c b/sysdeps/i386/fpu/e_exp_data.c
> deleted file mode 100644
> index 1cc8931700..0000000000
> --- a/sysdeps/i386/fpu/e_exp_data.c
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* Not needed.  */
> diff --git a/sysdeps/i386/fpu/w_exp10_compat.c b/sysdeps/i386/fpu/w_exp10_compat.c
> deleted file mode 100644
> index 49a0e03385..0000000000
> --- a/sysdeps/i386/fpu/w_exp10_compat.c
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -/* i386 provides an optimized __ieee754_exp10.  */
> -#ifdef SHARED
> -# define NO_COMPAT_NEEDED 1
> -# include <math/w_exp10_compat.c>
> -#else
> -# include <math-type-macros-double.h>
> -# include <w_exp10_template.c>
> -#endif
> diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
> index 225fc74c4c..c63b852f72 100644
> --- a/sysdeps/ieee754/dbl-64/e_exp10.c
> +++ b/sysdeps/ieee754/dbl-64/e_exp10.c
> @@ -99,7 +99,7 @@ __exp10 (double x)
>
>    /* Reduce x: z = x * N / log10(2), k = round(z).  */
>    double_t z = __exp_data.invlog10_2N * x;
> -  double_t kd;
> +  double kd;
>    int64_t ki;
>  #if TOINT_INTRINSICS
>    kd = roundtoint (z);
> @@ -147,7 +147,10 @@ __exp10 (double x)
>  strong_alias (__exp10, __ieee754_exp10)
>  libm_alias_finite (__ieee754_exp10, __exp10)
>  #if LIBM_SVID_COMPAT
> -versioned_symbol (libm, __exp10, exp10, GLIBC_2_39);
> +# ifndef EXP10_VERSION
> +#  define EXP10_VERSION GLIBC_2_39
> +# endif
> +versioned_symbol (libm, __exp10, exp10, EXP10_VERSION);
>  libm_alias_double_other (__exp10, exp10)
>  #else
>  libm_alias_double (__exp10, exp10)
> diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
> index 88e7538e51..01c5633663 100644
> --- a/sysdeps/mach/hurd/i386/libm.abilist
> +++ b/sysdeps/mach/hurd/i386/libm.abilist
> @@ -1181,5 +1181,6 @@ GLIBC_2.35 fsqrt F
>  GLIBC_2.35 fsqrtl F
>  GLIBC_2.35 hypot F
>  GLIBC_2.35 hypotf F
> +GLIBC_2.40 exp10 F
>  GLIBC_2.40 fmod F
>  GLIBC_2.40 fmodf F
> diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
> index c99c60161d..3413cfdbe7 100644
> --- a/sysdeps/unix/sysv/linux/i386/libm.abilist
> +++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
> @@ -1188,5 +1188,6 @@ GLIBC_2.35 fsqrt F
>  GLIBC_2.35 fsqrtl F
>  GLIBC_2.35 hypot F
>  GLIBC_2.35 hypotf F
> +GLIBC_2.40 exp10 F
>  GLIBC_2.40 fmod F
>  GLIBC_2.40 fmodf F
> --
> 2.34.1
>

Also need a bug report.
diff mbox series

Patch

diff --git a/sysdeps/i386/fpu/Versions b/sysdeps/i386/fpu/Versions
index 9509f9b7c7..7326f25583 100644
--- a/sysdeps/i386/fpu/Versions
+++ b/sysdeps/i386/fpu/Versions
@@ -5,6 +5,7 @@  libm {
   }
   GLIBC_2.40 {
     # No SVID compatible error handling.
+    exp10;
     fmod; fmodf;
   }
 }
diff --git a/sysdeps/i386/fpu/e_exp10.S b/sysdeps/i386/fpu/e_exp10.S
deleted file mode 100644
index 902f70b77f..0000000000
--- a/sysdeps/i386/fpu/e_exp10.S
+++ /dev/null
@@ -1,51 +0,0 @@ 
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_DBL_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-#else
-# define MO(op) op
-#endif
-
-	.text
-/* 10^x = 2^(x * log2(10)) */
-ENTRY(__ieee754_exp10)
-#ifdef  PIC
-	LOAD_PIC_REG (cx)
-#endif
-	fldl	4(%esp)
-/* I added the following ugly construct because exp(+-Inf) resulted
-   in NaN.  The ugliness results from the bright minds at Intel.
-   For the i686 the code can be written better.
-   -- drepper@cygnus.com.  */
-	fxam				/* Is NaN or +-Inf?  */
-	fstsw	%ax
-	movb	$0x45, %dh
-	andb	%ah, %dh
-	cmpb	$0x05, %dh
-	je	1f			/* Is +-Inf, jump.  */
-	fldl2t
-	fmulp				/* x * log2(10) */
-	fld	%st
-	frndint				/* int(x * log2(10)) */
-	fsubr	%st,%st(1)		/* fract(x * log2(10)) */
-	fxch
-	f2xm1				/* 2^(fract(x * log2(10))) - 1 */
-	fld1
-	faddp				/* 2^(fract(x * log2(10))) */
-	fscale				/* e^x */
-	fstp	%st(1)
-	DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
-	ret
-
-1:	testl	$0x200, %eax		/* Test sign.  */
-	jz	2f			/* If positive, jump.  */
-	fstp	%st
-	fldz				/* Set result to 0.  */
-2:	ret
-END (__ieee754_exp10)
-libm_alias_finite (__ieee754_exp10, __exp10)
diff --git a/sysdeps/i386/fpu/e_exp10.c b/sysdeps/i386/fpu/e_exp10.c
new file mode 100644
index 0000000000..340254fc6e
--- /dev/null
+++ b/sysdeps/i386/fpu/e_exp10.c
@@ -0,0 +1,2 @@ 
+#define EXP10_VERSION GLIBC_2_40
+#include <sysdeps/ieee754/dbl-64/e_exp10.c>
diff --git a/sysdeps/i386/fpu/e_exp_data.c b/sysdeps/i386/fpu/e_exp_data.c
deleted file mode 100644
index 1cc8931700..0000000000
--- a/sysdeps/i386/fpu/e_exp_data.c
+++ /dev/null
@@ -1 +0,0 @@ 
-/* Not needed.  */
diff --git a/sysdeps/i386/fpu/w_exp10_compat.c b/sysdeps/i386/fpu/w_exp10_compat.c
deleted file mode 100644
index 49a0e03385..0000000000
--- a/sysdeps/i386/fpu/w_exp10_compat.c
+++ /dev/null
@@ -1,8 +0,0 @@ 
-/* i386 provides an optimized __ieee754_exp10.  */
-#ifdef SHARED
-# define NO_COMPAT_NEEDED 1
-# include <math/w_exp10_compat.c>
-#else
-# include <math-type-macros-double.h>
-# include <w_exp10_template.c>
-#endif
diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
index 225fc74c4c..c63b852f72 100644
--- a/sysdeps/ieee754/dbl-64/e_exp10.c
+++ b/sysdeps/ieee754/dbl-64/e_exp10.c
@@ -99,7 +99,7 @@  __exp10 (double x)
 
   /* Reduce x: z = x * N / log10(2), k = round(z).  */
   double_t z = __exp_data.invlog10_2N * x;
-  double_t kd;
+  double kd;
   int64_t ki;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
@@ -147,7 +147,10 @@  __exp10 (double x)
 strong_alias (__exp10, __ieee754_exp10)
 libm_alias_finite (__ieee754_exp10, __exp10)
 #if LIBM_SVID_COMPAT
-versioned_symbol (libm, __exp10, exp10, GLIBC_2_39);
+# ifndef EXP10_VERSION
+#  define EXP10_VERSION GLIBC_2_39
+# endif
+versioned_symbol (libm, __exp10, exp10, EXP10_VERSION);
 libm_alias_double_other (__exp10, exp10)
 #else
 libm_alias_double (__exp10, exp10)
diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
index 88e7538e51..01c5633663 100644
--- a/sysdeps/mach/hurd/i386/libm.abilist
+++ b/sysdeps/mach/hurd/i386/libm.abilist
@@ -1181,5 +1181,6 @@  GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
 GLIBC_2.35 hypot F
 GLIBC_2.35 hypotf F
+GLIBC_2.40 exp10 F
 GLIBC_2.40 fmod F
 GLIBC_2.40 fmodf F
diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
index c99c60161d..3413cfdbe7 100644
--- a/sysdeps/unix/sysv/linux/i386/libm.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
@@ -1188,5 +1188,6 @@  GLIBC_2.35 fsqrt F
 GLIBC_2.35 fsqrtl F
 GLIBC_2.35 hypot F
 GLIBC_2.35 hypotf F
+GLIBC_2.40 exp10 F
 GLIBC_2.40 fmod F
 GLIBC_2.40 fmodf F