Message ID | 20200409195945.10393-3-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [1/5] benchtests: Add exp10f benchmark | expand |
Ping. On 09/04/2020 16:59, Adhemerval Zanella wrote: > The generic implementation is twice as fast. Using the exp10f > benchmark: > > * master: > "exp10f": { > "": { > "duration": 4.25753e+09, > "iterations": 3.3376e+07, > "max": 1414.77, > "min": 103.649, > "mean": 127.563 > } > } > > * patched: > "exp10f": { > "": { > "duration": 4.05755e+09, > "iterations": 6.9136e+07, > "max": 1489.64, > "min": 50.95, > "mean": 58.6894 > } > } > > Checked on i686-linux-gnu. > --- > sysdeps/i386/fpu/e_exp10f.S | 54 ------------------------------------- > 1 file changed, 54 deletions(-) > delete mode 100644 sysdeps/i386/fpu/e_exp10f.S > > diff --git a/sysdeps/i386/fpu/e_exp10f.S b/sysdeps/i386/fpu/e_exp10f.S > deleted file mode 100644 > index 196ce8744a..0000000000 > --- a/sysdeps/i386/fpu/e_exp10f.S > +++ /dev/null > @@ -1,54 +0,0 @@ > -/* > - * Written by Ulrich Drepper. > - */ > - > -#include <machine/asm.h> > -#include <i386-math-asm.h> > -#include <libm-alias-finite.h> > - > -DEFINE_FLT_MIN > - > -#ifdef PIC > -# define MO(op) op##@GOTOFF(%ecx) > -#else > -# define MO(op) op > -#endif > - > - .text > -/* 10^x = 2^(x * log2(10)) */ > -ENTRY(__ieee754_exp10f) > -#ifdef PIC > - LOAD_PIC_REG (cx) > -#endif > - flds 4(%esp) > -/* I added the following ugly construct because exp(+-Inf) resulted > - in NaN. The ugliness results from the bright minds at Intel. > - For the i686 the code can be written better. > - -- drepper@cygnus.com. */ > - fxam /* Is NaN or +-Inf? */ > - fstsw %ax > - movb $0x45, %dh > - andb %ah, %dh > - cmpb $0x05, %dh > - je 1f /* Is +-Inf, jump. */ > - fldl2t > - fmulp /* x * log2(10) */ > - fld %st > - frndint /* int(x * log2(10)) */ > - fsubr %st,%st(1) /* fract(x * log2(10)) */ > - fxch > - f2xm1 /* 2^(fract(x * log2(10))) - 1 */ > - fld1 > - faddp /* 2^(fract(x * log2(10))) */ > - fscale /* e^x */ > - fstp %st(1) > - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN > - ret > - > -1: testl $0x200, %eax /* Test sign. */ > - jz 2f /* If positive, jump. */ > - fstp %st > - fldz /* Set result to 0. */ > -2: ret > -END (__ieee754_exp10f) > -libm_alias_finite (__ieee754_exp10f, __exp10f) >
Ping (x2). On 29/04/2020 14:11, Adhemerval Zanella wrote: > Ping. > > On 09/04/2020 16:59, Adhemerval Zanella wrote: >> The generic implementation is twice as fast. Using the exp10f >> benchmark: >> >> * master: >> "exp10f": { >> "": { >> "duration": 4.25753e+09, >> "iterations": 3.3376e+07, >> "max": 1414.77, >> "min": 103.649, >> "mean": 127.563 >> } >> } >> >> * patched: >> "exp10f": { >> "": { >> "duration": 4.05755e+09, >> "iterations": 6.9136e+07, >> "max": 1489.64, >> "min": 50.95, >> "mean": 58.6894 >> } >> } >> >> Checked on i686-linux-gnu. >> --- >> sysdeps/i386/fpu/e_exp10f.S | 54 ------------------------------------- >> 1 file changed, 54 deletions(-) >> delete mode 100644 sysdeps/i386/fpu/e_exp10f.S >> >> diff --git a/sysdeps/i386/fpu/e_exp10f.S b/sysdeps/i386/fpu/e_exp10f.S >> deleted file mode 100644 >> index 196ce8744a..0000000000 >> --- a/sysdeps/i386/fpu/e_exp10f.S >> +++ /dev/null >> @@ -1,54 +0,0 @@ >> -/* >> - * Written by Ulrich Drepper. >> - */ >> - >> -#include <machine/asm.h> >> -#include <i386-math-asm.h> >> -#include <libm-alias-finite.h> >> - >> -DEFINE_FLT_MIN >> - >> -#ifdef PIC >> -# define MO(op) op##@GOTOFF(%ecx) >> -#else >> -# define MO(op) op >> -#endif >> - >> - .text >> -/* 10^x = 2^(x * log2(10)) */ >> -ENTRY(__ieee754_exp10f) >> -#ifdef PIC >> - LOAD_PIC_REG (cx) >> -#endif >> - flds 4(%esp) >> -/* I added the following ugly construct because exp(+-Inf) resulted >> - in NaN. The ugliness results from the bright minds at Intel. >> - For the i686 the code can be written better. >> - -- drepper@cygnus.com. */ >> - fxam /* Is NaN or +-Inf? */ >> - fstsw %ax >> - movb $0x45, %dh >> - andb %ah, %dh >> - cmpb $0x05, %dh >> - je 1f /* Is +-Inf, jump. */ >> - fldl2t >> - fmulp /* x * log2(10) */ >> - fld %st >> - frndint /* int(x * log2(10)) */ >> - fsubr %st,%st(1) /* fract(x * log2(10)) */ >> - fxch >> - f2xm1 /* 2^(fract(x * log2(10))) - 1 */ >> - fld1 >> - faddp /* 2^(fract(x * log2(10))) */ >> - fscale /* e^x */ >> - fstp %st(1) >> - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN >> - ret >> - >> -1: testl $0x200, %eax /* Test sign. */ >> - jz 2f /* If positive, jump. */ >> - fstp %st >> - fldz /* Set result to 0. */ >> -2: ret >> -END (__ieee754_exp10f) >> -libm_alias_finite (__ieee754_exp10f, __exp10f) >>
diff --git a/sysdeps/i386/fpu/e_exp10f.S b/sysdeps/i386/fpu/e_exp10f.S deleted file mode 100644 index 196ce8744a..0000000000 --- a/sysdeps/i386/fpu/e_exp10f.S +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Written by Ulrich Drepper. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> -#include <libm-alias-finite.h> - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -/* 10^x = 2^(x * log2(10)) */ -ENTRY(__ieee754_exp10f) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fldl2t - fmulp /* x * log2(10) */ - fld %st - frndint /* int(x * log2(10)) */ - fsubr %st,%st(1) /* fract(x * log2(10)) */ - fxch - f2xm1 /* 2^(fract(x * log2(10))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(10))) */ - fscale /* e^x */ - fstp %st(1) - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp10f) -libm_alias_finite (__ieee754_exp10f, __exp10f)