Message ID | 20230309183312.205763-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86-64: Add x87 fmod and remainder [BZ #30179] | expand |
On Thu, Mar 9, 2023 at 12:33 PM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > X87 (fprem/fprem1) implementations of fmod and remainder are much faster > than generic fmod and remainder. Add e_fmod.S, e_fmodf.S, e_remainder.S > and e_remainderf.S with fprem/fprem1. This fixes BZ #30179. > --- > sysdeps/x86_64/fpu/e_fmod.S | 22 ++++++++++++++++++++++ > sysdeps/x86_64/fpu/e_fmodf.S | 22 ++++++++++++++++++++++ > sysdeps/x86_64/fpu/e_remainder.S | 22 ++++++++++++++++++++++ > sysdeps/x86_64/fpu/e_remainderf.S | 22 ++++++++++++++++++++++ > 4 files changed, 88 insertions(+) > create mode 100644 sysdeps/x86_64/fpu/e_fmod.S > create mode 100644 sysdeps/x86_64/fpu/e_fmodf.S > create mode 100644 sysdeps/x86_64/fpu/e_remainder.S > create mode 100644 sysdeps/x86_64/fpu/e_remainderf.S > > diff --git a/sysdeps/x86_64/fpu/e_fmod.S b/sysdeps/x86_64/fpu/e_fmod.S > new file mode 100644 > index 0000000000..4bdc8a1ab0 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/e_fmod.S > @@ -0,0 +1,22 @@ > +/* > + * Public domain. > + */ > + > +#include <machine/asm.h> > +#include <libm-alias-finite.h> > + > +ENTRY(__ieee754_fmod) > + movsd %xmm0, -16(%rsp) > + movsd %xmm1, -8(%rsp) > + fldl -8(%rsp) > + fldl -16(%rsp) > +1: fprem > + fstsw %ax > + sahf > + jp 1b For all functions can you replace `sahf; jp` with `testl $0x400, %eax; jnz`? > + fstp %st(1) > + fstpl -8(%rsp) > + movsd -8(%rsp), %xmm0 > + ret > +END (__ieee754_fmod) > +libm_alias_finite (__ieee754_fmod, __fmod) > diff --git a/sysdeps/x86_64/fpu/e_fmodf.S b/sysdeps/x86_64/fpu/e_fmodf.S > new file mode 100644 > index 0000000000..6f76daff01 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/e_fmodf.S > @@ -0,0 +1,22 @@ > +/* > + * Public domain. > + */ > + > +#include <machine/asm.h> > +#include <libm-alias-finite.h> > + > +ENTRY(__ieee754_fmodf) > + movss %xmm0, -8(%rsp) > + movss %xmm1, -4(%rsp) > + flds -4(%rsp) > + flds -8(%rsp) > +1: fprem > + fstsw %ax > + sahf > + jp 1b > + fstp %st(1) > + fstps -4(%rsp) > + movss -4(%rsp), %xmm0 > + ret > +END (__ieee754_fmodf) > +libm_alias_finite (__ieee754_fmodf, __fmodf) > diff --git a/sysdeps/x86_64/fpu/e_remainder.S b/sysdeps/x86_64/fpu/e_remainder.S > new file mode 100644 > index 0000000000..be2184f25a > --- /dev/null > +++ b/sysdeps/x86_64/fpu/e_remainder.S > @@ -0,0 +1,22 @@ > +/* > + * Public domain. > + */ > + > +#include <machine/asm.h> > +#include <libm-alias-finite.h> > + > +ENTRY(__ieee754_remainder) > + movsd %xmm0, -16(%rsp) > + movsd %xmm1, -8(%rsp) > + fldl -8(%rsp) > + fldl -16(%rsp) > +1: fprem1 > + fstsw %ax > + sahf > + jp 1b > + fstp %st(1) > + fstpl -8(%rsp) > + movsd -8(%rsp), %xmm0 > + ret > +END (__ieee754_remainder) > +libm_alias_finite (__ieee754_remainder, __remainder) > diff --git a/sysdeps/x86_64/fpu/e_remainderf.S b/sysdeps/x86_64/fpu/e_remainderf.S > new file mode 100644 > index 0000000000..42972d3f84 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/e_remainderf.S > @@ -0,0 +1,22 @@ > +/* > + * Public domain. > + */ > + > +#include <machine/asm.h> > +#include <libm-alias-finite.h> > + > +ENTRY(__ieee754_remainderf) > + movss %xmm0, -8(%rsp) > + movss %xmm1, -4(%rsp) > + flds -4(%rsp) > + flds -8(%rsp) > +1: fprem1 > + fstsw %ax > + sahf > + jp 1b > + fstp %st(1) > + fstps -4(%rsp) > + movss -4(%rsp), %xmm0 > + ret > +END (__ieee754_remainderf) > +libm_alias_finite (__ieee754_remainderf, __remainderf) > -- > 2.39.2 >
On Thu, Mar 9, 2023 at 6:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Thu, Mar 9, 2023 at 12:33 PM H.J. Lu via Libc-alpha > <libc-alpha@sourceware.org> wrote: > > > > X87 (fprem/fprem1) implementations of fmod and remainder are much faster > > than generic fmod and remainder. Add e_fmod.S, e_fmodf.S, e_remainder.S > > and e_remainderf.S with fprem/fprem1. This fixes BZ #30179. > > --- > > sysdeps/x86_64/fpu/e_fmod.S | 22 ++++++++++++++++++++++ > > sysdeps/x86_64/fpu/e_fmodf.S | 22 ++++++++++++++++++++++ > > sysdeps/x86_64/fpu/e_remainder.S | 22 ++++++++++++++++++++++ > > sysdeps/x86_64/fpu/e_remainderf.S | 22 ++++++++++++++++++++++ > > 4 files changed, 88 insertions(+) > > create mode 100644 sysdeps/x86_64/fpu/e_fmod.S > > create mode 100644 sysdeps/x86_64/fpu/e_fmodf.S > > create mode 100644 sysdeps/x86_64/fpu/e_remainder.S > > create mode 100644 sysdeps/x86_64/fpu/e_remainderf.S > > > > diff --git a/sysdeps/x86_64/fpu/e_fmod.S b/sysdeps/x86_64/fpu/e_fmod.S > > new file mode 100644 > > index 0000000000..4bdc8a1ab0 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/e_fmod.S > > @@ -0,0 +1,22 @@ > > +/* > > + * Public domain. > > + */ > > + > > +#include <machine/asm.h> > > +#include <libm-alias-finite.h> > > + > > +ENTRY(__ieee754_fmod) > > + movsd %xmm0, -16(%rsp) > > + movsd %xmm1, -8(%rsp) > > + fldl -8(%rsp) > > + fldl -16(%rsp) > > +1: fprem > > + fstsw %ax > > + sahf > > + jp 1b > For all functions can you replace `sahf; jp` with `testl $0x400, %eax; jnz`? Yes. SAHF isn't available for all x86-64 CPUs. > > > + fstp %st(1) > > + fstpl -8(%rsp) > > + movsd -8(%rsp), %xmm0 > > + ret > > +END (__ieee754_fmod) > > +libm_alias_finite (__ieee754_fmod, __fmod) > > diff --git a/sysdeps/x86_64/fpu/e_fmodf.S b/sysdeps/x86_64/fpu/e_fmodf.S > > new file mode 100644 > > index 0000000000..6f76daff01 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/e_fmodf.S > > @@ -0,0 +1,22 @@ > > +/* > > + * Public domain. > > + */ > > + > > +#include <machine/asm.h> > > +#include <libm-alias-finite.h> > > + > > +ENTRY(__ieee754_fmodf) > > + movss %xmm0, -8(%rsp) > > + movss %xmm1, -4(%rsp) > > + flds -4(%rsp) > > + flds -8(%rsp) > > +1: fprem > > + fstsw %ax > > + sahf > > + jp 1b > > + fstp %st(1) > > + fstps -4(%rsp) > > + movss -4(%rsp), %xmm0 > > + ret > > +END (__ieee754_fmodf) > > +libm_alias_finite (__ieee754_fmodf, __fmodf) > > diff --git a/sysdeps/x86_64/fpu/e_remainder.S b/sysdeps/x86_64/fpu/e_remainder.S > > new file mode 100644 > > index 0000000000..be2184f25a > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/e_remainder.S > > @@ -0,0 +1,22 @@ > > +/* > > + * Public domain. > > + */ > > + > > +#include <machine/asm.h> > > +#include <libm-alias-finite.h> > > + > > +ENTRY(__ieee754_remainder) > > + movsd %xmm0, -16(%rsp) > > + movsd %xmm1, -8(%rsp) > > + fldl -8(%rsp) > > + fldl -16(%rsp) > > +1: fprem1 > > + fstsw %ax > > + sahf > > + jp 1b > > + fstp %st(1) > > + fstpl -8(%rsp) > > + movsd -8(%rsp), %xmm0 > > + ret > > +END (__ieee754_remainder) > > +libm_alias_finite (__ieee754_remainder, __remainder) > > diff --git a/sysdeps/x86_64/fpu/e_remainderf.S b/sysdeps/x86_64/fpu/e_remainderf.S > > new file mode 100644 > > index 0000000000..42972d3f84 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/e_remainderf.S > > @@ -0,0 +1,22 @@ > > +/* > > + * Public domain. > > + */ > > + > > +#include <machine/asm.h> > > +#include <libm-alias-finite.h> > > + > > +ENTRY(__ieee754_remainderf) > > + movss %xmm0, -8(%rsp) > > + movss %xmm1, -4(%rsp) > > + flds -4(%rsp) > > + flds -8(%rsp) > > +1: fprem1 > > + fstsw %ax > > + sahf > > + jp 1b > > + fstp %st(1) > > + fstps -4(%rsp) > > + movss -4(%rsp), %xmm0 > > + ret > > +END (__ieee754_remainderf) > > +libm_alias_finite (__ieee754_remainderf, __remainderf) > > -- > > 2.39.2 > >
diff --git a/sysdeps/x86_64/fpu/e_fmod.S b/sysdeps/x86_64/fpu/e_fmod.S new file mode 100644 index 0000000000..4bdc8a1ab0 --- /dev/null +++ b/sysdeps/x86_64/fpu/e_fmod.S @@ -0,0 +1,22 @@ +/* + * Public domain. + */ + +#include <machine/asm.h> +#include <libm-alias-finite.h> + +ENTRY(__ieee754_fmod) + movsd %xmm0, -16(%rsp) + movsd %xmm1, -8(%rsp) + fldl -8(%rsp) + fldl -16(%rsp) +1: fprem + fstsw %ax + sahf + jp 1b + fstp %st(1) + fstpl -8(%rsp) + movsd -8(%rsp), %xmm0 + ret +END (__ieee754_fmod) +libm_alias_finite (__ieee754_fmod, __fmod) diff --git a/sysdeps/x86_64/fpu/e_fmodf.S b/sysdeps/x86_64/fpu/e_fmodf.S new file mode 100644 index 0000000000..6f76daff01 --- /dev/null +++ b/sysdeps/x86_64/fpu/e_fmodf.S @@ -0,0 +1,22 @@ +/* + * Public domain. + */ + +#include <machine/asm.h> +#include <libm-alias-finite.h> + +ENTRY(__ieee754_fmodf) + movss %xmm0, -8(%rsp) + movss %xmm1, -4(%rsp) + flds -4(%rsp) + flds -8(%rsp) +1: fprem + fstsw %ax + sahf + jp 1b + fstp %st(1) + fstps -4(%rsp) + movss -4(%rsp), %xmm0 + ret +END (__ieee754_fmodf) +libm_alias_finite (__ieee754_fmodf, __fmodf) diff --git a/sysdeps/x86_64/fpu/e_remainder.S b/sysdeps/x86_64/fpu/e_remainder.S new file mode 100644 index 0000000000..be2184f25a --- /dev/null +++ b/sysdeps/x86_64/fpu/e_remainder.S @@ -0,0 +1,22 @@ +/* + * Public domain. + */ + +#include <machine/asm.h> +#include <libm-alias-finite.h> + +ENTRY(__ieee754_remainder) + movsd %xmm0, -16(%rsp) + movsd %xmm1, -8(%rsp) + fldl -8(%rsp) + fldl -16(%rsp) +1: fprem1 + fstsw %ax + sahf + jp 1b + fstp %st(1) + fstpl -8(%rsp) + movsd -8(%rsp), %xmm0 + ret +END (__ieee754_remainder) +libm_alias_finite (__ieee754_remainder, __remainder) diff --git a/sysdeps/x86_64/fpu/e_remainderf.S b/sysdeps/x86_64/fpu/e_remainderf.S new file mode 100644 index 0000000000..42972d3f84 --- /dev/null +++ b/sysdeps/x86_64/fpu/e_remainderf.S @@ -0,0 +1,22 @@ +/* + * Public domain. + */ + +#include <machine/asm.h> +#include <libm-alias-finite.h> + +ENTRY(__ieee754_remainderf) + movss %xmm0, -8(%rsp) + movss %xmm1, -4(%rsp) + flds -4(%rsp) + flds -8(%rsp) +1: fprem1 + fstsw %ax + sahf + jp 1b + fstp %st(1) + fstps -4(%rsp) + movss -4(%rsp), %xmm0 + ret +END (__ieee754_remainderf) +libm_alias_finite (__ieee754_remainderf, __remainderf)