Message ID | 20210301175140.29109-1-rzinsly@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/3] powerpc: Add optimized ilogb* for POWER9 | expand |
On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote: > Changes since v1: > - Move the builtins definitions to powerpc's math_private.h. > - Check if the correct GCC version is used. > > --8<--- > > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract > the exponent from a double-precision and quad-precision floating-point > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128. > --- > sysdeps/powerpc/fpu/math_private.h | 20 +++++++++++- > .../powerpc64/le/fpu/w_ilogb_template.c | 31 +++++++++++++++++++ > 2 files changed, 50 insertions(+), 1 deletion(-) > create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h > index 91b1361749..accc28d091 100644 > --- a/sysdeps/powerpc/fpu/math_private.h > +++ b/sysdeps/powerpc/fpu/math_private.h > @@ -25,7 +25,23 @@ > > #include_next <math_private.h> > > -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128 > +#ifdef _ARCH_PWR9 > + > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb > +#define __builtin_ilogbf __builtin_ilogb > + > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 > +#define __builtin_ilogbl __builtin_ilogbf128 > + > +#define __builtin_test_dc_ilogb(x, y) \ > + __builtin_vsx_scalar_test_data_class_dp(x, y) > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff > + > +#define __builtin_test_dc_ilogbf128(x, y) \ > + __builtin_vsx_scalar_test_data_class_qp(x, y) > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff > + > +#if __HAVE_DISTINCT_FLOAT128 > extern __always_inline _Float128 > __ieee754_sqrtf128 (_Float128 __x) > { > @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x) > } > #endif > > +#endif /* _ARCH_PWR9 */ > + > #endif /* _PPC_MATH_PRIVATE_H_ */ > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > new file mode 100644 > index 0000000000..17ac7809e1 > --- /dev/null > +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > @@ -0,0 +1,31 @@ > +/* The builtins used are only available with GCC 8.0 or newer. */ > +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0) I wonder if it would be better to use __glibc_has_builtin () for the builtins on which you depend, rather than testing for a specific GCC level. (Same for patch 2/3.) PC > +#include <math.h> > +#include <errno.h> > +#include <limits.h> > +#include <math_private.h> > +#include <fenv.h> > + > +int > +M_DECL_FUNC (__ilogb) (FLOAT x) > +{ > + int r; > + /* Check for exceptional cases. */ > + if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f)) > + r = M_SUF (__builtin_ilogb) (x); > + else > + /* Fallback to the generic ilogb if x is NaN, Inf or subnormal. */ > + r = M_SUF (__ieee754_ilogb) (x); > + if (__builtin_expect (r == FP_ILOGB0, 0) > + || __builtin_expect (r == FP_ILOGBNAN, 0) > + || __builtin_expect (r == INT_MAX, 0)) > + { > + __set_errno (EDOM); > + __feraiseexcept (FE_INVALID); > + } > + return r; > +} > +declare_mgen_alias (__ilogb, ilogb) > +#else > +#include <math/w_ilogb_template.c> > +#endif > -- > 2.29.2 >
On 3/1/21 11:51 AM, Raphael Moreira Zinsly wrote: > Changes since v1: > - Move the builtins definitions to powerpc's math_private.h. > - Check if the correct GCC version is used. > > --8<--- > > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract > the exponent from a double-precision and quad-precision floating-point > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128. > --- > sysdeps/powerpc/fpu/math_private.h | 20 +++++++++++- > .../powerpc64/le/fpu/w_ilogb_template.c | 31 +++++++++++++++++++ > 2 files changed, 50 insertions(+), 1 deletion(-) > create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h > index 91b1361749..accc28d091 100644 > --- a/sysdeps/powerpc/fpu/math_private.h > +++ b/sysdeps/powerpc/fpu/math_private.h > @@ -25,7 +25,23 @@ > > #include_next <math_private.h> > > -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128 > +#ifdef _ARCH_PWR9 > + > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb > +#define __builtin_ilogbf __builtin_ilogb > + > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 > +#define __builtin_ilogbl __builtin_ilogbf128 I suspect this converting an ibm128 value to float128. Can you verify whether this is the case, and if so, exclude ibm long doubles from this optimization? (Note, libm should be building long double == ibm128) Otherwise, LGTM. > + > +#define __builtin_test_dc_ilogb(x, y) \ > + __builtin_vsx_scalar_test_data_class_dp(x, y) > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff > + > +#define __builtin_test_dc_ilogbf128(x, y) \ > + __builtin_vsx_scalar_test_data_class_qp(x, y) > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff > + > +#if __HAVE_DISTINCT_FLOAT128 > extern __always_inline _Float128 > __ieee754_sqrtf128 (_Float128 __x) > { > @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x) > } > #endif > > +#endif /* _ARCH_PWR9 */ > + > #endif /* _PPC_MATH_PRIVATE_H_ */ > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > new file mode 100644 > index 0000000000..17ac7809e1 > --- /dev/null > +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > @@ -0,0 +1,31 @@ > +/* The builtins used are only available with GCC 8.0 or newer. */ > +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0) > +#include <math.h> > +#include <errno.h> > +#include <limits.h> > +#include <math_private.h> > +#include <fenv.h> > + > +int > +M_DECL_FUNC (__ilogb) (FLOAT x) > +{ > + int r; > + /* Check for exceptional cases. */ > + if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f)) > + r = M_SUF (__builtin_ilogb) (x); > + else > + /* Fallback to the generic ilogb if x is NaN, Inf or subnormal. */ > + r = M_SUF (__ieee754_ilogb) (x); > + if (__builtin_expect (r == FP_ILOGB0, 0) > + || __builtin_expect (r == FP_ILOGBNAN, 0) > + || __builtin_expect (r == INT_MAX, 0)) > + { > + __set_errno (EDOM); > + __feraiseexcept (FE_INVALID); > + } > + return r; > +} > +declare_mgen_alias (__ilogb, ilogb) > +#else > +#include <math/w_ilogb_template.c> > +#endif >
On 01/03/2021 22:27, Paul A. Clarke wrote: > On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote: >> Changes since v1: >> - Move the builtins definitions to powerpc's math_private.h. >> - Check if the correct GCC version is used. >> >> --8<--- >> >> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract >> the exponent from a double-precision and quad-precision floating-point >> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128. >> --- >> sysdeps/powerpc/fpu/math_private.h | 20 +++++++++++- >> .../powerpc64/le/fpu/w_ilogb_template.c | 31 +++++++++++++++++++ >> 2 files changed, 50 insertions(+), 1 deletion(-) >> create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c >> >> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h >> index 91b1361749..accc28d091 100644 >> --- a/sysdeps/powerpc/fpu/math_private.h >> +++ b/sysdeps/powerpc/fpu/math_private.h >> @@ -25,7 +25,23 @@ >> >> #include_next <math_private.h> >> >> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128 >> +#ifdef _ARCH_PWR9 >> + >> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb >> +#define __builtin_ilogbf __builtin_ilogb >> + >> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 >> +#define __builtin_ilogbl __builtin_ilogbf128 >> + >> +#define __builtin_test_dc_ilogb(x, y) \ >> + __builtin_vsx_scalar_test_data_class_dp(x, y) >> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff >> + >> +#define __builtin_test_dc_ilogbf128(x, y) \ >> + __builtin_vsx_scalar_test_data_class_qp(x, y) >> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff >> + >> +#if __HAVE_DISTINCT_FLOAT128 >> extern __always_inline _Float128 >> __ieee754_sqrtf128 (_Float128 __x) >> { >> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x) >> } >> #endif >> >> +#endif /* _ARCH_PWR9 */ >> + >> #endif /* _PPC_MATH_PRIVATE_H_ */ >> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c >> new file mode 100644 >> index 0000000000..17ac7809e1 >> --- /dev/null >> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c >> @@ -0,0 +1,31 @@ >> +/* The builtins used are only available with GCC 8.0 or newer. */ >> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0) > > I wonder if it would be better to use __glibc_has_builtin () for the > builtins on which you depend, rather than testing for a specific GCC level. > I didn't find a __glibc_has_builtin definition, do you mean the preprocessor's __has_builtin()? I believe it's not available on GCC 8.0. > (Same for patch 2/3.) > > PC
On Wed, Mar 03, 2021 at 01:23:43PM -0300, Raphael M Zinsly via Libc-alpha wrote: > On 01/03/2021 22:27, Paul A. Clarke wrote: > > On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote: > > > Changes since v1: > > > - Move the builtins definitions to powerpc's math_private.h. > > > - Check if the correct GCC version is used. > > > > > > --8<--- > > > > > > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract > > > the exponent from a double-precision and quad-precision floating-point > > > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128. > > > --- > > > sysdeps/powerpc/fpu/math_private.h | 20 +++++++++++- > > > .../powerpc64/le/fpu/w_ilogb_template.c | 31 +++++++++++++++++++ > > > 2 files changed, 50 insertions(+), 1 deletion(-) > > > create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > > > > > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h > > > index 91b1361749..accc28d091 100644 > > > --- a/sysdeps/powerpc/fpu/math_private.h > > > +++ b/sysdeps/powerpc/fpu/math_private.h > > > @@ -25,7 +25,23 @@ > > > > > > #include_next <math_private.h> > > > > > > -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128 > > > +#ifdef _ARCH_PWR9 > > > + > > > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb > > > +#define __builtin_ilogbf __builtin_ilogb > > > + > > > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 > > > +#define __builtin_ilogbl __builtin_ilogbf128 > > > + > > > +#define __builtin_test_dc_ilogb(x, y) \ > > > + __builtin_vsx_scalar_test_data_class_dp(x, y) > > > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff > > > + > > > +#define __builtin_test_dc_ilogbf128(x, y) \ > > > + __builtin_vsx_scalar_test_data_class_qp(x, y) > > > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff > > > + > > > +#if __HAVE_DISTINCT_FLOAT128 > > > extern __always_inline _Float128 > > > __ieee754_sqrtf128 (_Float128 __x) > > > { > > > @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x) > > > } > > > #endif > > > > > > +#endif /* _ARCH_PWR9 */ > > > + > > > #endif /* _PPC_MATH_PRIVATE_H_ */ > > > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > > new file mode 100644 > > > index 0000000000..17ac7809e1 > > > --- /dev/null > > > +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > > @@ -0,0 +1,31 @@ > > > +/* The builtins used are only available with GCC 8.0 or newer. */ > > > +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0) > > > > I wonder if it would be better to use __glibc_has_builtin () for the > > builtins on which you depend, rather than testing for a specific GCC level. > > > > I didn't find a __glibc_has_builtin definition, do you mean the > preprocessor's __has_builtin()? I believe it's not available on GCC 8.0. misc/sys/cdefs.h: -- /* Compilers that lack __has_attribute may object to #if defined __has_attribute && __has_attribute (...) even though they do not need to evaluate the right-hand side of the &&. Similarly for __has_builtin, etc. */ [...] #ifdef __has_builtin # define __glibc_has_builtin(name) __has_builtin (name) #else # define __glibc_has_builtin(name) 0 #endif -- ... so, it covers pre-GCC8 by always saying "nope". PC
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h index 91b1361749..accc28d091 100644 --- a/sysdeps/powerpc/fpu/math_private.h +++ b/sysdeps/powerpc/fpu/math_private.h @@ -25,7 +25,23 @@ #include_next <math_private.h> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128 +#ifdef _ARCH_PWR9 + +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb +#define __builtin_ilogbf __builtin_ilogb + +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 +#define __builtin_ilogbl __builtin_ilogbf128 + +#define __builtin_test_dc_ilogb(x, y) \ + __builtin_vsx_scalar_test_data_class_dp(x, y) +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff + +#define __builtin_test_dc_ilogbf128(x, y) \ + __builtin_vsx_scalar_test_data_class_qp(x, y) +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff + +#if __HAVE_DISTINCT_FLOAT128 extern __always_inline _Float128 __ieee754_sqrtf128 (_Float128 __x) { @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x) } #endif +#endif /* _ARCH_PWR9 */ + #endif /* _PPC_MATH_PRIVATE_H_ */ diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c new file mode 100644 index 0000000000..17ac7809e1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c @@ -0,0 +1,31 @@ +/* The builtins used are only available with GCC 8.0 or newer. */ +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0) +#include <math.h> +#include <errno.h> +#include <limits.h> +#include <math_private.h> +#include <fenv.h> + +int +M_DECL_FUNC (__ilogb) (FLOAT x) +{ + int r; + /* Check for exceptional cases. */ + if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f)) + r = M_SUF (__builtin_ilogb) (x); + else + /* Fallback to the generic ilogb if x is NaN, Inf or subnormal. */ + r = M_SUF (__ieee754_ilogb) (x); + if (__builtin_expect (r == FP_ILOGB0, 0) + || __builtin_expect (r == FP_ILOGBNAN, 0) + || __builtin_expect (r == INT_MAX, 0)) + { + __set_errno (EDOM); + __feraiseexcept (FE_INVALID); + } + return r; +} +declare_mgen_alias (__ilogb, ilogb) +#else +#include <math/w_ilogb_template.c> +#endif