Message ID | 20210226160814.24963-1-rzinsly@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [1/3] powerpc: Add optimized ilogb* for POWER9 | expand |
Benchtests results without and with this patch on a POWER9: without: "ilogbf128": { "subnormal": { "duration": 5.10445e+08, "iterations": 2.5826e+07, "max": 39.454, "min": 3.098, "mean": 19.7648 }, "normal": { "duration": 5.02611e+08, "iterations": 1.54482e+08, "max": 6.037, "min": 3.241, "mean": 3.25352 } }, "ilogb": { "subnormal": { "duration": 5.09116e+08, "iterations": 4.7656e+07, "max": 33.015, "min": 3.042, "mean": 10.6831 }, "normal": { "duration": 5.01558e+08, "iterations": 1.72702e+08, "max": 5.6, "min": 2.893, "mean": 2.90418 } }, "ilogbf": { "subnormal": { "duration": 5.0847e+08, "iterations": 5.8494e+07, "max": 15.928, "min": 3.032, "mean": 8.69269 }, "normal": { "duration": 5.00965e+08, "iterations": 1.80536e+08, "max": 10.71, "min": 2.756, "mean": 2.77488 } } with: "ilogbf128": { "subnormal": { "duration": 5.10468e+08, "iterations": 2.534e+07, "max": 38.864, "min": 3.379, "mean": 20.1448 }, "normal": { "duration": 4.98668e+08, "iterations": 2.1883e+08, "max": 16.539, "min": 2.215, "mean": 2.27879 } }, "ilogb": { "subnormal": { "duration": 5.0921e+08, "iterations": 4.6646e+07, "max": 21.122, "min": 3.477, "mean": 10.9165 }, "normal": { "duration": 4.98203e+08, "iterations": 2.2731e+08, "max": 4.944, "min": 2.12, "mean": 2.19173 } }, "ilogbf": { "subnormal": { "duration": 4.9848e+08, "iterations": 2.2245e+08, "max": 11.685, "min": 2.16, "mean": 2.24086 }, "normal": { "duration": 4.98546e+08, "iterations": 2.22032e+08, "max": 5.855, "min": 2.166, "mean": 2.24538 } }
On 2/26/21 10:08 AM, Raphael Moreira Zinsly wrote: > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract > the exponent from a double-precision and quad-precision floating-point > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128. > --- > .../powerpc64/le/fpu/multiarch/math_private.h | 14 +++++++++ > .../powerpc64/le/fpu/w_ilogb_template.c | 29 +++++++++++++++++++ > 2 files changed, 43 insertions(+) > create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c > > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h > index 0970709cff..955497e98e 100644 > --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h > +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h > @@ -1,6 +1,20 @@ > #ifndef MATH_PRIVATE_PPC64LE_MA > #define MATH_PRIVATE_PPC64LE_MA 1 > > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb > +#define __builtin_ilogbf __builtin_ilogb > + > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 > +#define __builtin_ilogbl __builtin_ilogbf128 > + > +#define __builtin_test_dc_ilogb(x, y) \ > + __builtin_vsx_scalar_test_data_class_dp(x, y) > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff > + > +#define __builtin_test_dc_ilogbf128(x, y) \ > + __builtin_vsx_scalar_test_data_class_qp(x, y) > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff > + > #include_next <math_private.h> Should these be placed in the powerpc math_private.h? I suspect this will not build if multiarch is disabled. Also, have you tested with the minimum version of GCC required for glibc? I am not sure when these builtins were added.
On 26/02/2021 13:16, Paul E Murphy wrote: > > > On 2/26/21 10:08 AM, Raphael Moreira Zinsly wrote: >> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract >> the exponent from a double-precision and quad-precision floating-point >> respectively, thus they can be used to improve ilogb, ilogbf and >> ilogbf128. >> --- >> .../powerpc64/le/fpu/multiarch/math_private.h | 14 +++++++++ >> .../powerpc64/le/fpu/w_ilogb_template.c | 29 +++++++++++++++++++ >> 2 files changed, 43 insertions(+) >> create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c >> >> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h >> b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h >> index 0970709cff..955497e98e 100644 >> --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h >> +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h >> @@ -1,6 +1,20 @@ >> #ifndef MATH_PRIVATE_PPC64LE_MA >> #define MATH_PRIVATE_PPC64LE_MA 1 >> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb >> +#define __builtin_ilogbf __builtin_ilogb >> + >> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 >> +#define __builtin_ilogbl __builtin_ilogbf128 >> + >> +#define __builtin_test_dc_ilogb(x, y) \ >> + __builtin_vsx_scalar_test_data_class_dp(x, y) >> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff >> + >> +#define __builtin_test_dc_ilogbf128(x, y) \ >> + __builtin_vsx_scalar_test_data_class_qp(x, y) >> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - >> 0x3fff >> + >> #include_next <math_private.h> > > Should these be placed in the powerpc math_private.h? I suspect this > will not build if multiarch is disabled. Thanks for catching that, I'll fix that in the next version. > > Also, have you tested with the minimum version of GCC required for > glibc? I am not sure when these builtins were added. You are right, this doesn't work on GCC older than 8.4, I'll add a guard for that on my V2. Thanks,
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h index 0970709cff..955497e98e 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h @@ -1,6 +1,20 @@ #ifndef MATH_PRIVATE_PPC64LE_MA #define MATH_PRIVATE_PPC64LE_MA 1 +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb +#define __builtin_ilogbf __builtin_ilogb + +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128 +#define __builtin_ilogbl __builtin_ilogbf128 + +#define __builtin_test_dc_ilogb(x, y) \ + __builtin_vsx_scalar_test_data_class_dp(x, y) +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff + +#define __builtin_test_dc_ilogbf128(x, y) \ + __builtin_vsx_scalar_test_data_class_qp(x, y) +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff + #include_next <math_private.h> #if defined (_F128_ENABLE_IFUNC) diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c new file mode 100644 index 0000000000..3d76a3d0fb --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c @@ -0,0 +1,29 @@ +#ifdef _ARCH_PWR9 +#include <math.h> +#include <errno.h> +#include <limits.h> +#include <math_private.h> +#include <fenv.h> + +int +M_DECL_FUNC (__ilogb) (FLOAT x) +{ + int r; + /* Check for exceptional cases. */ + if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f)) + r = M_SUF (__builtin_ilogb) (x); + else + /* Fallback to the generic ilogb if x is NaN, Inf or subnormal. */ + r = M_SUF (__ieee754_ilogb) (x); + if (__builtin_expect (r == FP_ILOGB0, 0) + || __builtin_expect (r == FP_ILOGBNAN, 0) + || __builtin_expect (r == INT_MAX, 0)) + { + __set_errno (EDOM); + __feraiseexcept (FE_INVALID); + } + return r; +} +declare_mgen_alias (__ilogb, ilogb) + +#endif