Message ID | 54C2A402.8090007@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On 01/23/2015 02:41 PM, Adhemerval Zanella wrote: > Some powerpc64 processors (e5500 core for instance) does not provide the > fsqrt instruction, however current check to use in math_private.h is > __WORDSIZE and _ARCH_PWR4 (ISA 2.02). This is patch change it to use > the compiler flag _ARCH_PPCSQ (which is the same condition GCC uses to > decide whether to generate fsqrt instruction). > > It fixes BZ#16576. > > Tested on powerpc64/powerpc32 with -with-cpu=power4 and with -with-cpu=powerpc64. > The powerpc64 testcase showed some ULPs and exceptions issues when using > the software implementation and I will track them in other bug reports. > > Carlos, I would like to push to 2.21. OK for 2.21. > -- > > [BZ #16576] > * sysdeps/powerpc/fpu/math_private.h [__CPU_HAS_FSQRT]: Remove define > and use _ARCH_PPCSQ instead. > (__ieee754_sqrt): Likewise. > (__ieee754_sqrtf): Likewise. > * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Build only if > _ARCH_PPCSQ is defined. > (__ieee754_sqrt): Use _ARCH_PPCSQ to select wheter to use hardware > fsqrt instruction. > * sysdeps/powerpc/fpu/e_sqrtf.c (__ieee754_sqrtf): Build only if > _ARCH_PPCSQ is defined. > (__ieee754_sqrtf): Use _ARCH_PPCSQ to select wheter to use hardware > fsqrts instruction. > * sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Remove file. > > -- > > diff --git a/NEWS b/NEWS > index 903d925..ed15176 100644 > --- a/NEWS > +++ b/NEWS > @@ -10,15 +10,15 @@ Version 2.21 > * The following bugs are resolved with this release: > > 6652, 10672, 12674, 12847, 12926, 13862, 14132, 14138, 14171, 14498, > - 15215, 15884, 16009, 16418, 16191, 16469, 16617, 16619, 16657, 16740, > - 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, 17460, > - 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, 17572, > - 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, 17608, > - 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, 17658, > - 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, 17724, > - 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, 17777, > - 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, 17834, > - 17844, 17848. > + 15215, 15884, 16009, 16418, 16191, 16469, 16576, 16617, 16619, 16657, > + 16740, 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, > + 17460, 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, > + 17572, 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, > + 17608, 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, > + 17658, 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, > + 17724, 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, > + 17777, 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, > + 17834, 17844, 17848. > > * A new semaphore algorithm has been implemented in generic C code for all > machines. Previous custom assembly implementations of semaphore were > diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c > index ba77ae5..4a854a1 100644 > --- a/sysdeps/powerpc/fpu/e_sqrt.c > +++ b/sysdeps/powerpc/fpu/e_sqrt.c > @@ -24,6 +24,7 @@ > #include <sysdep.h> > #include <ldsodefs.h> > > +#ifndef _ARCH_PPCSQ > static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */ > static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; > static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; > @@ -152,6 +153,7 @@ __slow_ieee754_sqrt (double x) > } > return f_wash (x); > } > +#endif /* _ARCH_PPCSQ */ > > #undef __ieee754_sqrt > double > @@ -159,16 +161,11 @@ __ieee754_sqrt (double x) > { > double z; > > - /* If the CPU is 64-bit we can use the optional FP instructions. */ > - if (__CPU_HAS_FSQRT) > - { > - /* Volatile is required to prevent the compiler from moving the > - fsqrt instruction above the branch. */ > - __asm __volatile (" fsqrt %0,%1\n" > - :"=f" (z):"f" (x)); > - } > - else > - z = __slow_ieee754_sqrt (x); > +#ifdef _ARCH_PPCSQ > + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x)); > +#else > + z = __slow_ieee754_sqrt (x); > +#endif > > return z; > } > diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c > index 5cf47f7..034b6f5 100644 > --- a/sysdeps/powerpc/fpu/e_sqrtf.c > +++ b/sysdeps/powerpc/fpu/e_sqrtf.c > @@ -24,6 +24,7 @@ > #include <sysdep.h> > #include <ldsodefs.h> > > +#ifndef _ARCH_PPCSQ > static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */ > static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; > static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; > @@ -128,6 +129,7 @@ __slow_ieee754_sqrtf (float x) > } > return f_washf (x); > } > +#endif /* _ARCH_PPCSQ */ > > #undef __ieee754_sqrtf > float > @@ -135,16 +137,11 @@ __ieee754_sqrtf (float x) > { > double z; > > - /* If the CPU is 64-bit we can use the optional FP instructions. */ > - if (__CPU_HAS_FSQRT) > - { > - /* Volatile is required to prevent the compiler from moving the > - fsqrt instruction above the branch. */ > - __asm __volatile (" fsqrts %0,%1\n" > - :"=f" (z):"f" (x)); > - } > - else > - z = __slow_ieee754_sqrtf (x); > +#ifdef _ARCH_PPCSQ > + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x)); > +#else > + z = __slow_ieee754_sqrtf (x); > +#endif > > return z; > } > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h > index 6631535..37e7456 100644 > --- a/sysdeps/powerpc/fpu/math_private.h > +++ b/sysdeps/powerpc/fpu/math_private.h > @@ -25,26 +25,17 @@ > #include <fenv_private.h> > #include_next <math_private.h> > > -# if __WORDSIZE == 64 || defined _ARCH_PWR4 > -# define __CPU_HAS_FSQRT 1 > -# else > -# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0) > -# endif > - > extern double __slow_ieee754_sqrt (double); > extern __always_inline double > __ieee754_sqrt (double __x) > { > double __z; > > - if (__CPU_HAS_FSQRT) > - { > - /* Volatile is required to prevent the compiler from moving the > - fsqrt instruction above the branch. */ > - __asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); > - } > - else > - __z = __slow_ieee754_sqrt(__x); > +#ifdef _ARCH_PPCSQ > + asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); > +#else > + __z = __slow_ieee754_sqrt(__x); > +#endif > > return __z; > } > @@ -55,14 +46,11 @@ __ieee754_sqrtf (float __x) > { > float __z; > > - if (__CPU_HAS_FSQRT) > - { > - /* Volatile is required to prevent the compiler from moving the > - fsqrts instruction above the branch. */ > - __asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); > - } > - else > - __z = __slow_ieee754_sqrtf(__x); > +#ifdef _ARCH_PPCSQ > + asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); > +#else > + __z = __slow_ieee754_sqrtf(__x); > +#endif > > return __z; > } > diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c > deleted file mode 100644 > index 796388e..0000000 > --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c > +++ /dev/null > @@ -1,30 +0,0 @@ > -/* Double-precision floating point square root. > - Copyright (C) 1997-2015 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <http://www.gnu.org/licenses/>. */ > - > -#include <math.h> > -#include <math_private.h> > - > -#undef __ieee754_sqrt > -double > -__ieee754_sqrt (double x) > -{ > - double z; > - __asm __volatile ("fsqrt %0,%1" : "=f" (z) : "f" (x)); > - return z; > -} > -strong_alias (__ieee754_sqrt, __sqrt_finite) > diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c > deleted file mode 100644 > index 5502525..0000000 > --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c > +++ /dev/null > @@ -1,30 +0,0 @@ > -/* Single-precision floating point square root. > - Copyright (C) 1997-2015 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <http://www.gnu.org/licenses/>. */ > - > -#include <math.h> > -#include <math_private.h> > - > -#undef __ieee754_sqrtf > -float > -__ieee754_sqrtf (float x) > -{ > - double z; > - __asm ("fsqrts %0,%1" : "=f" (z) : "f" (x)); > - return z; > -} > -strong_alias (__ieee754_sqrtf, __sqrtf_finite) >
diff --git a/NEWS b/NEWS index 903d925..ed15176 100644 --- a/NEWS +++ b/NEWS @@ -10,15 +10,15 @@ Version 2.21 * The following bugs are resolved with this release: 6652, 10672, 12674, 12847, 12926, 13862, 14132, 14138, 14171, 14498, - 15215, 15884, 16009, 16418, 16191, 16469, 16617, 16619, 16657, 16740, - 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, 17460, - 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, 17572, - 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, 17608, - 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, 17658, - 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, 17724, - 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, 17777, - 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, 17834, - 17844, 17848. + 15215, 15884, 16009, 16418, 16191, 16469, 16576, 16617, 16619, 16657, + 16740, 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, + 17460, 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, + 17572, 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, + 17608, 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, + 17658, 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, + 17724, 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, + 17777, 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, + 17834, 17844, 17848. * A new semaphore algorithm has been implemented in generic C code for all machines. Previous custom assembly implementations of semaphore were diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c index ba77ae5..4a854a1 100644 --- a/sysdeps/powerpc/fpu/e_sqrt.c +++ b/sysdeps/powerpc/fpu/e_sqrt.c @@ -24,6 +24,7 @@ #include <sysdep.h> #include <ldsodefs.h> +#ifndef _ARCH_PPCSQ static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */ static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; @@ -152,6 +153,7 @@ __slow_ieee754_sqrt (double x) } return f_wash (x); } +#endif /* _ARCH_PPCSQ */ #undef __ieee754_sqrt double @@ -159,16 +161,11 @@ __ieee754_sqrt (double x) { double z; - /* If the CPU is 64-bit we can use the optional FP instructions. */ - if (__CPU_HAS_FSQRT) - { - /* Volatile is required to prevent the compiler from moving the - fsqrt instruction above the branch. */ - __asm __volatile (" fsqrt %0,%1\n" - :"=f" (z):"f" (x)); - } - else - z = __slow_ieee754_sqrt (x); +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrt (x); +#endif return z; } diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c index 5cf47f7..034b6f5 100644 --- a/sysdeps/powerpc/fpu/e_sqrtf.c +++ b/sysdeps/powerpc/fpu/e_sqrtf.c @@ -24,6 +24,7 @@ #include <sysdep.h> #include <ldsodefs.h> +#ifndef _ARCH_PPCSQ static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */ static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; @@ -128,6 +129,7 @@ __slow_ieee754_sqrtf (float x) } return f_washf (x); } +#endif /* _ARCH_PPCSQ */ #undef __ieee754_sqrtf float @@ -135,16 +137,11 @@ __ieee754_sqrtf (float x) { double z; - /* If the CPU is 64-bit we can use the optional FP instructions. */ - if (__CPU_HAS_FSQRT) - { - /* Volatile is required to prevent the compiler from moving the - fsqrt instruction above the branch. */ - __asm __volatile (" fsqrts %0,%1\n" - :"=f" (z):"f" (x)); - } - else - z = __slow_ieee754_sqrtf (x); +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrtf (x); +#endif return z; } diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h index 6631535..37e7456 100644 --- a/sysdeps/powerpc/fpu/math_private.h +++ b/sysdeps/powerpc/fpu/math_private.h @@ -25,26 +25,17 @@ #include <fenv_private.h> #include_next <math_private.h> -# if __WORDSIZE == 64 || defined _ARCH_PWR4 -# define __CPU_HAS_FSQRT 1 -# else -# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0) -# endif - extern double __slow_ieee754_sqrt (double); extern __always_inline double __ieee754_sqrt (double __x) { double __z; - if (__CPU_HAS_FSQRT) - { - /* Volatile is required to prevent the compiler from moving the - fsqrt instruction above the branch. */ - __asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); - } - else - __z = __slow_ieee754_sqrt(__x); +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrt(__x); +#endif return __z; } @@ -55,14 +46,11 @@ __ieee754_sqrtf (float __x) { float __z; - if (__CPU_HAS_FSQRT) - { - /* Volatile is required to prevent the compiler from moving the - fsqrts instruction above the branch. */ - __asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); - } - else - __z = __slow_ieee754_sqrtf(__x); +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrtf(__x); +#endif return __z; } diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c deleted file mode 100644 index 796388e..0000000 --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Double-precision floating point square root. - Copyright (C) 1997-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <math.h> -#include <math_private.h> - -#undef __ieee754_sqrt -double -__ieee754_sqrt (double x) -{ - double z; - __asm __volatile ("fsqrt %0,%1" : "=f" (z) : "f" (x)); - return z; -} -strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c deleted file mode 100644 index 5502525..0000000 --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Single-precision floating point square root. - Copyright (C) 1997-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <math.h> -#include <math_private.h> - -#undef __ieee754_sqrtf -float -__ieee754_sqrtf (float x) -{ - double z; - __asm ("fsqrts %0,%1" : "=f" (z) : "f" (x)); - return z; -} -strong_alias (__ieee754_sqrtf, __sqrtf_finite)