Message ID | 1502215837.3962.127.camel@cavium.com |
---|---|
State | New |
Headers | show |
On 08/08/2017 11:10 AM, Steve Ellcey wrote: > @@ -44,6 +66,32 @@ __CONCATX(__,FUNC) (ITYPE x) > { > OTYPE result; > ITYPE temp; > + > +#if IREG_SIZE == 64 && OREG_SIZE == 32 > + if (__builtin_fabs (x) > INT32_MAX) > + { > + /* Converting large values to a 32 bit int may cause the frintx/fcvtza > + sequence to set both FE_INVALID and FE_INEXACT. To avoid this > + we save and restore the FE and only set one or the other. */ > + > + fenv_t env; > + int feflags; > + > + libc_feholdexcept (&env); > + asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" > + "fcvtzs" "\t%" OREGS "0, %" IREGS "1" > + : "=r" (result), "=w" (temp) : "w" (x) ); > + feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT); > + libc_fesetenv (&env); > + > + if (feflags & FE_INVALID) > + feraiseexcept (FE_INVALID); > + else if (feflags & FE_INEXACT) > + feraiseexcept (FE_INEXACT); > + > + return result; > + } > +#endif Surely it is simply better to do the conversion in one step, getting the proper flags set the first time. Like so. r~ diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c index 8c61a039bf..a6ac070fa6 100644 --- a/sysdeps/aarch64/fpu/s_lrint.c +++ b/sysdeps/aarch64/fpu/s_lrint.c @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <math.h> +#include <get-rounding-mode.h> #ifndef FUNC # define FUNC lrint @@ -43,10 +44,25 @@ OTYPE __CONCATX(__,FUNC) (ITYPE x) { OTYPE result; - ITYPE temp; - asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" - "fcvtzs" "\t%" OREGS "0, %" IREGS "1" - : "=r" (result), "=w" (temp) : "w" (x) ); + switch (get_rounding_mode ()) + { + case FE_TONEAREST: + asm volatile ("fcvtns" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result) : "w" (x)); + break; + case FE_UPWARD: + asm volatile ("fcvtps" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result) : "w" (x)); + break; + case FE_DOWNWARD: + asm volatile ("fcvtms" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result) : "w" (x)); + break; + default: + case FE_TOWARDZERO: + asm volatile ("fcvtzs" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result) : "w" (x)); + } return result; }
On 28/08/17 15:52, Richard Henderson wrote: > Surely it is simply better to do the conversion in one step, getting the proper > flags set the first time. Like so. > hm true, for ilp32, but on lp64 you don't want unnecessary fpcr access and dispatch on the rounding mode.
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c index c0d0d0e..57821c0 100644 --- a/sysdeps/aarch64/fpu/s_llrint.c +++ b/sysdeps/aarch64/fpu/s_llrint.c @@ -18,4 +18,5 @@ #define FUNC llrint #define OTYPE long long int +#define OREG_SIZE 64 #include <s_lrint.c> diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c index 67724c6..98ed4f8 100644 --- a/sysdeps/aarch64/fpu/s_llrintf.c +++ b/sysdeps/aarch64/fpu/s_llrintf.c @@ -18,6 +18,7 @@ #define FUNC llrintf #define ITYPE float -#define IREGS "s" +#define IREG_SIZE 32 #define OTYPE long long int +#define OREG_SIZE 64 #include <s_lrint.c> diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c index ed4b192..ef7aedf 100644 --- a/sysdeps/aarch64/fpu/s_llround.c +++ b/sysdeps/aarch64/fpu/s_llround.c @@ -18,4 +18,5 @@ #define FUNC llround #define OTYPE long long int +#define OREG_SIZE 64 #include <s_lround.c> diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c index 360ce8b..294f0f4 100644 --- a/sysdeps/aarch64/fpu/s_llroundf.c +++ b/sysdeps/aarch64/fpu/s_llroundf.c @@ -18,6 +18,7 @@ #define FUNC llroundf #define ITYPE float -#define IREGS "s" +#define IREG_SIZE 32 #define OTYPE long long int +#define OREG_SIZE 64 #include <s_lround.c> diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c index 8c61a03..ed0135c 100644 --- a/sysdeps/aarch64/fpu/s_lrint.c +++ b/sysdeps/aarch64/fpu/s_lrint.c @@ -16,7 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#include <math_private.h> #include <math.h> +#include <fenv.h> +#include <stdint.h> #ifndef FUNC # define FUNC lrint @@ -24,18 +27,37 @@ #ifndef ITYPE # define ITYPE double -# define IREGS "d" +# define IREG_SIZE 64 #else -# ifndef IREGS -# error IREGS not defined +# ifndef IREG_SIZE +# error IREG_SIZE not defined # endif #endif #ifndef OTYPE # define OTYPE long int +# ifdef __ILP32__ +# define OREG_SIZE 32 +# else +# define OREG_SIZE 64 +# endif +#else +# ifndef OREG_SIZE +# error OREG_SIZE not defined +# endif +#endif + +#if IREG_SIZE == 32 +# define IREGS "s" +#else +# define IREGS "d" #endif -#define OREGS "x" +#if OREG_SIZE == 32 +# define OREGS "w" +#else +# define OREGS "x" +#endif #define __CONCATX(a,b) __CONCAT(a,b) @@ -44,6 +66,32 @@ __CONCATX(__,FUNC) (ITYPE x) { OTYPE result; ITYPE temp; + +#if IREG_SIZE == 64 && OREG_SIZE == 32 + if (__builtin_fabs (x) > INT32_MAX) + { + /* Converting large values to a 32 bit int may cause the frintx/fcvtza + sequence to set both FE_INVALID and FE_INEXACT. To avoid this + we save and restore the FE and only set one or the other. */ + + fenv_t env; + int feflags; + + libc_feholdexcept (&env); + asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" + "fcvtzs" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result), "=w" (temp) : "w" (x) ); + feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT); + libc_fesetenv (&env); + + if (feflags & FE_INVALID) + feraiseexcept (FE_INVALID); + else if (feflags & FE_INEXACT) + feraiseexcept (FE_INEXACT); + + return result; + } +#endif asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" "fcvtzs" "\t%" OREGS "0, %" IREGS "1" : "=r" (result), "=w" (temp) : "w" (x) ); diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c index a995e4b..2e73271 100644 --- a/sysdeps/aarch64/fpu/s_lrintf.c +++ b/sysdeps/aarch64/fpu/s_lrintf.c @@ -18,5 +18,5 @@ #define FUNC lrintf #define ITYPE float -#define IREGS "s" +#define IREG_SIZE 32 #include <s_lrint.c> diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c index 9be9e7f..1f77d82 100644 --- a/sysdeps/aarch64/fpu/s_lround.c +++ b/sysdeps/aarch64/fpu/s_lround.c @@ -24,18 +24,37 @@ #ifndef ITYPE # define ITYPE double -# define IREGS "d" +# define IREG_SIZE 64 #else -# ifndef IREGS -# error IREGS not defined +# ifndef IREG_SIZE +# error IREG_SIZE not defined # endif #endif #ifndef OTYPE # define OTYPE long int +# ifdef __ILP32__ +# define OREG_SIZE 32 +# else +# define OREG_SIZE 64 +# endif +#else +# ifndef OREG_SIZE +# error OREG_SIZE not defined +# endif +#endif + +#if IREG_SIZE == 32 +# define IREGS "s" +#else +# define IREGS "d" #endif -#define OREGS "x" +#if OREG_SIZE == 32 +# define OREGS "w" +#else +# define OREGS "x" +#endif #define __CONCATX(a,b) __CONCAT(a,b) diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c index 4a066d4..b30ddb6 100644 --- a/sysdeps/aarch64/fpu/s_lroundf.c +++ b/sysdeps/aarch64/fpu/s_lroundf.c @@ -18,5 +18,5 @@ #define FUNC lroundf #define ITYPE float -#define IREGS "s" +#define IREG_SIZE 32 #include <s_lround.c>