[3/4] Add ILP32 support to aarch64

Submitted by Steve Ellcey on Aug. 8, 2017, 6:10 p.m.

Details

Message ID 1502215837.3962.127.camel@cavium.com
State New
Headers show

Commit Message

Steve Ellcey Aug. 8, 2017, 6:10 p.m.
On Tue, 2017-08-08 at 16:01 +0100, Szabolcs Nagy wrote:

> > +#if IREG_SIZE == 64 && OREG_SIZE == 32
> > +  if (__builtin_fabs (x) > INT32_MAX - 2)
> i don't understand the -2 here.

I was confused and trying to handle the fact that fabs(INT32_MIN) !=
INT32_MAX.  I have removed the -2 and am just comparing to INT32_MAX
and that seems to work fine.  Since fabs(INT32_MIN) is greater than
INT32_MAX we may unnecessarily enter this if statement for values
between  INT32_MIN and INT32_MIN+1 but that should not cause any
failures, just a slowdown.

> > +    {
> > +      /* Converting large values to a 32 bit in may cause the
> > frintx/fcvtza
> s/in/int/

Fixed that.

> > +      invalid_p = libc_fetestexcept (FE_INVALID);
> > +      inexact_p = libc_fetestexcept (FE_INEXACT);
> multiple flags can be tested/raised in a single call.

Good point.  I changed this to one call and saved the flags in an
integer variable for checking later.

> > +      libc_fesetenv (&env);
> > +
> > +      if (invalid_p)
> > +	feraiseexcept (FE_INVALID);
> > +      else if (inexact_p)
> > +	feraiseexcept (FE_INEXACT);
> > +
> i think correct trapping is not guaranteed by glibc,
> only correct status flags when the function returns,
> so spurious inexact is not a problem if it is already
> raised, and then i expect better code gen for the
> inexact clearing approach:
> 
> if (fabs (x) > INT32_MAX && fetestexcept (FE_INEXACT) == 0)
>   {
>     asm (...);
>     if (fetestexcept (FE_INVALID|FE_INEXACT) ==
> (FE_INVALID|FE_INEXACT))
>       feclearexcept (FE_INEXACT);
>   }
> else
>   asm (...);

As you mentioned in your followup email, we have to worry about
FE_INVALID being set on entry too.  I have attached an updated
version of my patch.

Steve Ellcey
sellcey@cavium.com


2017-08-08  Steve Ellcey  <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
	New includes.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Handle exceptions correctly on large values that may
	set FE_INVALID.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
        (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.

Patch hide | download patch | download mbox

diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@ 
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@ 
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@ 
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@ 
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..ed0135c 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
+#include <stdint.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +27,37 @@ 
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -44,6 +66,32 @@  __CONCATX(__,FUNC) (ITYPE x)
 {
   OTYPE result;
   ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+  if (__builtin_fabs (x) > INT32_MAX)
+    {
+      /* Converting large values to a 32 bit int may cause the frintx/fcvtza
+	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
+         we save and restore the FE and only set one or the other.  */
+
+      fenv_t env;
+      int feflags;
+
+      libc_feholdexcept (&env);
+      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+	    : "=r" (result), "=w" (temp) : "w" (x) );
+      feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT);
+      libc_fesetenv (&env);
+
+      if (feflags & FE_INVALID)
+	feraiseexcept (FE_INVALID);
+      else if (feflags & FE_INEXACT)
+	feraiseexcept (FE_INEXACT);
+
+      return result;
+  }
+#endif
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@ 
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@ 
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@ 
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>