diff mbox series

[2/2,v2,powerpc] Use faster means to access FPSCR when possible in some cases

Message ID 1560452241-11638-3-git-send-email-pc@us.ibm.com
State New
Headers show
Series utilize faster method to get FPSCR | expand

Commit Message

Paul A. Clarke June 13, 2019, 6:57 p.m. UTC
From: "Paul A. Clarke" <pc@us.ibm.com>

Using 'mffs' instruction to read the Floating Point Status Control Register
(FPSCR) can force a processor flush in some cases, with undesirable
performance impact.  If the values of the bits in the FPSCR which force the
flush are not needed, an instruction that is new to POWER9 (ISA version 3.0),
'mffsl' can be used instead.

Cases included:  get_rounding_mode, fegetround, fegetmode, fegetexcept.

2019-06-13  Paul A. Clarke  <pc@us.ibm.com>

	* sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use 'mffsl' when possible.
	* sysdeps/powerpc/fpu_control.h (IS_ISA300): New.
	(_FPU_MFFS): Move implementation...
	(_FPU_GETCW): Here.
	(_FPU_MFFSL): Move implementation....
	(_FPU_GET_RC_FAST): Here. New.
	(_FPU_GET_RC): Change to use _FPU_GET_RC_FAST or _FPU_GETCW.
	* sysdeps/powerpc/fpu/fenv_libc.h (IS_ISA300): New.
	(fegetenv_status): New.
	* sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status()
	instead of fegetenv_register().
	* sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise.

v2: This incorporates the suggestion from Adhemerval in the earlier version
    of the changes posted with subject "[powerpc] fegetmode: utilize faster
    method to get rounding mode".
    Fixed some serious issues with a default build.
    Added '.machine "power9"' where 'mffsl' can be generated when not compiled
    for power9.
---
 sysdeps/powerpc/bits/fenvinline.h | 22 ++++++++++++++------
 sysdeps/powerpc/fpu/fegetexcept.c |  2 +-
 sysdeps/powerpc/fpu/fegetmode.c   |  2 +-
 sysdeps/powerpc/fpu/fenv_libc.h   | 19 +++++++++++++++++
 sysdeps/powerpc/fpu_control.h     | 43 +++++++++++++++++++++------------------
 5 files changed, 60 insertions(+), 28 deletions(-)

Comments

Tulio Magno Quites Machado Filho June 18, 2019, 9:03 p.m. UTC | #1
"Paul A. Clarke" <pc@us.ibm.com> writes:

> diff --git a/sysdeps/powerpc/bits/fenvinline.h b/sysdeps/powerpc/bits/fenvinline.h
> index 7079d1a..72cd263 100644
> --- a/sysdeps/powerpc/bits/fenvinline.h
> +++ b/sysdeps/powerpc/bits/fenvinline.h
> @@ -19,12 +19,22 @@
>  #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
>  
>  /* Inline definition for fegetround.  */
> -# define __fegetround() \
> -  (__extension__  ({ int __fegetround_result;				      \
> -		     __asm__ __volatile__				      \
> -		       ("mcrfs 7,7 ; mfcr %0"				      \
> -			: "=r"(__fegetround_result) : : "cr7");		      \
> -		     __fegetround_result & 3; }))
> +#ifdef _ARCH_PWR9
> +# define __fegetround()							\
> +  __extension__  ({							\
> +    union { double __d; unsigned long long __ll; } __u;			\
> +    __asm__ ("mffsl %0" : "=f" (__u.__d));				\
> +    __u.__ll & 0x0000000000000003LL;					\
> +  })
> +#else
> +# define __fegetround()							\
> +  __extension__  ({							\
> +        int __fegetround_result;					\
> +        __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0"			\
> +                              : "=r"(__fegetround_result) : : "cr7");	\
> +        __fegetround_result & 3;					\
> +  })

This is removing a parenthesis around "__extension__ ...".
Indentation needs to be reviewed too.

> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index f66bf24..79e70df 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -33,6 +33,25 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>  /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
>     pointer.  */
>  #define fegetenv_register() __builtin_mffs()
> +

Trailing whitespace here.

> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1
> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
> +#endif
> +
> +/* Equivalent to fegetenv_register, but only returns bits for
> +   status, exception enables, and mode.  */
> +#define fegetenv_status()						\
> +  ({register double __fr;						\
> +    if (__glibc_likely(IS_ISA300())) 					\
> +      __asm__ __volatile__ (						\
> +        ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
> +        : "=f" (__fr));							\
> +    else								\
> +      __fr = __builtin_mffs();						\
> +    __fr;								\
> +  })
>  
>  /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
>  #define fesetenv_register(env) \
> diff --git a/sysdeps/powerpc/fpu_control.h b/sysdeps/powerpc/fpu_control.h
> index 0ab9331..4c151e9 100644
> --- a/sysdeps/powerpc/fpu_control.h
> +++ b/sysdeps/powerpc/fpu_control.h
> @@ -23,6 +23,12 @@
>  # error "SPE/e500 is no longer supported"
>  #endif
>  
> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1

Indentation missing.
This is an installed header.  The macro should start with __ in order to avoid
a namespace conflict.

> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")

Likewise.

> @@ -65,34 +71,31 @@ extern fpu_control_t __fpu_control;
>  typedef unsigned int fpu_control_t;
>  
>  /* Macros for accessing the hardware control word.  */
> -# define __FPU_MFFS()						\
> -  ({register double __fr;					\
> -    __asm__ __volatile__("mffs %0" : "=f" (__fr));				\
> -    __fr;							\
> -  })
> -

You've just added it, so no problem if you remove before the release.

>  # define _FPU_GETCW(cw)						\
>    ({union { double __d; unsigned long long __ll; } __u;		\
> -    __u.__d = __FPU_MFFS();					\
> +    register double __fr;					\
> +    __asm__ __volatile__("mffs %0" : "=f" (__fr));		\
> +    __u.__d = __fr;						\
>      (cw) = (fpu_control_t) __u.__ll;				\
>      (fpu_control_t) __u.__ll;					\
>    })
>  
> -#ifdef _ARCH_PWR9
> -# define __FPU_MFFSL()						\
> -  ({register double __fr;					\
> -    __asm__ __volatile__("mffsl %0" : "=f" (__fr));				\
> -    __fr;							\
> +# define _FPU_GET_RC_FAST()						\
> +  ({union { double __d; unsigned long long __ll; } __u;			\
> +    register double __fr;						\
> +    __asm__ __volatile__(						\
> +      ".machine push; .machine \"power9\"; mffsl %0; .machine pop" 	\
> +      : "=f" (__fr));							\
> +    __u.__d = __fr;							\
> +    __u.__ll &= _FPU_MASK_RC;						\
> +    (fpu_control_t) __u.__ll;						\
>    })
> -#else
> -# define __FPU_MFFSL() __FPU_MFFS()
> -#endif
> -    
> +
>  # define _FPU_GET_RC()						\
> -  ({union { double __d; unsigned long long __ll; } __u;		\
> -    __u.__d = __FPU_MFFSL();					\
> -    __u.__ll &= _FPU_MASK_RC;					\
> -    (fpu_control_t) __u.__ll;					\
> +  ({fpu_control_t rc = __glibc_likely(IS_ISA300())		\
> +    ? _FPU_GET_RC_FAST()					\
> +    : _FPU_GETCW(rc);						\

This is missing '& _FPU_MASK_RC'.
s/rc/__rc/ too
diff mbox series

Patch

diff --git a/sysdeps/powerpc/bits/fenvinline.h b/sysdeps/powerpc/bits/fenvinline.h
index 7079d1a..72cd263 100644
--- a/sysdeps/powerpc/bits/fenvinline.h
+++ b/sysdeps/powerpc/bits/fenvinline.h
@@ -19,12 +19,22 @@ 
 #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
 
 /* Inline definition for fegetround.  */
-# define __fegetround() \
-  (__extension__  ({ int __fegetround_result;				      \
-		     __asm__ __volatile__				      \
-		       ("mcrfs 7,7 ; mfcr %0"				      \
-			: "=r"(__fegetround_result) : : "cr7");		      \
-		     __fegetround_result & 3; }))
+#ifdef _ARCH_PWR9
+# define __fegetround()							\
+  __extension__  ({							\
+    union { double __d; unsigned long long __ll; } __u;			\
+    __asm__ ("mffsl %0" : "=f" (__u.__d));				\
+    __u.__ll & 0x0000000000000003LL;					\
+  })
+#else
+# define __fegetround()							\
+  __extension__  ({							\
+        int __fegetround_result;					\
+        __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0"			\
+                              : "=r"(__fegetround_result) : : "cr7");	\
+        __fegetround_result & 3;					\
+  })
+#endif
 # define fegetround() __fegetround ()
 
 # ifndef __NO_MATH_INLINES
diff --git a/sysdeps/powerpc/fpu/fegetexcept.c b/sysdeps/powerpc/fpu/fegetexcept.c
index 2173d77..10a37f0 100644
--- a/sysdeps/powerpc/fpu/fegetexcept.c
+++ b/sysdeps/powerpc/fpu/fegetexcept.c
@@ -24,7 +24,7 @@  __fegetexcept (void)
 {
   fenv_union_t fe;
 
-  fe.fenv = fegetenv_register ();
+  fe.fenv = fegetenv_status ();
 
   return fenv_reg_to_exceptions (fe.l);
 }
diff --git a/sysdeps/powerpc/fpu/fegetmode.c b/sysdeps/powerpc/fpu/fegetmode.c
index f43ab60..466f5b7 100644
--- a/sysdeps/powerpc/fpu/fegetmode.c
+++ b/sysdeps/powerpc/fpu/fegetmode.c
@@ -21,6 +21,6 @@ 
 int
 fegetmode (femode_t *modep)
 {
-  *modep = fegetenv_register ();
+  *modep = fegetenv_status ();
   return 0;
 }
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index f66bf24..79e70df 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -33,6 +33,25 @@  extern const fenv_t *__fe_mask_env (void) attribute_hidden;
 /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
    pointer.  */
 #define fegetenv_register() __builtin_mffs()
+ 
+#ifdef _ARCH_PWR9
+#define IS_ISA300() 1
+#else
+#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
+#endif
+
+/* Equivalent to fegetenv_register, but only returns bits for
+   status, exception enables, and mode.  */
+#define fegetenv_status()						\
+  ({register double __fr;						\
+    if (__glibc_likely(IS_ISA300())) 					\
+      __asm__ __volatile__ (						\
+        ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
+        : "=f" (__fr));							\
+    else								\
+      __fr = __builtin_mffs();						\
+    __fr;								\
+  })
 
 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
 #define fesetenv_register(env) \
diff --git a/sysdeps/powerpc/fpu_control.h b/sysdeps/powerpc/fpu_control.h
index 0ab9331..4c151e9 100644
--- a/sysdeps/powerpc/fpu_control.h
+++ b/sysdeps/powerpc/fpu_control.h
@@ -23,6 +23,12 @@ 
 # error "SPE/e500 is no longer supported"
 #endif
 
+#ifdef _ARCH_PWR9
+#define IS_ISA300() 1
+#else
+#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
+#endif
+
 #ifdef _SOFT_FLOAT
 
 # define _FPU_RESERVED 0xffffffff
@@ -65,34 +71,31 @@  extern fpu_control_t __fpu_control;
 typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-# define __FPU_MFFS()						\
-  ({register double __fr;					\
-    __asm__ __volatile__("mffs %0" : "=f" (__fr));				\
-    __fr;							\
-  })
-
 # define _FPU_GETCW(cw)						\
   ({union { double __d; unsigned long long __ll; } __u;		\
-    __u.__d = __FPU_MFFS();					\
+    register double __fr;					\
+    __asm__ __volatile__("mffs %0" : "=f" (__fr));		\
+    __u.__d = __fr;						\
     (cw) = (fpu_control_t) __u.__ll;				\
     (fpu_control_t) __u.__ll;					\
   })
 
-#ifdef _ARCH_PWR9
-# define __FPU_MFFSL()						\
-  ({register double __fr;					\
-    __asm__ __volatile__("mffsl %0" : "=f" (__fr));				\
-    __fr;							\
+# define _FPU_GET_RC_FAST()						\
+  ({union { double __d; unsigned long long __ll; } __u;			\
+    register double __fr;						\
+    __asm__ __volatile__(						\
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop" 	\
+      : "=f" (__fr));							\
+    __u.__d = __fr;							\
+    __u.__ll &= _FPU_MASK_RC;						\
+    (fpu_control_t) __u.__ll;						\
   })
-#else
-# define __FPU_MFFSL() __FPU_MFFS()
-#endif
-    
+
 # define _FPU_GET_RC()						\
-  ({union { double __d; unsigned long long __ll; } __u;		\
-    __u.__d = __FPU_MFFSL();					\
-    __u.__ll &= _FPU_MASK_RC;					\
-    (fpu_control_t) __u.__ll;					\
+  ({fpu_control_t rc = __glibc_likely(IS_ISA300())		\
+    ? _FPU_GET_RC_FAST()					\
+    : _FPU_GETCW(rc);						\
+    rc;								\
   })
 
 # define _FPU_SETCW(cw)						\