diff mbox series

[powerpc] libc_feupdateenv_test: optimize FPSCR access

Message ID 1567967373-27052-1-git-send-email-pc@us.ibm.com
State New
Headers show
Series [powerpc] libc_feupdateenv_test: optimize FPSCR access | expand

Commit Message

Paul A. Clarke Sept. 8, 2019, 6:29 p.m. UTC
From: "Paul A. Clarke" <pc@us.ibm.com>

ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to
restore the rounding mode and exception enables, preserve exception flags,
and test whether given exception(s) were generated.

If the exception flags haven't changed, then it is sufficient and a bit
more efficient to just restore the rounding mode and enables, rather than
writing the full Floating-Point Status and Control Register (FPSCR).

2019-09-08  Paul A. Clarke  <pc@us.ibm.com>

	* sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK):  New.
	* sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc):  Optimize
	to write FPSCR control only, if exceptions have not changed.
---
 sysdeps/powerpc/fpu/fenv_libc.h    |  4 ++++
 sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

Comments

Paul E Murphy Sept. 11, 2019, 11 p.m. UTC | #1
On 9/8/19 1:29 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
> 
> ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to
> restore the rounding mode and exception enables, preserve exception flags,
> and test whether given exception(s) were generated.
> 
> If the exception flags haven't changed, then it is sufficient and a bit
> more efficient to just restore the rounding mode and enables, rather than
> writing the full Floating-Point Status and Control Register (FPSCR).
> 
> 2019-09-08  Paul A. Clarke  <pc@us.ibm.com>
> 
> 	* sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK):  New.
> 	* sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc):  Optimize
> 	to write FPSCR control only, if exceptions have not changed.
> ---
>   sysdeps/powerpc/fpu/fenv_libc.h    |  4 ++++
>   sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++--
>   2 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index b703c8d..0aad897 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -204,6 +204,10 @@ enum {
>     (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
>   #define FPSCR_BASIC_EXCEPTIONS_MASK \
>     (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
> +#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
> +  FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
> +  FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
> +  FPSCR_VXCVI_MASK)
OK

>   #define FPSCR_FPRF_MASK \
>     (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
>      FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
> index 5ebe6cd..af72560 100644
> --- a/sysdeps/powerpc/fpu/fenv_private.h
> +++ b/sysdeps/powerpc/fpu/fenv_private.h
> @@ -116,8 +116,20 @@ __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
>     if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0)
>       (void) __fe_mask_env ();
>   
> -  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
> -  fesetenv_register (new.fenv);
> +  /* If requesting to keep status, replace control, and merge exceptions,
> +     and exceptions haven't changed, we can just set new control instead
> +     of the whole FPSCR.  */
> +  if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
> +      == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) &&
> +      (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
> +      == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) &&]
I think the _FPU_MASK_* macros should get rewritten similar to the first 
part of this patch. I had a hard time digesting. I think it is OK.

> +      (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK))
> +  {
> +    fesetenv_mode (new.fenv);
> +  }
> +  else
> +    /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
> +    fesetenv_register (new.fenv);
OK.

>   
>     return old.l;
>   }
>

LGTM. thanks.
diff mbox series

Patch

diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index b703c8d..0aad897 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -204,6 +204,10 @@  enum {
   (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
 #define FPSCR_BASIC_EXCEPTIONS_MASK \
   (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
+#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
+  FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
+  FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
+  FPSCR_VXCVI_MASK)
 #define FPSCR_FPRF_MASK \
   (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
    FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
index 5ebe6cd..af72560 100644
--- a/sysdeps/powerpc/fpu/fenv_private.h
+++ b/sysdeps/powerpc/fpu/fenv_private.h
@@ -116,8 +116,20 @@  __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
   if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0)
     (void) __fe_mask_env ();
 
-  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
-  fesetenv_register (new.fenv);
+  /* If requesting to keep status, replace control, and merge exceptions,
+     and exceptions haven't changed, we can just set new control instead
+     of the whole FPSCR.  */
+  if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
+      == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) &&
+      (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
+      == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) &&
+      (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK))
+  {
+    fesetenv_mode (new.fenv);
+  }
+  else
+    /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
+    fesetenv_register (new.fenv);
 
   return old.l;
 }