AArch64: Add libc_feholdsetround_noex_aarch64_ctx
diff mbox

Message ID 004301cf8585$c6248360$526d8a20$@com
State New
Headers show

Commit Message

Wilco June 11, 2014, 2:59 p.m. UTC
Hi,

This patch adds new function libc_feholdsetround_noex_aarch64_ctx, enabling 
further optimization. libc_feholdsetround_aarch64_ctx now only needs to 
read the FPCR in the typical case, avoiding a redundant FPSR read.
Performance results show a good improvement (5-10% on sin()) on cores with
expensive FPCR/FPSR instructions.

OK for commit?

Wilco

ChangeLog:
2014-06-11  Wilco  <wdijkstr@arm.com>

	* sysdeps/aarch64/fpu/math_private.h
	(libc_feholdsetround_noex_aarch64_ctx): New function.
---
 sysdeps/aarch64/fpu/math_private.h | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

Comments

Will Newton July 23, 2014, 3:54 p.m. UTC | #1
On 11 June 2014 15:59, Wilco <wdijkstr@arm.com> wrote:
> Hi,
>
> This patch adds new function libc_feholdsetround_noex_aarch64_ctx, enabling
> further optimization. libc_feholdsetround_aarch64_ctx now only needs to
> read the FPCR in the typical case, avoiding a redundant FPSR read.
> Performance results show a good improvement (5-10% on sin()) on cores with
> expensive FPCR/FPSR instructions.
>
> OK for commit?

This looks ok to me.

> Wilco
>
> ChangeLog:
> 2014-06-11  Wilco  <wdijkstr@arm.com>
>
>         * sysdeps/aarch64/fpu/math_private.h
>         (libc_feholdsetround_noex_aarch64_ctx): New function.

Patch
diff mbox

diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index 023c9d0..b13c030 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -228,12 +228,9 @@  static __always_inline void
 libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r)
 {
   fpu_control_t fpcr;
-  fpu_fpsr_t fpsr;
   int round;
 
   _FPU_GETCW (fpcr);
-  _FPU_GETFPSR (fpsr);
-  ctx->env.__fpsr = fpsr;
 
   /* Check whether rounding modes are different.  */
   round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
@@ -264,6 +261,33 @@  libc_feresetround_aarch64_ctx (struct rm_ctx *ctx)
 #define libc_feresetroundl_ctx		libc_feresetround_aarch64_ctx
 
 static __always_inline void
+libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r)
+{
+  fpu_control_t fpcr;
+  fpu_fpsr_t fpsr;
+  int round;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  ctx->env.__fpsr = fpsr;
+
+  /* Check whether rounding modes are different.  */
+  round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
+  ctx->updated_status = round != 0;
+
+  /* Set the rounding mode if changed.  */
+  if (__glibc_unlikely (round != 0))
+    {
+      ctx->env.__fpcr = fpcr;
+      _FPU_SETCW (fpcr ^ round);
+    }
+}
+
+#define libc_feholdsetround_noex_ctx	libc_feholdsetround_noex_aarch64_ctx
+#define libc_feholdsetround_noexf_ctx	libc_feholdsetround_noex_aarch64_ctx
+#define libc_feholdsetround_noexl_ctx	libc_feholdsetround_noex_aarch64_ctx
+
+static __always_inline void
 libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
 {
   /* Restore the rounding mode if updated.  */