From patchwork Wed Apr 23 23:51:46 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ian Bolton X-Patchwork-Id: 342042 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 9A9DF14011B for ; Thu, 24 Apr 2014 09:51:59 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:date:from:to:subject:message-id:reply-to :mime-version:content-type; q=dns; s=default; b=ZklaMKsxwJLxC+Ac 01oJUzcbsOC6dapFo6FiC2qO1oGXx9NKfHGm9xmktOWDhb9PkBbovldaw8XJZz3y gwn7J137Z78WlxNvWdwsYGFcKIfZEvOaaDDjCwUU1HKeYIaUcWZ2ZIaxSWniCKBB JiT08M0DUvEG4At9dUPGxLE6dQ4= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:date:from:to:subject:message-id:reply-to :mime-version:content-type; s=default; bh=OU+Jo9HwJie1CPoRPMgAXr 5VpC4=; b=r6t2cccVeYovjyhz2fXFGKWZk5j0NB3Pu2ma4XK+5cYdHCaACAz4sT TBSma+VvvNcPUHLrU/y8JgRJeEjs65kOAVuhirwWIw9MFYSjnLAxqFKdfxNwD4va Q06wkeXwDpwev55ecX8fgzeG8RNmgE6kAPiVpjVbndlMVerFu4SOQ= Received: (qmail 9468 invoked by alias); 23 Apr 2014 23:51:52 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 9457 invoked by uid 89); 23 Apr 2014 23:51:51 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.4 required=5.0 tests=AWL, BAYES_00, NO_DNS_FOR_FROM, RCVD_IN_DNSWL_LOW, RP_MATCHES_RCVD, UNSUBSCRIBE_BODY autolearn=no version=3.3.2 X-HELO: cam-smtp0.cambridge.arm.com Date: Thu, 24 Apr 2014 00:51:46 +0100 From: Ian Bolton To: libc-alpha@sourceware.org Subject: [PATCH] [AArch64] Suppress unnecessary FPSR and FPCR writes Message-ID: <20140423235146.GA20028@e104535-lin.arm.com> Reply-To: ian.bolton@arm.com Mime-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.4.2.2i Writes to fpsr and fpcr are expensive. This patch adds checks before each write to ensure we only write new values to them (as opposed to rewriting what's there). Tested with make check and make bench, with no regressions. OK for commit? Cheers, Ian 2014-04-24 Ian Bolton * sysdeps/aarch64/fpu/fclrexcpt.c (feclearexcept): Don't write to fpsr if value didn't change. * sysdeps/aarch64/fpu/fedisblxcpt.c (fedisableexcept): Don't write to fpcr if value didn't change. * sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept): Likewise. * sysdeps/aarch64/fpu/feholdexcpt.c (feholdexcept): Don't write to fpsr or fpcr if value didn't change. * sysdeps/aarch64/fpu/fesetenv.c (fesetenv): Likewise. * sysdeps/aarch64/fpu/fesetround.c (fesetround): Don't write to fpcr if value didn't change. * sysdeps/aarch64/fpu/fsetexcptflg.c (fesetexceptflag): Don't write to fpsr if value didn't change. diff --git a/sysdeps/aarch64/fpu/fclrexcpt.c b/sysdeps/aarch64/fpu/fclrexcpt.c index 531269f..1f05259 100644 --- a/sysdeps/aarch64/fpu/fclrexcpt.c +++ b/sysdeps/aarch64/fpu/fclrexcpt.c @@ -22,14 +22,15 @@ int feclearexcept (int excepts) { - fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr, fpsr_new; excepts &= FE_ALL_EXCEPT; _FPU_GETFPSR (fpsr); - fpsr = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts); + fpsr_new = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts); - _FPU_SETFPSR (fpsr); + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); return 0; } diff --git a/sysdeps/aarch64/fpu/fedisblxcpt.c b/sysdeps/aarch64/fpu/fedisblxcpt.c index 719d52f..67e0aea 100644 --- a/sysdeps/aarch64/fpu/fedisblxcpt.c +++ b/sysdeps/aarch64/fpu/fedisblxcpt.c @@ -22,7 +22,7 @@ int fedisableexcept (int excepts) { - fpu_control_t fpcr; + fpu_control_t fpcr, fpcr_new; int original_excepts; _FPU_GETCW (fpcr); @@ -31,9 +31,10 @@ fedisableexcept (int excepts) excepts &= FE_ALL_EXCEPT; - fpcr &= ~(excepts << FE_EXCEPT_SHIFT); + fpcr_new = fpcr & ~(excepts << FE_EXCEPT_SHIFT); - _FPU_SETCW (fpcr); + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); return original_excepts; } diff --git a/sysdeps/aarch64/fpu/feenablxcpt.c b/sysdeps/aarch64/fpu/feenablxcpt.c index 07a4bbb..58288be 100644 --- a/sysdeps/aarch64/fpu/feenablxcpt.c +++ b/sysdeps/aarch64/fpu/feenablxcpt.c @@ -22,7 +22,7 @@ int feenableexcept (int excepts) { - fpu_control_t fpcr; + fpu_control_t fpcr, fpcr_new; int original_excepts; _FPU_GETCW (fpcr); @@ -31,9 +31,10 @@ feenableexcept (int excepts) excepts &= FE_ALL_EXCEPT; - fpcr |= (excepts << FE_EXCEPT_SHIFT); + fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT); - _FPU_SETCW (fpcr); + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); /* Trapping exceptions are optional in AArch64 the relevant enable bits in FPCR are RES0 hence the absence of support can be diff --git a/sysdeps/aarch64/fpu/feholdexcpt.c b/sysdeps/aarch64/fpu/feholdexcpt.c index 0514ac1..639b61e 100644 --- a/sysdeps/aarch64/fpu/feholdexcpt.c +++ b/sysdeps/aarch64/fpu/feholdexcpt.c @@ -22,8 +22,8 @@ int feholdexcept (fenv_t *envp) { - fpu_fpsr_t fpsr; - fpu_control_t fpcr; + fpu_fpsr_t fpsr, fpsr_new; + fpu_control_t fpcr, fpcr_new; _FPU_GETCW (fpcr); envp->__fpcr = fpcr; @@ -32,14 +32,16 @@ feholdexcept (fenv_t *envp) envp->__fpsr = fpsr; /* Now set all exceptions to non-stop. */ - fpcr &= ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT); + fpcr_new = fpcr & ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT); /* And clear all exception flags. */ - fpsr &= ~FE_ALL_EXCEPT; + fpsr_new = fpsr & ~FE_ALL_EXCEPT; - _FPU_SETFPSR (fpsr); + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); - _FPU_SETCW (fpcr); + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); return 0; } diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c index a2434e3..8650666 100644 --- a/sysdeps/aarch64/fpu/fesetenv.c +++ b/sysdeps/aarch64/fpu/fesetenv.c @@ -22,35 +22,37 @@ int fesetenv (const fenv_t *envp) { - fpu_control_t fpcr; - fpu_fpsr_t fpsr; + fpu_control_t fpcr, fpcr_new; + fpu_fpsr_t fpsr, fpsr_new; fpu_control_t updated_fpcr; _FPU_GETCW (fpcr); _FPU_GETFPSR (fpsr); - fpcr &= _FPU_RESERVED; - fpsr &= _FPU_FPSR_RESERVED; + fpcr_new = fpcr & _FPU_RESERVED; + fpsr_new = fpsr & _FPU_FPSR_RESERVED; if (envp == FE_DFL_ENV) { - fpcr |= _FPU_DEFAULT; - fpsr |= _FPU_FPSR_DEFAULT; + fpcr_new |= _FPU_DEFAULT; + fpsr_new |= _FPU_FPSR_DEFAULT; } else if (envp == FE_NOMASK_ENV) { - fpcr |= _FPU_FPCR_IEEE; - fpsr |= _FPU_FPSR_IEEE; + fpcr_new |= _FPU_FPCR_IEEE; + fpsr_new |= _FPU_FPSR_IEEE; } else { - fpcr |= envp->__fpcr & ~_FPU_RESERVED; - fpsr |= envp->__fpsr & ~_FPU_FPSR_RESERVED; + fpcr_new |= envp->__fpcr & ~_FPU_RESERVED; + fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED; } - _FPU_SETFPSR (fpsr); + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); - _FPU_SETCW (fpcr); + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); /* Trapping exceptions are optional in AArch64 the relevant enable bits in FPCR are RES0 hence the absence of support can be @@ -58,7 +60,7 @@ fesetenv (const fenv_t *envp) value. */ _FPU_GETCW (updated_fpcr); - if ((updated_fpcr & fpcr) != fpcr) + if ((updated_fpcr & fpcr_new) != fpcr_new) return 1; return 0; diff --git a/sysdeps/aarch64/fpu/fesetround.c b/sysdeps/aarch64/fpu/fesetround.c index 40a05f6..ccfb8f4 100644 --- a/sysdeps/aarch64/fpu/fesetround.c +++ b/sysdeps/aarch64/fpu/fesetround.c @@ -22,7 +22,7 @@ int fesetround (int round) { - fpu_control_t fpcr; + fpu_control_t fpcr, fpcr_new; switch (round) { @@ -31,9 +31,10 @@ fesetround (int round) case FE_DOWNWARD: case FE_TOWARDZERO: _FPU_GETCW (fpcr); - fpcr = (fpcr & ~FE_TOWARDZERO) | round; + fpcr_new = (fpcr & ~FE_TOWARDZERO) | round; - _FPU_SETCW (fpcr); + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); return 0; default: diff --git a/sysdeps/aarch64/fpu/fsetexcptflg.c b/sysdeps/aarch64/fpu/fsetexcptflg.c index 49cd1e4..3e9a700 100644 --- a/sysdeps/aarch64/fpu/fsetexcptflg.c +++ b/sysdeps/aarch64/fpu/fsetexcptflg.c @@ -23,17 +23,18 @@ int fesetexceptflag (const fexcept_t *flagp, int excepts) { - fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr, fpsr_new; /* Get the current environment. */ _FPU_GETFPSR (fpsr); /* Set the desired exception mask. */ - fpsr &= ~(excepts & FE_ALL_EXCEPT); - fpsr |= (*flagp & excepts & FE_ALL_EXCEPT); + fpsr_new = fpsr & ~(excepts & FE_ALL_EXCEPT); + fpsr_new |= (*flagp & excepts & FE_ALL_EXCEPT); /* Save state back to the FPU. */ - _FPU_SETFPSR (fpsr); + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); return 0; }