From patchwork Tue Oct 10 14:21:13 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Philipp Tomsich X-Patchwork-Id: 823889 X-Patchwork-Delegate: philipp.tomsich@theobroma-systems.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=lists.denx.de (client-ip=81.169.180.215; helo=lists.denx.de; envelope-from=u-boot-bounces@lists.denx.de; receiver=) Received: from lists.denx.de (dione.denx.de [81.169.180.215]) by ozlabs.org (Postfix) with ESMTP id 3yBKLK4h05z9tYT for ; Wed, 11 Oct 2017 01:31:37 +1100 (AEDT) Received: by lists.denx.de (Postfix, from userid 105) id 05E29C21E0C; Tue, 10 Oct 2017 14:27:16 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on lists.denx.de X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=unavailable autolearn_force=no version=3.4.0 Received: from lists.denx.de (localhost [IPv6:::1]) by lists.denx.de (Postfix) with ESMTP id D171DC21E0C; Tue, 10 Oct 2017 14:27:08 +0000 (UTC) Received: by lists.denx.de (Postfix, from userid 105) id BCEE0C21D09; Tue, 10 Oct 2017 14:23:25 +0000 (UTC) Received: from mail.theobroma-systems.com (vegas.theobroma-systems.com [144.76.126.164]) by lists.denx.de (Postfix) with ESMTPS id 6C55EC21D09 for ; Tue, 10 Oct 2017 14:23:21 +0000 (UTC) Received: from [86.59.122.178] (port=58485 helo=android.lan) by mail.theobroma-systems.com with esmtpsa (TLS1.2:RSA_AES_128_CBC_SHA256:128) (Exim 4.80) (envelope-from ) id 1e1vPy-0005js-H9; Tue, 10 Oct 2017 16:21:58 +0200 From: Philipp Tomsich To: u-boot@lists.denx.de Date: Tue, 10 Oct 2017 16:21:13 +0200 Message-Id: <1507645279-25188-14-git-send-email-philipp.tomsich@theobroma-systems.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1507645279-25188-1-git-send-email-philipp.tomsich@theobroma-systems.com> References: <1507645279-25188-1-git-send-email-philipp.tomsich@theobroma-systems.com> Cc: Andre Przywara , Klaus Goger , Andy Yan , Jagan Teki , Heiko Stuebner Subject: [U-Boot] [PATCH v5 13/18] arm: provide a PCS-compliant setjmp implementation X-BeenThere: u-boot@lists.denx.de X-Mailman-Version: 2.1.18 Precedence: list List-Id: U-Boot discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: u-boot-bounces@lists.denx.de Sender: "U-Boot" The previous setjmp-implementation (as a static inline function that contained an 'asm volatile' sequence) was extremely fragile: (some versions of) GCC optimised the set of registers. One critical example was the removal of 'r9' from the clobber list, if -ffixed-reg9 was supplied. To increase robustness and ensure PCS-compliant behaviour, the setjmp and longjmp implementation are now in assembly and closely match what one would expect to find in a libc implementation. Signed-off-by: Philipp Tomsich Tested-by: Andy Yan --- Changes in v5: None Changes in v4: None Changes in v3: - converted setjmp/longjmp from inline-assembly to separate .S files to improve predicatability if emitted code Changes in v2: None arch/arm/include/asm/setjmp.h | 94 ++++++------------------------------------- arch/arm/lib/Makefile | 6 +++ arch/arm/lib/setjmp.S | 37 +++++++++++++++++ arch/arm/lib/setjmp_aarch64.S | 42 +++++++++++++++++++ 4 files changed, 98 insertions(+), 81 deletions(-) create mode 100644 arch/arm/lib/setjmp.S create mode 100644 arch/arm/lib/setjmp_aarch64.S diff --git a/arch/arm/include/asm/setjmp.h b/arch/arm/include/asm/setjmp.h index c3399a7..517beeb 100644 --- a/arch/arm/include/asm/setjmp.h +++ b/arch/arm/include/asm/setjmp.h @@ -1,6 +1,6 @@ /* - * (C) Copyright 2016 - * Alexander Graf + * (C) Copyright 2017 Theobroma Systems Design und Consulting GmbH + * (C) Copyright 2016 Alexander Graf * * SPDX-License-Identifier: GPL-2.0+ */ @@ -8,89 +8,21 @@ #ifndef _SETJMP_H_ #define _SETJMP_H_ 1 +/* + * This really should be opaque, but the EFI implementation wrongly + * assumes that a 'struct jmp_buf_data' is defined. + */ struct jmp_buf_data { - ulong target; - ulong regs[5]; - int ret; -}; - -typedef struct jmp_buf_data jmp_buf[1]; - -static inline int setjmp(jmp_buf jmp) -{ - jmp->ret = 0; - -#ifdef CONFIG_ARM64 - asm volatile( - "adr x1, jmp_target\n" - "str x1, %0\n" - "stp x26, x27, %1\n" - "stp x28, x29, %2\n" - "mov x1, sp\n" - "str x1, %3\n" - "jmp_target: " - : "=m" (jmp->target), "=m" (jmp->regs[0]), - "=m" (jmp->regs[2]), "=m" (jmp->regs[4]) - : - : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", - "x23", "x24", "x25", /* x26, x27, x28, x29, sp */ - "x30", "cc", "memory"); -#else - asm volatile( -#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) - ".align 2\n" - "adr r0, jmp_target\n" - "add r0, r0, $1\n" +#if defined(__aarch64__) + u64 regs[13]; #else - "adr r0, jmp_target\n" -#endif - "mov r1, %0\n" - "mov r2, sp\n" - "stm r1!, {r0, r2, r4, r5, r6, r7}\n" - ".align 2\n" - "jmp_target: \n" - : - : "l" (&jmp->target) - : "r0", "r1", "r2", "r3", /* "r4", "r5", "r6", "r7", */ - "r8", "r9", "r10", "r11", /* sp, */ "ip", "lr", - "cc", "memory"); -#endif - - return jmp->ret; -} - -static inline __noreturn void longjmp(jmp_buf jmp, int ret) -{ - jmp->ret = ret; - -#ifdef CONFIG_ARM64 - asm volatile( - "ldr x0, %0\n" - "ldr x1, %3\n" - "mov sp, x1\n" - "ldp x26, x27, %1\n" - "ldp x28, x25, %2\n" - "mov x29, x25\n" - "br x0\n" - : - : "m" (jmp->target), "m" (jmp->regs[0]), "m" (jmp->regs[2]), - "m" (jmp->regs[4]) - : "x0", "x1", "x25", "x26", "x27", "x28"); -#else - asm volatile( - "mov r1, %0\n" - "ldm r1!, {r0, r2, r4, r5, r6, r7}\n" - "mov sp, r2\n" - "bx r0\n" - : - : "l" (&jmp->target) - : "r1"); + u32 regs[10]; /* r4-r9, sl, fp, sp, lr */ #endif +}; - while (1) { } -} +typedef struct jmp_buf_data jmp_buf[1]; +int setjmp(jmp_buf jmp); +void longjmp(jmp_buf jmp, int ret); #endif /* _SETJMP_H_ */ diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 6e1c436..abffa10 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -17,6 +17,12 @@ else obj-y += vectors.o crt0.o endif +ifdef CONFIG_ARM64 +obj-y += setjmp_aarch64.o +else +obj-y += setjmp.o +endif + ifndef CONFIG_SPL_BUILD ifdef CONFIG_ARM64 obj-y += relocate_64.o diff --git a/arch/arm/lib/setjmp.S b/arch/arm/lib/setjmp.S new file mode 100644 index 0000000..6746e5e --- /dev/null +++ b/arch/arm/lib/setjmp.S @@ -0,0 +1,37 @@ +/* + * (C) 2017 Theobroma Systems Design und Consulting GmbH + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include +#include +#include + +.pushsection .text.setjmp, "ax" +ENTRY(setjmp) + /* + * A subroutine must preserve the contents of the registers + * r4-r8, r10, r11 (v1-v5, v7 and v8) and SP (and r9 in PCS + * variants that designate r9 as v6). + */ + mov ip, sp + stm a1, {v1-v8, ip, lr} + mov a1, #0 + bx lr +ENDPROC(setjmp) +.popsection + +.pushsection .text.longjmp, "ax" +ENTRY(longjmp) + ldm a1, {v1-v8, ip, lr} + mov sp, ip + mov a1, a2 + /* If we were passed a return value of zero, return one instead */ + cmp a1, #0 + bne 1f + mov a1, #1 +1: + bx lr +ENDPROC(longjmp) +.popsection diff --git a/arch/arm/lib/setjmp_aarch64.S b/arch/arm/lib/setjmp_aarch64.S new file mode 100644 index 0000000..b68edb8 --- /dev/null +++ b/arch/arm/lib/setjmp_aarch64.S @@ -0,0 +1,42 @@ +/* + * (C) 2017 Theobroma Systems Design und Consulting GmbH + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include +#include +#include + +.pushsection .text.setjmp, "ax" +ENTRY(setjmp) + /* Preserve all callee-saved registers and the SP */ + stp x19, x20, [x0,#0] + stp x21, x22, [x0,#16] + stp x23, x24, [x0,#32] + stp x25, x26, [x0,#48] + stp x27, x28, [x0,#64] + stp x29, x30, [x0,#80] + mov x2, sp + str x2, [x0, #96] + mov x0, #0 + ret +ENDPROC(setjmp) +.popsection + +.pushsection .text.longjmp, "ax" +ENTRY(longjmp) + ldp x19, x20, [x0,#0] + ldp x21, x22, [x0,#16] + ldp x23, x24, [x0,#32] + ldp x25, x26, [x0,#48] + ldp x27, x28, [x0,#64] + ldp x29, x30, [x0,#80] + ldr x2, [x0,#96] + mov sp, x2 + /* Move the return value in place, but return 1 if passed 0. */ + adds x0, xzr, x1 + csinc x0, x0, xzr, ne + ret +ENDPROC(longjmp) +.popsection