Message ID | 1466485631-3532-25-git-send-email-ynorov@caviumnetworks.com |
---|---|
State | New |
Headers | show |
Yury Norov <ynorov@caviumnetworks.com> writes: > diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S > index fe8a17d..718dddf 100644 > --- a/sysdeps/aarch64/dl-tlsdesc.S > +++ b/sysdeps/aarch64/dl-tlsdesc.S > @@ -74,6 +74,7 @@ > cfi_startproc > .align 2 > _dl_tlsdesc_return: > + DELOUSE(0) > ldr PTR_REG (0), [x0, #PTR_SIZE] > RET > cfi_endproc > @@ -126,6 +127,7 @@ _dl_tlsdesc_undefweak: > td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load > from [x0,#8] here happens after the initialization of td->arg. */ > ldar xzr, [x0] > + DELOUSE(0) That needs to be moved before the previous insn, doesn't it? Also, again _dl_tlsdesc_return_lazy is missing. Andreas.
On Tue, 21 Jun 2016, Yury Norov wrote:
> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
You're missing a patch description. What does "delouse" even mean? What
is the ABI peculiarity that means there are ABI-conforming arguments to
these functions that need such a manipulation?
On Tuesday, June 21, 2016 10:36:53 AM CEST Joseph Myers wrote: > On Tue, 21 Jun 2016, Yury Norov wrote: > > > Signed-off-by: Yury Norov <ynorov@caviumnetworks.com> > > You're missing a patch description. What does "delouse" even mean? What > is the ABI peculiarity that means there are ABI-conforming arguments to > these functions that need such a manipulation? > This is the term the kernel uses for making sure that no system call from user space passes data in the upper bits of the argument registers, which could end up being used in an exploit when the calling conventions between functions assume that the upper bits contain zeroes. I don't think there is any point in doing this in glibc though: we can safely assume that any application calling into glibc follows the documented calling conventions (it would otherwise be a bug), but the kernel still has to zero those registers because malicious user space code would simply execute the system call instruction directly instead of calling into glibc... Arnd
On Tue, Jun 21, 2016 at 8:42 AM, Arnd Bergmann <arnd@arndb.de> wrote: > On Tuesday, June 21, 2016 10:36:53 AM CEST Joseph Myers wrote: >> On Tue, 21 Jun 2016, Yury Norov wrote: >> >> > Signed-off-by: Yury Norov <ynorov@caviumnetworks.com> >> >> You're missing a patch description. What does "delouse" even mean? What >> is the ABI peculiarity that means there are ABI-conforming arguments to >> these functions that need such a manipulation? >> > > This is the term the kernel uses for making sure that no system call > from user space passes data in the upper bits of the argument registers, > which could end up being used in an exploit when the calling conventions > between functions assume that the upper bits contain zeroes. > > I don't think there is any point in doing this in glibc though: we > can safely assume that any application calling into glibc follows > the documented calling conventions (it would otherwise be a bug), > but the kernel still has to zero those registers because malicious > user space code would simply execute the system call instruction > directly instead of calling into glibc... The documented abi leaves the top 32bits undefined which is why the zeroing is needed in these cases. Maybe a different name is needed here. It is the same reason why it is needed for vdso assembly code. If these functions were written in C, the compiler would be emitting the zero extend for you. Thanks, Andrew > > Arnd
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S index 58332be..0377715 100644 --- a/sysdeps/aarch64/__longjmp.S +++ b/sysdeps/aarch64/__longjmp.S @@ -46,6 +46,8 @@ ENTRY (__longjmp) cfi_offset(d14, JB_D14<<3) cfi_offset(d15, JB_D15<<3) + DELOUSE(0) + ldp x19, x20, [x0, #JB_X19<<3] ldp x21, x22, [x0, #JB_X21<<3] ldp x23, x24, [x0, #JB_X23<<3] diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S index fe8a17d..718dddf 100644 --- a/sysdeps/aarch64/dl-tlsdesc.S +++ b/sysdeps/aarch64/dl-tlsdesc.S @@ -74,6 +74,7 @@ cfi_startproc .align 2 _dl_tlsdesc_return: + DELOUSE(0) ldr PTR_REG (0), [x0, #PTR_SIZE] RET cfi_endproc @@ -126,6 +127,7 @@ _dl_tlsdesc_undefweak: td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load from [x0,#8] here happens after the initialization of td->arg. */ ldar xzr, [x0] + DELOUSE(0) ldr PTR_REG (0), [x0, #PTR_SIZE] mrs x1, tpidr_el0 sub PTR_REG (0), PTR_REG (0), PTR_REG (1) @@ -174,6 +176,7 @@ _dl_tlsdesc_dynamic: stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) mov x29, sp + DELOUSE(0) /* Save just enough registers to support fast path, if we fall into slow path we will save additional registers. */ @@ -279,12 +282,14 @@ _dl_tlsdesc_resolve_rela: SAVE_Q_REGISTERS + DELOUSE(3) ldr PTR_REG (1), [x3, #PTR_SIZE] bl _dl_tlsdesc_resolve_rela_fixup RESTORE_Q_REGISTERS ldr x0, [sp, #32+16*8] + DELOUSE(0) ldr PTR_REG (1), [x0] blr x1 @@ -346,6 +351,7 @@ _dl_tlsdesc_resolve_hold: RESTORE_Q_REGISTERS ldr x0, [sp, #32+16*9] + DELOUSE(0) ldr PTR_REG (1), [x0] blr x1 diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S index ae2d997..982aa02 100644 --- a/sysdeps/aarch64/memcmp.S +++ b/sysdeps/aarch64/memcmp.S @@ -47,6 +47,9 @@ #define mask x13 ENTRY_ALIGN (memcmp, 6) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(ret0) eor tmp1, src1, src2 tst tmp1, #7 diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index 442f390..e0bbbf8 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -46,7 +46,9 @@ #include <sysdep.h> ENTRY_ALIGN (memcpy, 6) - + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) mov dst, dstin cmp count, #64 b.ge L(cpy_not_short) diff --git a/sysdeps/aarch64/memmove.S b/sysdeps/aarch64/memmove.S index dd91db0..3f72dea 100644 --- a/sysdeps/aarch64/memmove.S +++ b/sysdeps/aarch64/memmove.S @@ -46,6 +46,9 @@ #define D_h x14 ENTRY_ALIGN (memmove, 6) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cmp dstin, src b.lo L(downwards) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index e49f4d6..e8eed9e 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -52,7 +52,8 @@ #define tmp3w w9 ENTRY_ALIGN (__memset, 6) - + DELOUSE(0) + DELOUSE(2) mov dst, dstin /* Preserve return value. */ ands A_lw, val, #255 #ifndef DONT_USE_DC diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S index da83f19..d608660 100644 --- a/sysdeps/aarch64/setjmp.S +++ b/sysdeps/aarch64/setjmp.S @@ -33,6 +33,7 @@ END (_setjmp) libc_hidden_def (_setjmp) ENTRY (__sigsetjmp) + DELOUSE(0) 1: stp x19, x20, [x0, #JB_X19<<3] diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S index 5e3aecf..838384c 100644 --- a/sysdeps/aarch64/strchr.S +++ b/sysdeps/aarch64/strchr.S @@ -62,6 +62,7 @@ /* Locals and temporaries. */ ENTRY (strchr) + DELOUSE(0) mov wtmp2, #0x0401 movk wtmp2, #0x4010, lsl #16 dup vrepchr.16b, chrin diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S index a624c8d..b60df26 100644 --- a/sysdeps/aarch64/strchrnul.S +++ b/sysdeps/aarch64/strchrnul.S @@ -60,6 +60,7 @@ identify exactly which byte is causing the termination. */ ENTRY (__strchrnul) + DELOUSE(0) /* Magic constant 0x40100401 to allow us to identify which lane matches the termination condition. */ mov wtmp2, #0x0401 diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S index ba0ccb4..ccfe281 100644 --- a/sysdeps/aarch64/strcmp.S +++ b/sysdeps/aarch64/strcmp.S @@ -49,6 +49,8 @@ /* Start of performance-critical section -- one 64B cache line. */ ENTRY_ALIGN(strcmp, 6) + DELOUSE(0) + DELOUSE(1) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S index 0694199..2a281b9 100644 --- a/sysdeps/aarch64/strcpy.S +++ b/sysdeps/aarch64/strcpy.S @@ -91,6 +91,8 @@ #define MIN_PAGE_SIZE (1 << MIN_PAGE_P2) ENTRY_ALIGN (STRCPY, 6) + DELOUSE(0) + DELOUSE(1) /* For moderately short strings, the fastest way to do the copy is to calculate the length of the string in the same way as strlen, then essentially do a memcpy of the result. This avoids the need for diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S index 9b4d1da..4734cfb 100644 --- a/sysdeps/aarch64/strlen.S +++ b/sysdeps/aarch64/strlen.S @@ -85,6 +85,8 @@ boundary. */ ENTRY_ALIGN (strlen, 6) + DELOUSE(0) + DELOUSE(1) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S index f6a17fd..a372654 100644 --- a/sysdeps/aarch64/strncmp.S +++ b/sysdeps/aarch64/strncmp.S @@ -51,6 +51,9 @@ #define endloop x15 ENTRY_ALIGN_AND_PAD (strncmp, 6, 7) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S index 4cce45f..6f67221 100644 --- a/sysdeps/aarch64/strnlen.S +++ b/sysdeps/aarch64/strnlen.S @@ -50,6 +50,9 @@ #define REP8_80 0x8080808080808080 ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(hit_limit) mov zeroones, #REP8_01 bic src, srcin, #15 diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S index 44c1917..bb85a60 100644 --- a/sysdeps/aarch64/strrchr.S +++ b/sysdeps/aarch64/strrchr.S @@ -68,6 +68,7 @@ identify exactly which byte is causing the termination, and why. */ ENTRY(strrchr) + DELOUSE(0) cbz x1, L(null_search) /* Magic constant 0x40100401 to allow us to identify which lane matches the requested byte. Magic constant 0x80200802 used diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h index f2ea821..c4ff5e7 100644 --- a/sysdeps/aarch64/sysdep.h +++ b/sysdeps/aarch64/sysdep.h @@ -25,10 +25,12 @@ #define AARCH64_R(NAME) R_AARCH64_ ## NAME #define PTR_REG(n) x##n #define PTR_LOG_SIZE 3 -#else +#define DELOUSE(n) +#else /* __ILP32__ */ #define AARCH64_R(NAME) R_AARCH64_P32_ ## NAME #define PTR_REG(n) w##n #define PTR_LOG_SIZE 2 +#define DELOUSE(n) mov w##n, w##n #endif #define PTR_SIZE (1<<PTR_LOG_SIZE) diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S index 596fb9c..91a1d4c 100644 --- a/sysdeps/unix/sysv/linux/aarch64/clone.S +++ b/sysdeps/unix/sysv/linux/aarch64/clone.S @@ -39,6 +39,13 @@ */ .text ENTRY(__clone) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) + DELOUSE(3) + DELOUSE(4) + DELOUSE(5) + DELOUSE(6) /* Save args for the child. */ mov x10, x0 mov x11, x2 diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S index 71e526c..35ff326 100644 --- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S @@ -30,6 +30,7 @@ .text ENTRY(__getcontext) + DELOUSE(0) /* The saved context will return to the getcontext() call point with a return value of 0 */ str xzr, [x0, oX0 + 0 * SZREG] diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S index d17f8c8..7d854bd 100644 --- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S @@ -34,6 +34,7 @@ .text ENTRY (__setcontext) + DELOUSE(0) /* Save a copy of UCP. */ mov x9, x0 diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S index c1a16f3..764fedc 100644 --- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S @@ -27,6 +27,7 @@ .text ENTRY(__swapcontext) + DELOUSE(0) /* Set the value returned when swapcontext() returns in this context. */ str xzr, [x0, oX0 + 0 * SZREG]
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com> --- sysdeps/aarch64/__longjmp.S | 2 ++ sysdeps/aarch64/dl-tlsdesc.S | 6 ++++++ sysdeps/aarch64/memcmp.S | 3 +++ sysdeps/aarch64/memcpy.S | 4 +++- sysdeps/aarch64/memmove.S | 3 +++ sysdeps/aarch64/memset.S | 3 ++- sysdeps/aarch64/setjmp.S | 1 + sysdeps/aarch64/strchr.S | 1 + sysdeps/aarch64/strchrnul.S | 1 + sysdeps/aarch64/strcmp.S | 2 ++ sysdeps/aarch64/strcpy.S | 2 ++ sysdeps/aarch64/strlen.S | 2 ++ sysdeps/aarch64/strncmp.S | 3 +++ sysdeps/aarch64/strnlen.S | 3 +++ sysdeps/aarch64/strrchr.S | 1 + sysdeps/aarch64/sysdep.h | 4 +++- sysdeps/unix/sysv/linux/aarch64/clone.S | 7 +++++++ sysdeps/unix/sysv/linux/aarch64/getcontext.S | 1 + sysdeps/unix/sysv/linux/aarch64/setcontext.S | 1 + sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 1 + 20 files changed, 48 insertions(+), 3 deletions(-)