Message ID | 20210515123442.1432385-5-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | Add an internal wrapper for clone, clone2 and clone3 | expand |
* H. J. Lu: > extern int clone3 (struct clone_args *__cl_args, > int (*__func) (void *__arg), void *__arg); > --- > sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++ > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 2 + > 2 files changed, 94 insertions(+) > create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S > new file mode 100644 > index 0000000000..f7d4036a6a > + .text > +ENTRY (__clone3) > + /* Sanity check arguments. */ > + movq $-EINVAL, %rax > + testq %rdi, %rdi /* No NULL cl_args pointer. */ > + jz SYSCALL_ERROR_LABEL > + testq %rsi, %rsi /* No NULL function pointer. */ > + jz SYSCALL_ERROR_LABEL I think some of these register aren't x32-compatible. Isn't the upper half undefined? > + /* Save the function pointer in R8 which is preserved by the > + syscall. */ > + movq %rsi, %r8 > + > + /* Put sizeof (struct clone_args) in ESI. */ > + movl $CLONE_ARGS_SIZE , %esi If this is in preparation of the public wrapper, this should actually be an argument. Sorry didn't realize this was the direction. > +L(thread_start): > + cfi_startproc > + /* Clearing frame pointer is insufficient, use CFI. */ > + cfi_undefined (rip) > + /* Clear the frame pointer. The ABI suggests this be done, to mark > + the outermost frame obviously. */ > + xorl %ebp, %ebp > + > + /* Set up arguments for the function call. */ > + movq %rdx, %rdi /* Argument. */ > + call *%r8 /* Call function. */ > + /* Call exit with return value from function call. */ > + movq %rax, %rdi > + movl $SYS_ify(exit), %eax > + syscall > + cfi_endproc > + > + cfi_startproc > +PSEUDO_END (__clone3) If this is a public wrapper, should it round up %rsp to 16 bytes at the point of the caller, to follow the x86-64 calling convention? Thanks, Florian
On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > extern int clone3 (struct clone_args *__cl_args, > int (*__func) (void *__arg), void *__arg); > --- > sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++ > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 2 + > 2 files changed, 94 insertions(+) > create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S > new file mode 100644 > index 0000000000..f7d4036a6a > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S > @@ -0,0 +1,92 @@ > +/* The clone3 syscall wrapper. Linux/x86-64 version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* clone3() is even more special than fork() as it mucks with stacks > + and invokes a function in the right context after its all over. */ > + > +#include <sysdep.h> > +#include <clone-offsets.h> > + > +/* The userland implementation is: > + int clone3 (struct clone_args *cl_args, int (*func)(void *arg), > + void *arg); > + the kernel entry is: > + int clone3 (struct clone_args *cl_args, size_t size); > + > + The parameters are passed in registers from userland: > + rdi: cl_args > + rsi: func > + rdx: arg > + > + The kernel expects: > + rax: system call number > + rdi: cl_args > + rsi: size */ > + > + .text > +ENTRY (__clone3) > + /* Sanity check arguments. */ > + movq $-EINVAL, %rax Can this be movl? > + testq %rdi, %rdi /* No NULL cl_args pointer. */ > + jz SYSCALL_ERROR_LABEL > + testq %rsi, %rsi /* No NULL function pointer. */ > + jz SYSCALL_ERROR_LABEL > + > + /* Save the function pointer in R8 which is preserved by the > + syscall. */ > + movq %rsi, %r8 > + > + /* Put sizeof (struct clone_args) in ESI. */ > + movl $CLONE_ARGS_SIZE , %esi > + > + /* Do the system call. */ > + movl $SYS_ify(clone3), %eax > + > + /* End FDE now, because in the child the unwind info will be > + wrong. */ > + cfi_endproc > + syscall > + > + test %RAX_LP, %RAX_LP > + jl SYSCALL_ERROR_LABEL > + jz L(thread_start) > + Is expectation to go to L(thread_start)? If so think jnz L(ret) and fallthrough is probably better. > + ret > + > +L(thread_start): > + cfi_startproc > + /* Clearing frame pointer is insufficient, use CFI. */ > + cfi_undefined (rip) > + /* Clear the frame pointer. The ABI suggests this be done, to mark > + the outermost frame obviously. */ > + xorl %ebp, %ebp > + > + /* Set up arguments for the function call. */ > + movq %rdx, %rdi /* Argument. */ > + call *%r8 /* Call function. */ > + /* Call exit with return value from function call. */ > + movq %rax, %rdi > + movl $SYS_ify(exit), %eax > + syscall > + cfi_endproc > + > + cfi_startproc > +PSEUDO_END (__clone3) > + > +libc_hidden_def (__clone3) > +weak_alias (__clone3, clone3) > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > index dbad2c788a..f26ffc68ae 100644 > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > @@ -377,6 +377,8 @@ > # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" > # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" > > +# define HAVE_CLONE3_WAPPER 1 > + > # define SINGLE_THREAD_BY_GLOBAL 1 > > #endif /* __ASSEMBLER__ */ > -- > 2.31.1 >
On Thu, May 20, 2021 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha > <libc-alpha@sourceware.org> wrote: > > > > extern int clone3 (struct clone_args *__cl_args, > > int (*__func) (void *__arg), void *__arg); > > --- > > sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++ > > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 2 + > > 2 files changed, 94 insertions(+) > > create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S > > > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S > > new file mode 100644 > > index 0000000000..f7d4036a6a > > --- /dev/null > > +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S > > @@ -0,0 +1,92 @@ > > +/* The clone3 syscall wrapper. Linux/x86-64 version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +/* clone3() is even more special than fork() as it mucks with stacks > > + and invokes a function in the right context after its all over. */ > > + > > +#include <sysdep.h> > > +#include <clone-offsets.h> > > + > > +/* The userland implementation is: > > + int clone3 (struct clone_args *cl_args, int (*func)(void *arg), > > + void *arg); > > + the kernel entry is: > > + int clone3 (struct clone_args *cl_args, size_t size); > > + > > + The parameters are passed in registers from userland: > > + rdi: cl_args > > + rsi: func > > + rdx: arg > > + > > + The kernel expects: > > + rax: system call number > > + rdi: cl_args > > + rsi: size */ > > + > > + .text > > +ENTRY (__clone3) > > + /* Sanity check arguments. */ > > + movq $-EINVAL, %rax > > Can this be movl? > > > + testq %rdi, %rdi /* No NULL cl_args pointer. */ > > + jz SYSCALL_ERROR_LABEL > > + testq %rsi, %rsi /* No NULL function pointer. */ > > + jz SYSCALL_ERROR_LABEL > > + > > + /* Save the function pointer in R8 which is preserved by the > > + syscall. */ > > + movq %rsi, %r8 > > + > > + /* Put sizeof (struct clone_args) in ESI. */ > > + movl $CLONE_ARGS_SIZE , %esi > > + > > + /* Do the system call. */ > > + movl $SYS_ify(clone3), %eax > > + > > + /* End FDE now, because in the child the unwind info will be > > + wrong. */ > > + cfi_endproc > > + syscall > > + > > + test %RAX_LP, %RAX_LP > > + jl SYSCALL_ERROR_LABEL > > + jz L(thread_start) > > + > > Is expectation to go to L(thread_start)? If so > think jnz L(ret) and fallthrough is probably > better. Or better take the error check branch off the critical path with jnz L(error_or_ret) then jl in L(error_or_ret) > > > + ret > > + > > +L(thread_start): > > + cfi_startproc > > + /* Clearing frame pointer is insufficient, use CFI. */ > > + cfi_undefined (rip) > > + /* Clear the frame pointer. The ABI suggests this be done, to mark > > + the outermost frame obviously. */ > > + xorl %ebp, %ebp > > + > > + /* Set up arguments for the function call. */ > > + movq %rdx, %rdi /* Argument. */ > > + call *%r8 /* Call function. */ > > + /* Call exit with return value from function call. */ > > + movq %rax, %rdi > > + movl $SYS_ify(exit), %eax > > + syscall > > + cfi_endproc > > + > > + cfi_startproc > > +PSEUDO_END (__clone3) > > + > > +libc_hidden_def (__clone3) > > +weak_alias (__clone3, clone3) > > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > index dbad2c788a..f26ffc68ae 100644 > > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > @@ -377,6 +377,8 @@ > > # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" > > # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" > > > > +# define HAVE_CLONE3_WAPPER 1 > > + > > # define SINGLE_THREAD_BY_GLOBAL 1 > > > > #endif /* __ASSEMBLER__ */ > > -- > > 2.31.1 > >
On Thu, May 20, 2021 at 7:53 AM Florian Weimer <fweimer@redhat.com> wrote: > > * H. J. Lu: > > > extern int clone3 (struct clone_args *__cl_args, > > int (*__func) (void *__arg), void *__arg); > > --- > > sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++ > > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 2 + > > 2 files changed, 94 insertions(+) > > create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S > > > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S > > new file mode 100644 > > index 0000000000..f7d4036a6a > > > + .text > > +ENTRY (__clone3) > > + /* Sanity check arguments. */ > > + movq $-EINVAL, %rax > > + testq %rdi, %rdi /* No NULL cl_args pointer. */ > > + jz SYSCALL_ERROR_LABEL > > + testq %rsi, %rsi /* No NULL function pointer. */ > > + jz SYSCALL_ERROR_LABEL > > I think some of these register aren't x32-compatible. Isn't the upper > half undefined? All pointers passed in registers are zero-extended to 64 bits. I changed it to use REG_LP macros to avoid the REX prefix. > > + /* Save the function pointer in R8 which is preserved by the > > + syscall. */ > > + movq %rsi, %r8 > > + > > + /* Put sizeof (struct clone_args) in ESI. */ > > + movl $CLONE_ARGS_SIZE , %esi > > If this is in preparation of the public wrapper, this should actually be > an argument. Sorry didn't realize this was the direction. Fixed. > > +L(thread_start): > > + cfi_startproc > > + /* Clearing frame pointer is insufficient, use CFI. */ > > + cfi_undefined (rip) > > + /* Clear the frame pointer. The ABI suggests this be done, to mark > > + the outermost frame obviously. */ > > + xorl %ebp, %ebp > > + > > + /* Set up arguments for the function call. */ > > + movq %rdx, %rdi /* Argument. */ > > + call *%r8 /* Call function. */ > > + /* Call exit with return value from function call. */ > > + movq %rax, %rdi > > + movl $SYS_ify(exit), %eax > > + syscall > > + cfi_endproc > > + > > + cfi_startproc > > +PSEUDO_END (__clone3) > > If this is a public wrapper, should it round up %rsp to 16 bytes Fixed. > at the point of the caller, to follow the x86-64 calling convention? > > Thanks, > Florian > Thanks.
On Thu, May 20, 2021 at 11:39 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Thu, May 20, 2021 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha > > <libc-alpha@sourceware.org> wrote: > > > > > > extern int clone3 (struct clone_args *__cl_args, > > > int (*__func) (void *__arg), void *__arg); > > > --- > > > sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++ > > > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 2 + > > > 2 files changed, 94 insertions(+) > > > create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S > > > > > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S > > > new file mode 100644 > > > index 0000000000..f7d4036a6a > > > --- /dev/null > > > +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S > > > @@ -0,0 +1,92 @@ > > > +/* The clone3 syscall wrapper. Linux/x86-64 version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +/* clone3() is even more special than fork() as it mucks with stacks > > > + and invokes a function in the right context after its all over. */ > > > + > > > +#include <sysdep.h> > > > +#include <clone-offsets.h> > > > + > > > +/* The userland implementation is: > > > + int clone3 (struct clone_args *cl_args, int (*func)(void *arg), > > > + void *arg); > > > + the kernel entry is: > > > + int clone3 (struct clone_args *cl_args, size_t size); > > > + > > > + The parameters are passed in registers from userland: > > > + rdi: cl_args > > > + rsi: func > > > + rdx: arg > > > + > > > + The kernel expects: > > > + rax: system call number > > > + rdi: cl_args > > > + rsi: size */ > > > + > > > + .text > > > +ENTRY (__clone3) > > > + /* Sanity check arguments. */ > > > + movq $-EINVAL, %rax > > > > Can this be movl? Yes. Fixed. > > > + testq %rdi, %rdi /* No NULL cl_args pointer. */ > > > + jz SYSCALL_ERROR_LABEL > > > + testq %rsi, %rsi /* No NULL function pointer. */ > > > + jz SYSCALL_ERROR_LABEL > > > + > > > + /* Save the function pointer in R8 which is preserved by the > > > + syscall. */ > > > + movq %rsi, %r8 > > > + > > > + /* Put sizeof (struct clone_args) in ESI. */ > > > + movl $CLONE_ARGS_SIZE , %esi > > > + > > > + /* Do the system call. */ > > > + movl $SYS_ify(clone3), %eax > > > + > > > + /* End FDE now, because in the child the unwind info will be > > > + wrong. */ > > > + cfi_endproc > > > + syscall > > > + > > > + test %RAX_LP, %RAX_LP > > > + jl SYSCALL_ERROR_LABEL > > > + jz L(thread_start) > > > + > > > > Is expectation to go to L(thread_start)? If so > > think jnz L(ret) and fallthrough is probably > > better. > > Or better take the error check branch off > the critical path with jnz L(error_or_ret) then jl > in L(error_or_ret) I don't think the clone wrapper is on the critical path. Since the same code is executed by both child and parent. I check the error return first. > > > > > + ret > > > + > > > +L(thread_start): > > > + cfi_startproc > > > + /* Clearing frame pointer is insufficient, use CFI. */ > > > + cfi_undefined (rip) > > > + /* Clear the frame pointer. The ABI suggests this be done, to mark > > > + the outermost frame obviously. */ > > > + xorl %ebp, %ebp > > > + > > > + /* Set up arguments for the function call. */ > > > + movq %rdx, %rdi /* Argument. */ > > > + call *%r8 /* Call function. */ > > > + /* Call exit with return value from function call. */ > > > + movq %rax, %rdi > > > + movl $SYS_ify(exit), %eax > > > + syscall > > > + cfi_endproc > > > + > > > + cfi_startproc > > > +PSEUDO_END (__clone3) > > > + > > > +libc_hidden_def (__clone3) > > > +weak_alias (__clone3, clone3) > > > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > > index dbad2c788a..f26ffc68ae 100644 > > > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h > > > @@ -377,6 +377,8 @@ > > > # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" > > > # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" > > > > > > +# define HAVE_CLONE3_WAPPER 1 > > > + > > > # define SINGLE_THREAD_BY_GLOBAL 1 > > > > > > #endif /* __ASSEMBLER__ */ > > > -- > > > 2.31.1 > > > Thanks.
diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S new file mode 100644 index 0000000000..f7d4036a6a --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S @@ -0,0 +1,92 @@ +/* The clone3 syscall wrapper. Linux/x86-64 version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* clone3() is even more special than fork() as it mucks with stacks + and invokes a function in the right context after its all over. */ + +#include <sysdep.h> +#include <clone-offsets.h> + +/* The userland implementation is: + int clone3 (struct clone_args *cl_args, int (*func)(void *arg), + void *arg); + the kernel entry is: + int clone3 (struct clone_args *cl_args, size_t size); + + The parameters are passed in registers from userland: + rdi: cl_args + rsi: func + rdx: arg + + The kernel expects: + rax: system call number + rdi: cl_args + rsi: size */ + + .text +ENTRY (__clone3) + /* Sanity check arguments. */ + movq $-EINVAL, %rax + testq %rdi, %rdi /* No NULL cl_args pointer. */ + jz SYSCALL_ERROR_LABEL + testq %rsi, %rsi /* No NULL function pointer. */ + jz SYSCALL_ERROR_LABEL + + /* Save the function pointer in R8 which is preserved by the + syscall. */ + movq %rsi, %r8 + + /* Put sizeof (struct clone_args) in ESI. */ + movl $CLONE_ARGS_SIZE , %esi + + /* Do the system call. */ + movl $SYS_ify(clone3), %eax + + /* End FDE now, because in the child the unwind info will be + wrong. */ + cfi_endproc + syscall + + test %RAX_LP, %RAX_LP + jl SYSCALL_ERROR_LABEL + jz L(thread_start) + + ret + +L(thread_start): + cfi_startproc + /* Clearing frame pointer is insufficient, use CFI. */ + cfi_undefined (rip) + /* Clear the frame pointer. The ABI suggests this be done, to mark + the outermost frame obviously. */ + xorl %ebp, %ebp + + /* Set up arguments for the function call. */ + movq %rdx, %rdi /* Argument. */ + call *%r8 /* Call function. */ + /* Call exit with return value from function call. */ + movq %rax, %rdi + movl $SYS_ify(exit), %eax + syscall + cfi_endproc + + cfi_startproc +PSEUDO_END (__clone3) + +libc_hidden_def (__clone3) +weak_alias (__clone3, clone3) diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index dbad2c788a..f26ffc68ae 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -377,6 +377,8 @@ # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" +# define HAVE_CLONE3_WAPPER 1 + # define SINGLE_THREAD_BY_GLOBAL 1 #endif /* __ASSEMBLER__ */