Message ID | 20210714133327.3102313-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | i386: Add the clone3 wrapper | expand |
On Wed, Jul 14, 2021 at 9:33 AM H.J. Lu via Libc-alpha < libc-alpha@sourceware.org> wrote: > extern int clone3 (struct clone_args *__cl_args, size_t __size, > int (*__func) (void *__arg), void *__arg); > --- > sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++ > sysdeps/unix/sysv/linux/i386/sysdep.h | 2 + > 2 files changed, 125 insertions(+) > create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S > > diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S > b/sysdeps/unix/sysv/linux/i386/clone3.S > new file mode 100644 > index 0000000000..bef3ce0455 > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/i386/clone3.S > @@ -0,0 +1,123 @@ > +/* The clone3 syscall wrapper. Linux/i386 version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* clone3() is even more special than fork() as it mucks with stacks > + and invokes a function in the right context after its all over. */ > + > +#include <sysdep.h> > + > +/* The userland implementation is: > + int clone3 (struct clone_args *cl_args, size_t size, > + int (*func)(void *arg), void *arg); > + the kernel entry is: > + int clone3 (struct clone_args *cl_args, size_t size); > + > + The parameters are passed on stack from userland: > + 16(%esp) arg > + 12(%esp) func > + 8(%esp) size > + 4(%esp) cl_args > + (%esp) Return address > + > + The kernel expects: > + eax: system call number > + ebx: cl_args > + ecx: size > + */ > + > +#define CL_ARGS 4 > +#define SIZE 8 > +#define FUNC 12 > +#define ARG 16 > + > + .text > +ENTRY (__clone3) > + /* Sanity check arguments. */ > + movl $-EINVAL, %eax > + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */ > + testl %ecx, %ecx > + jz SYSCALL_ERROR_LABEL > + /* Save the function pointer in EDX which is preserved by the > + system call. */ > + movl FUNC(%esp), %edx /* No NULL function pointer. */ > + testl %edx, %edx > + jz SYSCALL_ERROR_LABEL > + > + /* Save EBX and ESI. */ > + pushl %ebx > + cfi_adjust_cfa_offset (4) > + pushl %esi > + cfi_adjust_cfa_offset (4) > + > + /* Save the function argument in ESI which is preserved by the > + system call. */ > + movl (ARG + 8)(%esp), %esi > + > + /* Put cl_args in EBX. */ > + movl %ecx, %ebx > + > + /* Put size in ECX. */ > + movl (SIZE + 8)(%esp), %ecx > + > + /* Do the system call. */ > + movl $SYS_ify(clone3), %eax > + > + /* End FDE now, because in the child the unwind info will be > + wrong. */ > + cfi_endproc > + > + int $0x80 > + test %eax, %eax > + /* No need to restore EBX and ESI in child. */ > does esp need to be adjusted? + jz L(thread_start) > + > + /* Restore EBX and ESI in parent. */ > + pop %esi > + pop %ebx > + jl SYSCALL_ERROR_LABEL > + > + ret > + > +L(thread_start): > + cfi_startproc > + /* Clearing frame pointer is insufficient, use CFI. */ > + cfi_undefined (eip) > + xorl %ebp, %ebp /* Terminate the stack frame. */ > + > + /* Align stack to 16 bytes per the i386 psABI. */ > + andl $-16, %esp + > + /* The PUSH below will decrement stack pointer by 4 bytes. */ > + subl $12, %esp > + > + /* Set up the argument for the function call. */ > + pushl %esi /* Argument. */ > Can you pushl then align and drop the subl? Or does esp need to be aligned before the pushl? > + cfi_adjust_cfa_offset (4) > + call *%edx /* Call function. */ > + > + /* Call exit with return value from function call. */ > + movl %eax, %ebx > + movl $SYS_ify(exit), %eax > + ENTER_KERNEL > + cfi_endproc > + > + cfi_startproc > +PSEUDO_END (__clone3) > + > +libc_hidden_def (__clone3) > +weak_alias (__clone3, clone3) > diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h > b/sysdeps/unix/sysv/linux/i386/sysdep.h > index 8680b49bf7..3927a1a6e0 100644 > --- a/sysdeps/unix/sysv/linux/i386/sysdep.h > +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h > @@ -291,6 +291,8 @@ struct libc_do_syscall_args > # define HAVE_TIME_VSYSCALL "__vdso_time" > # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres" > > +# define HAVE_CLONE3_WAPPER 1 > + > # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL > # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 > > -- > 2.31.1 > >
On Wed, Jul 14, 2021 at 11:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Jul 14, 2021 at 9:33 AM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: >> >> extern int clone3 (struct clone_args *__cl_args, size_t __size, >> int (*__func) (void *__arg), void *__arg); >> --- >> sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++ >> sysdeps/unix/sysv/linux/i386/sysdep.h | 2 + >> 2 files changed, 125 insertions(+) >> create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S >> >> diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S >> new file mode 100644 >> index 0000000000..bef3ce0455 >> --- /dev/null >> +++ b/sysdeps/unix/sysv/linux/i386/clone3.S >> @@ -0,0 +1,123 @@ >> +/* The clone3 syscall wrapper. Linux/i386 version. >> + Copyright (C) 2021 Free Software Foundation, Inc. >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +/* clone3() is even more special than fork() as it mucks with stacks >> + and invokes a function in the right context after its all over. */ >> + >> +#include <sysdep.h> >> + >> +/* The userland implementation is: >> + int clone3 (struct clone_args *cl_args, size_t size, >> + int (*func)(void *arg), void *arg); >> + the kernel entry is: >> + int clone3 (struct clone_args *cl_args, size_t size); >> + >> + The parameters are passed on stack from userland: >> + 16(%esp) arg >> + 12(%esp) func >> + 8(%esp) size >> + 4(%esp) cl_args >> + (%esp) Return address >> + >> + The kernel expects: >> + eax: system call number >> + ebx: cl_args >> + ecx: size >> + */ >> + >> +#define CL_ARGS 4 >> +#define SIZE 8 >> +#define FUNC 12 >> +#define ARG 16 >> + >> + .text >> +ENTRY (__clone3) >> + /* Sanity check arguments. */ >> + movl $-EINVAL, %eax >> + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */ >> + testl %ecx, %ecx >> + jz SYSCALL_ERROR_LABEL >> + /* Save the function pointer in EDX which is preserved by the >> + system call. */ >> + movl FUNC(%esp), %edx /* No NULL function pointer. */ >> + testl %edx, %edx >> + jz SYSCALL_ERROR_LABEL >> + >> + /* Save EBX and ESI. */ >> + pushl %ebx >> + cfi_adjust_cfa_offset (4) >> + pushl %esi >> + cfi_adjust_cfa_offset (4) >> + >> + /* Save the function argument in ESI which is preserved by the >> + system call. */ >> + movl (ARG + 8)(%esp), %esi >> + >> + /* Put cl_args in EBX. */ >> + movl %ecx, %ebx >> + >> + /* Put size in ECX. */ >> + movl (SIZE + 8)(%esp), %ecx >> + >> + /* Do the system call. */ >> + movl $SYS_ify(clone3), %eax >> + >> + /* End FDE now, because in the child the unwind info will be >> + wrong. */ >> + cfi_endproc >> + >> + int $0x80 >> + test %eax, %eax >> + /* No need to restore EBX and ESI in child. */ > > does esp need to be adjusted? We don't need to adjust ESP in parent and we will set up the child stack a few lines below. >> + jz L(thread_start) >> + >> + /* Restore EBX and ESI in parent. */ >> + pop %esi >> + pop %ebx >> + jl SYSCALL_ERROR_LABEL >> + >> + ret >> + >> +L(thread_start): >> + cfi_startproc >> + /* Clearing frame pointer is insufficient, use CFI. */ >> + cfi_undefined (eip) >> + xorl %ebp, %ebp /* Terminate the stack frame. */ >> + >> + /* Align stack to 16 bytes per the i386 psABI. */ >> + andl $-16, %esp >> >> + >> + /* The PUSH below will decrement stack pointer by 4 bytes. */ >> + subl $12, %esp >> + >> + /* Set up the argument for the function call. */ >> + pushl %esi /* Argument. */ > > Can you pushl then align and drop the subl? Or does esp need to be aligned before > the pushl? We need to align the child stack to 16 bytes first and then push the argument onto stack for the child function. >> >> + cfi_adjust_cfa_offset (4) >> + call *%edx /* Call function. */ >> + >> + /* Call exit with return value from function call. */ >> + movl %eax, %ebx >> + movl $SYS_ify(exit), %eax >> + ENTER_KERNEL >> + cfi_endproc >> + >> + cfi_startproc >> +PSEUDO_END (__clone3) >> + >> +libc_hidden_def (__clone3) >> +weak_alias (__clone3, clone3) >> diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h >> index 8680b49bf7..3927a1a6e0 100644 >> --- a/sysdeps/unix/sysv/linux/i386/sysdep.h >> +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h >> @@ -291,6 +291,8 @@ struct libc_do_syscall_args >> # define HAVE_TIME_VSYSCALL "__vdso_time" >> # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres" >> >> +# define HAVE_CLONE3_WAPPER 1 >> + >> # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL >> # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 >> >> -- >> 2.31.1 >>
On 7/14/21 9:33 AM, H.J. Lu via Libc-alpha wrote: > extern int clone3 (struct clone_args *__cl_args, size_t __size, > int (*__func) (void *__arg), void *__arg); OK for glibc 2.34. I think this is important for x86 overall. I'd like to see this in the release. Reviewed-by: Carlos O'Donell <carlos@redhat.com> > --- > sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++ > sysdeps/unix/sysv/linux/i386/sysdep.h | 2 + > 2 files changed, 125 insertions(+) > create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S > > diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S > new file mode 100644 > index 0000000000..bef3ce0455 > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/i386/clone3.S > @@ -0,0 +1,123 @@ > +/* The clone3 syscall wrapper. Linux/i386 version. OK. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* clone3() is even more special than fork() as it mucks with stacks > + and invokes a function in the right context after its all over. */ OK. > + > +#include <sysdep.h> > + > +/* The userland implementation is: > + int clone3 (struct clone_args *cl_args, size_t size, > + int (*func)(void *arg), void *arg); > + the kernel entry is: > + int clone3 (struct clone_args *cl_args, size_t size); > + > + The parameters are passed on stack from userland: > + 16(%esp) arg > + 12(%esp) func > + 8(%esp) size > + 4(%esp) cl_args > + (%esp) Return address > + > + The kernel expects: > + eax: system call number > + ebx: cl_args > + ecx: size > + */ OK. Nice comment. > + > +#define CL_ARGS 4 > +#define SIZE 8 > +#define FUNC 12 > +#define ARG 16 > + > + .text > +ENTRY (__clone3) > + /* Sanity check arguments. */ > + movl $-EINVAL, %eax > + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */ > + testl %ecx, %ecx > + jz SYSCALL_ERROR_LABEL > + /* Save the function pointer in EDX which is preserved by the > + system call. */ > + movl FUNC(%esp), %edx /* No NULL function pointer. */ > + testl %edx, %edx > + jz SYSCALL_ERROR_LABEL > + > + /* Save EBX and ESI. */ > + pushl %ebx > + cfi_adjust_cfa_offset (4) > + pushl %esi > + cfi_adjust_cfa_offset (4) > + > + /* Save the function argument in ESI which is preserved by the > + system call. */ > + movl (ARG + 8)(%esp), %esi > + > + /* Put cl_args in EBX. */ > + movl %ecx, %ebx > + > + /* Put size in ECX. */ > + movl (SIZE + 8)(%esp), %ecx > + > + /* Do the system call. */ > + movl $SYS_ify(clone3), %eax OK. > + > + /* End FDE now, because in the child the unwind info will be > + wrong. */ > + cfi_endproc > + > + int $0x80 > + test %eax, %eax > + /* No need to restore EBX and ESI in child. */ > + jz L(thread_start) > + > + /* Restore EBX and ESI in parent. */ > + pop %esi > + pop %ebx > + jl SYSCALL_ERROR_LABEL > + > + ret > + > +L(thread_start): > + cfi_startproc > + /* Clearing frame pointer is insufficient, use CFI. */ > + cfi_undefined (eip) > + xorl %ebp, %ebp /* Terminate the stack frame. */ > + > + /* Align stack to 16 bytes per the i386 psABI. */ > + andl $-16, %esp > + > + /* The PUSH below will decrement stack pointer by 4 bytes. */ > + subl $12, %esp > + > + /* Set up the argument for the function call. */ > + pushl %esi /* Argument. */ > + cfi_adjust_cfa_offset (4) > + call *%edx /* Call function. */ > + > + /* Call exit with return value from function call. */ > + movl %eax, %ebx > + movl $SYS_ify(exit), %eax > + ENTER_KERNEL > + cfi_endproc > + > + cfi_startproc > +PSEUDO_END (__clone3) > + > +libc_hidden_def (__clone3) > +weak_alias (__clone3, clone3) > diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h > index 8680b49bf7..3927a1a6e0 100644 > --- a/sysdeps/unix/sysv/linux/i386/sysdep.h > +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h > @@ -291,6 +291,8 @@ struct libc_do_syscall_args > # define HAVE_TIME_VSYSCALL "__vdso_time" > # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres" > > +# define HAVE_CLONE3_WAPPER 1 OK. > + > # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL > # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 > >
diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S new file mode 100644 index 0000000000..bef3ce0455 --- /dev/null +++ b/sysdeps/unix/sysv/linux/i386/clone3.S @@ -0,0 +1,123 @@ +/* The clone3 syscall wrapper. Linux/i386 version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* clone3() is even more special than fork() as it mucks with stacks + and invokes a function in the right context after its all over. */ + +#include <sysdep.h> + +/* The userland implementation is: + int clone3 (struct clone_args *cl_args, size_t size, + int (*func)(void *arg), void *arg); + the kernel entry is: + int clone3 (struct clone_args *cl_args, size_t size); + + The parameters are passed on stack from userland: + 16(%esp) arg + 12(%esp) func + 8(%esp) size + 4(%esp) cl_args + (%esp) Return address + + The kernel expects: + eax: system call number + ebx: cl_args + ecx: size + */ + +#define CL_ARGS 4 +#define SIZE 8 +#define FUNC 12 +#define ARG 16 + + .text +ENTRY (__clone3) + /* Sanity check arguments. */ + movl $-EINVAL, %eax + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */ + testl %ecx, %ecx + jz SYSCALL_ERROR_LABEL + /* Save the function pointer in EDX which is preserved by the + system call. */ + movl FUNC(%esp), %edx /* No NULL function pointer. */ + testl %edx, %edx + jz SYSCALL_ERROR_LABEL + + /* Save EBX and ESI. */ + pushl %ebx + cfi_adjust_cfa_offset (4) + pushl %esi + cfi_adjust_cfa_offset (4) + + /* Save the function argument in ESI which is preserved by the + system call. */ + movl (ARG + 8)(%esp), %esi + + /* Put cl_args in EBX. */ + movl %ecx, %ebx + + /* Put size in ECX. */ + movl (SIZE + 8)(%esp), %ecx + + /* Do the system call. */ + movl $SYS_ify(clone3), %eax + + /* End FDE now, because in the child the unwind info will be + wrong. */ + cfi_endproc + + int $0x80 + test %eax, %eax + /* No need to restore EBX and ESI in child. */ + jz L(thread_start) + + /* Restore EBX and ESI in parent. */ + pop %esi + pop %ebx + jl SYSCALL_ERROR_LABEL + + ret + +L(thread_start): + cfi_startproc + /* Clearing frame pointer is insufficient, use CFI. */ + cfi_undefined (eip) + xorl %ebp, %ebp /* Terminate the stack frame. */ + + /* Align stack to 16 bytes per the i386 psABI. */ + andl $-16, %esp + + /* The PUSH below will decrement stack pointer by 4 bytes. */ + subl $12, %esp + + /* Set up the argument for the function call. */ + pushl %esi /* Argument. */ + cfi_adjust_cfa_offset (4) + call *%edx /* Call function. */ + + /* Call exit with return value from function call. */ + movl %eax, %ebx + movl $SYS_ify(exit), %eax + ENTER_KERNEL + cfi_endproc + + cfi_startproc +PSEUDO_END (__clone3) + +libc_hidden_def (__clone3) +weak_alias (__clone3, clone3) diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h index 8680b49bf7..3927a1a6e0 100644 --- a/sysdeps/unix/sysv/linux/i386/sysdep.h +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h @@ -291,6 +291,8 @@ struct libc_do_syscall_args # define HAVE_TIME_VSYSCALL "__vdso_time" # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres" +# define HAVE_CLONE3_WAPPER 1 + # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1