diff mbox series

[v2,4/9] aarch64: Add the clone3 wrapper

Message ID 20220930192613.3491147-5-adhemerval.zanella@linaro.org
State New
Headers show
Series Optimize posix_spawn signal setup with clone3 | expand

Commit Message

Adhemerval Zanella Netto Sept. 30, 2022, 7:26 p.m. UTC
It follow the internal signature:

  extern int clone3 (struct clone_args *__cl_args, size_t __size,
 int (*__func) (void *__arg), void *__arg);

And x86_64 semantics to return EINVAL if either cl_args or func
is NULL.  The stack is 16-byte aligned prior executing func.

Checked on aarch64-linux-gnu.
---
 sysdeps/unix/sysv/linux/aarch64/clone3.S | 90 ++++++++++++++++++++++++
 sysdeps/unix/sysv/linux/aarch64/sysdep.h |  2 +
 2 files changed, 92 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/aarch64/clone3.S

Comments

Szabolcs Nagy Nov. 2, 2022, 12:12 p.m. UTC | #1
The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> It follow the internal signature:
> 
>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>  int (*__func) (void *__arg), void *__arg);
> 
> And x86_64 semantics to return EINVAL if either cl_args or func
> is NULL.  The stack is 16-byte aligned prior executing func.

"x86_64 semantics" sounds wrong: maybe this should be documented?
i'd expect 0 cl_args/func to be UB like in most posix apis.

and aligning sp in the child fails if signals are allowed there
(pthreads does not allow signals now, direct callers might).
i dont know if that's a concert (or if unaligned stack is
something we should fix up in clone3).

> 
> Checked on aarch64-linux-gnu.
> ---
>  sysdeps/unix/sysv/linux/aarch64/clone3.S | 90 ++++++++++++++++++++++++
>  sysdeps/unix/sysv/linux/aarch64/sysdep.h |  2 +
>  2 files changed, 92 insertions(+)
>  create mode 100644 sysdeps/unix/sysv/linux/aarch64/clone3.S
> 
> diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
> new file mode 100644
> index 0000000000..dba93430eb
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
> @@ -0,0 +1,90 @@
> +/* The clone3 syscall wrapper.  Linux/aarch64 version.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#define _ERRNO_H        1
> +#include <bits/errno.h>
> +
> +/* The userland implementation is:
> +   int clone3 (struct clone_args *cl_args, size_t size,
> +               int (*func)(void *arg), void *arg);
> +
> +   the kernel entry is:
> +   int clone3 (struct clone_args *cl_args, size_t size);
> +
> +   The parameters are passed in registers from userland:
> +   x0: cl_args
> +   x1: size
> +   x2: func
> +   x3: arg  */
> +
> +        .text
> +ENTRY(__clone3)
> +	PTR_ARG (0)
> +	PTR_ARG (1)
> +	PTR_ARG (3)
> +	PTR_ARG (4)
> +	/* Save args for the child.  */
> +	mov	x10, x0		/* cl_args  */
> +	mov	x11, x2		/* func	 */
> +	mov	x12, x3		/* args  */
> +
> +	/* Sanity check args.  */
> +	mov	x0, #-EINVAL
> +	cbz	x10, .Lsyscall_error	/* No NULL cl_args pointer.  */
> +	cbz	x2, .Lsyscall_error	/* No NULL function pointer.  */
> +
> +	/* Do the system call, the kernel expects:
> +	   x8: system call number
> +	   x0: cl_args
> +	   x1: size  */
> +	mov	x0, x10
> +	mov	x8, #SYS_ify(clone3)
> +	svc	0x0
> +
> +	cmp	x0, #0
> +	beq	thread_start
> +	blt	.Lsyscall_error
> +	RET
> +PSEUDO_END (__clone3)
> +
> +	.align 4
> +	.type thread_start, %function
> +thread_start:
> +	cfi_startproc
> +	cfi_undefined (x30)
> +	mov	x29, 0
> +
> +	/* Align sp.  */
> +	mov	x0, sp
> +	and	x0, x0, -16
> +	mov	sp, x0
> +
> +	/* Pick the function arg and execute.  */
> +	mov	x0, x12
> +	blr	x11
> +
> +	/* We are done, pass the return value through x0.  */
> +	mov	x8, #SYS_ify(exit)
> +	svc	0x0
> +	cfi_endproc
> +	.size thread_start, .-thread_start
> +
> +libc_hidden_def (__clone3)
> +weak_alias (__clone3, clone3)
> diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> index f1853e012f..42bb22f5e6 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> @@ -164,6 +164,8 @@
>  # define HAVE_CLOCK_GETTIME64_VSYSCALL	"__kernel_clock_gettime"
>  # define HAVE_GETTIMEOFDAY_VSYSCALL	"__kernel_gettimeofday"
>  
> +# define HAVE_CLONE3_WRAPPER		1
> +
>  # undef INTERNAL_SYSCALL_RAW
>  # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
>    ({ long _sys_result;						\
> -- 
> 2.34.1
>
Adhemerval Zanella Netto Nov. 3, 2022, 1:15 p.m. UTC | #2
On 02/11/22 09:12, Szabolcs Nagy wrote:
> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>> It follow the internal signature:
>>
>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>  int (*__func) (void *__arg), void *__arg);
>>
>> And x86_64 semantics to return EINVAL if either cl_args or func
>> is NULL.  The stack is 16-byte aligned prior executing func.
> 
> "x86_64 semantics" sounds wrong: maybe this should be documented?
> i'd expect 0 cl_args/func to be UB like in most posix apis.

Right, I think it is worth to document the function semantic
properly at least on its internal header (include/clone_internal.h).
H.J also added a new clone3.h headers, which is not currently installed
that I am inclined to just remove it from now.  We might reinstate 
if/when we decide to provide the clone3 as an ABI.

And returning EINVAL for 0 cl_args/func aligns with our exported clone
interface, where EINVAL is also returned for 0 function argument.

> 
> and aligning sp in the child fails if signals are allowed there
> (pthreads does not allow signals now, direct callers might).
> i dont know if that's a concert (or if unaligned stack is
> something we should fix up in clone3).

It was overlooked on initial x86_64 clone3 implementation as well.  I
think it better to just return EINVAL for unaligned stacks and avoid
to change the stack pointer in the created thread.

> 
>>
>> Checked on aarch64-linux-gnu.
>> ---
>>  sysdeps/unix/sysv/linux/aarch64/clone3.S | 90 ++++++++++++++++++++++++
>>  sysdeps/unix/sysv/linux/aarch64/sysdep.h |  2 +
>>  2 files changed, 92 insertions(+)
>>  create mode 100644 sysdeps/unix/sysv/linux/aarch64/clone3.S
>>
>> diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
>> new file mode 100644
>> index 0000000000..dba93430eb
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
>> @@ -0,0 +1,90 @@
>> +/* The clone3 syscall wrapper.  Linux/aarch64 version.
>> +   Copyright (C) 2022 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <sysdep.h>
>> +#define _ERRNO_H        1
>> +#include <bits/errno.h>
>> +
>> +/* The userland implementation is:
>> +   int clone3 (struct clone_args *cl_args, size_t size,
>> +               int (*func)(void *arg), void *arg);
>> +
>> +   the kernel entry is:
>> +   int clone3 (struct clone_args *cl_args, size_t size);
>> +
>> +   The parameters are passed in registers from userland:
>> +   x0: cl_args
>> +   x1: size
>> +   x2: func
>> +   x3: arg  */
>> +
>> +        .text
>> +ENTRY(__clone3)
>> +	PTR_ARG (0)
>> +	PTR_ARG (1)
>> +	PTR_ARG (3)
>> +	PTR_ARG (4)
>> +	/* Save args for the child.  */
>> +	mov	x10, x0		/* cl_args  */
>> +	mov	x11, x2		/* func	 */
>> +	mov	x12, x3		/* args  */
>> +
>> +	/* Sanity check args.  */
>> +	mov	x0, #-EINVAL
>> +	cbz	x10, .Lsyscall_error	/* No NULL cl_args pointer.  */
>> +	cbz	x2, .Lsyscall_error	/* No NULL function pointer.  */
>> +
>> +	/* Do the system call, the kernel expects:
>> +	   x8: system call number
>> +	   x0: cl_args
>> +	   x1: size  */
>> +	mov	x0, x10
>> +	mov	x8, #SYS_ify(clone3)
>> +	svc	0x0
>> +
>> +	cmp	x0, #0
>> +	beq	thread_start
>> +	blt	.Lsyscall_error
>> +	RET
>> +PSEUDO_END (__clone3)
>> +
>> +	.align 4
>> +	.type thread_start, %function
>> +thread_start:
>> +	cfi_startproc
>> +	cfi_undefined (x30)
>> +	mov	x29, 0
>> +
>> +	/* Align sp.  */
>> +	mov	x0, sp
>> +	and	x0, x0, -16
>> +	mov	sp, x0
>> +
>> +	/* Pick the function arg and execute.  */
>> +	mov	x0, x12
>> +	blr	x11
>> +
>> +	/* We are done, pass the return value through x0.  */
>> +	mov	x8, #SYS_ify(exit)
>> +	svc	0x0
>> +	cfi_endproc
>> +	.size thread_start, .-thread_start
>> +
>> +libc_hidden_def (__clone3)
>> +weak_alias (__clone3, clone3)
>> diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
>> index f1853e012f..42bb22f5e6 100644
>> --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
>> +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
>> @@ -164,6 +164,8 @@
>>  # define HAVE_CLOCK_GETTIME64_VSYSCALL	"__kernel_clock_gettime"
>>  # define HAVE_GETTIMEOFDAY_VSYSCALL	"__kernel_gettimeofday"
>>  
>> +# define HAVE_CLONE3_WRAPPER		1
>> +
>>  # undef INTERNAL_SYSCALL_RAW
>>  # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
>>    ({ long _sys_result;						\
>> -- 
>> 2.34.1
>>
Szabolcs Nagy Nov. 3, 2022, 2:01 p.m. UTC | #3
The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
> On 02/11/22 09:12, Szabolcs Nagy wrote:
> > The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> >> It follow the internal signature:
> >>
> >>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
> >>  int (*__func) (void *__arg), void *__arg);
> >>
> >> And x86_64 semantics to return EINVAL if either cl_args or func
> >> is NULL.  The stack is 16-byte aligned prior executing func.
> > 
> > "x86_64 semantics" sounds wrong: maybe this should be documented?
> > i'd expect 0 cl_args/func to be UB like in most posix apis.
> 
> Right, I think it is worth to document the function semantic
> properly at least on its internal header (include/clone_internal.h).
> H.J also added a new clone3.h headers, which is not currently installed
> that I am inclined to just remove it from now.  We might reinstate 
> if/when we decide to provide the clone3 as an ABI.
> 
> And returning EINVAL for 0 cl_args/func aligns with our exported clone
> interface, where EINVAL is also returned for 0 function argument.

ok.

> > 
> > and aligning sp in the child fails if signals are allowed there
> > (pthreads does not allow signals now, direct callers might).
> > i dont know if that's a concert (or if unaligned stack is
> > something we should fix up in clone3).
> 
> It was overlooked on initial x86_64 clone3 implementation as well.  I
> think it better to just return EINVAL for unaligned stacks and avoid
> to change the stack pointer in the created thread.

long time ago linux did that on aarch64, but it was removed:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a

i think in clone3 the kernel should have aligned (it knows
the bounds now), doing it in the userspace wrapper is weird
(should we adjust the stack size?). and not doing it at all
makes clone3 hard to use portably (user has to know target
specific pcs requirements).

not sure what's the best way forward.
Adhemerval Zanella Netto Nov. 3, 2022, 4:22 p.m. UTC | #4
On 03/11/22 11:01, Szabolcs Nagy wrote:
> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>> It follow the internal signature:
>>>>
>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>  int (*__func) (void *__arg), void *__arg);
>>>>
>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>
>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>
>> Right, I think it is worth to document the function semantic
>> properly at least on its internal header (include/clone_internal.h).
>> H.J also added a new clone3.h headers, which is not currently installed
>> that I am inclined to just remove it from now.  We might reinstate 
>> if/when we decide to provide the clone3 as an ABI.
>>
>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>> interface, where EINVAL is also returned for 0 function argument.
> 
> ok.
> 
>>>
>>> and aligning sp in the child fails if signals are allowed there
>>> (pthreads does not allow signals now, direct callers might).
>>> i dont know if that's a concert (or if unaligned stack is
>>> something we should fix up in clone3).
>>
>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>> think it better to just return EINVAL for unaligned stacks and avoid
>> to change the stack pointer in the created thread.
> 
> long time ago linux did that on aarch64, but it was removed:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
> 
> i think in clone3 the kernel should have aligned (it knows
> the bounds now), doing it in the userspace wrapper is weird
> (should we adjust the stack size?). and not doing it at all
> makes clone3 hard to use portably (user has to know target
> specific pcs requirements).
> 
> not sure what's the best way forward.

I think the stack size won't matter much here, at least not from
kernel point of view (the resulting stack size will most likely
be page aligned anyway).  But I think this kernel commit makes a good
point that silently adjusting the stack in userland is not the
correct approach, I think H.J has done to make it consistent with
glibc clone implementation which does it.

IMHO the best approach would to just remove the stack alignment,
since it incurs the signal handling issue.
Szabolcs Nagy Nov. 3, 2022, 4:31 p.m. UTC | #5
The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
> 
> 
> On 03/11/22 11:01, Szabolcs Nagy wrote:
> > The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
> >> On 02/11/22 09:12, Szabolcs Nagy wrote:
> >>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> >>>> It follow the internal signature:
> >>>>
> >>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
> >>>>  int (*__func) (void *__arg), void *__arg);
> >>>>
> >>>> And x86_64 semantics to return EINVAL if either cl_args or func
> >>>> is NULL.  The stack is 16-byte aligned prior executing func.
> >>>
> >>> "x86_64 semantics" sounds wrong: maybe this should be documented?
> >>> i'd expect 0 cl_args/func to be UB like in most posix apis.
> >>
> >> Right, I think it is worth to document the function semantic
> >> properly at least on its internal header (include/clone_internal.h).
> >> H.J also added a new clone3.h headers, which is not currently installed
> >> that I am inclined to just remove it from now.  We might reinstate 
> >> if/when we decide to provide the clone3 as an ABI.
> >>
> >> And returning EINVAL for 0 cl_args/func aligns with our exported clone
> >> interface, where EINVAL is also returned for 0 function argument.
> > 
> > ok.
> > 
> >>>
> >>> and aligning sp in the child fails if signals are allowed there
> >>> (pthreads does not allow signals now, direct callers might).
> >>> i dont know if that's a concert (or if unaligned stack is
> >>> something we should fix up in clone3).
> >>
> >> It was overlooked on initial x86_64 clone3 implementation as well.  I
> >> think it better to just return EINVAL for unaligned stacks and avoid
> >> to change the stack pointer in the created thread.
> > 
> > long time ago linux did that on aarch64, but it was removed:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
> > 
> > i think in clone3 the kernel should have aligned (it knows
> > the bounds now), doing it in the userspace wrapper is weird
> > (should we adjust the stack size?). and not doing it at all
> > makes clone3 hard to use portably (user has to know target
> > specific pcs requirements).
> > 
> > not sure what's the best way forward.
> 
> I think the stack size won't matter much here, at least not from
> kernel point of view (the resulting stack size will most likely
> be page aligned anyway).  But I think this kernel commit makes a good
> point that silently adjusting the stack in userland is not the
> correct approach, I think H.J has done to make it consistent with
> glibc clone implementation which does it.
> 
> IMHO the best approach would to just remove the stack alignment,
> since it incurs the signal handling issue.

current generic clone callers dont align the stack and
e.g. unaligned pthread custom stack should work.

so we have to do arch specific stack alignment somewhere,
maybe in pthread_create?
Adhemerval Zanella Netto Nov. 3, 2022, 4:39 p.m. UTC | #6
On 03/11/22 13:31, Szabolcs Nagy wrote:
> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
>>
>>
>> On 03/11/22 11:01, Szabolcs Nagy wrote:
>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>>>> It follow the internal signature:
>>>>>>
>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>>>  int (*__func) (void *__arg), void *__arg);
>>>>>>
>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>>>
>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>>>
>>>> Right, I think it is worth to document the function semantic
>>>> properly at least on its internal header (include/clone_internal.h).
>>>> H.J also added a new clone3.h headers, which is not currently installed
>>>> that I am inclined to just remove it from now.  We might reinstate 
>>>> if/when we decide to provide the clone3 as an ABI.
>>>>
>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>>>> interface, where EINVAL is also returned for 0 function argument.
>>>
>>> ok.
>>>
>>>>>
>>>>> and aligning sp in the child fails if signals are allowed there
>>>>> (pthreads does not allow signals now, direct callers might).
>>>>> i dont know if that's a concert (or if unaligned stack is
>>>>> something we should fix up in clone3).
>>>>
>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>>>> think it better to just return EINVAL for unaligned stacks and avoid
>>>> to change the stack pointer in the created thread.
>>>
>>> long time ago linux did that on aarch64, but it was removed:
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
>>>
>>> i think in clone3 the kernel should have aligned (it knows
>>> the bounds now), doing it in the userspace wrapper is weird
>>> (should we adjust the stack size?). and not doing it at all
>>> makes clone3 hard to use portably (user has to know target
>>> specific pcs requirements).
>>>
>>> not sure what's the best way forward.
>>
>> I think the stack size won't matter much here, at least not from
>> kernel point of view (the resulting stack size will most likely
>> be page aligned anyway).  But I think this kernel commit makes a good
>> point that silently adjusting the stack in userland is not the
>> correct approach, I think H.J has done to make it consistent with
>> glibc clone implementation which does it.
>>
>> IMHO the best approach would to just remove the stack alignment,
>> since it incurs the signal handling issue.
> 
> current generic clone callers dont align the stack and
> e.g. unaligned pthread custom stack should work.
> 
> so we have to do arch specific stack alignment somewhere,
> maybe in pthread_create?

I am thinking on __clone_internal, where if an unaligned stack is
used it creates a new clone_args struct with adjust arguments.  It
can adjust the struct in place (not sure which is better).
Szabolcs Nagy Nov. 3, 2022, 4:52 p.m. UTC | #7
The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
> 
> 
> On 03/11/22 13:31, Szabolcs Nagy wrote:
> > The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
> >>
> >>
> >> On 03/11/22 11:01, Szabolcs Nagy wrote:
> >>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
> >>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
> >>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> >>>>>> It follow the internal signature:
> >>>>>>
> >>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
> >>>>>>  int (*__func) (void *__arg), void *__arg);
> >>>>>>
> >>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
> >>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
> >>>>>
> >>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
> >>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
> >>>>
> >>>> Right, I think it is worth to document the function semantic
> >>>> properly at least on its internal header (include/clone_internal.h).
> >>>> H.J also added a new clone3.h headers, which is not currently installed
> >>>> that I am inclined to just remove it from now.  We might reinstate 
> >>>> if/when we decide to provide the clone3 as an ABI.
> >>>>
> >>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
> >>>> interface, where EINVAL is also returned for 0 function argument.
> >>>
> >>> ok.
> >>>
> >>>>>
> >>>>> and aligning sp in the child fails if signals are allowed there
> >>>>> (pthreads does not allow signals now, direct callers might).
> >>>>> i dont know if that's a concert (or if unaligned stack is
> >>>>> something we should fix up in clone3).
> >>>>
> >>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
> >>>> think it better to just return EINVAL for unaligned stacks and avoid
> >>>> to change the stack pointer in the created thread.
> >>>
> >>> long time ago linux did that on aarch64, but it was removed:
> >>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
> >>>
> >>> i think in clone3 the kernel should have aligned (it knows
> >>> the bounds now), doing it in the userspace wrapper is weird
> >>> (should we adjust the stack size?). and not doing it at all
> >>> makes clone3 hard to use portably (user has to know target
> >>> specific pcs requirements).
> >>>
> >>> not sure what's the best way forward.
> >>
> >> I think the stack size won't matter much here, at least not from
> >> kernel point of view (the resulting stack size will most likely
> >> be page aligned anyway).  But I think this kernel commit makes a good
> >> point that silently adjusting the stack in userland is not the
> >> correct approach, I think H.J has done to make it consistent with
> >> glibc clone implementation which does it.
> >>
> >> IMHO the best approach would to just remove the stack alignment,
> >> since it incurs the signal handling issue.
> > 
> > current generic clone callers dont align the stack and
> > e.g. unaligned pthread custom stack should work.
> > 
> > so we have to do arch specific stack alignment somewhere,
> > maybe in pthread_create?
> 
> I am thinking on __clone_internal, where if an unaligned stack is
> used it creates a new clone_args struct with adjust arguments.  It
> can adjust the struct in place (not sure which is better).

if the api is not exposed, then i think the arg can be modified
in place. (if clone3 api is exposed to users then we should not
modify user structs unless the clone3 api contract explicitly
allows this.)

either aligning in pthread_create or __clone_internal works for me,
the target specific clone3 syscall should not in case that gets
exposed to users.
Adhemerval Zanella Netto Nov. 3, 2022, 4:55 p.m. UTC | #8
On 03/11/22 13:52, Szabolcs Nagy wrote:
> The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
>>
>>
>> On 03/11/22 13:31, Szabolcs Nagy wrote:
>>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
>>>>
>>>>
>>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
>>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>>>>>> It follow the internal signature:
>>>>>>>>
>>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>>>>>  int (*__func) (void *__arg), void *__arg);
>>>>>>>>
>>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>>>>>
>>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>>>>>
>>>>>> Right, I think it is worth to document the function semantic
>>>>>> properly at least on its internal header (include/clone_internal.h).
>>>>>> H.J also added a new clone3.h headers, which is not currently installed
>>>>>> that I am inclined to just remove it from now.  We might reinstate 
>>>>>> if/when we decide to provide the clone3 as an ABI.
>>>>>>
>>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>>>>>> interface, where EINVAL is also returned for 0 function argument.
>>>>>
>>>>> ok.
>>>>>
>>>>>>>
>>>>>>> and aligning sp in the child fails if signals are allowed there
>>>>>>> (pthreads does not allow signals now, direct callers might).
>>>>>>> i dont know if that's a concert (or if unaligned stack is
>>>>>>> something we should fix up in clone3).
>>>>>>
>>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>>>>>> think it better to just return EINVAL for unaligned stacks and avoid
>>>>>> to change the stack pointer in the created thread.
>>>>>
>>>>> long time ago linux did that on aarch64, but it was removed:
>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
>>>>>
>>>>> i think in clone3 the kernel should have aligned (it knows
>>>>> the bounds now), doing it in the userspace wrapper is weird
>>>>> (should we adjust the stack size?). and not doing it at all
>>>>> makes clone3 hard to use portably (user has to know target
>>>>> specific pcs requirements).
>>>>>
>>>>> not sure what's the best way forward.
>>>>
>>>> I think the stack size won't matter much here, at least not from
>>>> kernel point of view (the resulting stack size will most likely
>>>> be page aligned anyway).  But I think this kernel commit makes a good
>>>> point that silently adjusting the stack in userland is not the
>>>> correct approach, I think H.J has done to make it consistent with
>>>> glibc clone implementation which does it.
>>>>
>>>> IMHO the best approach would to just remove the stack alignment,
>>>> since it incurs the signal handling issue.
>>>
>>> current generic clone callers dont align the stack and
>>> e.g. unaligned pthread custom stack should work.
>>>
>>> so we have to do arch specific stack alignment somewhere,
>>> maybe in pthread_create?
>>
>> I am thinking on __clone_internal, where if an unaligned stack is
>> used it creates a new clone_args struct with adjust arguments.  It
>> can adjust the struct in place (not sure which is better).
> 
> if the api is not exposed, then i think the arg can be modified
> in place. (if clone3 api is exposed to users then we should not
> modify user structs unless the clone3 api contract explicitly
> allows this.)
> 
> either aligning in pthread_create or __clone_internal works for me,
> the target specific clone3 syscall should not in case that gets
> exposed to users.
> 

The arg modification would be done only internally by __clone_internal,
if we ever export __clone3 it will not mess with stack alignment (my
idea is to remove it from x86_64 as well).
H.J. Lu Nov. 3, 2022, 8:55 p.m. UTC | #9
On Thu, Nov 3, 2022 at 9:55 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 03/11/22 13:52, Szabolcs Nagy wrote:
> > The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
> >>
> >>
> >> On 03/11/22 13:31, Szabolcs Nagy wrote:
> >>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
> >>>>
> >>>>
> >>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
> >>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
> >>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
> >>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> >>>>>>>> It follow the internal signature:
> >>>>>>>>
> >>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
> >>>>>>>>  int (*__func) (void *__arg), void *__arg);
> >>>>>>>>
> >>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
> >>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
> >>>>>>>
> >>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
> >>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
> >>>>>>
> >>>>>> Right, I think it is worth to document the function semantic
> >>>>>> properly at least on its internal header (include/clone_internal.h).
> >>>>>> H.J also added a new clone3.h headers, which is not currently installed
> >>>>>> that I am inclined to just remove it from now.  We might reinstate
> >>>>>> if/when we decide to provide the clone3 as an ABI.
> >>>>>>
> >>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
> >>>>>> interface, where EINVAL is also returned for 0 function argument.
> >>>>>
> >>>>> ok.
> >>>>>
> >>>>>>>
> >>>>>>> and aligning sp in the child fails if signals are allowed there
> >>>>>>> (pthreads does not allow signals now, direct callers might).
> >>>>>>> i dont know if that's a concert (or if unaligned stack is
> >>>>>>> something we should fix up in clone3).
> >>>>>>
> >>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
> >>>>>> think it better to just return EINVAL for unaligned stacks and avoid
> >>>>>> to change the stack pointer in the created thread.
> >>>>>
> >>>>> long time ago linux did that on aarch64, but it was removed:
> >>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
> >>>>>
> >>>>> i think in clone3 the kernel should have aligned (it knows
> >>>>> the bounds now), doing it in the userspace wrapper is weird
> >>>>> (should we adjust the stack size?). and not doing it at all
> >>>>> makes clone3 hard to use portably (user has to know target
> >>>>> specific pcs requirements).
> >>>>>
> >>>>> not sure what's the best way forward.
> >>>>
> >>>> I think the stack size won't matter much here, at least not from
> >>>> kernel point of view (the resulting stack size will most likely
> >>>> be page aligned anyway).  But I think this kernel commit makes a good
> >>>> point that silently adjusting the stack in userland is not the
> >>>> correct approach, I think H.J has done to make it consistent with
> >>>> glibc clone implementation which does it.
> >>>>
> >>>> IMHO the best approach would to just remove the stack alignment,
> >>>> since it incurs the signal handling issue.
> >>>
> >>> current generic clone callers dont align the stack and
> >>> e.g. unaligned pthread custom stack should work.
> >>>
> >>> so we have to do arch specific stack alignment somewhere,
> >>> maybe in pthread_create?
> >>
> >> I am thinking on __clone_internal, where if an unaligned stack is
> >> used it creates a new clone_args struct with adjust arguments.  It
> >> can adjust the struct in place (not sure which is better).
> >
> > if the api is not exposed, then i think the arg can be modified
> > in place. (if clone3 api is exposed to users then we should not
> > modify user structs unless the clone3 api contract explicitly
> > allows this.)
> >
> > either aligning in pthread_create or __clone_internal works for me,
> > the target specific clone3 syscall should not in case that gets
> > exposed to users.
> >
>
> The arg modification would be done only internally by __clone_internal,
> if we ever export __clone3 it will not mess with stack alignment (my
> idea is to remove it from x86_64 as well).

Is there a bug for the signal handling issue?
Adhemerval Zanella Netto Nov. 3, 2022, 9:22 p.m. UTC | #10
On 03/11/22 13:55, Adhemerval Zanella Netto wrote:
> 
> 
> On 03/11/22 13:52, Szabolcs Nagy wrote:
>> The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
>>>
>>>
>>> On 03/11/22 13:31, Szabolcs Nagy wrote:
>>>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
>>>>>
>>>>>
>>>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
>>>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>>>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>>>>>>> It follow the internal signature:
>>>>>>>>>
>>>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>>>>>>  int (*__func) (void *__arg), void *__arg);
>>>>>>>>>
>>>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>>>>>>
>>>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>>>>>>
>>>>>>> Right, I think it is worth to document the function semantic
>>>>>>> properly at least on its internal header (include/clone_internal.h).
>>>>>>> H.J also added a new clone3.h headers, which is not currently installed
>>>>>>> that I am inclined to just remove it from now.  We might reinstate 
>>>>>>> if/when we decide to provide the clone3 as an ABI.
>>>>>>>
>>>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>>>>>>> interface, where EINVAL is also returned for 0 function argument.
>>>>>>
>>>>>> ok.
>>>>>>
>>>>>>>>
>>>>>>>> and aligning sp in the child fails if signals are allowed there
>>>>>>>> (pthreads does not allow signals now, direct callers might).
>>>>>>>> i dont know if that's a concert (or if unaligned stack is
>>>>>>>> something we should fix up in clone3).
>>>>>>>
>>>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>>>>>>> think it better to just return EINVAL for unaligned stacks and avoid
>>>>>>> to change the stack pointer in the created thread.
>>>>>>
>>>>>> long time ago linux did that on aarch64, but it was removed:
>>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
>>>>>>
>>>>>> i think in clone3 the kernel should have aligned (it knows
>>>>>> the bounds now), doing it in the userspace wrapper is weird
>>>>>> (should we adjust the stack size?). and not doing it at all
>>>>>> makes clone3 hard to use portably (user has to know target
>>>>>> specific pcs requirements).
>>>>>>
>>>>>> not sure what's the best way forward.
>>>>>
>>>>> I think the stack size won't matter much here, at least not from
>>>>> kernel point of view (the resulting stack size will most likely
>>>>> be page aligned anyway).  But I think this kernel commit makes a good
>>>>> point that silently adjusting the stack in userland is not the
>>>>> correct approach, I think H.J has done to make it consistent with
>>>>> glibc clone implementation which does it.
>>>>>
>>>>> IMHO the best approach would to just remove the stack alignment,
>>>>> since it incurs the signal handling issue.
>>>>
>>>> current generic clone callers dont align the stack and
>>>> e.g. unaligned pthread custom stack should work.
>>>>
>>>> so we have to do arch specific stack alignment somewhere,
>>>> maybe in pthread_create?
>>>
>>> I am thinking on __clone_internal, where if an unaligned stack is
>>> used it creates a new clone_args struct with adjust arguments.  It
>>> can adjust the struct in place (not sure which is better).
>>
>> if the api is not exposed, then i think the arg can be modified
>> in place. (if clone3 api is exposed to users then we should not
>> modify user structs unless the clone3 api contract explicitly
>> allows this.)
>>
>> either aligning in pthread_create or __clone_internal works for me,
>> the target specific clone3 syscall should not in case that gets
>> exposed to users.
>>
> 
> The arg modification would be done only internally by __clone_internal,
> if we ever export __clone3 it will not mess with stack alignment (my
> idea is to remove it from x86_64 as well).

All the internal usage of __clone_internal are done with all signal masked,
so aligning the stack is currently safe.  However, I still think moving out
the stack alignment of __clone3 is still a net gain: it remove an
implementation detail (block/unblock signals) and simplifies the arch-specific
code.

However it makes a possible libc wrap clunky, the caller will need to know
the ABI stack alignment prior to the call since kernel does not automatically
align the stack.
Adhemerval Zanella Netto Nov. 3, 2022, 9:28 p.m. UTC | #11
On 03/11/22 17:55, H.J. Lu wrote:
> On Thu, Nov 3, 2022 at 9:55 AM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 03/11/22 13:52, Szabolcs Nagy wrote:
>>> The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
>>>>
>>>>
>>>> On 03/11/22 13:31, Szabolcs Nagy wrote:
>>>>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
>>>>>>
>>>>>>
>>>>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
>>>>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>>>>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>>>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>>>>>>>> It follow the internal signature:
>>>>>>>>>>
>>>>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>>>>>>>  int (*__func) (void *__arg), void *__arg);
>>>>>>>>>>
>>>>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>>>>>>>
>>>>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>>>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>>>>>>>
>>>>>>>> Right, I think it is worth to document the function semantic
>>>>>>>> properly at least on its internal header (include/clone_internal.h).
>>>>>>>> H.J also added a new clone3.h headers, which is not currently installed
>>>>>>>> that I am inclined to just remove it from now.  We might reinstate
>>>>>>>> if/when we decide to provide the clone3 as an ABI.
>>>>>>>>
>>>>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>>>>>>>> interface, where EINVAL is also returned for 0 function argument.
>>>>>>>
>>>>>>> ok.
>>>>>>>
>>>>>>>>>
>>>>>>>>> and aligning sp in the child fails if signals are allowed there
>>>>>>>>> (pthreads does not allow signals now, direct callers might).
>>>>>>>>> i dont know if that's a concert (or if unaligned stack is
>>>>>>>>> something we should fix up in clone3).
>>>>>>>>
>>>>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>>>>>>>> think it better to just return EINVAL for unaligned stacks and avoid
>>>>>>>> to change the stack pointer in the created thread.
>>>>>>>
>>>>>>> long time ago linux did that on aarch64, but it was removed:
>>>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
>>>>>>>
>>>>>>> i think in clone3 the kernel should have aligned (it knows
>>>>>>> the bounds now), doing it in the userspace wrapper is weird
>>>>>>> (should we adjust the stack size?). and not doing it at all
>>>>>>> makes clone3 hard to use portably (user has to know target
>>>>>>> specific pcs requirements).
>>>>>>>
>>>>>>> not sure what's the best way forward.
>>>>>>
>>>>>> I think the stack size won't matter much here, at least not from
>>>>>> kernel point of view (the resulting stack size will most likely
>>>>>> be page aligned anyway).  But I think this kernel commit makes a good
>>>>>> point that silently adjusting the stack in userland is not the
>>>>>> correct approach, I think H.J has done to make it consistent with
>>>>>> glibc clone implementation which does it.
>>>>>>
>>>>>> IMHO the best approach would to just remove the stack alignment,
>>>>>> since it incurs the signal handling issue.
>>>>>
>>>>> current generic clone callers dont align the stack and
>>>>> e.g. unaligned pthread custom stack should work.
>>>>>
>>>>> so we have to do arch specific stack alignment somewhere,
>>>>> maybe in pthread_create?
>>>>
>>>> I am thinking on __clone_internal, where if an unaligned stack is
>>>> used it creates a new clone_args struct with adjust arguments.  It
>>>> can adjust the struct in place (not sure which is better).
>>>
>>> if the api is not exposed, then i think the arg can be modified
>>> in place. (if clone3 api is exposed to users then we should not
>>> modify user structs unless the clone3 api contract explicitly
>>> allows this.)
>>>
>>> either aligning in pthread_create or __clone_internal works for me,
>>> the target specific clone3 syscall should not in case that gets
>>> exposed to users.
>>>
>>
>> The arg modification would be done only internally by __clone_internal,
>> if we ever export __clone3 it will not mess with stack alignment (my
>> idea is to remove it from x86_64 as well).
> 
> Is there a bug for the signal handling issue?

No with current usage, on both pthread_create and posix_spawn glibc mask
all internal signals (including internal ones).
H.J. Lu Nov. 3, 2022, 9:58 p.m. UTC | #12
On Thu, Nov 3, 2022 at 2:22 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 03/11/22 13:55, Adhemerval Zanella Netto wrote:
> >
> >
> > On 03/11/22 13:52, Szabolcs Nagy wrote:
> >> The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
> >>>
> >>>
> >>> On 03/11/22 13:31, Szabolcs Nagy wrote:
> >>>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
> >>>>>
> >>>>>
> >>>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
> >>>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
> >>>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
> >>>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
> >>>>>>>>> It follow the internal signature:
> >>>>>>>>>
> >>>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
> >>>>>>>>>  int (*__func) (void *__arg), void *__arg);
> >>>>>>>>>
> >>>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
> >>>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
> >>>>>>>>
> >>>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
> >>>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
> >>>>>>>
> >>>>>>> Right, I think it is worth to document the function semantic
> >>>>>>> properly at least on its internal header (include/clone_internal.h).
> >>>>>>> H.J also added a new clone3.h headers, which is not currently installed
> >>>>>>> that I am inclined to just remove it from now.  We might reinstate
> >>>>>>> if/when we decide to provide the clone3 as an ABI.
> >>>>>>>
> >>>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
> >>>>>>> interface, where EINVAL is also returned for 0 function argument.
> >>>>>>
> >>>>>> ok.
> >>>>>>
> >>>>>>>>
> >>>>>>>> and aligning sp in the child fails if signals are allowed there
> >>>>>>>> (pthreads does not allow signals now, direct callers might).
> >>>>>>>> i dont know if that's a concert (or if unaligned stack is
> >>>>>>>> something we should fix up in clone3).
> >>>>>>>
> >>>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
> >>>>>>> think it better to just return EINVAL for unaligned stacks and avoid
> >>>>>>> to change the stack pointer in the created thread.
> >>>>>>
> >>>>>> long time ago linux did that on aarch64, but it was removed:
> >>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
> >>>>>>
> >>>>>> i think in clone3 the kernel should have aligned (it knows
> >>>>>> the bounds now), doing it in the userspace wrapper is weird
> >>>>>> (should we adjust the stack size?). and not doing it at all
> >>>>>> makes clone3 hard to use portably (user has to know target
> >>>>>> specific pcs requirements).
> >>>>>>
> >>>>>> not sure what's the best way forward.
> >>>>>
> >>>>> I think the stack size won't matter much here, at least not from
> >>>>> kernel point of view (the resulting stack size will most likely
> >>>>> be page aligned anyway).  But I think this kernel commit makes a good
> >>>>> point that silently adjusting the stack in userland is not the
> >>>>> correct approach, I think H.J has done to make it consistent with
> >>>>> glibc clone implementation which does it.
> >>>>>
> >>>>> IMHO the best approach would to just remove the stack alignment,
> >>>>> since it incurs the signal handling issue.
> >>>>
> >>>> current generic clone callers dont align the stack and
> >>>> e.g. unaligned pthread custom stack should work.
> >>>>
> >>>> so we have to do arch specific stack alignment somewhere,
> >>>> maybe in pthread_create?
> >>>
> >>> I am thinking on __clone_internal, where if an unaligned stack is
> >>> used it creates a new clone_args struct with adjust arguments.  It
> >>> can adjust the struct in place (not sure which is better).
> >>
> >> if the api is not exposed, then i think the arg can be modified
> >> in place. (if clone3 api is exposed to users then we should not
> >> modify user structs unless the clone3 api contract explicitly
> >> allows this.)
> >>
> >> either aligning in pthread_create or __clone_internal works for me,
> >> the target specific clone3 syscall should not in case that gets
> >> exposed to users.
> >>
> >
> > The arg modification would be done only internally by __clone_internal,
> > if we ever export __clone3 it will not mess with stack alignment (my
> > idea is to remove it from x86_64 as well).
>
> All the internal usage of __clone_internal are done with all signal masked,
> so aligning the stack is currently safe.  However, I still think moving out
> the stack alignment of __clone3 is still a net gain: it remove an
> implementation detail (block/unblock signals) and simplifies the arch-specific
> code.
>
> However it makes a possible libc wrap clunky, the caller will need to know
> the ABI stack alignment prior to the call since kernel does not automatically
> align the stack.

For the internal clone3, we can drop stack alignment adjustment.  The
internal users are responsible for correct stack alignment.  If there is
a public clone3 wrapper, it should adjust stack alignment.
Adhemerval Zanella Netto Nov. 4, 2022, 12:32 p.m. UTC | #13
On 03/11/22 18:58, H.J. Lu wrote:
> On Thu, Nov 3, 2022 at 2:22 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 03/11/22 13:55, Adhemerval Zanella Netto wrote:
>>>
>>>
>>> On 03/11/22 13:52, Szabolcs Nagy wrote:
>>>> The 11/03/2022 13:39, Adhemerval Zanella Netto wrote:
>>>>>
>>>>>
>>>>> On 03/11/22 13:31, Szabolcs Nagy wrote:
>>>>>> The 11/03/2022 13:22, Adhemerval Zanella Netto wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 03/11/22 11:01, Szabolcs Nagy wrote:
>>>>>>>> The 11/03/2022 10:15, Adhemerval Zanella Netto wrote:
>>>>>>>>> On 02/11/22 09:12, Szabolcs Nagy wrote:
>>>>>>>>>> The 09/30/2022 16:26, Adhemerval Zanella via Libc-alpha wrote:
>>>>>>>>>>> It follow the internal signature:
>>>>>>>>>>>
>>>>>>>>>>>   extern int clone3 (struct clone_args *__cl_args, size_t __size,
>>>>>>>>>>>  int (*__func) (void *__arg), void *__arg);
>>>>>>>>>>>
>>>>>>>>>>> And x86_64 semantics to return EINVAL if either cl_args or func
>>>>>>>>>>> is NULL.  The stack is 16-byte aligned prior executing func.
>>>>>>>>>>
>>>>>>>>>> "x86_64 semantics" sounds wrong: maybe this should be documented?
>>>>>>>>>> i'd expect 0 cl_args/func to be UB like in most posix apis.
>>>>>>>>>
>>>>>>>>> Right, I think it is worth to document the function semantic
>>>>>>>>> properly at least on its internal header (include/clone_internal.h).
>>>>>>>>> H.J also added a new clone3.h headers, which is not currently installed
>>>>>>>>> that I am inclined to just remove it from now.  We might reinstate
>>>>>>>>> if/when we decide to provide the clone3 as an ABI.
>>>>>>>>>
>>>>>>>>> And returning EINVAL for 0 cl_args/func aligns with our exported clone
>>>>>>>>> interface, where EINVAL is also returned for 0 function argument.
>>>>>>>>
>>>>>>>> ok.
>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> and aligning sp in the child fails if signals are allowed there
>>>>>>>>>> (pthreads does not allow signals now, direct callers might).
>>>>>>>>>> i dont know if that's a concert (or if unaligned stack is
>>>>>>>>>> something we should fix up in clone3).
>>>>>>>>>
>>>>>>>>> It was overlooked on initial x86_64 clone3 implementation as well.  I
>>>>>>>>> think it better to just return EINVAL for unaligned stacks and avoid
>>>>>>>>> to change the stack pointer in the created thread.
>>>>>>>>
>>>>>>>> long time ago linux did that on aarch64, but it was removed:
>>>>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e6d9a52543338603e25e71e0e4942f05dae0dd8a
>>>>>>>>
>>>>>>>> i think in clone3 the kernel should have aligned (it knows
>>>>>>>> the bounds now), doing it in the userspace wrapper is weird
>>>>>>>> (should we adjust the stack size?). and not doing it at all
>>>>>>>> makes clone3 hard to use portably (user has to know target
>>>>>>>> specific pcs requirements).
>>>>>>>>
>>>>>>>> not sure what's the best way forward.
>>>>>>>
>>>>>>> I think the stack size won't matter much here, at least not from
>>>>>>> kernel point of view (the resulting stack size will most likely
>>>>>>> be page aligned anyway).  But I think this kernel commit makes a good
>>>>>>> point that silently adjusting the stack in userland is not the
>>>>>>> correct approach, I think H.J has done to make it consistent with
>>>>>>> glibc clone implementation which does it.
>>>>>>>
>>>>>>> IMHO the best approach would to just remove the stack alignment,
>>>>>>> since it incurs the signal handling issue.
>>>>>>
>>>>>> current generic clone callers dont align the stack and
>>>>>> e.g. unaligned pthread custom stack should work.
>>>>>>
>>>>>> so we have to do arch specific stack alignment somewhere,
>>>>>> maybe in pthread_create?
>>>>>
>>>>> I am thinking on __clone_internal, where if an unaligned stack is
>>>>> used it creates a new clone_args struct with adjust arguments.  It
>>>>> can adjust the struct in place (not sure which is better).
>>>>
>>>> if the api is not exposed, then i think the arg can be modified
>>>> in place. (if clone3 api is exposed to users then we should not
>>>> modify user structs unless the clone3 api contract explicitly
>>>> allows this.)
>>>>
>>>> either aligning in pthread_create or __clone_internal works for me,
>>>> the target specific clone3 syscall should not in case that gets
>>>> exposed to users.
>>>>
>>>
>>> The arg modification would be done only internally by __clone_internal,
>>> if we ever export __clone3 it will not mess with stack alignment (my
>>> idea is to remove it from x86_64 as well).
>>
>> All the internal usage of __clone_internal are done with all signal masked,
>> so aligning the stack is currently safe.  However, I still think moving out
>> the stack alignment of __clone3 is still a net gain: it remove an
>> implementation detail (block/unblock signals) and simplifies the arch-specific
>> code.
>>
>> However it makes a possible libc wrap clunky, the caller will need to know
>> the ABI stack alignment prior to the call since kernel does not automatically
>> align the stack.
> 
> For the internal clone3, we can drop stack alignment adjustment.  The
> internal users are responsible for correct stack alignment.  If there is
> a public clone3 wrapper, it should adjust stack alignment.
>

Ok, I will a documentation that __clone3 does not align the stack.
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
new file mode 100644
index 0000000000..dba93430eb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
@@ -0,0 +1,90 @@ 
+/* The clone3 syscall wrapper.  Linux/aarch64 version.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#define _ERRNO_H        1
+#include <bits/errno.h>
+
+/* The userland implementation is:
+   int clone3 (struct clone_args *cl_args, size_t size,
+               int (*func)(void *arg), void *arg);
+
+   the kernel entry is:
+   int clone3 (struct clone_args *cl_args, size_t size);
+
+   The parameters are passed in registers from userland:
+   x0: cl_args
+   x1: size
+   x2: func
+   x3: arg  */
+
+        .text
+ENTRY(__clone3)
+	PTR_ARG (0)
+	PTR_ARG (1)
+	PTR_ARG (3)
+	PTR_ARG (4)
+	/* Save args for the child.  */
+	mov	x10, x0		/* cl_args  */
+	mov	x11, x2		/* func	 */
+	mov	x12, x3		/* args  */
+
+	/* Sanity check args.  */
+	mov	x0, #-EINVAL
+	cbz	x10, .Lsyscall_error	/* No NULL cl_args pointer.  */
+	cbz	x2, .Lsyscall_error	/* No NULL function pointer.  */
+
+	/* Do the system call, the kernel expects:
+	   x8: system call number
+	   x0: cl_args
+	   x1: size  */
+	mov	x0, x10
+	mov	x8, #SYS_ify(clone3)
+	svc	0x0
+
+	cmp	x0, #0
+	beq	thread_start
+	blt	.Lsyscall_error
+	RET
+PSEUDO_END (__clone3)
+
+	.align 4
+	.type thread_start, %function
+thread_start:
+	cfi_startproc
+	cfi_undefined (x30)
+	mov	x29, 0
+
+	/* Align sp.  */
+	mov	x0, sp
+	and	x0, x0, -16
+	mov	sp, x0
+
+	/* Pick the function arg and execute.  */
+	mov	x0, x12
+	blr	x11
+
+	/* We are done, pass the return value through x0.  */
+	mov	x8, #SYS_ify(exit)
+	svc	0x0
+	cfi_endproc
+	.size thread_start, .-thread_start
+
+libc_hidden_def (__clone3)
+weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index f1853e012f..42bb22f5e6 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -164,6 +164,8 @@ 
 # define HAVE_CLOCK_GETTIME64_VSYSCALL	"__kernel_clock_gettime"
 # define HAVE_GETTIMEOFDAY_VSYSCALL	"__kernel_gettimeofday"
 
+# define HAVE_CLONE3_WRAPPER		1
+
 # undef INTERNAL_SYSCALL_RAW
 # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
   ({ long _sys_result;						\