new file mode 100644
@@ -0,0 +1,16 @@
+#ifndef _CLONE3_H
+#include_next <clone3.h>
+
+extern __typeof (clone3) __clone3;
+
+/* The internal wrapper of clone/clone2 and clone3. If __clone3 returns
+ -1 with ENOSYS, fall back to clone or clone2. */
+extern int __clone_internal (struct clone_args *__cl_args,
+ int (*__func) (void *__arg), void *__arg);
+
+#ifndef _ISOMAC
+libc_hidden_proto (__clone3)
+libc_hidden_proto (__clone_internal)
+#endif
+
+#endif
@@ -64,7 +64,7 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \
time64-support pselect32 \
xstat fxstat lxstat xstat64 fxstat64 lxstat64 \
fxstatat fxstatat64 \
- xmknod xmknodat
+ xmknod xmknodat clone3 clone-internal
CFLAGS-gethostid.c = -fexceptions
CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
new file mode 100644
@@ -0,0 +1,91 @@
+/* The internal wrapper of clone and clone3.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sched.h>
+#include <clone_internal.h>
+#include <libc-pointer-arith.h> /* For cast_to_pointer. */
+#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */
+
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+
+#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER))
+
+_Static_assert (__alignof (struct clone_args) == 8,
+ "__alignof (struct clone_args) != 8");
+_Static_assert (offsetofend (struct clone_args, tls) == CLONE_ARGS_SIZE_VER0,
+ "offsetofend (struct clone_args, tls) != CLONE_ARGS_SIZE_VER0");
+_Static_assert (offsetofend (struct clone_args, set_tid_size) == CLONE_ARGS_SIZE_VER1,
+ "offsetofend (struct clone_args, set_tid_size) != CLONE_ARGS_SIZE_VER1");
+_Static_assert (offsetofend (struct clone_args, cgroup) == CLONE_ARGS_SIZE_VER2,
+ "offsetofend (struct clone_args, cgroup) != CLONE_ARGS_SIZE_VER2");
+_Static_assert (sizeof (struct clone_args) == CLONE_ARGS_SIZE_VER2,
+ "sizeof (struct clone_args) != CLONE_ARGS_SIZE_VER2");
+
+int
+__clone_internal (struct clone_args *cl_args,
+ int (*func) (void *arg), void *arg)
+{
+ int ret;
+#ifdef HAVE_CLONE3_WAPPER
+ /* Try clone3 first. */
+ int saved_errno = errno;
+ ret = __clone3 (cl_args, sizeof (*cl_args), func, arg);
+ if (ret != -1 || errno != ENOSYS)
+ return ret;
+
+ /* NB: Restore errno since errno may be checked against non-zero
+ return value. */
+ __set_errno (saved_errno);
+#endif
+
+ /* Map clone3 arguments to clone arguments. NB: No need to check
+ invalid clone3 specific bits in flags nor exit_signal since this
+ is an internal function. */
+ int flags = cl_args->flags | cl_args->exit_signal;
+ void *stack = cast_to_pointer (cl_args->stack);
+
+#ifdef __ia64__
+ ret = __clone2 (func, stack, cl_args->stack_size,
+ flags, arg,
+ cast_to_pointer (cl_args->parent_tid),
+ cast_to_pointer (cl_args->tls),
+ cast_to_pointer (cl_args->child_tid));
+#else
+# if !_STACK_GROWS_DOWN && !_STACK_GROWS_UP
+# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
+# endif
+
+# if _STACK_GROWS_DOWN
+ stack += cl_args->stack_size;
+# endif
+ ret = __clone (func, stack, flags, arg,
+ cast_to_pointer (cl_args->parent_tid),
+ cast_to_pointer (cl_args->tls),
+ cast_to_pointer (cl_args->child_tid));
+#endif
+ return ret;
+}
+
+libc_hidden_def (__clone_internal)
new file mode 100644
@@ -0,0 +1 @@
+/* An empty placeholder. */
new file mode 100644
@@ -0,0 +1,60 @@
+/* The wrapper of clone3.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _CLONE3_H
+#define _CLONE3_H 1
+
+#include <features.h>
+#include <stdint.h>
+#include <stddef.h>
+
+__BEGIN_DECLS
+
+/* This struct should only be used in an argument to the clone3 system
+ call (along with its size argument). It may be extended with new
+ fields in the future. */
+
+struct clone_args
+{
+ uint64_t flags; /* Flags bit mask. */
+ uint64_t pidfd; /* Where to store PID file descriptor
+ (pid_t *). */
+ uint64_t child_tid; /* Where to store child TID, in child's memory
+ (pid_t *). */
+ uint64_t parent_tid; /* Where to store child TID, in parent's memory
+ (int *). */
+ uint64_t exit_signal; /* Signal to deliver to parent on child
+ termination */
+ uint64_t stack; /* The lowest address of stack. */
+ uint64_t stack_size; /* Size of stack. */
+ uint64_t tls; /* Location of new TLS. */
+ uint64_t set_tid; /* Pointer to a pid_t array
+ (since Linux 5.5). */
+ uint64_t set_tid_size; /* Number of elements in set_tid
+ (since Linux 5.5). */
+ uint64_t cgroup; /* File descriptor for target cgroup
+ of child (since Linux 5.7). */
+} __attribute__ ((aligned (8)));
+
+/* The wrapper of clone3. */
+extern int clone3 (struct clone_args *__cl_args, size_t __size,
+ int (*__func) (void *__arg), void *__arg);
+
+__END_DECLS
+
+#endif /* clone3.h */
@@ -25,15 +25,10 @@
#include <ldsodefs.h>
#include <tls.h>
#include <stdint.h>
+#include <clone_internal.h>
#include <arch-fork.h>
-#ifdef __NR_clone2
-# define ARCH_CLONE __clone2
-#else
-# define ARCH_CLONE __clone
-#endif
-
/* See the comments in pthread_create.c for the requirements for these
two macros and the create_thread function. */
@@ -47,7 +42,8 @@ static int start_thread (void *arg) __attribute__ ((noreturn));
static int
create_thread (struct pthread *pd, const struct pthread_attr *attr,
- bool *stopped_start, STACK_VARIABLES_PARMS, bool *thread_ran)
+ bool *stopped_start, void *stackaddr, size_t stacksize,
+ bool *thread_ran)
{
/* Determine whether the newly created threads has to be started
stopped since we have to set the scheduling parameters or set the
@@ -100,9 +96,18 @@ create_thread (struct pthread *pd, const struct pthread_attr *attr,
TLS_DEFINE_INIT_TP (tp, pd);
- if (__glibc_unlikely (ARCH_CLONE (&start_thread, STACK_VARIABLES_ARGS,
- clone_flags, pd, &pd->tid, tp, &pd->tid)
- == -1))
+ struct clone_args args =
+ {
+ .flags = clone_flags,
+ .pidfd = (uintptr_t) &pd->tid,
+ .parent_tid = (uintptr_t) &pd->tid,
+ .child_tid = (uintptr_t) &pd->tid,
+ .stack = (uintptr_t) stackaddr,
+ .stack_size = stacksize,
+ .tls = (uintptr_t) tp,
+ };
+ int ret = __clone_internal (&args, &start_thread, pd);
+ if (__glibc_unlikely (ret == -1))
return errno;
/* It's started now, so if we fail below, we'll have to cancel it
@@ -31,6 +31,7 @@
#include <dl-sysdep.h>
#include <libc-pointer-arith.h>
#include <ldsodefs.h>
+#include <clone_internal.h>
#include "spawn_int.h"
/* The Linux implementation of posix_spawn{p} uses the clone syscall directly
@@ -59,21 +60,6 @@
normal program exit with the exit code 127. */
#define SPAWN_ERROR 127
-#ifdef __ia64__
-# define CLONE(__fn, __stackbase, __stacksize, __flags, __args) \
- __clone2 (__fn, __stackbase, __stacksize, __flags, __args, 0, 0, 0)
-#else
-# define CLONE(__fn, __stack, __stacksize, __flags, __args) \
- __clone (__fn, __stack, __flags, __args)
-#endif
-
-/* Since ia64 wants the stackbase w/clone2, re-use the grows-up macro. */
-#if _STACK_GROWS_UP || defined (__ia64__)
-# define STACK(__stack, __stack_size) (__stack)
-#elif _STACK_GROWS_DOWN
-# define STACK(__stack, __stack_size) (__stack + __stack_size)
-#endif
-
struct posix_spawn_args
{
@@ -378,8 +364,14 @@ __spawnix (pid_t * pid, const char *file,
need for CLONE_SETTLS. Although parent and child share the same TLS
namespace, there will be no concurrent access for TLS variables (errno
for instance). */
- new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size,
- CLONE_VM | CLONE_VFORK | SIGCHLD, &args);
+ struct clone_args clone_args =
+ {
+ .flags = CLONE_VM | CLONE_VFORK,
+ .exit_signal = SIGCHLD,
+ .stack = (uintptr_t) stack,
+ .stack_size = stack_size,
+ };
+ new_pid = __clone_internal (&clone_args, __spawni_child, &args);
/* It needs to collect the case where the auxiliary process was created
but failed to execute the file (due either any preparation step or