diff mbox series

[4/5] API: Add tst_clone

Message ID 20210211110317.31942-5-rpalethorpe@suse.com
State Changes Requested
Headers show
Series Add close_range01, SAFE_DUP2 and SAFE_CLONE | expand

Commit Message

Richard Palethorpe Feb. 11, 2021, 11:03 a.m. UTC
The raw clone system call and clone3 have relatively simple interfaces
if the stack pointer is set to NULL. The libc wrapper complicates
things hugely. So introduce an interface similar to clone3, but that
falls back to clone.

Not all features of clone3 are implemented in clone, but we could
either return TCONF or implement them in user land.

Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
---
 include/tst_clone.h | 28 +++++++++++++++++++++++++++
 include/tst_test.h  |  2 +-
 lib/tst_clone.c     | 46 +++++++++++++++++++++++++++++++++++++++++++++
 lib/tst_test.c      | 25 ++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 lib/tst_clone.c

Comments

Cyril Hrubis Feb. 11, 2021, 12:51 p.m. UTC | #1
Hi!
> +	int flags;
> +	pid_t pid = -1;
> +
> +	tst_flush();
> +
> +	errno = ENOSYS;
> +	if (__NR_clone3 != __LTP__NR_INVALID_SYSCALL)
> +		pid = syscall(__NR_clone3, &args, sizeof(args));
> +
> +	if (pid == -1 && errno != ENOSYS)
> +		return -1;

As far as I can tell when kernel is too old we would get EINVAL because
the syscall number is not allocated. ENOSYS happens mostly when syscall
number is allocated and kernel does not implement the functionality,
e.g. it's disabled in .config.

I wonder if it's even menaningful to handle ENOSYS here, I doubt that
clone3() can be disabled, or do I miss something?

> +	if (pid != -1)
> +		return pid;
> +
> +	flags = args.exit_signal | args.flags;
> +
> +#ifdef __s390x__
> +	pid = syscall(__NR_clone, NULL, flags);
> +#else
> +	pid = syscall(__NR_clone, flags, NULL);
> +#endif
> +
> +	if (pid == -1)
> +		return -2;
> +
> +	return pid;
> +}
> diff --git a/lib/tst_test.c b/lib/tst_test.c
> index 0714f0a0e..6bbee030b 100644
> --- a/lib/tst_test.c
> +++ b/lib/tst_test.c
> @@ -424,6 +424,31 @@ pid_t safe_fork(const char *filename, unsigned int lineno)
>  	return pid;
>  }
>  
> +pid_t safe_clone(const char *file, const int lineno,
> +		 const struct tst_clone_args *args)
> +{
> +	pid_t pid;
> +
> +	if (!tst_test->forks_child)
> +		tst_brk(TBROK, "test.forks_child must be set!");
> +
> +	pid = tst_clone(args);
> +
> +	switch (pid) {
> +	case -1:
> +		tst_brk_(file, lineno, TBROK | TERRNO, "clone3 failed");
> +		break;
> +	case -2:
> +		tst_brk_(file, lineno, TBROK | TERRNO, "clone failed");
> +		return -1;
> +	}
> +
> +	if (!pid)
> +		atexit(tst_free_all);
> +
> +	return pid;
> +}
> +
>  static struct option {
>  	char *optstr;
>  	char *help;
> -- 
> 2.30.0
> 
> 
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp
Richard Palethorpe Feb. 11, 2021, 2:24 p.m. UTC | #2
Hello,

Cyril Hrubis <chrubis@suse.cz> writes:

> Hi!
>> +	int flags;
>> +	pid_t pid = -1;
>> +
>> +	tst_flush();
>> +
>> +	errno = ENOSYS;
>> +	if (__NR_clone3 != __LTP__NR_INVALID_SYSCALL)
>> +		pid = syscall(__NR_clone3, &args, sizeof(args));
>> +
>> +	if (pid == -1 && errno != ENOSYS)
>> +		return -1;
>
> As far as I can tell when kernel is too old we would get EINVAL because
> the syscall number is not allocated. ENOSYS happens mostly when syscall
> number is allocated and kernel does not implement the functionality,
> e.g. it's disabled in .config.
>
> I wonder if it's even menaningful to handle ENOSYS here, I doubt that
> clone3() can be disabled, or do I miss something?

AFAICT it should return ENOSYS if the syscall number is greater than the
current maximum. This is certainly true for riscv and also apears to be
true for arm64 and x86. It is also written in a kernel book I have from
2010 :-p
Cyril Hrubis Feb. 11, 2021, 2:35 p.m. UTC | #3
Hi!
> >> +	int flags;
> >> +	pid_t pid = -1;
> >> +
> >> +	tst_flush();
> >> +
> >> +	errno = ENOSYS;
> >> +	if (__NR_clone3 != __LTP__NR_INVALID_SYSCALL)
> >> +		pid = syscall(__NR_clone3, &args, sizeof(args));
> >> +
> >> +	if (pid == -1 && errno != ENOSYS)
> >> +		return -1;
> >
> > As far as I can tell when kernel is too old we would get EINVAL because
> > the syscall number is not allocated. ENOSYS happens mostly when syscall
> > number is allocated and kernel does not implement the functionality,
> > e.g. it's disabled in .config.
> >
> > I wonder if it's even menaningful to handle ENOSYS here, I doubt that
> > clone3() can be disabled, or do I miss something?
> 
> AFAICT it should return ENOSYS if the syscall number is greater than the
> current maximum. This is certainly true for riscv and also apears to be
> true for arm64 and x86. It is also written in a kernel book I have from
> 2010 :-p

Sounds sane, so we get EINVAL if the syscall number is out of the
syscall table. So I guess that we have to handle both.
Richard Palethorpe Feb. 11, 2021, 3:07 p.m. UTC | #4
Hello,

Cyril Hrubis <chrubis@suse.cz> writes:

> Hi!
>> >> +	int flags;
>> >> +	pid_t pid = -1;
>> >> +
>> >> +	tst_flush();
>> >> +
>> >> +	errno = ENOSYS;
>> >> +	if (__NR_clone3 != __LTP__NR_INVALID_SYSCALL)
>> >> +		pid = syscall(__NR_clone3, &args, sizeof(args));
>> >> +
>> >> +	if (pid == -1 && errno != ENOSYS)
>> >> +		return -1;
>> >
>> > As far as I can tell when kernel is too old we would get EINVAL because
>> > the syscall number is not allocated. ENOSYS happens mostly when syscall
>> > number is allocated and kernel does not implement the functionality,
>> > e.g. it's disabled in .config.
>> >
>> > I wonder if it's even menaningful to handle ENOSYS here, I doubt that
>> > clone3() can be disabled, or do I miss something?
>> 
>> AFAICT it should return ENOSYS if the syscall number is greater than the
>> current maximum. This is certainly true for riscv and also apears to be
>> true for arm64 and x86. It is also written in a kernel book I have from
>> 2010 :-p
>
> Sounds sane, so we get EINVAL if the syscall number is out of the
> syscall table. So I guess that we have to handle both.

I don't know where you are getting EINVAL from?

Try the following

#include <errno.h>
#include <unistd.h>
#include <sys/syscall.h>

int main(int argc, const char* argv[])
{
	syscall(~0ULL);

	return errno;
}

It returns ENOSYS and strace shows this is not due to any sanity
checking by glibc.

I guess it would actually be a bug if it returned EINVAL although I am
not sure this is specified anywhere.
Cyril Hrubis Feb. 11, 2021, 3:30 p.m. UTC | #5
Hi!
> I don't know where you are getting EINVAL from?
> 
> Try the following
> 
> #include <errno.h>
> #include <unistd.h>
> #include <sys/syscall.h>
> 
> int main(int argc, const char* argv[])
> {
> 	syscall(~0ULL);
> 
> 	return errno;
> }
> 
> It returns ENOSYS and strace shows this is not due to any sanity
> checking by glibc.
> 
> I guess it would actually be a bug if it returned EINVAL although I am
> not sure this is specified anywhere.

And I guess that I got confused again. You get EINVAL when you pass
unsupported flags to a syscall, not unsupported syscall number.
diff mbox series

Patch

diff --git a/include/tst_clone.h b/include/tst_clone.h
index 88188525d..9ffdc68d1 100644
--- a/include/tst_clone.h
+++ b/include/tst_clone.h
@@ -5,6 +5,34 @@ 
 #ifndef TST_CLONE_H__
 #define TST_CLONE_H__
 
+#ifdef TST_TEST_H__
+
+/* The parts of clone3's clone_args we support */
+struct tst_clone_args {
+	uint64_t flags;
+	uint64_t exit_signal;
+};
+
+/* clone3 with fallbacks to clone when possible. Be aware that it
+ * returns -1 if clone3 fails (except ENOSYS), but -2 if clone fails.
+ *
+ * Without CLONE_VM this acts like fork so you may want to set
+ * tst_test.forks_child (safe_clone requires this).
+ *
+ * You should set exit_signal to SIGCHLD for
+ * tst_reap_children. Otherwise you must call wait with the
+ * appropriate parameters.
+ */
+pid_t tst_clone(const struct tst_clone_args *args);
+
+pid_t safe_clone(const char *file, const int lineno,
+		 const struct tst_clone_args *args);
+
+/* "Safe" version of tst_clone */
+#define SAFE_CLONE(args) safe_clone(__FILE__, __LINE__, args)
+
+#endif	/* TST_TEST_H__ */
+
 /* Functions from lib/cloner.c */
 int ltp_clone(unsigned long flags, int (*fn)(void *arg), void *arg,
 		size_t stack_size, void *stack);
diff --git a/include/tst_test.h b/include/tst_test.h
index c87251870..7dab5f761 100644
--- a/include/tst_test.h
+++ b/include/tst_test.h
@@ -29,7 +29,6 @@ 
 #include "tst_process_state.h"
 #include "tst_atomic.h"
 #include "tst_kvercmp.h"
-#include "tst_clone.h"
 #include "tst_kernel.h"
 #include "tst_minmax.h"
 #include "tst_get_bad_addr.h"
@@ -94,6 +93,7 @@  pid_t safe_fork(const char *filename, unsigned int lineno);
 #include "tst_safe_macros.h"
 #include "tst_safe_file_ops.h"
 #include "tst_safe_net.h"
+#include "tst_clone.h"
 
 /*
  * Wait for all children and exit with TBROK if
diff --git a/lib/tst_clone.c b/lib/tst_clone.c
new file mode 100644
index 000000000..07e7f0767
--- /dev/null
+++ b/lib/tst_clone.c
@@ -0,0 +1,46 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2021 SUSE LLC
+ * Richard Palethorpe <rpalethorpe@suse.com>
+ */
+
+#define TST_NO_DEFAULT_MAIN
+
+#include <stddef.h>
+
+#include "tst_test.h"
+#include "lapi/clone.h"
+
+pid_t tst_clone(const struct tst_clone_args *tst_args)
+{
+	struct clone_args args = {
+		.flags = tst_args->flags,
+		.exit_signal = tst_args->exit_signal,
+	};
+	int flags;
+	pid_t pid = -1;
+
+	tst_flush();
+
+	errno = ENOSYS;
+	if (__NR_clone3 != __LTP__NR_INVALID_SYSCALL)
+		pid = syscall(__NR_clone3, &args, sizeof(args));
+
+	if (pid == -1 && errno != ENOSYS)
+		return -1;
+
+	if (pid != -1)
+		return pid;
+
+	flags = args.exit_signal | args.flags;
+
+#ifdef __s390x__
+	pid = syscall(__NR_clone, NULL, flags);
+#else
+	pid = syscall(__NR_clone, flags, NULL);
+#endif
+
+	if (pid == -1)
+		return -2;
+
+	return pid;
+}
diff --git a/lib/tst_test.c b/lib/tst_test.c
index 0714f0a0e..6bbee030b 100644
--- a/lib/tst_test.c
+++ b/lib/tst_test.c
@@ -424,6 +424,31 @@  pid_t safe_fork(const char *filename, unsigned int lineno)
 	return pid;
 }
 
+pid_t safe_clone(const char *file, const int lineno,
+		 const struct tst_clone_args *args)
+{
+	pid_t pid;
+
+	if (!tst_test->forks_child)
+		tst_brk(TBROK, "test.forks_child must be set!");
+
+	pid = tst_clone(args);
+
+	switch (pid) {
+	case -1:
+		tst_brk_(file, lineno, TBROK | TERRNO, "clone3 failed");
+		break;
+	case -2:
+		tst_brk_(file, lineno, TBROK | TERRNO, "clone failed");
+		return -1;
+	}
+
+	if (!pid)
+		atexit(tst_free_all);
+
+	return pid;
+}
+
 static struct option {
 	char *optstr;
 	char *help;