diff mbox series

[2/2] syscalls/msgstress01: Fix timeouts

Message ID 20240523133132.13978-3-chrubis@suse.cz
State Accepted
Headers show
Series Fix msgstress01 timeouts | expand

Commit Message

Cyril Hrubis May 23, 2024, 1:31 p.m. UTC
Make the test exit if runtime has been exhausted before we finished the
requested amount of iterations.

For that to happen we let the main test process to loop while checking
the runtime and set the stop flag if runtime was exhausted. We also need
to separte the stop and fail flag and add counter for finished children.

Also if we exhaust our runtime during the initial fork phase we print a
warning, since we hardly did a meaningful testing in that case.

The changes can be tested with -I parameter, e.g. -I 5 should trigger
the TWARN message and you should be able to get the test to stop in the
message sending phase with larger -I value.

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 .../syscalls/ipc/msgstress/msgstress01.c      | 53 ++++++++++++++++---
 1 file changed, 47 insertions(+), 6 deletions(-)

Comments

Martin Doucha May 23, 2024, 2:37 p.m. UTC | #1
Hi,
for both patches:
Reviewed-by: Martin Doucha <mdoucha@suse.cz>

On 23. 05. 24 15:31, Cyril Hrubis wrote:
> Make the test exit if runtime has been exhausted before we finished the
> requested amount of iterations.
> 
> For that to happen we let the main test process to loop while checking
> the runtime and set the stop flag if runtime was exhausted. We also need
> to separte the stop and fail flag and add counter for finished children.
> 
> Also if we exhaust our runtime during the initial fork phase we print a
> warning, since we hardly did a meaningful testing in that case.
> 
> The changes can be tested with -I parameter, e.g. -I 5 should trigger
> the TWARN message and you should be able to get the test to stop in the
> message sending phase with larger -I value.
> 
> Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
> ---
>   .../syscalls/ipc/msgstress/msgstress01.c      | 53 ++++++++++++++++---
>   1 file changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
> index 62ffcf63b..fb1d4263d 100644
> --- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
> +++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
> @@ -50,6 +50,9 @@ static char *str_num_iterations;
>   static int num_messages = 1000;
>   static int num_iterations = MAXNREPS;
>   static volatile int *stop;
> +static volatile int *fail;
> +static int *finished;
> +static int *flags;
>   
>   static int get_used_sysvipc(void)
>   {
> @@ -77,6 +80,10 @@ static void reset_messages(void)
>   
>   	for (int i = 0; i < num_messages; i++)
>   		ipc_data[i].id = -1;
> +
> +	*stop = 0;
> +	*fail = 0;
> +	*finished = 0;
>   }
>   
>   static int create_message(const int id)
> @@ -112,6 +119,8 @@ static void writer(const int id, const int pos)
>   			tst_brk(TBROK | TERRNO, "msgsnd() failed");
>   		}
>   	}
> +
> +	tst_atomic_inc(finished);
>   }
>   
>   static void reader(const int id, const int pos)
> @@ -138,6 +147,7 @@ static void reader(const int id, const int pos)
>   			tst_res(TFAIL, "Received the wrong message type");
>   
>   			*stop = 1;
> +			*fail = 1;
>   			return;
>   		}
>   
> @@ -145,6 +155,7 @@ static void reader(const int id, const int pos)
>   			tst_res(TFAIL, "Received the wrong message data length");
>   
>   			*stop = 1;
> +			*fail = 1;
>   			return;
>   		}
>   
> @@ -155,6 +166,7 @@ static void reader(const int id, const int pos)
>   					buff->msg.data.pbytes[i]);
>   
>   				*stop = 1;
> +				*fail = 1;
>   				return;
>   			}
>   		}
> @@ -163,6 +175,8 @@ static void reader(const int id, const int pos)
>   		tst_res(TDEBUG, "msg_recv.type = %ld", msg_recv.type);
>   		tst_res(TDEBUG, "msg_recv.data.len = %d", msg_recv.data.len);
>   	}
> +
> +	tst_atomic_inc(finished);
>   }
>   
>   static void remove_queues(void)
> @@ -196,12 +210,37 @@ static void run(void)
>   
>   		if (*stop)
>   			break;
> +
> +		if (!tst_remaining_runtime()) {
> +			tst_res(TWARN, "Out of runtime during forking...");
> +			*stop = 1;
> +			break;
> +		}
> +	}
> +
> +	if (!(*stop))
> +		tst_res(TINFO, "All processes running.");
> +
> +	for (;;) {
> +		if (tst_atomic_load(finished) == 2 * num_messages)
> +			break;
> +
> +		if (*stop)
> +			break;
> +
> +		if (!tst_remaining_runtime()) {
> +			tst_res(TINFO, "Out of runtime, stopping processes...");
> +			*stop = 1;
> +			break;
> +		}
> +
> +		sleep(1);
>   	}
>   
>   	tst_reap_children();
>   	remove_queues();
>   
> -	if (!(*stop))
> +	if (!(*fail))
>   		tst_res(TPASS, "Test passed. All messages have been received");
>   }
>   
> @@ -242,14 +281,16 @@ static void setup(void)
>   		MAP_SHARED | MAP_ANONYMOUS,
>   		-1, 0);
>   
> -	stop = SAFE_MMAP(
> +	flags = SAFE_MMAP(
>   		NULL,
> -		sizeof(int),
> +		sizeof(int) * 3,
>   		PROT_READ | PROT_WRITE,
>   		MAP_SHARED | MAP_ANONYMOUS,
>   		-1, 0);
>   
> -	reset_messages();
> +	stop = &flags[0];
> +	fail = &flags[1];
> +	finished = &flags[2];
>   }
>   
>   static void cleanup(void)
> @@ -260,7 +301,7 @@ static void cleanup(void)
>   	remove_queues();
>   
>   	SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages);
> -	SAFE_MUNMAP((void *)stop, sizeof(int));
> +	SAFE_MUNMAP(flags, sizeof(int) * 3);
>   }
>   
>   static struct tst_test test = {
> @@ -271,7 +312,7 @@ static struct tst_test test = {
>   	.max_runtime = 180,
>   	.options = (struct tst_option[]) {
>   		{"n:", &str_num_messages, "Number of messages to send (default: 1000)"},
> -		{"l:", &str_num_iterations, "Number iterations per message (default: 10000)"},
> +		{"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"},
>   		{},
>   	},
>   };
Petr Vorel May 23, 2024, 3:20 p.m. UTC | #2
Hi Cyril,

> Make the test exit if runtime has been exhausted before we finished the
> requested amount of iterations.

> For that to happen we let the main test process to loop while checking
> the runtime and set the stop flag if runtime was exhausted. We also need
> to separte the stop and fail flag and add counter for finished children.
nit: s/separte/separate/

...
>  static void remove_queues(void)
> @@ -196,12 +210,37 @@ static void run(void)

>  		if (*stop)
>  			break;
> +
> +		if (!tst_remaining_runtime()) {
> +			tst_res(TWARN, "Out of runtime during forking...");

I tested the patchset on various VMs (various kernels), with both 1 or 2 CPU.
Indeed it fixes the problem. IMHO it can quite easily get KVM host overloaded
enough to get the TWARN out of runtime during forking, but we can't do anything
about it.

msgstress01.c:215: TWARN: Out of runtime during forking...
msgstress01.c:244: TPASS: Test passed. All messages have been received

> +			*stop = 1;
> +			break;
> +		}
> +	}
> +
> +	if (!(*stop))
> +		tst_res(TINFO, "All processes running.");
very nit: I'd remove dot at the end.

> +
> +	for (;;) {
> +		if (tst_atomic_load(finished) == 2 * num_messages)
> +			break;
> +
> +		if (*stop)
> +			break;
> +
> +		if (!tst_remaining_runtime()) {
> +			tst_res(TINFO, "Out of runtime, stopping processes...");
> +			*stop = 1;
> +			break;
> +		}
> +
> +		sleep(1);
>  	}

>  	tst_reap_children();
>  	remove_queues();

> -	if (!(*stop))
> +	if (!(*fail))
>  		tst_res(TPASS, "Test passed. All messages have been received");
>  }

> @@ -242,14 +281,16 @@ static void setup(void)
>  		MAP_SHARED | MAP_ANONYMOUS,
>  		-1, 0);

> -	stop = SAFE_MMAP(
> +	flags = SAFE_MMAP(
>  		NULL,
> -		sizeof(int),
> +		sizeof(int) * 3,
>  		PROT_READ | PROT_WRITE,
>  		MAP_SHARED | MAP_ANONYMOUS,
>  		-1, 0);

> -	reset_messages();
> +	stop = &flags[0];
> +	fail = &flags[1];
> +	finished = &flags[2];
>  }

>  static void cleanup(void)
> @@ -260,7 +301,7 @@ static void cleanup(void)
>  	remove_queues();

>  	SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages);
> -	SAFE_MUNMAP((void *)stop, sizeof(int));
> +	SAFE_MUNMAP(flags, sizeof(int) * 3);
>  }

>  static struct tst_test test = {
> @@ -271,7 +312,7 @@ static struct tst_test test = {
>  	.max_runtime = 180,
>  	.options = (struct tst_option[]) {
>  		{"n:", &str_num_messages, "Number of messages to send (default: 1000)"},
> -		{"l:", &str_num_iterations, "Number iterations per message (default: 10000)"},
> +		{"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"},
>  		{},
very nit: too long line

Reviewed-by: Petr Vorel <pvorel@suse.cz>

Kind regards,
Petr
Cyril Hrubis May 23, 2024, 3:33 p.m. UTC | #3
Hi!
> > Make the test exit if runtime has been exhausted before we finished the
> > requested amount of iterations.
> 
> > For that to happen we let the main test process to loop while checking
> > the runtime and set the stop flag if runtime was exhausted. We also need
> > to separte the stop and fail flag and add counter for finished children.
> nit: s/separte/separate/
> 
> ...
> >  static void remove_queues(void)
> > @@ -196,12 +210,37 @@ static void run(void)
> 
> >  		if (*stop)
> >  			break;
> > +
> > +		if (!tst_remaining_runtime()) {
> > +			tst_res(TWARN, "Out of runtime during forking...");
> 
> I tested the patchset on various VMs (various kernels), with both 1 or 2 CPU.
> Indeed it fixes the problem. IMHO it can quite easily get KVM host overloaded
> enough to get the TWARN out of runtime during forking, but we can't do anything
> about it.

And I think that it's fair to signal to the user that the machine is too
overloaded to run a meaningful test in this case.

> msgstress01.c:215: TWARN: Out of runtime during forking...
> msgstress01.c:244: TPASS: Test passed. All messages have been received
> 
> > +			*stop = 1;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!(*stop))
> > +		tst_res(TINFO, "All processes running.");
> very nit: I'd remove dot at the end.
> 
> > +
> > +	for (;;) {
> > +		if (tst_atomic_load(finished) == 2 * num_messages)
> > +			break;
> > +
> > +		if (*stop)
> > +			break;
> > +
> > +		if (!tst_remaining_runtime()) {
> > +			tst_res(TINFO, "Out of runtime, stopping processes...");
> > +			*stop = 1;
> > +			break;
> > +		}
> > +
> > +		sleep(1);
> >  	}
> 
> >  	tst_reap_children();
> >  	remove_queues();
> 
> > -	if (!(*stop))
> > +	if (!(*fail))
> >  		tst_res(TPASS, "Test passed. All messages have been received");
> >  }
> 
> > @@ -242,14 +281,16 @@ static void setup(void)
> >  		MAP_SHARED | MAP_ANONYMOUS,
> >  		-1, 0);
> 
> > -	stop = SAFE_MMAP(
> > +	flags = SAFE_MMAP(
> >  		NULL,
> > -		sizeof(int),
> > +		sizeof(int) * 3,
> >  		PROT_READ | PROT_WRITE,
> >  		MAP_SHARED | MAP_ANONYMOUS,
> >  		-1, 0);
> 
> > -	reset_messages();
> > +	stop = &flags[0];
> > +	fail = &flags[1];
> > +	finished = &flags[2];
> >  }
> 
> >  static void cleanup(void)
> > @@ -260,7 +301,7 @@ static void cleanup(void)
> >  	remove_queues();
> 
> >  	SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages);
> > -	SAFE_MUNMAP((void *)stop, sizeof(int));
> > +	SAFE_MUNMAP(flags, sizeof(int) * 3);
> >  }
> 
> >  static struct tst_test test = {
> > @@ -271,7 +312,7 @@ static struct tst_test test = {
> >  	.max_runtime = 180,
> >  	.options = (struct tst_option[]) {
> >  		{"n:", &str_num_messages, "Number of messages to send (default: 1000)"},
> > -		{"l:", &str_num_iterations, "Number iterations per message (default: 10000)"},
> > +		{"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"},
> >  		{},
> very nit: too long line

Feel free to fix the minor nits and push the patch.
Petr Vorel May 23, 2024, 3:41 p.m. UTC | #4
Hi,

BTW I have found problem on Alpine:

tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/user.slice/user-1000.slice/pids.max'
tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/pids/user.slice/user-1000.slice/pids.max'
msgstress01.c:269: TINFO: Available messages slots: 5530
msgstress01.c:164: TFAIL: Received wrong data at index 99: ffffffa3 != 19

It looks to me it's not related to using busybox init system (e.g. IMHO not
related to the Cannot read session user limits, but I can be wrong).
Anyway, this is for sure not a blocker for the release (and probably wait for
LTP musl users).

Kind regards,
Petr
Petr Vorel May 23, 2024, 3:43 p.m. UTC | #5
> Hi,

> BTW I have found problem on Alpine:

OK, it's not Alpine related, but 32 bit related:

$ ./msgstress01
tst_test.c:1733: TINFO: LTP version: 20240129-290-g37fd02a1c
tst_test.c:1619: TINFO: Timeout per run is 0h 03m 30s
tst_pid.c:94: TINFO: Found limit of processes 40844 (from /sys/fs/cgroup/user.slice/user-1000.slice/pids.max)
msgstress01.c:269: TINFO: Available messages slots: 19045
msgstress01.c:166: TFAIL: Received wrong data at index 99: 6c != 1

Maybe worth of fixing. Regression from 90f80322a.

Kind regards,
Petr
Petr Vorel May 23, 2024, 3:55 p.m. UTC | #6
Hi Cyril,

patchset merged, thanks!

Your quick fix for this test on 32bit will be the last commit before release
tomorrow.

Kind regards,
Petr
Cyril Hrubis May 23, 2024, 3:55 p.m. UTC | #7
Hi!
> BTW I have found problem on Alpine:
> 
> tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/user.slice/user-1000.slice/pids.max'
> tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/pids/user.slice/user-1000.slice/pids.max'
> msgstress01.c:269: TINFO: Available messages slots: 5530
> msgstress01.c:164: TFAIL: Received wrong data at index 99: ffffffa3 != 19
> 
> It looks to me it's not related to using busybox init system (e.g. IMHO not
> related to the Cannot read session user limits, but I can be wrong).
> Anyway, this is for sure not a blocker for the release (and probably wait for
> LTP musl users).

So looks like we are using wrong message size for the message buffer.

We are using size returned from the msgrcv() that returns the size of
the payload but the payload looks like:

struct {
	char len;
	char pbytes[99];
} data;

This means that the size returned from msgrcv() is one more byte longer
than the pbytes array.

Quick fix would be:

diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
index 5c84957b3..b0d945a11 100644
--- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
+++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
@@ -131,7 +131,7 @@ static void reader(const int id, const int pos)
                        return;
                }

-               for (int i = 0; i < size; i++) {
+               for (int i = 0; i < msg_recv.data.len; i++) {
                        if (msg_recv.data.pbytes[i] != buff->msg.data.pbytes[i]) {
                                tst_res(TFAIL, "Received wrong data at index %d: %x != %x", i,
                                        msg_recv.data.pbytes[i],


Better fix would remove the len from the sysv_msg structure and use the
size only, but I would go for the easy fix before the release and
do cleanup later on. I will send a proper patch.
diff mbox series

Patch

diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
index 62ffcf63b..fb1d4263d 100644
--- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
+++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c
@@ -50,6 +50,9 @@  static char *str_num_iterations;
 static int num_messages = 1000;
 static int num_iterations = MAXNREPS;
 static volatile int *stop;
+static volatile int *fail;
+static int *finished;
+static int *flags;
 
 static int get_used_sysvipc(void)
 {
@@ -77,6 +80,10 @@  static void reset_messages(void)
 
 	for (int i = 0; i < num_messages; i++)
 		ipc_data[i].id = -1;
+
+	*stop = 0;
+	*fail = 0;
+	*finished = 0;
 }
 
 static int create_message(const int id)
@@ -112,6 +119,8 @@  static void writer(const int id, const int pos)
 			tst_brk(TBROK | TERRNO, "msgsnd() failed");
 		}
 	}
+
+	tst_atomic_inc(finished);
 }
 
 static void reader(const int id, const int pos)
@@ -138,6 +147,7 @@  static void reader(const int id, const int pos)
 			tst_res(TFAIL, "Received the wrong message type");
 
 			*stop = 1;
+			*fail = 1;
 			return;
 		}
 
@@ -145,6 +155,7 @@  static void reader(const int id, const int pos)
 			tst_res(TFAIL, "Received the wrong message data length");
 
 			*stop = 1;
+			*fail = 1;
 			return;
 		}
 
@@ -155,6 +166,7 @@  static void reader(const int id, const int pos)
 					buff->msg.data.pbytes[i]);
 
 				*stop = 1;
+				*fail = 1;
 				return;
 			}
 		}
@@ -163,6 +175,8 @@  static void reader(const int id, const int pos)
 		tst_res(TDEBUG, "msg_recv.type = %ld", msg_recv.type);
 		tst_res(TDEBUG, "msg_recv.data.len = %d", msg_recv.data.len);
 	}
+
+	tst_atomic_inc(finished);
 }
 
 static void remove_queues(void)
@@ -196,12 +210,37 @@  static void run(void)
 
 		if (*stop)
 			break;
+
+		if (!tst_remaining_runtime()) {
+			tst_res(TWARN, "Out of runtime during forking...");
+			*stop = 1;
+			break;
+		}
+	}
+
+	if (!(*stop))
+		tst_res(TINFO, "All processes running.");
+
+	for (;;) {
+		if (tst_atomic_load(finished) == 2 * num_messages)
+			break;
+
+		if (*stop)
+			break;
+
+		if (!tst_remaining_runtime()) {
+			tst_res(TINFO, "Out of runtime, stopping processes...");
+			*stop = 1;
+			break;
+		}
+
+		sleep(1);
 	}
 
 	tst_reap_children();
 	remove_queues();
 
-	if (!(*stop))
+	if (!(*fail))
 		tst_res(TPASS, "Test passed. All messages have been received");
 }
 
@@ -242,14 +281,16 @@  static void setup(void)
 		MAP_SHARED | MAP_ANONYMOUS,
 		-1, 0);
 
-	stop = SAFE_MMAP(
+	flags = SAFE_MMAP(
 		NULL,
-		sizeof(int),
+		sizeof(int) * 3,
 		PROT_READ | PROT_WRITE,
 		MAP_SHARED | MAP_ANONYMOUS,
 		-1, 0);
 
-	reset_messages();
+	stop = &flags[0];
+	fail = &flags[1];
+	finished = &flags[2];
 }
 
 static void cleanup(void)
@@ -260,7 +301,7 @@  static void cleanup(void)
 	remove_queues();
 
 	SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages);
-	SAFE_MUNMAP((void *)stop, sizeof(int));
+	SAFE_MUNMAP(flags, sizeof(int) * 3);
 }
 
 static struct tst_test test = {
@@ -271,7 +312,7 @@  static struct tst_test test = {
 	.max_runtime = 180,
 	.options = (struct tst_option[]) {
 		{"n:", &str_num_messages, "Number of messages to send (default: 1000)"},
-		{"l:", &str_num_iterations, "Number iterations per message (default: 10000)"},
+		{"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"},
 		{},
 	},
 };