diff mbox series

[PATCH/RFC] tst_process_state_wait: wait for schedstats to settle when state == S

Message ID 48e9d0f8ed25dd69dc97fe31c4446a30cd990b06.1572954996.git.jstancek@redhat.com
State Rejected
Headers show
Series [PATCH/RFC] tst_process_state_wait: wait for schedstats to settle when state == S | expand

Commit Message

Jan Stancek Nov. 5, 2019, 11:57 a.m. UTC
There can be a gap between task state update and process being scheduled
off from runqueue. For example futex_wait_queue_me() updates task state
before it queues the futex:

  static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
				  struct hrtimer_sleeper *timeout)
  {
	  /*
	   * The task state is guaranteed to be set before another task can
	   * wake it. set_current_state() is implemented using smp_store_mb() and
	   * queue_me() calls spin_unlock() upon completion, both serializing
	   * access to the hash list and forcing another memory barrier.
	   */
	  set_current_state(TASK_INTERRUPTIBLE);
	  queue_me(q, hb);

This can lead to test assuming, that child process is already queued on
futex and try to requeue futexes early (example below is modified to use
2 children: 1 wake, 1 requeue):
  futex_cmp_requeue01.c:107: FAIL: futex_cmp_requeue() requeues 0 waiters, expected 1

In addition to tst_process_state_wait() waiting on 'S' state, also wait
for /proc/pid/schedstats' sum_exec_runtime and run_delay to settle:
  1) time spent on the cpu
  2) time spent waiting on a runqueue

Signed-off-by: Jan Stancek <jstancek@redhat.com>
---
 lib/tst_process_state.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 2 deletions(-)

Comments

Jan Stancek Nov. 6, 2019, 9:59 a.m. UTC | #1
----- Original Message -----
> There can be a gap between task state update and process being scheduled
> off from runqueue. For example futex_wait_queue_me() updates task state
> before it queues the futex:
> 
>   static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct
>   futex_q *q,
> 				  struct hrtimer_sleeper *timeout)
>   {
> 	  /*
> 	   * The task state is guaranteed to be set before another task can
> 	   * wake it. set_current_state() is implemented using smp_store_mb() and
> 	   * queue_me() calls spin_unlock() upon completion, both serializing
> 	   * access to the hash list and forcing another memory barrier.
> 	   */
> 	  set_current_state(TASK_INTERRUPTIBLE);
> 	  queue_me(q, hb);

hb->lock is locked at this point, and requeue takes it too, so I'm not
sure what makes it fail. I've seen testcase fail in at least
2 different ways now. Here's the other one:

tst_test.c:1118: INFO: Timeout per run is 0h 05m 00s
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 3 waiters and requeued 7 waiters
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 0 waiters and requeued 10 waiters
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 2 waiters and requeued 6 waiters
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 50 waiters and requeued 50 waiters
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 0 waiters and requeued 70 waiters
futex_cmp_requeue01.c:46: INFO: process 7923 wasn't woken up: ETIMEDOUT (110)
...
futex_cmp_requeue01.c:71: FAIL: futex_cmp_requeue() returned 776, expected 1000
futex_cmp_requeue01.c:85: FAIL: 224 waiters were not woken up normally
futex_cmp_requeue01.c:106: PASS: futex_cmp_requeue() woke up 300 waiters and requeued 500 waiters
Cyril Hrubis Nov. 7, 2019, 11:39 a.m. UTC | #2
Hi!
> hb->lock is locked at this point, and requeue takes it too, so I'm not
> sure what makes it fail. I've seen testcase fail in at least
> 2 different ways now. Here's the other one:

Looking at the test shouldn't we wait for the processes to be sleeping
after the futex_cmp_requeue() syscall?

i.e.

diff --git a/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c b/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
index f5264c2ba..41f5e88a6 100644
--- a/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
+++ b/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
@@ -71,6 +71,9 @@ static void verify_futex_cmp_requeue(unsigned int n)
                        TST_RET, exp_ret);
        }

+       for (i = 0; i < tc->num_waiters; i++)
+               TST_PROCESS_STATE_WAIT(pid[i], 'S');
+
        num_requeues = futex_wake(&futexes[1], tc->num_waiters, 0);
        num_waits = futex_wake(&futexes[0], tc->num_waiters, 0);
Cyril Hrubis Nov. 7, 2019, 12:03 p.m. UTC | #3
Hi!
> > hb->lock is locked at this point, and requeue takes it too, so I'm not
> > sure what makes it fail. I've seen testcase fail in at least
> > 2 different ways now. Here's the other one:
> 
> Looking at the test shouldn't we wait for the processes to be sleeping
> after the futex_cmp_requeue() syscall?
> 
> i.e.
> 
> diff --git a/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c b/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
> index f5264c2ba..41f5e88a6 100644
> --- a/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
> +++ b/testcases/kernel/syscalls/futex/futex_cmp_requeue01.c
> @@ -71,6 +71,9 @@ static void verify_futex_cmp_requeue(unsigned int n)
>                         TST_RET, exp_ret);
>         }
> 
> +       for (i = 0; i < tc->num_waiters; i++)
> +               TST_PROCESS_STATE_WAIT(pid[i], 'S');
> +
>         num_requeues = futex_wake(&futexes[1], tc->num_waiters, 0);
>         num_waits = futex_wake(&futexes[0], tc->num_waiters, 0);

And this obviously wouldn't work since we are waking up some of them in
a subset of the testcases.
Cyril Hrubis Nov. 7, 2019, 12:15 p.m. UTC | #4
Hi!
> hb->lock is locked at this point, and requeue takes it too, so I'm not
> sure what makes it fail. I've seen testcase fail in at least
> 2 different ways now. Here's the other one:

Here is another theory, some of the processes may be sleeping in a
different place in the kernel, somewhere between the fork() and the
futex(), and hence we think that they have been suspended on the futex
but aren't.

I guess that what we can do is to put a counter in a piece of shared
memory and increment it from each child just before the futex_wait()
call and wait in the parent until the counter reached num_waiters.
Jan Stancek Nov. 7, 2019, 12:31 p.m. UTC | #5
----- Original Message -----
> Hi!
> > hb->lock is locked at this point, and requeue takes it too, so I'm not
> > sure what makes it fail. I've seen testcase fail in at least
> > 2 different ways now. Here's the other one:
> 
> Here is another theory, some of the processes may be sleeping in a
> different place in the kernel, somewhere between the fork() and the
> futex(), and hence we think that they have been suspended on the futex
> but aren't.
> 
> I guess that what we can do is to put a counter in a piece of shared
> memory and increment it from each child just before the futex_wait()
> call and wait in the parent until the counter reached num_waiters.

It does look related to spurious wake ups and fact that test doesn't
change futex value. I raised it on lkml, here's important part:

"If there is an actual use case for keeping the uaddr1 value the same across
a requeue then this needs to be described properly and also needs to be
handled differently and consistently in all cases not just for a spurious
wakeup."

https://lore.kernel.org/lkml/alpine.DEB.2.21.1911070009040.1869@nanos.tec.linutronix.de/T/#m5662b71d7e0d14b6d74137c1da81d774e5035f9a
diff mbox series

Patch

diff --git a/lib/tst_process_state.c b/lib/tst_process_state.c
index 7a7824959d72..a24b49092ef2 100644
--- a/lib/tst_process_state.c
+++ b/lib/tst_process_state.c
@@ -28,6 +28,51 @@ 
 #include "test.h"
 #include "tst_process_state.h"
 
+#define SETTLE_SLEEP_US 20000 
+#define MAX_SETTLE_SLEEP_US (5 * SETTLE_SLEEP_US)
+
+/*
+ * When schedstats settle we have more confidence process ('S') has
+ * been scheduled off from runqueue.
+ */
+static void schedstat_settle(const char *file, const int lineno, pid_t pid)
+{
+	char ss_path[128];
+	int sleep = 0;
+
+	snprintf(ss_path, sizeof(ss_path), "/proc/%i/schedstat", pid);
+	if (access(ss_path, F_OK)) {
+		/* CONFIG_SCHED_INFO is likely disabled */
+		usleep(MAX_SETTLE_SLEEP_US);
+		return;
+	}
+
+	for (;;) {
+		unsigned long long sum_exec, run_delay, sum_exec2, run_delay2;
+		int ret = 0;
+
+		ret |= file_scanf(file, lineno, ss_path, "%llu %llu",
+			&sum_exec, &run_delay);
+		usleep(SETTLE_SLEEP_US);
+		sleep += SETTLE_SLEEP_US;
+		ret |= file_scanf(file, lineno, ss_path, "%llu %llu",
+			&sum_exec2, &run_delay2);
+
+		if (ret) {
+			tst_resm(TWARN, "Error parsing %s", ss_path);
+			return;
+		}
+
+		if (sum_exec == sum_exec2 && run_delay == run_delay2)
+			return;
+
+		if (sleep >= MAX_SETTLE_SLEEP_US) {
+			tst_resm(TWARN, "MAX_SETTLE_SLEEP_US reached");
+			return;
+		}
+	}
+}
+
 void tst_process_state_wait(const char *file, const int lineno,
                             void (*cleanup_fn)(void),
                             pid_t pid, const char state)
@@ -40,8 +85,11 @@  void tst_process_state_wait(const char *file, const int lineno,
 		safe_file_scanf(file, lineno, cleanup_fn, proc_path,
 		                "%*i %*s %c", &cur_state);
 
-		if (state == cur_state)
+		if (state == cur_state) {
+			if (state == 'S')
+				schedstat_settle(file, lineno, pid);
 			return;
+		}
 
 		usleep(10000);
 	}
@@ -69,8 +117,11 @@  int tst_process_state_wait2(pid_t pid, const char state)
 		}
 		fclose(f);
 
-		if (state == cur_state)
+		if (state == cur_state) {
+			if (state == 'S')
+				schedstat_settle(__FILE__, __LINE__, pid);
 			return 0;
+		}
 
 		usleep(10000);
 	}