Message ID | 20220512123816.24399-23-chrubis@suse.cz |
---|---|
State | Accepted |
Headers | show |
Series | Introduce runtime and conver tests | expand |
Cyril Hrubis <chrubis@suse.cz> wrote: > > diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h > index 4f09ed416..bef424002 100644 > --- a/include/tst_fuzzy_sync.h > +++ b/include/tst_fuzzy_sync.h > @@ -160,15 +160,6 @@ struct tst_fzsync_pair { > int b_cntr; > /** Internal; Used by tst_fzsync_pair_exit() and > fzsync_pair_wait() */ > int exit; > - /** > - * The maximum desired execution time as a proportion of the > timeout > - * > - * A value x so that 0 < x < 1 which decides how long the test > should > - * be run for (assuming the loop limit is not exceeded first). > - * > - * Defaults to 0.5 (~150 seconds with default timeout). > - */ > - float exec_time_p; > /** Internal; The test time remaining on tst_fzsync_pair_reset() */ > float exec_time_start; > /** > @@ -214,7 +205,6 @@ static inline void tst_fzsync_pair_init(struct > tst_fzsync_pair *pair) > CHK(avg_alpha, 0, 1, 0.25); > CHK(min_samples, 20, INT_MAX, 1024); > CHK(max_dev_ratio, 0, 1, 0.1); > - CHK(exec_time_p, 0, 1, 0.5); > CHK(exec_loops, 20, INT_MAX, 3000000); > > if (tst_ncpus_available() <= 1) > @@ -291,7 +281,7 @@ static inline void tst_fzsync_pair_reset(struct > tst_fzsync_pair *pair, > if (run_b) > SAFE_PTHREAD_CREATE(&pair->thread_b, 0, run_b, 0); > > - pair->exec_time_start = (float)tst_timeout_remaining(); > + pair->exec_time_start = (float)tst_remaining_runtime(); > } > > /** > @@ -644,10 +634,9 @@ static inline void tst_fzsync_wait_b(struct > tst_fzsync_pair *pair) > */ > static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) > { > - float rem_p = 1 - tst_timeout_remaining() / pair->exec_time_start; > + float rem_p = 1 - tst_remaining_runtime() / pair->exec_time_start; > > - if ((pair->exec_time_p * SAMPLING_SLICE < rem_p) > - && (pair->sampling > 0)) { > + if ((SAMPLING_SLICE < rem_p) && (pair->sampling > 0)) { > tst_res(TINFO, "Stopped sampling at %d (out of %d) > samples, " > "sampling time reached 50%% of the total time > limit", > pair->exec_loop, pair->min_samples); > @@ -655,7 +644,7 @@ static inline int tst_fzsync_run_a(struct > tst_fzsync_pair *pair) > tst_fzsync_pair_info(pair); > } > > - if (pair->exec_time_p < rem_p) { > + if (rem_p >= 1) { > I hit a new problem while testing new pty03, that seems here will fall into an infinite loop and test timed out finally. The printf shows rem_p will be overflow I haven't figured out why. But with comparing with 0.9, it always gets passed on to the same system. --- a/include/tst_fuzzy_sync.h +++ b/include/tst_fuzzy_sync.h @@ -644,7 +644,7 @@ static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) tst_fzsync_pair_info(pair); } - if (rem_p >= 1) { + if (rem_p >= 0.9) { tst_res(TINFO, "Exceeded execution time, requesting exit"); tst_atomic_store(1, &pair->exit); ---------------------- # ./pty03 ... ../../../include/tst_fuzzy_sync.h:295: TINFO: start_a - start_b: { avg = -90ns, avg_dev = 8ns, dev_ratio = 0.09 } ../../../include/tst_fuzzy_sync.h:295: TINFO: end_a - start_a : { avg = 2606ns, avg_dev = 127ns, dev_ratio = 0.05 } ../../../include/tst_fuzzy_sync.h:295: TINFO: end_b - start_b : { avg = 8785ns, avg_dev = 600ns, dev_ratio = 0.07 } ../../../include/tst_fuzzy_sync.h:295: TINFO: end_a - end_b : { avg = -6269ns, avg_dev = 566ns, dev_ratio = 0.09 } ../../../include/tst_fuzzy_sync.h:295: TINFO: spins : { avg = 3269 , avg_dev = 200 , dev_ratio = 0.06 } Test timeouted, sending SIGKILL! tst_test.c:1573: TINFO: If you are running on slow machine, try exporting LTP_TIMEOUT_MUL > 1 tst_test.c:1575: TBROK: Test killed! (timeout?)
Hi! > > diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h > > index 4f09ed416..bef424002 100644 > > --- a/include/tst_fuzzy_sync.h > > +++ b/include/tst_fuzzy_sync.h > > @@ -160,15 +160,6 @@ struct tst_fzsync_pair { > > int b_cntr; > > /** Internal; Used by tst_fzsync_pair_exit() and > > fzsync_pair_wait() */ > > int exit; > > - /** > > - * The maximum desired execution time as a proportion of the > > timeout > > - * > > - * A value x so that 0 < x < 1 which decides how long the test > > should > > - * be run for (assuming the loop limit is not exceeded first). > > - * > > - * Defaults to 0.5 (~150 seconds with default timeout). > > - */ > > - float exec_time_p; > > /** Internal; The test time remaining on tst_fzsync_pair_reset() */ > > float exec_time_start; > > /** > > @@ -214,7 +205,6 @@ static inline void tst_fzsync_pair_init(struct > > tst_fzsync_pair *pair) > > CHK(avg_alpha, 0, 1, 0.25); > > CHK(min_samples, 20, INT_MAX, 1024); > > CHK(max_dev_ratio, 0, 1, 0.1); > > - CHK(exec_time_p, 0, 1, 0.5); > > CHK(exec_loops, 20, INT_MAX, 3000000); > > > > if (tst_ncpus_available() <= 1) > > @@ -291,7 +281,7 @@ static inline void tst_fzsync_pair_reset(struct > > tst_fzsync_pair *pair, > > if (run_b) > > SAFE_PTHREAD_CREATE(&pair->thread_b, 0, run_b, 0); > > > > - pair->exec_time_start = (float)tst_timeout_remaining(); > > + pair->exec_time_start = (float)tst_remaining_runtime(); > > } > > > > /** > > @@ -644,10 +634,9 @@ static inline void tst_fzsync_wait_b(struct > > tst_fzsync_pair *pair) > > */ > > static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) > > { > > - float rem_p = 1 - tst_timeout_remaining() / pair->exec_time_start; > > + float rem_p = 1 - tst_remaining_runtime() / pair->exec_time_start; > > > > - if ((pair->exec_time_p * SAMPLING_SLICE < rem_p) > > - && (pair->sampling > 0)) { > > + if ((SAMPLING_SLICE < rem_p) && (pair->sampling > 0)) { > > tst_res(TINFO, "Stopped sampling at %d (out of %d) > > samples, " > > "sampling time reached 50%% of the total time > > limit", > > pair->exec_loop, pair->min_samples); > > @@ -655,7 +644,7 @@ static inline int tst_fzsync_run_a(struct > > tst_fzsync_pair *pair) > > tst_fzsync_pair_info(pair); > > } > > > > - if (pair->exec_time_p < rem_p) { > > + if (rem_p >= 1) { > > > > I hit a new problem while testing new pty03, that seems here > will fall into an infinite loop and test timed out finally. The printf > shows rem_p will be overflow I haven't figured out why. > > But with comparing with 0.9, it always gets passed on to the same system. That is strange, since we do: rem_p = 1 - tst_remaining_runtime()/pair->time_exec_start; And the tst_remaining_runtime() should return 0 once the time is up so the end result should be that rem_p will end up 1 sooner or later. Anyways we can as well use the value from tst_remainig_runtime() directly instead as: diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h index bef424002..db5bec7a4 100644 --- a/include/tst_fuzzy_sync.h +++ b/include/tst_fuzzy_sync.h @@ -634,7 +634,8 @@ static inline void tst_fzsync_wait_b(struct tst_fzsync_pair *pair) */ static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) { - float rem_p = 1 - tst_remaining_runtime() / pair->exec_time_start; + int remaining_runtime = tst_remaining_runtime(); + float rem_p = 1 - remaining_runtime / pair->exec_time_start; if ((SAMPLING_SLICE < rem_p) && (pair->sampling > 0)) { tst_res(TINFO, "Stopped sampling at %d (out of %d) samples, " @@ -644,7 +645,7 @@ static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) tst_fzsync_pair_info(pair); } - if (rem_p >= 1) { + if (!remaining_runtime) { tst_res(TINFO, "Exceeded execution time, requesting exit"); tst_atomic_store(1, &pair->exit); Does that fix your problem?
Cyril Hrubis <chrubis@suse.cz> wrote: > > > - if (pair->exec_time_p < rem_p) { > > > + if (rem_p >= 1) { > > > > > > > I hit a new problem while testing new pty03, that seems here > > will fall into an infinite loop and test timed out finally. The printf > > shows rem_p will be overflow I haven't figured out why. > > > > But with comparing with 0.9, it always gets passed on to the same system. > > That is strange, since we do: > > rem_p = 1 - tst_remaining_runtime()/pair->time_exec_start; > I guess the root cause is that 'pair->time_exec_start' has a possibility to reach zero. in pty03 it has ".tcnt = 9" which made the tst_fzsync_pair_reset() to be re-run many times, but in that function 'pair->time_exec_start' will be set only based on the original .max_runtime, with time elapsed the remaining time tends to be zero. debug log -------------- ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is 0.000000 ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is 0.000000 ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is 1.000000 ../../../include/tst_fuzzy_sync.h:654: TINFO: Exceeded execution time, requesting exit pty03.c:132: TPASS: Did not crash with PPS TTY discipline pty03.c:106: TINFO: Creating PTY with CAIF line discipline ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is -nan pty03.c:91: TCONF: You don't appear to have the CAIF TTY line discipline: EINVAL (22) pty03.c:106: TINFO: Creating PTY with GSM line discipline ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is -nan ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is -nan ../../../include/tst_fuzzy_sync.h:651: TINFO: rem_p is -nan > > > And the tst_remaining_runtime() should return 0 once the time is up so > the end result should be that rem_p will end up 1 sooner or later. > Yes, the pair->time_exec_start will be reset to 0 as well then becomes the denominator. That makes rem_p get an overflowed negative number, then the test falls into an infinite loop from this moment. > > Anyways we can as well use the value from tst_remainig_runtime() > directly instead as: > diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h > index bef424002..db5bec7a4 100644 > --- a/include/tst_fuzzy_sync.h > +++ b/include/tst_fuzzy_sync.h > @@ -634,7 +634,8 @@ static inline void tst_fzsync_wait_b(struct > tst_fzsync_pair *pair) > */ > static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) > { > - float rem_p = 1 - tst_remaining_runtime() / pair->exec_time_start; > + int remaining_runtime = tst_remaining_runtime(); > + float rem_p = 1 - remaining_runtime / pair->exec_time_start; > > if ((SAMPLING_SLICE < rem_p) && (pair->sampling > 0)) { > tst_res(TINFO, "Stopped sampling at %d (out of %d) > samples, " > @@ -644,7 +645,7 @@ static inline int tst_fzsync_run_a(struct > tst_fzsync_pair *pair) > tst_fzsync_pair_info(pair); > } > > - if (rem_p >= 1) { > + if (!remaining_runtime) { > tst_res(TINFO, > "Exceeded execution time, requesting exit"); > tst_atomic_store(1, &pair->exit); > > > Does that fix your problem? > This amendment is more readable but does not solve the original problem.
Hi! > > > I hit a new problem while testing new pty03, that seems here > > > will fall into an infinite loop and test timed out finally. The printf > > > shows rem_p will be overflow I haven't figured out why. > > > > > > But with comparing with 0.9, it always gets passed on to the same system. > > > > That is strange, since we do: > > > > rem_p = 1 - tst_remaining_runtime()/pair->time_exec_start; > > > > I guess the root cause is that 'pair->time_exec_start' has a possibility > to reach zero. in pty03 it has ".tcnt = 9" which made the > tst_fzsync_pair_reset() > to be re-run many times, but in that function 'pair->time_exec_start' will > be set only based on the original .max_runtime, with time elapsed the > remaining time tends to be zero. I guess that that the interaction of tcnt and runtime is not optimal here. You are right that as long as we call tst_fzsync_pair_reset() on each invocation of the run() function we may eventually get to state where the runtime is exhausted, especially on slower hardware we end up with division by zero and overflow. The cleanest solution would be to rewrite the test to use .test_variants = 9 and setting the .max_runtime to a smaller value. That way we would have precisely defined runtime for each iteration. What do you think?
On Fri, May 13, 2022 at 10:20 PM Cyril Hrubis <chrubis@suse.cz> wrote: > Hi! > > > > I hit a new problem while testing new pty03, that seems here > > > > will fall into an infinite loop and test timed out finally. The > printf > > > > shows rem_p will be overflow I haven't figured out why. > > > > > > > > But with comparing with 0.9, it always gets passed on to the same > system. > > > > > > That is strange, since we do: > > > > > > rem_p = 1 - tst_remaining_runtime()/pair->time_exec_start; > > > > > > > I guess the root cause is that 'pair->time_exec_start' has a possibility > > to reach zero. in pty03 it has ".tcnt = 9" which made the > > tst_fzsync_pair_reset() > > to be re-run many times, but in that function 'pair->time_exec_start' > will > > be set only based on the original .max_runtime, with time elapsed the > > remaining time tends to be zero. > > I guess that that the interaction of tcnt and runtime is not optimal > here. You are right that as long as we call tst_fzsync_pair_reset() on > each invocation of the run() function we may eventually get to state > where the runtime is exhausted, especially on slower hardware we end up > with division by zero and overflow. > > The cleanest solution would be to rewrite the test to use .test_variants = > 9 > and setting the .max_runtime to a smaller value. That way we would have > precisely defined runtime for each iteration. What do you think? > Yes, that should work, but people have to painstakingly remember that to avoid this interaction of tcnt and runtime in test writing. Another simple choice I can think of is to use tst_set_max_runtime() to reset both 'timeout' and 'runtime' at the beginning of each invocation of the run(). And maybe we should invoke it in the tst_fzsync_pair_reset directly? I'm not very sure about fixing this in fzsync-lib or out. --- a/testcases/kernel/pty/pty03.c +++ b/testcases/kernel/pty/pty03.c @@ -105,6 +105,7 @@ static void do_test(unsigned int n) tst_res(TINFO, "Creating PTY with %s line discipline", ldisc->name); + tst_set_max_runtime(test.runtime); tst_fzsync_pair_reset(&fzp, hangup); while (tst_fzsync_run_a(&fzp)) { ptmx = SAFE_OPEN("/dev/ptmx", O_RDONLY);
Hello, Cyril Hrubis <chrubis@suse.cz> writes: > Hi! >> > > I hit a new problem while testing new pty03, that seems here >> > > will fall into an infinite loop and test timed out finally. The printf >> > > shows rem_p will be overflow I haven't figured out why. >> > > >> > > But with comparing with 0.9, it always gets passed on to the same system. >> > >> > That is strange, since we do: >> > >> > rem_p = 1 - tst_remaining_runtime()/pair->time_exec_start; >> > >> >> I guess the root cause is that 'pair->time_exec_start' has a possibility >> to reach zero. in pty03 it has ".tcnt = 9" which made the >> tst_fzsync_pair_reset() >> to be re-run many times, but in that function 'pair->time_exec_start' will >> be set only based on the original .max_runtime, with time elapsed the >> remaining time tends to be zero. > > I guess that that the interaction of tcnt and runtime is not optimal > here. You are right that as long as we call tst_fzsync_pair_reset() on > each invocation of the run() function we may eventually get to state > where the runtime is exhausted, especially on slower hardware we end up > with division by zero and overflow. > > The cleanest solution would be to rewrite the test to use .test_variants = 9 > and setting the .max_runtime to a smaller value. That way we would have > precisely defined runtime for each iteration. What do you think? Or each test case (defined by tcnt) could be given an equal share of the runtime?
Hi! > >> I guess the root cause is that 'pair->time_exec_start' has a possibility > >> to reach zero. in pty03 it has ".tcnt = 9" which made the > >> tst_fzsync_pair_reset() > >> to be re-run many times, but in that function 'pair->time_exec_start' will > >> be set only based on the original .max_runtime, with time elapsed the > >> remaining time tends to be zero. > > > > I guess that that the interaction of tcnt and runtime is not optimal > > here. You are right that as long as we call tst_fzsync_pair_reset() on > > each invocation of the run() function we may eventually get to state > > where the runtime is exhausted, especially on slower hardware we end up > > with division by zero and overflow. > > > > The cleanest solution would be to rewrite the test to use .test_variants = 9 > > and setting the .max_runtime to a smaller value. That way we would have > > precisely defined runtime for each iteration. What do you think? > > Or each test case (defined by tcnt) could be given an equal share of the > runtime? That would be solution as well. If everyone agrees on this I will go with this version, it should be as easy as: diff --git a/lib/tst_test.c b/lib/tst_test.c index dad8aad92..69d7799c3 100644 --- a/lib/tst_test.c +++ b/lib/tst_test.c @@ -1349,6 +1349,8 @@ static void run_tests(void) if (results_equal(&saved_results, results)) tst_brk(TBROK, "Test %i haven't reported results!", i); + + kill(getppid(), SIGUSR1); } } With this we do reset the test timeout after each test iteration.
Hi all, > Hi! > > >> I guess the root cause is that 'pair->time_exec_start' has a possibility > > >> to reach zero. in pty03 it has ".tcnt = 9" which made the > > >> tst_fzsync_pair_reset() > > >> to be re-run many times, but in that function 'pair->time_exec_start' will > > >> be set only based on the original .max_runtime, with time elapsed the > > >> remaining time tends to be zero. > > > I guess that that the interaction of tcnt and runtime is not optimal > > > here. You are right that as long as we call tst_fzsync_pair_reset() on > > > each invocation of the run() function we may eventually get to state > > > where the runtime is exhausted, especially on slower hardware we end up > > > with division by zero and overflow. > > > The cleanest solution would be to rewrite the test to use .test_variants = 9 > > > and setting the .max_runtime to a smaller value. That way we would have > > > precisely defined runtime for each iteration. What do you think? > > Or each test case (defined by tcnt) could be given an equal share of the > > runtime? LGTM > That would be solution as well. If everyone agrees on this I will go > with this version, it should be as easy as: > diff --git a/lib/tst_test.c b/lib/tst_test.c > index dad8aad92..69d7799c3 100644 > --- a/lib/tst_test.c > +++ b/lib/tst_test.c > @@ -1349,6 +1349,8 @@ static void run_tests(void) > if (results_equal(&saved_results, results)) > tst_brk(TBROK, "Test %i haven't reported results!", i); > + > + kill(getppid(), SIGUSR1); > } > } > With this we do reset the test timeout after each test iteration. +1. Kind regards, Petr
Cyril Hrubis <chrubis@suse.cz> wrote: > > > Or each test case (defined by tcnt) could be given an equal share of the > > runtime? > > That would be solution as well. If everyone agrees on this I will go > with this version, it should be as easy as: > > diff --git a/lib/tst_test.c b/lib/tst_test.c > index dad8aad92..69d7799c3 100644 > --- a/lib/tst_test.c > +++ b/lib/tst_test.c > @@ -1349,6 +1349,8 @@ static void run_tests(void) > > if (results_equal(&saved_results, results)) > tst_brk(TBROK, "Test %i haven't reported > results!", i); > + > + kill(getppid(), SIGUSR1); > I have to say NO here. This is only reset timeout for each test, the problem we're encountered is the max_runtime exhausting, we have to reset 'results->max_runtime' but not .timeout.
Li Wang <liwang@redhat.com> wrote: > Cyril Hrubis <chrubis@suse.cz> wrote: > > >> >> > Or each test case (defined by tcnt) could be given an equal share of the >> > runtime? >> >> That would be solution as well. If everyone agrees on this I will go >> with this version, it should be as easy as: >> >> diff --git a/lib/tst_test.c b/lib/tst_test.c >> index dad8aad92..69d7799c3 100644 >> --- a/lib/tst_test.c >> +++ b/lib/tst_test.c >> @@ -1349,6 +1349,8 @@ static void run_tests(void) >> >> if (results_equal(&saved_results, results)) >> tst_brk(TBROK, "Test %i haven't reported >> results!", i); >> + >> + kill(getppid(), SIGUSR1); >> > > I have to say NO here. > > This is only reset timeout for each test, the problem we're encountered > is the max_runtime exhausting, we have to reset 'results->max_runtime' > but not .timeout. > Okay, my perspective is shortsighted as well. This solution is correct in the direction but overlooks the significant global value 'tst_start_time'. If that value reflush within tcnt loop we don't need to reset max_runtime again, actually the real work in my previous patch is to invoke heartbeat() which touches tst_start_time. So I would suggest using heartbeat() instead of only sending SIGUSR1 to lib_pid. Or, do simply revision like: --- a/lib/tst_test.c +++ b/lib/tst_test.c @@ -1349,6 +1349,10 @@ static void run_tests(void) if (results_equal(&saved_results, results)) tst_brk(TBROK, "Test %i haven't reported results!", i); + + if (tst_clock_gettime(CLOCK_MONOTONIC, &tst_start_time)) + tst_res(TWARN | TERRNO, "tst_clock_gettime() failed"); + kill(getppid(), SIGUSR1); } }
Hi! > Okay, my perspective is shortsighted as well. > > This solution is correct in the direction but overlooks the significant > global value 'tst_start_time'. If that value reflush within tcnt loop we > don't need to reset max_runtime again, actually the real work in my > previous patch is to invoke heartbeat() which touches tst_start_time. > > So I would suggest using heartbeat() instead of only sending SIGUSR1 > to lib_pid. Or, do simply revision like: Ah, right, I guess that we should just call heartbeat() before each iteration of the test then. So basically: diff --git a/lib/tst_test.c b/lib/tst_test.c index dad8aad92..f3090217b 100644 --- a/lib/tst_test.c +++ b/lib/tst_test.c @@ -1317,6 +1317,24 @@ static void do_cleanup(void) cleanup_ipc(); } +static void heartbeat(void) +{ + if (tst_clock_gettime(CLOCK_MONOTONIC, &tst_start_time)) + tst_res(TWARN | TERRNO, "tst_clock_gettime() failed"); + + if (getppid() == 1) { + tst_res(TFAIL, "Main test process might have exit!"); + /* + * We need kill the task group immediately since the + * main process has exit. + */ + kill(0, SIGKILL); + exit(TBROK); + } + + kill(getppid(), SIGUSR1); +} + static void run_tests(void) { unsigned int i; @@ -1324,6 +1342,7 @@ static void run_tests(void) if (!tst_test->test) { saved_results = *results; + heartbeat(); tst_test->test_all(); if (getpid() != main_pid) { @@ -1339,6 +1358,7 @@ static void run_tests(void) for (i = 0; i < tst_test->tcnt; i++) { saved_results = *results; + heartbeat(); tst_test->test(i); if (getpid() != main_pid) { @@ -1349,6 +1369,8 @@ static void run_tests(void) if (results_equal(&saved_results, results)) tst_brk(TBROK, "Test %i haven't reported results!", i); + + kill(getppid(), SIGUSR1); } } @@ -1379,24 +1401,6 @@ static void add_paths(void) free(new_path); } -static void heartbeat(void) -{ - if (tst_clock_gettime(CLOCK_MONOTONIC, &tst_start_time)) - tst_res(TWARN | TERRNO, "tst_clock_gettime() failed"); - - if (getppid() == 1) { - tst_res(TFAIL, "Main test process might have exit!"); - /* - * We need kill the task group immediately since the - * main process has exit. - */ - kill(0, SIGKILL); - exit(TBROK); - } - - kill(getppid(), SIGUSR1); -} - static void testrun(void) { unsigned int i = 0; @@ -1425,7 +1429,6 @@ static void testrun(void) break; run_tests(); - heartbeat(); } do_test_cleanup(); I guess that this should go in a separate patch on the top of the "Introduce concept of max runtime".
On Thu, May 19, 2022 at 5:03 PM Cyril Hrubis <chrubis@suse.cz> wrote: > Hi! > > Okay, my perspective is shortsighted as well. > > > > This solution is correct in the direction but overlooks the significant > > global value 'tst_start_time'. If that value reflush within tcnt loop we > > don't need to reset max_runtime again, actually the real work in my > > previous patch is to invoke heartbeat() which touches tst_start_time. > > > > So I would suggest using heartbeat() instead of only sending SIGUSR1 > > to lib_pid. Or, do simply revision like: > > Ah, right, I guess that we should just call heartbeat() before each > iteration of the test then. > > So basically: > > diff --git a/lib/tst_test.c b/lib/tst_test.c > index dad8aad92..f3090217b 100644 > --- a/lib/tst_test.c > +++ b/lib/tst_test.c > @@ -1317,6 +1317,24 @@ static void do_cleanup(void) > cleanup_ipc(); > } > > +static void heartbeat(void) > +{ > + if (tst_clock_gettime(CLOCK_MONOTONIC, &tst_start_time)) > + tst_res(TWARN | TERRNO, "tst_clock_gettime() failed"); > + > + if (getppid() == 1) { > + tst_res(TFAIL, "Main test process might have exit!"); > + /* > + * We need kill the task group immediately since the > + * main process has exit. > + */ > + kill(0, SIGKILL); > + exit(TBROK); > + } > + > + kill(getppid(), SIGUSR1); > +} > + > static void run_tests(void) > { > unsigned int i; > @@ -1324,6 +1342,7 @@ static void run_tests(void) > > if (!tst_test->test) { > saved_results = *results; > + heartbeat(); > tst_test->test_all(); > > if (getpid() != main_pid) { > @@ -1339,6 +1358,7 @@ static void run_tests(void) > > for (i = 0; i < tst_test->tcnt; i++) { > saved_results = *results; > + heartbeat(); > tst_test->test(i); > > if (getpid() != main_pid) { > @@ -1349,6 +1369,8 @@ static void run_tests(void) > > if (results_equal(&saved_results, results)) > tst_brk(TBROK, "Test %i haven't reported > results!", i); > + > + kill(getppid(), SIGUSR1); > As we already invoke heartbeat() at the beginning of each tcnt loop, why are we still sending SIGUSR1 here? Otherwise this version looks good to me. > } > } > > @@ -1379,24 +1401,6 @@ static void add_paths(void) > free(new_path); > } > > -static void heartbeat(void) > -{ > - if (tst_clock_gettime(CLOCK_MONOTONIC, &tst_start_time)) > - tst_res(TWARN | TERRNO, "tst_clock_gettime() failed"); > - > - if (getppid() == 1) { > - tst_res(TFAIL, "Main test process might have exit!"); > - /* > - * We need kill the task group immediately since the > - * main process has exit. > - */ > - kill(0, SIGKILL); > - exit(TBROK); > - } > - > - kill(getppid(), SIGUSR1); > -} > - > static void testrun(void) > { > unsigned int i = 0; > @@ -1425,7 +1429,6 @@ static void testrun(void) > break; > > run_tests(); > - heartbeat(); > } > > do_test_cleanup(); > > > I guess that this should go in a separate patch on the top of the > "Introduce concept of max runtime". > Agree.
Hi! > > + > > + kill(getppid(), SIGUSR1); > > > > As we already invoke heartbeat() at the beginning of each tcnt loop, > why are we still sending SIGUSR1 here? My bad, I did forget to remove it. > > I guess that this should go in a separate patch on the top of the > > "Introduce concept of max runtime". > > > > Agree. Should I send a V4 with this patch included?
Cyril Hrubis <chrubis@suse.cz> wrote: > > Should I send a V4 with this patch included? > No need. You can push with this new patch adding, then I will kick start release testing for the whole LTP from our side.
Hi! > No need. You can push with this new patch adding, > then I will kick start release testing for the whole LTP > from our side. Great, please do so. Andrea is going to send small fix to the aiodio tests that should be merged as well, otherwise these may fail rarely.
Hello, Cyril Hrubis <chrubis@suse.cz> writes: > Hi! >> No need. You can push with this new patch adding, >> then I will kick start release testing for the whole LTP >> from our side. > > Great, please do so. > > Andrea is going to send small fix to the aiodio tests that should be > merged as well, otherwise these may fail rarely. With these changes please add: Reviewed-by: Richard Palethorpe <rpalethorpe@suse.com>
diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h index 4f09ed416..bef424002 100644 --- a/include/tst_fuzzy_sync.h +++ b/include/tst_fuzzy_sync.h @@ -160,15 +160,6 @@ struct tst_fzsync_pair { int b_cntr; /** Internal; Used by tst_fzsync_pair_exit() and fzsync_pair_wait() */ int exit; - /** - * The maximum desired execution time as a proportion of the timeout - * - * A value x so that 0 < x < 1 which decides how long the test should - * be run for (assuming the loop limit is not exceeded first). - * - * Defaults to 0.5 (~150 seconds with default timeout). - */ - float exec_time_p; /** Internal; The test time remaining on tst_fzsync_pair_reset() */ float exec_time_start; /** @@ -214,7 +205,6 @@ static inline void tst_fzsync_pair_init(struct tst_fzsync_pair *pair) CHK(avg_alpha, 0, 1, 0.25); CHK(min_samples, 20, INT_MAX, 1024); CHK(max_dev_ratio, 0, 1, 0.1); - CHK(exec_time_p, 0, 1, 0.5); CHK(exec_loops, 20, INT_MAX, 3000000); if (tst_ncpus_available() <= 1) @@ -291,7 +281,7 @@ static inline void tst_fzsync_pair_reset(struct tst_fzsync_pair *pair, if (run_b) SAFE_PTHREAD_CREATE(&pair->thread_b, 0, run_b, 0); - pair->exec_time_start = (float)tst_timeout_remaining(); + pair->exec_time_start = (float)tst_remaining_runtime(); } /** @@ -644,10 +634,9 @@ static inline void tst_fzsync_wait_b(struct tst_fzsync_pair *pair) */ static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) { - float rem_p = 1 - tst_timeout_remaining() / pair->exec_time_start; + float rem_p = 1 - tst_remaining_runtime() / pair->exec_time_start; - if ((pair->exec_time_p * SAMPLING_SLICE < rem_p) - && (pair->sampling > 0)) { + if ((SAMPLING_SLICE < rem_p) && (pair->sampling > 0)) { tst_res(TINFO, "Stopped sampling at %d (out of %d) samples, " "sampling time reached 50%% of the total time limit", pair->exec_loop, pair->min_samples); @@ -655,7 +644,7 @@ static inline int tst_fzsync_run_a(struct tst_fzsync_pair *pair) tst_fzsync_pair_info(pair); } - if (pair->exec_time_p < rem_p) { + if (rem_p >= 1) { tst_res(TINFO, "Exceeded execution time, requesting exit"); tst_atomic_store(1, &pair->exit); diff --git a/lib/newlib_tests/tst_fuzzy_sync01.c b/lib/newlib_tests/tst_fuzzy_sync01.c index 5f23a085b..d0748958c 100644 --- a/lib/newlib_tests/tst_fuzzy_sync01.c +++ b/lib/newlib_tests/tst_fuzzy_sync01.c @@ -227,4 +227,5 @@ static struct tst_test test = { .test = run, .setup = setup, .cleanup = cleanup, + .max_runtime = 150, }; diff --git a/lib/newlib_tests/tst_fuzzy_sync02.c b/lib/newlib_tests/tst_fuzzy_sync02.c index c1c2a5327..afe4973b5 100644 --- a/lib/newlib_tests/tst_fuzzy_sync02.c +++ b/lib/newlib_tests/tst_fuzzy_sync02.c @@ -169,4 +169,5 @@ static struct tst_test test = { .test = run, .setup = setup, .cleanup = cleanup, + .max_runtime = 150, }; diff --git a/lib/newlib_tests/tst_fuzzy_sync03.c b/lib/newlib_tests/tst_fuzzy_sync03.c index 0d74e1eae..47ce7675a 100644 --- a/lib/newlib_tests/tst_fuzzy_sync03.c +++ b/lib/newlib_tests/tst_fuzzy_sync03.c @@ -99,4 +99,5 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .test_all = run, + .max_runtime = 150, }; diff --git a/testcases/kernel/crypto/af_alg07.c b/testcases/kernel/crypto/af_alg07.c index ef13ad764..9c2516632 100644 --- a/testcases/kernel/crypto/af_alg07.c +++ b/testcases/kernel/crypto/af_alg07.c @@ -125,6 +125,7 @@ static struct tst_test test = { .cleanup = cleanup, .min_kver = "4.10.0", .min_cpus = 2, + .max_runtime = 150, .taint_check = TST_TAINT_W | TST_TAINT_D, .tags = (const struct tst_tag[]) { {"linux-git", "ff7b11aa481f"}, diff --git a/testcases/kernel/mem/thp/thp04.c b/testcases/kernel/mem/thp/thp04.c index 985394dc3..c93da54e6 100644 --- a/testcases/kernel/mem/thp/thp04.c +++ b/testcases/kernel/mem/thp/thp04.c @@ -163,6 +163,7 @@ static struct tst_test test = { .test_all = run, .setup = setup, .cleanup = cleanup, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "a8f97366452e"}, {"linux-git", "8310d48b125d"}, diff --git a/testcases/kernel/pty/pty03.c b/testcases/kernel/pty/pty03.c index 71bcb2eb6..7354fdb21 100644 --- a/testcases/kernel/pty/pty03.c +++ b/testcases/kernel/pty/pty03.c @@ -135,7 +135,6 @@ static void do_test(unsigned int n) static void setup(void) { fzp.min_samples = 20; - fzp.exec_time_p = 0.1; tst_fzsync_pair_init(&fzp); } @@ -151,6 +150,7 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .needs_root = 1, + .max_runtime = 30, .tags = (const struct tst_tag[]) { {"linux-git", "0ace17d568241"}, {"CVE", "2020-14416"}, diff --git a/testcases/kernel/pty/pty05.c b/testcases/kernel/pty/pty05.c index afef051c8..35ecce922 100644 --- a/testcases/kernel/pty/pty05.c +++ b/testcases/kernel/pty/pty05.c @@ -97,6 +97,7 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "82f2341c94d27"}, {"CVE", "2017-2636"}, diff --git a/testcases/kernel/pty/pty06.c b/testcases/kernel/pty/pty06.c index cc8eb1f85..7e0f4233c 100644 --- a/testcases/kernel/pty/pty06.c +++ b/testcases/kernel/pty/pty06.c @@ -97,6 +97,7 @@ static struct tst_test test = { .cleanup = cleanup, .needs_root = 1, .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { { "linux-git", "ca4463bf8438"}, {} diff --git a/testcases/kernel/pty/pty07.c b/testcases/kernel/pty/pty07.c index c63b71c89..68b3e470c 100644 --- a/testcases/kernel/pty/pty07.c +++ b/testcases/kernel/pty/pty07.c @@ -110,6 +110,7 @@ static struct tst_test test = { .cleanup = cleanup, .needs_root = 1, .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { { "linux-git", "6cd1ed50efd8"}, {} diff --git a/testcases/kernel/sound/snd_seq01.c b/testcases/kernel/sound/snd_seq01.c index c56752230..31038b937 100644 --- a/testcases/kernel/sound/snd_seq01.c +++ b/testcases/kernel/sound/snd_seq01.c @@ -123,7 +123,7 @@ static struct tst_test test = { .tcnt = ARRAY_SIZE(testfunc_list), .setup = setup, .cleanup = cleanup, - .timeout = 120, + .max_runtime = 60, .taint_check = TST_TAINT_W | TST_TAINT_D, .tags = (const struct tst_tag[]) { {"linux-git", "d15d662e89fc"}, diff --git a/testcases/kernel/sound/snd_timer01.c b/testcases/kernel/sound/snd_timer01.c index 51591c18e..3d8e715e3 100644 --- a/testcases/kernel/sound/snd_timer01.c +++ b/testcases/kernel/sound/snd_timer01.c @@ -139,6 +139,7 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "d11662f4f798"}, {"linux-git", "ba3021b2c79b"}, diff --git a/testcases/kernel/syscalls/bind/bind06.c b/testcases/kernel/syscalls/bind/bind06.c index 1dfc534a7..618cfce46 100644 --- a/testcases/kernel/syscalls/bind/bind06.c +++ b/testcases/kernel/syscalls/bind/bind06.c @@ -102,7 +102,7 @@ static struct tst_test test = { .test_all = run, .setup = setup, .cleanup = cleanup, - .timeout = 600, + .max_runtime = 300, .taint_check = TST_TAINT_W | TST_TAINT_D, .needs_kconfigs = (const char *[]) { "CONFIG_USER_NS=y", diff --git a/testcases/kernel/syscalls/inotify/inotify09.c b/testcases/kernel/syscalls/inotify/inotify09.c index fdfc9c078..a18750794 100644 --- a/testcases/kernel/syscalls/inotify/inotify09.c +++ b/testcases/kernel/syscalls/inotify/inotify09.c @@ -94,6 +94,7 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .test_all = verify_inotify, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "d90a10e2444b"}, {} diff --git a/testcases/kernel/syscalls/ipc/shmctl/shmctl05.c b/testcases/kernel/syscalls/ipc/shmctl/shmctl05.c index 228174c9b..ca668aaf7 100644 --- a/testcases/kernel/syscalls/ipc/shmctl/shmctl05.c +++ b/testcases/kernel/syscalls/ipc/shmctl/shmctl05.c @@ -106,7 +106,7 @@ static void cleanup(void) } static struct tst_test test = { - .timeout = 20, + .max_runtime = 10, .setup = setup, .test_all = do_test, .cleanup = cleanup, diff --git a/testcases/kernel/syscalls/sendmsg/sendmsg03.c b/testcases/kernel/syscalls/sendmsg/sendmsg03.c index c7d72f686..505a6dd24 100644 --- a/testcases/kernel/syscalls/sendmsg/sendmsg03.c +++ b/testcases/kernel/syscalls/sendmsg/sendmsg03.c @@ -105,6 +105,7 @@ static struct tst_test test = { .setup = setup, .cleanup = cleanup, .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "8f659a03a0ba"}, {"CVE", "2017-17712"}, diff --git a/testcases/kernel/syscalls/setsockopt/setsockopt06.c b/testcases/kernel/syscalls/setsockopt/setsockopt06.c index c6fc2fba9..9c818646b 100644 --- a/testcases/kernel/syscalls/setsockopt/setsockopt06.c +++ b/testcases/kernel/syscalls/setsockopt/setsockopt06.c @@ -46,7 +46,6 @@ static void setup(void) SAFE_FILE_PRINTF("/proc/self/gid_map", "0 %d 1", real_gid); fzsync_pair.exec_loops = 100000; - fzsync_pair.exec_time_p = 0.9; tst_fzsync_pair_init(&fzsync_pair); } @@ -123,6 +122,7 @@ static struct tst_test test = { .test_all = run, .setup = setup, .cleanup = cleanup, + .max_runtime = 270, .taint_check = TST_TAINT_W | TST_TAINT_D, .needs_kconfigs = (const char *[]) { "CONFIG_USER_NS=y", diff --git a/testcases/kernel/syscalls/setsockopt/setsockopt07.c b/testcases/kernel/syscalls/setsockopt/setsockopt07.c index 147ae3899..616159a90 100644 --- a/testcases/kernel/syscalls/setsockopt/setsockopt07.c +++ b/testcases/kernel/syscalls/setsockopt/setsockopt07.c @@ -136,6 +136,7 @@ static struct tst_test test = { .test_all = run, .setup = setup, .cleanup = cleanup, + .max_runtime = 150, .needs_kconfigs = (const char *[]) { "CONFIG_USER_NS=y", "CONFIG_NET_NS=y", diff --git a/testcases/kernel/syscalls/timerfd/timerfd_settime02.c b/testcases/kernel/syscalls/timerfd/timerfd_settime02.c index bd92ee964..84ce95538 100644 --- a/testcases/kernel/syscalls/timerfd/timerfd_settime02.c +++ b/testcases/kernel/syscalls/timerfd/timerfd_settime02.c @@ -112,6 +112,7 @@ static struct tst_test test = { .cleanup = cleanup, .min_kver = "2.6.25", .taint_check = TST_TAINT_W | TST_TAINT_D, + .max_runtime = 150, .tags = (const struct tst_tag[]) { {"linux-git", "1e38da300e1e"}, {"CVE", "2017-10661"}, diff --git a/testcases/kernel/syscalls/writev/writev03.c b/testcases/kernel/syscalls/writev/writev03.c index d0c64743b..5b496203a 100644 --- a/testcases/kernel/syscalls/writev/writev03.c +++ b/testcases/kernel/syscalls/writev/writev03.c @@ -49,7 +49,6 @@ static void setup(void) mapfd = SAFE_OPEN(MAPFILE, O_CREAT|O_RDWR|O_TRUNC, 0644); SAFE_WRITE(1, mapfd, buf, BUF_SIZE); - fzsync_pair.exec_time_p = 0.25; tst_fzsync_pair_init(&fzsync_pair); } @@ -146,6 +145,7 @@ static struct tst_test test = { .min_cpus = 2, .setup = setup, .cleanup = cleanup, + .max_runtime = 75, .tags = (const struct tst_tag[]) { {"linux-git", "d4690f1e1cda"}, {}
The runtime for fuzzy sync tests are computed as timeout * exec_time_p. The default timeout was 300 and the default exec_time_p was 0.5. CC: Richard Palethorpe <rpalethorpe@suse.com> Signed-off-by: Cyril Hrubis <chrubis@suse.cz> --- include/tst_fuzzy_sync.h | 19 ++++--------------- lib/newlib_tests/tst_fuzzy_sync01.c | 1 + lib/newlib_tests/tst_fuzzy_sync02.c | 1 + lib/newlib_tests/tst_fuzzy_sync03.c | 1 + testcases/kernel/crypto/af_alg07.c | 1 + testcases/kernel/mem/thp/thp04.c | 1 + testcases/kernel/pty/pty03.c | 2 +- testcases/kernel/pty/pty05.c | 1 + testcases/kernel/pty/pty06.c | 1 + testcases/kernel/pty/pty07.c | 1 + testcases/kernel/sound/snd_seq01.c | 2 +- testcases/kernel/sound/snd_timer01.c | 1 + testcases/kernel/syscalls/bind/bind06.c | 2 +- testcases/kernel/syscalls/inotify/inotify09.c | 1 + .../kernel/syscalls/ipc/shmctl/shmctl05.c | 2 +- testcases/kernel/syscalls/sendmsg/sendmsg03.c | 1 + .../kernel/syscalls/setsockopt/setsockopt06.c | 2 +- .../kernel/syscalls/setsockopt/setsockopt07.c | 1 + .../syscalls/timerfd/timerfd_settime02.c | 1 + testcases/kernel/syscalls/writev/writev03.c | 2 +- 20 files changed, 23 insertions(+), 21 deletions(-)