diff mbox series

syscalls/clock_gettime: Add test to check bug during successive readings

Message ID e15907ed42f0276f09c4120b9255db5764d4e9b6.1591343286.git.viresh.kumar@linaro.org
State Changes Requested
Headers show
Series syscalls/clock_gettime: Add test to check bug during successive readings | expand

Commit Message

Viresh Kumar June 5, 2020, 7:48 a.m. UTC
An issue was reported recently where a bug was found during successive
reading of 64 bit time on arm32 platforms. Add a test for that.

https://github.com/richfelker/musl-cross-make/issues/96

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 runtest/syscalls                              |  1 +
 .../kernel/syscalls/clock_gettime/.gitignore  |  1 +
 .../syscalls/clock_gettime/clock_gettime04.c  | 96 +++++++++++++++++++
 3 files changed, 98 insertions(+)
 create mode 100644 testcases/kernel/syscalls/clock_gettime/clock_gettime04.c

Comments

Arnd Bergmann June 5, 2020, 12:19 p.m. UTC | #1
On Fri, Jun 5, 2020 at 9:48 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> +static struct test_variants {
> +       int (*gettime)(clockid_t clk_id, void *ts);
> +       enum tst_ts_type type;
> +       char *desc;
> +} variants[] = {
> +       { .gettime = libc_clock_gettime, .type = TST_LIBC_TIMESPEC, .desc = "vDSO or syscall with libc spec"},
> +
> +#if (__NR_clock_gettime != __LTP__NR_INVALID_SYSCALL)
> +       { .gettime = sys_clock_gettime, .type = TST_KERN_OLD_TIMESPEC, .desc = "syscall with old kernel spec"},
> +#endif
> +
> +#if (__NR_clock_gettime64 != __LTP__NR_INVALID_SYSCALL)
> +       { .gettime = sys_clock_gettime64, .type = TST_KERN_TIMESPEC, .desc = "syscall time64 with kernel spec"},
> +#endif
> +};

Would it be possible to add a direct call to the two vdso
implementations here as well?
This test would not catch the bug that we had because only the
clock_gettime64 vdso
implementation was affected, but on most systems the libc clock_gettime calls
the clock_gettime vdso functions, not clock_gettime64.

> +static void run(unsigned int i)
> +{
> +       struct tst_ts ts_start, ts_end;
> +       long long start, end, diff;
> +       struct test_variants *tv;
> +       int count = 10000;
> +       unsigned int j;
> +
> +       while (--count) {
> +               /* Store reference time in start */
> +               if (clks[i] == CLOCK_REALTIME) {
> +                       struct timeval tval;
> +
> +                       /* Compare CLOCK_REALTIME with gettimeofday() as well */
> +                       if (gettimeofday(&tval, NULL) < 0)
> +                               tst_brk(TBROK | TERRNO, "gettimeofday() failed");
> +
> +                       start = tst_timeval_to_ms(tval);
> +               } else {
> +                       tv = &variants[0];
> +                       ts_start.type = tv->type;
> +                       tv->gettime(clks[i], tst_ts_get(&ts_start));
> +                       start = tst_ts_to_ms(ts_start);
> +               }
> +
> +               for (j = 0; j < ARRAY_SIZE(variants); j++) {
> +                       tv = &variants[j];
> +                       ts_end.type = tv->type;
> +
> +                       tv->gettime(clks[i], tst_ts_get(&ts_end));
> +                       end = tst_ts_to_ms(ts_end);
> +
> +                       diff = end - start;
> +                       if (diff < 0) {
> +                               tst_res(TFAIL, "%s: Time travelled backwards: %lld",
> +                                               tst_clock_name(clks[i]), diff);
> +                               return;
> +                       }

I think this check should be done on the nanoseconds, not after the
conversion to milliseconds -- otherwise you miss when time goes
backwards by less than a millisecond.

> +
> +                       if (diff >= 5) {
> +                               tst_res(TFAIL, "%s: Difference between successive readings greater than 5 ms: %lld",
> +                                               tst_clock_name(clks[i]), diff);
> +                               return;
> +                       }
> +
> +                       /* Refresh time in start */
> +                       start = end;

resetting start here seems like the right choice in order to check for
backward jumps between loop iterations, but I see that the start of
the loop invalidates that by overwriting it again.

One way to solve this would be by having the gettimeofday() call
as part of the loop but skip it for !CLOCK_REALTIME, and adding
a special case for the lower resolution.

     Arnd
Viresh Kumar June 8, 2020, 10:09 a.m. UTC | #2
On 05-06-20, 14:19, Arnd Bergmann wrote:
> On Fri, Jun 5, 2020 at 9:48 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> 
> > +static struct test_variants {
> > +       int (*gettime)(clockid_t clk_id, void *ts);
> > +       enum tst_ts_type type;
> > +       char *desc;
> > +} variants[] = {
> > +       { .gettime = libc_clock_gettime, .type = TST_LIBC_TIMESPEC, .desc = "vDSO or syscall with libc spec"},
> > +
> > +#if (__NR_clock_gettime != __LTP__NR_INVALID_SYSCALL)
> > +       { .gettime = sys_clock_gettime, .type = TST_KERN_OLD_TIMESPEC, .desc = "syscall with old kernel spec"},
> > +#endif
> > +
> > +#if (__NR_clock_gettime64 != __LTP__NR_INVALID_SYSCALL)
> > +       { .gettime = sys_clock_gettime64, .type = TST_KERN_TIMESPEC, .desc = "syscall time64 with kernel spec"},
> > +#endif
> > +};
> 
> Would it be possible to add a direct call to the two vdso
> implementations here as well?

Which two vdso calls are you talking about here ?

> > +static void run(unsigned int i)
> > +{
> > +       struct tst_ts ts_start, ts_end;
> > +       long long start, end, diff;
> > +       struct test_variants *tv;
> > +       int count = 10000;
> > +       unsigned int j;
> > +
> > +       while (--count) {
> > +               /* Store reference time in start */
> > +               if (clks[i] == CLOCK_REALTIME) {
> > +                       struct timeval tval;
> > +
> > +                       /* Compare CLOCK_REALTIME with gettimeofday() as well */
> > +                       if (gettimeofday(&tval, NULL) < 0)
> > +                               tst_brk(TBROK | TERRNO, "gettimeofday() failed");
> > +
> > +                       start = tst_timeval_to_ms(tval);
> > +               } else {
> > +                       tv = &variants[0];
> > +                       ts_start.type = tv->type;
> > +                       tv->gettime(clks[i], tst_ts_get(&ts_start));
> > +                       start = tst_ts_to_ms(ts_start);
> > +               }
> > +
> > +               for (j = 0; j < ARRAY_SIZE(variants); j++) {
> > +                       tv = &variants[j];
> > +                       ts_end.type = tv->type;
> > +
> > +                       tv->gettime(clks[i], tst_ts_get(&ts_end));
> > +                       end = tst_ts_to_ms(ts_end);
> > +
> > +                       diff = end - start;
> > +                       if (diff < 0) {
> > +                               tst_res(TFAIL, "%s: Time travelled backwards: %lld",
> > +                                               tst_clock_name(clks[i]), diff);
> > +                               return;
> > +                       }
> 
> I think this check should be done on the nanoseconds, not after the
> conversion to milliseconds -- otherwise you miss when time goes
> backwards by less than a millisecond.
> 
> > +
> > +                       if (diff >= 5) {
> > +                               tst_res(TFAIL, "%s: Difference between successive readings greater than 5 ms: %lld",
> > +                                               tst_clock_name(clks[i]), diff);
> > +                               return;
> > +                       }
> > +
> > +                       /* Refresh time in start */
> > +                       start = end;
> 
> resetting start here seems like the right choice in order to check for
> backward jumps between loop iterations, but I see that the start of
> the loop invalidates that by overwriting it again.
> 
> One way to solve this would be by having the gettimeofday() call
> as part of the loop but skip it for !CLOCK_REALTIME, and adding
> a special case for the lower resolution.

This is what I have done now:

 static void run(unsigned int i)
 {
-       struct tst_ts ts_start, ts_end;
+       struct tst_ts ts;
        long long start, end, diff;
        struct test_variants *tv;
        int count = 10000;
        unsigned int j;
 
-       while (--count) {
-               /* Store reference time in start */
-               if (clks[i] == CLOCK_REALTIME) {
-                       struct timeval tval;
-
-                       /* Compare CLOCK_REALTIME with gettimeofday() as well */
-                       if (gettimeofday(&tval, NULL) < 0)
-                               tst_brk(TBROK | TERRNO, "gettimeofday() failed");
-
-                       start = tst_timeval_to_ms(tval);
-               } else {
-                       tv = &variants[0];
-                       ts_start.type = tv->type;
-                       tv->gettime(clks[i], tst_ts_get(&ts_start));
-                       start = tst_ts_to_ms(ts_start);
-               }
+       /* Store reference time in start */
+       if (clks[i] == CLOCK_REALTIME) {
+               struct timeval tval;
 
+               /* Compare CLOCK_REALTIME with gettimeofday() as well */
+               if (gettimeofday(&tval, NULL) < 0)
+                       tst_brk(TBROK | TERRNO, "gettimeofday() failed");
+
+               start = tst_timeval_to_us(tval) * 1000;
+       } else {
+               tv = &variants[0];
+               ts.type = tv->type;
+               tv->gettime(clks[i], tst_ts_get(&ts));
+               start = tst_ts_to_ns(ts);
+       }
+
+       while (--count) {
                for (j = 0; j < ARRAY_SIZE(variants); j++) {
                        tv = &variants[j];
-                       ts_end.type = tv->type;
+                       ts.type = tv->type;
 
-                       tv->gettime(clks[i], tst_ts_get(&ts_end));
-                       end = tst_ts_to_ms(ts_end);
+                       tv->gettime(clks[i], tst_ts_get(&ts));
+                       end = tst_ts_to_ns(ts);
 
                        diff = end - start;
                        if (diff < 0) {
-                               tst_res(TFAIL, "%s: Time travelled backwards: %lld",
-                                               tst_clock_name(clks[i]), diff);
+                               tst_res(TFAIL, "%s: Time travelled backwards: %lld ns",
+                                       tst_clock_name(clks[i]), diff);
                                return;
                        }
 
+                       diff /= 1000000;
+
Arnd Bergmann June 8, 2020, 10:21 a.m. UTC | #3
On Mon, Jun 8, 2020 at 12:09 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 05-06-20, 14:19, Arnd Bergmann wrote:
> > On Fri, Jun 5, 2020 at 9:48 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > > +static struct test_variants {
> > > +       int (*gettime)(clockid_t clk_id, void *ts);
> > > +       enum tst_ts_type type;
> > > +       char *desc;
> > > +} variants[] = {
> > > +       { .gettime = libc_clock_gettime, .type = TST_LIBC_TIMESPEC, .desc = "vDSO or syscall with libc spec"},
> > > +
> > > +#if (__NR_clock_gettime != __LTP__NR_INVALID_SYSCALL)
> > > +       { .gettime = sys_clock_gettime, .type = TST_KERN_OLD_TIMESPEC, .desc = "syscall with old kernel spec"},
> > > +#endif
> > > +
> > > +#if (__NR_clock_gettime64 != __LTP__NR_INVALID_SYSCALL)
> > > +       { .gettime = sys_clock_gettime64, .type = TST_KERN_TIMESPEC, .desc = "syscall time64 with kernel spec"},
> > > +#endif
> > > +};
> >
> > Would it be possible to add a direct call to the two vdso
> > implementations here as well?
>
> Which two vdso calls are you talking about here ?

I mean clock_gettime() and clock_gettime() as implemented in the vdso,
see https://github.com/nlynch-mentor/vdsotest/tree/master/src for how
to call them.

>  static void run(unsigned int i)
>  {
> -       struct tst_ts ts_start, ts_end;
> +       struct tst_ts ts;
>         long long start, end, diff;
>         struct test_variants *tv;
>         int count = 10000;
>         unsigned int j;
>
> -       while (--count) {
> -               /* Store reference time in start */
> -               if (clks[i] == CLOCK_REALTIME) {
> -                       struct timeval tval;
> -
> -                       /* Compare CLOCK_REALTIME with gettimeofday() as well */
> -                       if (gettimeofday(&tval, NULL) < 0)
> -                               tst_brk(TBROK | TERRNO, "gettimeofday() failed");
> -
> -                       start = tst_timeval_to_ms(tval);
> -               } else {
> -                       tv = &variants[0];
> -                       ts_start.type = tv->type;
> -                       tv->gettime(clks[i], tst_ts_get(&ts_start));
> -                       start = tst_ts_to_ms(ts_start);
> -               }
> +       /* Store reference time in start */
> +       if (clks[i] == CLOCK_REALTIME) {
> +               struct timeval tval;
>
> +               /* Compare CLOCK_REALTIME with gettimeofday() as well */
> +               if (gettimeofday(&tval, NULL) < 0)
> +                       tst_brk(TBROK | TERRNO, "gettimeofday() failed");
> +
> +               start = tst_timeval_to_us(tval) * 1000;
> +       } else {
> +               tv = &variants[0];
> +               ts.type = tv->type;
> +               tv->gettime(clks[i], tst_ts_get(&ts));
> +               start = tst_ts_to_ns(ts);
> +       }

So now you only call gettimeofday() once instead of in each loop, right?
This won't test for spurious failures any more, but I suppose it would catch
any case where gettimeofday() and clock_gettime() are out of sync by
a lot. The only case you don't catch is where clock_gettime() sometimes
returns a value that is slightly earlier than gettimeofday().

> +       while (--count) {
>                 for (j = 0; j < ARRAY_SIZE(variants); j++) {
>                         tv = &variants[j];
> -                       ts_end.type = tv->type;
> +                       ts.type = tv->type;
>
> -                       tv->gettime(clks[i], tst_ts_get(&ts_end));
> -                       end = tst_ts_to_ms(ts_end);
> +                       tv->gettime(clks[i], tst_ts_get(&ts));
> +                       end = tst_ts_to_ns(ts);
>
>                         diff = end - start;
>                         if (diff < 0) {
> -                               tst_res(TFAIL, "%s: Time travelled backwards: %lld",
> -                                               tst_clock_name(clks[i]), diff);
> +                               tst_res(TFAIL, "%s: Time travelled backwards: %lld ns",
> +                                       tst_clock_name(clks[i]), diff);
>                                 return;
>                         }
>
> +                       diff /= 1000000;
> +

Right, this seems reasonable. I noticed that tst_ts_to_ns() might overflow
for bad timestamp values, but those would also likely cause jumps, so
you don't have to add an extra check.

     Arnd
Viresh Kumar June 8, 2020, 11:20 a.m. UTC | #4
On 08-06-20, 12:21, Arnd Bergmann wrote:
> So now you only call gettimeofday() once instead of in each loop, right?
> This won't test for spurious failures any more, but I suppose it would catch
> any case where gettimeofday() and clock_gettime() are out of sync by
> a lot. The only case you don't catch is where clock_gettime() sometimes
> returns a value that is slightly earlier than gettimeofday().

Okay, I missed the fact that we need to call gettimeofday() for every iteration
and here is the new diff which has tried to simplify the overall code. But this
has a problem now as it always reports this error:

clock_gettime04.c:88: FAIL: CLOCK_REALTIME: Time travelled backwards (2): -148 ns

I guess the problem is that gettimeofday() gets the value in usec resolution and
clock_gettime() gets it in nsec and so some nsec always get lost with
gettimeofday() and so the errors ? How should we take care of this ? Take diff
in usec instead of nsec ?

+++ b/testcases/kernel/syscalls/clock_gettime/clock_gettime04.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020 Linaro Limited. All rights reserved.
+ * Author: Viresh Kumar<viresh.kumar@linaro.org>
+ *
+ * Check time difference between successive readings and report a bug if
+ * difference found to be over 5 ms.
+ */
+
+#include "config.h"
+#include "tst_timer.h"
+#include "tst_safe_clocks.h"
+
+clockid_t clks[] = {
+	CLOCK_REALTIME,
+	CLOCK_REALTIME_COARSE,
+	CLOCK_MONOTONIC,
+	CLOCK_MONOTONIC_COARSE,
+	CLOCK_MONOTONIC_RAW,
+	CLOCK_BOOTTIME,
+};
+
+static inline int my_gettimeofday(clockid_t clk_id, void *ts)
+{
+	struct timeval tval;
+
+	if (clk_id != CLOCK_REALTIME)
+		tst_brk(TBROK, "%s: Invalid clk_id, exiting", tst_clock_name(clk_id));
+
+	if (gettimeofday(&tval, NULL) < 0)
+		tst_brk(TBROK | TERRNO, "gettimeofday() failed");
+
+	/*
+	 * The array defines the type to TST_LIBC_TIMESPEC and so we can cast
+	 * this into struct timespec.
+	 */
+	*((struct timespec *)ts) = tst_timespec_from_us(tst_timeval_to_us(tval));
+	return 0;
+}
+
+static struct test_variants {
+	int (*gettime)(clockid_t clk_id, void *ts);
+	enum tst_ts_type type;
+	char *desc;
+} variants[] = {
+	{ .gettime = libc_clock_gettime, .type = TST_LIBC_TIMESPEC, .desc = "vDSO or syscall with libc spec"},
+
+#if (__NR_clock_gettime != __LTP__NR_INVALID_SYSCALL)
+	{ .gettime = sys_clock_gettime, .type = TST_KERN_OLD_TIMESPEC, .desc = "syscall with old kernel spec"},
+#endif
+
+#if (__NR_clock_gettime64 != __LTP__NR_INVALID_SYSCALL)
+	{ .gettime = sys_clock_gettime64, .type = TST_KERN_TIMESPEC, .desc = "syscall time64 with kernel spec"},
+#endif
+	{ .gettime = my_gettimeofday, .type = TST_LIBC_TIMESPEC, .desc = "gettimeofday"},
+};
+
+static void run(unsigned int i)
+{
+	struct tst_ts ts;
+	long long start, end = 0, diff;
+	struct test_variants *tv;
+	int count = 10000;
+	unsigned int j;
+
+	do {
+		for (j = 0; j < ARRAY_SIZE(variants); j++) {
+			/* Refresh time in start */
+			start = end;
+
+			tv = &variants[j];
+			ts.type = tv->type;
+
+			/* Do gettimeofday() test only for CLOCK_REALTIME */
+			if (tv->gettime == my_gettimeofday && clks[i] != CLOCK_REALTIME)
+				continue;
+
+			tv->gettime(clks[i], tst_ts_get(&ts));
+			end = tst_ts_to_ns(ts);
+
+			/* Skip comparison on first traversal */
+			if (count == 10000 && !j)
+				continue;
+
+			diff = end - start;
+			if (diff < 0) {
+				tst_res(TFAIL, "%s: Time travelled backwards (%d): %lld ns",
+					tst_clock_name(clks[i]), j, diff);
+				return;
+			}
+
+			diff /= 1000000;
+
+			if (diff >= 5) {
+				tst_res(TFAIL, "%s: Difference between successive readings greater than 5 ms (%d): %lld",
+					tst_clock_name(clks[i]), j, diff);
+				return;
+			}
+		}
+	} while (--count);
+
+	tst_res(TPASS, "%s: Difference between successive readings is reasonable",
+		tst_clock_name(clks[i]));
+}
+
+static struct tst_test test = {
+	.test = run,
+	.tcnt = ARRAY_SIZE(clks),
+};
Arnd Bergmann June 8, 2020, 2:51 p.m. UTC | #5
On Mon, Jun 8, 2020 at 1:20 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 08-06-20, 12:21, Arnd Bergmann wrote:
> > So now you only call gettimeofday() once instead of in each loop, right?
> > This won't test for spurious failures any more, but I suppose it would catch
> > any case where gettimeofday() and clock_gettime() are out of sync by
> > a lot. The only case you don't catch is where clock_gettime() sometimes
> > returns a value that is slightly earlier than gettimeofday().
>
> Okay, I missed the fact that we need to call gettimeofday() for every iteration
> and here is the new diff which has tried to simplify the overall code. But this
> has a problem now as it always reports this error:
>
> clock_gettime04.c:88: FAIL: CLOCK_REALTIME: Time travelled backwards (2): -148 ns
>
> I guess the problem is that gettimeofday() gets the value in usec resolution and
> clock_gettime() gets it in nsec and so some nsec always get lost with
> gettimeofday() and so the errors ? How should we take care of this ? Take diff
> in usec instead of nsec ?

You already have a special case for gettimeofday(), so just round down
the 'start' value to the previous microsecond when you are in that
case. For all other cases, I think we want to check the exact nanoseconds,
in particular to ensure that the calculation is done the same way in the
vdso vs the kernel.

> +static void run(unsigned int i)
> +{
> +       struct tst_ts ts;
> +       long long start, end = 0, diff;
> +       struct test_variants *tv;
> +       int count = 10000;
> +       unsigned int j;
> +
> +       do {
> +               for (j = 0; j < ARRAY_SIZE(variants); j++) {
> +                       /* Refresh time in start */
> +                       start = end;
> +
> +                       tv = &variants[j];
> +                       ts.type = tv->type;
> +
> +                       /* Do gettimeofday() test only for CLOCK_REALTIME */
> +                       if (tv->gettime == my_gettimeofday && clks[i] != CLOCK_REALTIME)
> +                               continue;
> +
> +                       tv->gettime(clks[i], tst_ts_get(&ts));
> +                       end = tst_ts_to_ns(ts);
> +
> +                       /* Skip comparison on first traversal */
> +                       if (count == 10000 && !j)
> +                               continue;
> +
> +                       diff = end - start;

I think on the first iteration, 'start' is wrong here as well, and needs to
be initialized once, as you did in the earlier versions.

       Arnd
Viresh Kumar June 9, 2020, 7:05 a.m. UTC | #6
On 08-06-20, 16:51, Arnd Bergmann wrote:
> You already have a special case for gettimeofday(), so just round down
> the 'start' value to the previous microsecond when you are in that
> case.

Since this will also affect the next reading as well (as we will loose
values < 1 us), I tried to do it a bit differently.

@@ -78,6 +78,13 @@ static void run(unsigned int i)
                        tv->gettime(clks[i], tst_ts_get(&ts));
                        end = tst_ts_to_ns(ts);
 
+                       /*
+                        * gettimeofday() doesn't value less than 1 us, copy
+                        * that from start.
+                        */
+                       if (tv->gettime == my_gettimeofday && end < start)
+                               end += start % 1000;
+
                        /* Skip comparison on first traversal */
                        if (count == 10000 && !j)
                                continue;

> > +static void run(unsigned int i)
> > +{
> > +       struct tst_ts ts;
> > +       long long start, end = 0, diff;
> > +       struct test_variants *tv;
> > +       int count = 10000;
> > +       unsigned int j;
> > +
> > +       do {
> > +               for (j = 0; j < ARRAY_SIZE(variants); j++) {
> > +                       /* Refresh time in start */
> > +                       start = end;
> > +
> > +                       tv = &variants[j];
> > +                       ts.type = tv->type;
> > +
> > +                       /* Do gettimeofday() test only for CLOCK_REALTIME */
> > +                       if (tv->gettime == my_gettimeofday && clks[i] != CLOCK_REALTIME)
> > +                               continue;
> > +
> > +                       tv->gettime(clks[i], tst_ts_get(&ts));
> > +                       end = tst_ts_to_ns(ts);
> > +
> > +                       /* Skip comparison on first traversal */
> > +                       if (count == 10000 && !j)
> > +                               continue;

I am skipping diff for the first iteration..

> > +
> > +                       diff = end - start;
> 
> I think on the first iteration, 'start' is wrong here as well, and needs to
> be initialized once, as you did in the earlier versions.

And so this won't be a problem.
Arnd Bergmann June 9, 2020, 7:43 a.m. UTC | #7
On Tue, Jun 9, 2020 at 9:05 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 08-06-20, 16:51, Arnd Bergmann wrote:
> > You already have a special case for gettimeofday(), so just round down
> > the 'start' value to the previous microsecond when you are in that
> > case.
>
> Since this will also affect the next reading as well (as we will loose
> values < 1 us), I tried to do it a bit differently.
>
> @@ -78,6 +78,13 @@ static void run(unsigned int i)
>                         tv->gettime(clks[i], tst_ts_get(&ts));
>                         end = tst_ts_to_ns(ts);
>
> +                       /*
> +                        * gettimeofday() doesn't value less than 1 us, copy
> +                        * that from start.
> +                        */
> +                       if (tv->gettime == my_gettimeofday && end < start)
> +                               end += start % 1000;

This looks like it should work correctly, but it feels a bit more fragile than

                       if (tv->gettime == my_gettimeofday)
                               start -= start % 1000;

which would bring the start and end values to the same resolution
rather than making up values that were never read.

Your approach however has the advantage of adding less overhead
as the % operator on a 64-bit integer is going to turn into a very
expensive calculation on most 32-bit processors that itself can
skew the timing.

> > > +                       /* Skip comparison on first traversal */
> > > +                       if (count == 10000 && !j)
> > > +                               continue;
>
> I am skipping diff for the first iteration..
>
> > > +
> > > +                       diff = end - start;
> >
> > I think on the first iteration, 'start' is wrong here as well, and needs to
> > be initialized once, as you did in the earlier versions.
>
> And so this won't be a problem.

Ok.

        Arnd
Viresh Kumar June 9, 2020, 8:32 a.m. UTC | #8
On 09-06-20, 09:43, Arnd Bergmann wrote:
> On Tue, Jun 9, 2020 at 9:05 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 08-06-20, 16:51, Arnd Bergmann wrote:
> > > You already have a special case for gettimeofday(), so just round down
> > > the 'start' value to the previous microsecond when you are in that
> > > case.
> >
> > Since this will also affect the next reading as well (as we will loose
> > values < 1 us), I tried to do it a bit differently.
> >
> > @@ -78,6 +78,13 @@ static void run(unsigned int i)
> >                         tv->gettime(clks[i], tst_ts_get(&ts));
> >                         end = tst_ts_to_ns(ts);
> >
> > +                       /*
> > +                        * gettimeofday() doesn't value less than 1 us, copy
> > +                        * that from start.
> > +                        */
> > +                       if (tv->gettime == my_gettimeofday && end < start)
> > +                               end += start % 1000;
> 
> This looks like it should work correctly, but it feels a bit more fragile than
> 
>                        if (tv->gettime == my_gettimeofday)
>                                start -= start % 1000;
> 
> which would bring the start and end values to the same resolution
> rather than making up values that were never read.

This sounds better, will pick it up.

> Your approach however has the advantage of adding less overhead
> as the % operator on a 64-bit integer is going to turn into a very
> expensive calculation on most 32-bit processors that itself can
> skew the timing.

I am not sure of what you meant here, both the approaches are using
the % operation.
Viresh Kumar June 9, 2020, 8:33 a.m. UTC | #9
On 09-06-20, 14:02, Viresh Kumar wrote:
> On 09-06-20, 09:43, Arnd Bergmann wrote:
> > On Tue, Jun 9, 2020 at 9:05 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 08-06-20, 16:51, Arnd Bergmann wrote:
> > > > You already have a special case for gettimeofday(), so just round down
> > > > the 'start' value to the previous microsecond when you are in that
> > > > case.
> > >
> > > Since this will also affect the next reading as well (as we will loose
> > > values < 1 us), I tried to do it a bit differently.
> > >
> > > @@ -78,6 +78,13 @@ static void run(unsigned int i)
> > >                         tv->gettime(clks[i], tst_ts_get(&ts));
> > >                         end = tst_ts_to_ns(ts);
> > >
> > > +                       /*
> > > +                        * gettimeofday() doesn't value less than 1 us, copy
> > > +                        * that from start.
> > > +                        */
> > > +                       if (tv->gettime == my_gettimeofday && end < start)
> > > +                               end += start % 1000;
> > 
> > This looks like it should work correctly, but it feels a bit more fragile than
> > 
> >                        if (tv->gettime == my_gettimeofday)
> >                                start -= start % 1000;

And this can be
        start /= 1000;
Arnd Bergmann June 9, 2020, 8:51 a.m. UTC | #10
On Tue, Jun 9, 2020 at 10:32 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 09-06-20, 09:43, Arnd Bergmann wrote:
> > On Tue, Jun 9, 2020 at 9:05 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 08-06-20, 16:51, Arnd Bergmann wrote:
> > > > You already have a special case for gettimeofday(), so just round down
> > > > the 'start' value to the previous microsecond when you are in that
> > > > case.
> > >
> > > Since this will also affect the next reading as well (as we will loose
> > > values < 1 us), I tried to do it a bit differently.
> > >
> > > @@ -78,6 +78,13 @@ static void run(unsigned int i)
> > >                         tv->gettime(clks[i], tst_ts_get(&ts));
> > >                         end = tst_ts_to_ns(ts);
> > >
> > > +                       /*
> > > +                        * gettimeofday() doesn't value less than 1 us, copy
> > > +                        * that from start.
> > > +                        */
> > > +                       if (tv->gettime == my_gettimeofday && end < start)
> > > +                               end += start % 1000;
> >
> > This looks like it should work correctly, but it feels a bit more fragile than
> >
> >                        if (tv->gettime == my_gettimeofday)
> >                                start -= start % 1000;
> >
> > which would bring the start and end values to the same resolution
> > rather than making up values that were never read.
>
> This sounds better, will pick it up.
>
> > Your approach however has the advantage of adding less overhead
> > as the % operator on a 64-bit integer is going to turn into a very
> > expensive calculation on most 32-bit processors that itself can
> > skew the timing.
>
> I am not sure of what you meant here, both the approaches are using
> the % operation.

Yours only needs it when the nanoseconds are actually lower, which
is only the case if no the actual time has not crossed into the next
microsecond. If the gettimeofday() call itself takes over a microsecond
to complete, then it usually will have wrapped, but this is highly
hardware specific as the amount of time it takes to read the current
clock registers can be anywhere from 'almost free' to 'several
microseconds'.

Doing a division would be the same as the modulo operator.

Using a 32-bit modulo or division on the tv_nsec portion instead
of doing it on the 64-bit nanoseconds is much faster but also
a little more complex.

Another option would be to allow up to 999 nanoseconds of
time going backwards before printing an error for the case
of gettimeofday() after clock_gettime(), like

if ((tv->gettime == my_gettimeofday)
    slack = 999;
else
    slack = 0;

if (start + slack < end)
      failure();

This is much faster as it avoids the division but does not catch
the corner case of gettimeofday() returning a value that is
slightly before the previous clock_gettime() but that is actually
in the previous microsecond interval.

     Arnd
Viresh Kumar June 10, 2020, 6:17 a.m. UTC | #11
On 09-06-20, 10:51, Arnd Bergmann wrote:
> Yours only needs it when the nanoseconds are actually lower, which
> is only the case if no the actual time has not crossed into the next
> microsecond.

That is a very small percentage of cases I believe, and so I won't
count my version to be really better here :)

> If the gettimeofday() call itself takes over a microsecond
> to complete, then it usually will have wrapped, but this is highly
> hardware specific as the amount of time it takes to read the current
> clock registers can be anywhere from 'almost free' to 'several
> microseconds'.
> 
> Doing a division would be the same as the modulo operator.

Sure.

> Using a 32-bit modulo or division on the tv_nsec portion instead
> of doing it on the 64-bit nanoseconds is much faster but also
> a little more complex.

Yeah, keeping separate values is a mess. Another thing I was thinking
about, does the time it takes to do a division operation has the
tendency to affect the output of the program? i.e. not catching bugs
sometimes ? Otherwise who cares about the time spent doing division
during testing of the kernel :)

> Another option would be to allow up to 999 nanoseconds of
> time going backwards before printing an error for the case
> of gettimeofday() after clock_gettime(), like
> 
> if ((tv->gettime == my_gettimeofday)
>     slack = 999;
> else
>     slack = 0;
> 
> if (start + slack < end)
>       failure();

I like it, but I think you did it a bit incorrectly according to the
current code:

if (end + slack < start)
      failure();

> This is much faster as it avoids the division but does not catch
> the corner case of gettimeofday() returning a value that is
> slightly before the previous clock_gettime() but that is actually
> in the previous microsecond interval.

Hmm, I am not sure I understand it. The only bug that can happen with
gettimeofday() is when it returns a value with a different microsecond
value than clock_gettime(), as we can't capture values lower than 1
us.

So, clock_gettime() returns 4001 ns and gettimeofday() returns 3000
(or 2000 or 1000) ns. This is the only bug possible here. And adding
999 to gettimeofday() output will never make it cross the microsecond
boundary (as it has 000 at the end) and so we should always catch the
bug.

Where am I wrong here now ? Of course I am :)
diff mbox series

Patch

diff --git a/runtest/syscalls b/runtest/syscalls
index f9a6397560fa..d7c3cbed611a 100644
--- a/runtest/syscalls
+++ b/runtest/syscalls
@@ -96,6 +96,7 @@  clock_nanosleep04 clock_nanosleep04
 clock_gettime01 clock_gettime01
 clock_gettime02 clock_gettime02
 clock_gettime03 clock_gettime03
+clock_gettime04 clock_gettime04
 leapsec01 leapsec01
 
 clock_settime01 clock_settime01
diff --git a/testcases/kernel/syscalls/clock_gettime/.gitignore b/testcases/kernel/syscalls/clock_gettime/.gitignore
index 9d06613b6f41..304eedab68c6 100644
--- a/testcases/kernel/syscalls/clock_gettime/.gitignore
+++ b/testcases/kernel/syscalls/clock_gettime/.gitignore
@@ -1,4 +1,5 @@ 
 clock_gettime01
 clock_gettime02
 clock_gettime03
+clock_gettime04
 leapsec01
diff --git a/testcases/kernel/syscalls/clock_gettime/clock_gettime04.c b/testcases/kernel/syscalls/clock_gettime/clock_gettime04.c
new file mode 100644
index 000000000000..139883033dce
--- /dev/null
+++ b/testcases/kernel/syscalls/clock_gettime/clock_gettime04.c
@@ -0,0 +1,96 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020 Linaro Limited. All rights reserved.
+ * Author: Viresh Kumar<viresh.kumar@linaro.org>
+ *
+ * Check time difference between successive readings and report a bug if
+ * difference found to be over 5 ms.
+ */
+
+#include "config.h"
+#include "tst_timer.h"
+#include "tst_safe_clocks.h"
+
+clockid_t clks[] = {
+	CLOCK_REALTIME,
+	CLOCK_REALTIME_COARSE,
+	CLOCK_MONOTONIC,
+	CLOCK_MONOTONIC_COARSE,
+	CLOCK_MONOTONIC_RAW,
+	CLOCK_BOOTTIME,
+};
+
+static struct test_variants {
+	int (*gettime)(clockid_t clk_id, void *ts);
+	enum tst_ts_type type;
+	char *desc;
+} variants[] = {
+	{ .gettime = libc_clock_gettime, .type = TST_LIBC_TIMESPEC, .desc = "vDSO or syscall with libc spec"},
+
+#if (__NR_clock_gettime != __LTP__NR_INVALID_SYSCALL)
+	{ .gettime = sys_clock_gettime, .type = TST_KERN_OLD_TIMESPEC, .desc = "syscall with old kernel spec"},
+#endif
+
+#if (__NR_clock_gettime64 != __LTP__NR_INVALID_SYSCALL)
+	{ .gettime = sys_clock_gettime64, .type = TST_KERN_TIMESPEC, .desc = "syscall time64 with kernel spec"},
+#endif
+};
+
+static void run(unsigned int i)
+{
+	struct tst_ts ts_start, ts_end;
+	long long start, end, diff;
+	struct test_variants *tv;
+	int count = 10000;
+	unsigned int j;
+
+	while (--count) {
+		/* Store reference time in start */
+		if (clks[i] == CLOCK_REALTIME) {
+			struct timeval tval;
+
+			/* Compare CLOCK_REALTIME with gettimeofday() as well */
+			if (gettimeofday(&tval, NULL) < 0)
+				tst_brk(TBROK | TERRNO, "gettimeofday() failed");
+
+			start = tst_timeval_to_ms(tval);
+		} else {
+			tv = &variants[0];
+			ts_start.type = tv->type;
+			tv->gettime(clks[i], tst_ts_get(&ts_start));
+			start = tst_ts_to_ms(ts_start);
+		}
+
+		for (j = 0; j < ARRAY_SIZE(variants); j++) {
+			tv = &variants[j];
+			ts_end.type = tv->type;
+
+			tv->gettime(clks[i], tst_ts_get(&ts_end));
+			end = tst_ts_to_ms(ts_end);
+
+			diff = end - start;
+			if (diff < 0) {
+				tst_res(TFAIL, "%s: Time travelled backwards: %lld",
+						tst_clock_name(clks[i]), diff);
+				return;
+			}
+
+			if (diff >= 5) {
+				tst_res(TFAIL, "%s: Difference between successive readings greater than 5 ms: %lld",
+						tst_clock_name(clks[i]), diff);
+				return;
+			}
+
+			/* Refresh time in start */
+			start = end;
+		}
+	}
+
+	tst_res(TPASS, "%s: Difference between successive readings is reasonable",
+		tst_clock_name(clks[i]));
+}
+
+static struct tst_test test = {
+	.test = run,
+	.tcnt = ARRAY_SIZE(clks),
+};