diff mbox

[NET-NEXT,02/10] time sync: generic infrastructure to map between time stamps generated by a time counter and system time

Message ID 1233752517-30010-3-git-send-email-patrick.ohly@intel.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Patrick Ohly Feb. 4, 2009, 1:01 p.m. UTC
Mapping from time counter to system time is implemented. This is sufficient to use
this code in a network device driver which wants to support hardware time stamping
and transformation of hardware time stamps to system time.

The interface could have been made more versatile by not depending on a time counter,
but this wasn't done to avoid writing glue code elsewhere.

The method implemented here is the one used and analyzed under the name
"assisted PTP" in the LCI PTP paper:
http://www.linuxclustersinstitute.org/conferences/archive/2008/PDF/Ohly_92221.pdf
---
 include/linux/clocksync.h |  102 +++++++++++++++++++++++
 kernel/time/Makefile      |    2 +-
 kernel/time/clocksync.c   |  197 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 300 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/clocksync.h
 create mode 100644 kernel/time/clocksync.c

Comments

john stultz Feb. 4, 2009, 7:44 p.m. UTC | #1
On Wed, 2009-02-04 at 14:01 +0100, Patrick Ohly wrote:
> Mapping from time counter to system time is implemented. This is sufficient to use
> this code in a network device driver which wants to support hardware time stamping
> and transformation of hardware time stamps to system time.
> 
> The interface could have been made more versatile by not depending on a time counter,
> but this wasn't done to avoid writing glue code elsewhere.
> 
> The method implemented here is the one used and analyzed under the name
> "assisted PTP" in the LCI PTP paper:
> http://www.linuxclustersinstitute.org/conferences/archive/2008/PDF/Ohly_92221.pdf
> ---
>  include/linux/clocksync.h |  102 +++++++++++++++++++++++
>  kernel/time/Makefile      |    2 +-
>  kernel/time/clocksync.c   |  197 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 300 insertions(+), 1 deletions(-)
>  create mode 100644 include/linux/clocksync.h
>  create mode 100644 kernel/time/clocksync.c

I think my main critique of this somewhat trivial, but still important,
as confusion is common in this area.

I sort of object to the name clocksync, as you're not really doing
anything to sync clocks in the code. One, "clock" is an way overloaded
term in the kernel. Two, you're really seem to be just providing deltas
and skew rates between notions of time. I want to avoid someone thinking
"Oh, NTP must use this code". 

So maybe something like timecompare.c? 

If this code is really PTP purposed, maybe ptp should be in the name?

> diff --git a/include/linux/clocksync.h b/include/linux/clocksync.h
> new file mode 100644
> index 0000000..07c0cc1
> --- /dev/null
> +++ b/include/linux/clocksync.h
> @@ -0,0 +1,102 @@
> +/*
> + * Utility code which helps transforming between hardware time stamps
> + * generated by a clocksource and system time. The clocksource is
> + * assumed to return monotonically increasing time (but this code does
> + * its best to compensate if that is not the case) whereas system time
> + * may jump.

You're not using clocksources here anymore, right? Probably needs an
update.


> + *
> + * Copyright(c) 2009 Intel Corporation.
> + * Author: Patrick Ohly <patrick.ohly@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +#ifndef _LINUX_CLOCKSYNC_H
> +#define _LINUX_CLOCKSYNC_H
> +
> +#include <linux/clocksource.h>
> +#include <linux/ktime.h>
> +
> +/**
> + * struct clocksync - stores state and configuration for the two clocks
> + *
> + * Initialize to zero, then set clock, systime, num_samples.
> + *
> + * Transformation between HW time and system time is done with:

So hw time is overloaded as well. It tends to be thought of as the
CMOS/RTC clock.  Would PTP or NIC time be ok? (It avoids network-time
which also has ntp connotations) Or are there other uses for this code
other then the PTP code?


> + * HW time transformed = HW time + offset +
> + *                       (HW time - last_update) * skew /
> + *                       CLOCKSYNC_SKEW_RESOLUTION
> + *
> + * @clock:           the source for HW time stamps (%clocksource_read_time)

nix clocksource.

> + * @systime:         function returning current system time (ktime_get
> + *                   for monotonic time, or ktime_get_real for wall clock)

So, are non-CLOCK_REALTIME clockids actually used?

> + * @num_samples:     number of times that HW time and system time are to
> + *                   be compared when determining their offset
> + * @offset:          (system time - HW time) at the time of the last update
> + * @skew:            average (system time - HW time) / delta HW time *
> + *                   CLOCKSYNC_SKEW_RESOLUTION
> + * @last_update:     last HW time stamp when clock offset was measured
> + */
> +struct clocksync {

struct time_comparator { ?

> +	struct timecounter *clock;
> +	ktime_t (*systime)(void);
> +	int num_samples;
> +
> +	s64 offset;
> +	s64 skew;
> +	u64 last_update;
> +};
> +
> +/**
> + * clocksync_hw2sys - transform HW time stamp into corresponding system time
> + * @sync:             context for clock sync
> + * @hwtstamp:         the result of timecounter_read() or
> + *                    timecounter_cyc2time()
> + */
> +extern ktime_t clocksync_hw2sys(struct clocksync *sync,
> +				u64 hwtstamp);

Ugh. hw2sys again is overloaded for reading the cmos/RTC persistent
clock and setting the system time.


So overall I don't have any objections with the code itself. Its fairly
isolated and doesn't interact with the timekeeping code itself.

Sorry for taking so long to get feedback to you, I had started looking
at this right before the holiday and lost context after the break. 

thanks
-john

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick Ohly Feb. 5, 2009, 10:21 a.m. UTC | #2
On Wed, 2009-02-04 at 21:44 +0200, john stultz wrote:
> On Wed, 2009-02-04 at 14:01 +0100, Patrick Ohly wrote:
> > Mapping from time counter to system time is implemented. This is sufficient to use
> > this code in a network device driver which wants to support hardware time stamping
> > and transformation of hardware time stamps to system time.
> > 
> > The interface could have been made more versatile by not depending on a time counter,
> > but this wasn't done to avoid writing glue code elsewhere.
> > 
> > The method implemented here is the one used and analyzed under the name
> > "assisted PTP" in the LCI PTP paper:
> > http://www.linuxclustersinstitute.org/conferences/archive/2008/PDF/Ohly_92221.pdf
> > ---
> >  include/linux/clocksync.h |  102 +++++++++++++++++++++++
> >  kernel/time/Makefile      |    2 +-
> >  kernel/time/clocksync.c   |  197 +++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 300 insertions(+), 1 deletions(-)
> >  create mode 100644 include/linux/clocksync.h
> >  create mode 100644 kernel/time/clocksync.c
> 
> I think my main critique of this somewhat trivial, but still important,
> as confusion is common in this area.

Agreed, good names are important - choosing them is often harder than
getting the code to work ;-)

> I sort of object to the name clocksync, as you're not really doing
> anything to sync clocks in the code. One, "clock" is an way overloaded
> term in the kernel. Two, you're really seem to be just providing deltas
> and skew rates between notions of time. I want to avoid someone thinking
> "Oh, NTP must use this code". 
> 
> So maybe something like timecompare.c? 

Fine with me.

> If this code is really PTP purposed, maybe ptp should be in the name?

It's not PTP specific. I'm not sure whether there are other uses for it,
but if something comes up, then having PTP in the name would be wrong.
So I prefer timecompare.

> > diff --git a/include/linux/clocksync.h b/include/linux/clocksync.h
> > new file mode 100644
> > index 0000000..07c0cc1
> > --- /dev/null
> > +++ b/include/linux/clocksync.h
> > @@ -0,0 +1,102 @@
> > +/*
> > + * Utility code which helps transforming between hardware time stamps
> > + * generated by a clocksource and system time. The clocksource is
> > + * assumed to return monotonically increasing time (but this code does
> > + * its best to compensate if that is not the case) whereas system time
> > + * may jump.
> 
> You're not using clocksources here anymore, right? Probably needs an
> update.

Right.

> > +/**
> > + * struct clocksync - stores state and configuration for the two clocks
> > + *
> > + * Initialize to zero, then set clock, systime, num_samples.
> > + *
> > + * Transformation between HW time and system time is done with:
> 
> So hw time is overloaded as well. It tends to be thought of as the
> CMOS/RTC clock.  Would PTP or NIC time be ok? (It avoids network-time
> which also has ntp connotations) Or are there other uses for this code
> other then the PTP code?

As said above, there might be. I should better avoid all references to
HW and system and just speak of "source" and "target" time, with just
one motivating example given that refers to NIC and system time.

> > + * @systime:         function returning current system time (ktime_get
> > + *                   for monotonic time, or ktime_get_real for wall clock)
> 
> So, are non-CLOCK_REALTIME clockids actually used?

Not at the moment, but I can imagine that this might be useful at some
point.

> > + * @num_samples:     number of times that HW time and system time are to
> > + *                   be compared when determining their offset
> > + * @offset:          (system time - HW time) at the time of the last update
> > + * @skew:            average (system time - HW time) / delta HW time *
> > + *                   CLOCKSYNC_SKEW_RESOLUTION
> > + * @last_update:     last HW time stamp when clock offset was measured
> > + */
> > +struct clocksync {
> 
> struct time_comparator { ?

Why not simply "timecompare"? It's the central data structure in this
module, similar to "clocksource" in "clocksource.[ch]". Apart from that
I don't mind using time_comparator.

> > +	struct timecounter *clock;
> > +	ktime_t (*systime)(void);
> > +	int num_samples;
> > +
> > +	s64 offset;
> > +	s64 skew;
> > +	u64 last_update;
> > +};
> > +
> > +/**
> > + * clocksync_hw2sys - transform HW time stamp into corresponding system time
> > + * @sync:             context for clock sync
> > + * @hwtstamp:         the result of timecounter_read() or
> > + *                    timecounter_cyc2time()
> > + */
> > +extern ktime_t clocksync_hw2sys(struct clocksync *sync,
> > +				u64 hwtstamp);
> 
> Ugh. hw2sys again is overloaded for reading the cmos/RTC persistent
> clock and setting the system time.

timecompare_transform()?
diff mbox

Patch

diff --git a/include/linux/clocksync.h b/include/linux/clocksync.h
new file mode 100644
index 0000000..07c0cc1
--- /dev/null
+++ b/include/linux/clocksync.h
@@ -0,0 +1,102 @@ 
+/*
+ * Utility code which helps transforming between hardware time stamps
+ * generated by a clocksource and system time. The clocksource is
+ * assumed to return monotonically increasing time (but this code does
+ * its best to compensate if that is not the case) whereas system time
+ * may jump.
+ *
+ * Copyright(c) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _LINUX_CLOCKSYNC_H
+#define _LINUX_CLOCKSYNC_H
+
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+
+/**
+ * struct clocksync - stores state and configuration for the two clocks
+ *
+ * Initialize to zero, then set clock, systime, num_samples.
+ *
+ * Transformation between HW time and system time is done with:
+ * HW time transformed = HW time + offset +
+ *                       (HW time - last_update) * skew /
+ *                       CLOCKSYNC_SKEW_RESOLUTION
+ *
+ * @clock:           the source for HW time stamps (%clocksource_read_time)
+ * @systime:         function returning current system time (ktime_get
+ *                   for monotonic time, or ktime_get_real for wall clock)
+ * @num_samples:     number of times that HW time and system time are to
+ *                   be compared when determining their offset
+ * @offset:          (system time - HW time) at the time of the last update
+ * @skew:            average (system time - HW time) / delta HW time *
+ *                   CLOCKSYNC_SKEW_RESOLUTION
+ * @last_update:     last HW time stamp when clock offset was measured
+ */
+struct clocksync {
+	struct timecounter *clock;
+	ktime_t (*systime)(void);
+	int num_samples;
+
+	s64 offset;
+	s64 skew;
+	u64 last_update;
+};
+
+/**
+ * clocksync_hw2sys - transform HW time stamp into corresponding system time
+ * @sync:             context for clock sync
+ * @hwtstamp:         the result of timecounter_read() or
+ *                    timecounter_cyc2time()
+ */
+extern ktime_t clocksync_hw2sys(struct clocksync *sync,
+				u64 hwtstamp);
+
+/**
+ * clocksync_offset - measure current (system time - HW time) offset
+ * @sync:             context for clock sync
+ * @offset:           average offset during sample period returned here
+ * @hwtstamp:         average HW time during sample period returned here
+ *
+ * Returns number of samples used. Might be zero (= no result) in the
+ * unlikely case that system time was monotonically decreasing for all
+ * samples (= broken).
+ */
+extern int clocksync_offset(struct clocksync *sync,
+			    s64 *offset,
+			    u64 *hwtstamp);
+
+extern void __clocksync_update(struct clocksync *sync,
+			       u64 hwtstamp);
+
+/**
+ * clocksync_update - update offset and skew by measuring current offset
+ * @sync:             context for clock sync
+ * @hwtstamp:         the result of timecounter_read() or
+ *                    timecounter_cyc2time(), pass zero to force update
+ *
+ * Updates are only done at most once per second.
+ */
+static inline void clocksync_update(struct clocksync *sync,
+				    u64 hwtstamp)
+{
+	if (!hwtstamp ||
+	    (s64)(hwtstamp - sync->last_update) >= NSEC_PER_SEC)
+		__clocksync_update(sync, hwtstamp);
+}
+
+#endif /* _LINUX_CLOCKSYNC_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 905b0b5..6279fb0 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@ 
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o clocksync.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/clocksync.c b/kernel/time/clocksync.c
new file mode 100644
index 0000000..c10857d
--- /dev/null
+++ b/kernel/time/clocksync.c
@@ -0,0 +1,197 @@ 
+/*
+ * Utility code which helps transforming between hardware time stamps
+ * generated by a timecounter and system time.
+ *
+ * Copyright (C) 20098 Intel.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/clocksync.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+
+/*
+ * fixed point arithmetic scale factor for skew
+ *
+ * Usually one would measure skew in ppb (parts per billion, 1e9), but
+ * using a factor of 2 simplifies the math.
+ */
+#define CLOCKSYNC_SKEW_RESOLUTION (((s64)1)<<30)
+
+ktime_t clocksync_hw2sys(struct clocksync *sync,
+			 u64 hwtstamp)
+{
+	u64 nsec;
+
+	nsec = hwtstamp + sync->offset;
+	nsec += (s64)(hwtstamp - sync->last_update) * sync->skew /
+		CLOCKSYNC_SKEW_RESOLUTION;
+
+	return ns_to_ktime(nsec);
+}
+EXPORT_SYMBOL(clocksync_hw2sys);
+
+int clocksync_offset(struct clocksync *sync,
+		     s64 *offset,
+		     u64 *hwtstamp)
+{
+	u64 starthw = 0, endhw = 0;
+	struct {
+		s64 offset;
+		s64 duration_sys;
+	} buffer[10], sample, *samples;
+	int counter = 0, i;
+	int used;
+	int index;
+	int num_samples = sync->num_samples;
+
+	if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
+		samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
+		if (!samples) {
+			samples = buffer;
+			num_samples = sizeof(buffer)/sizeof(buffer[0]);
+		}
+	} else {
+		samples = buffer;
+	}
+
+	/* run until we have enough valid samples, but do not try forever */
+	i = 0;
+	counter = 0;
+	while (1) {
+		u64 ts;
+		ktime_t start, end;
+
+		start = sync->systime();
+		ts = timecounter_read(sync->clock);
+		end = sync->systime();
+
+		if (!i)
+			starthw = ts;
+
+		/* ignore negative durations */
+		sample.duration_sys = ktime_to_ns(ktime_sub(end, start));
+		if (sample.duration_sys >= 0) {
+			/*
+			 * assume symetric delay to and from HW:
+			 * average system time corresponds to measured
+			 * HW time
+			 */
+			sample.offset =
+				ktime_to_ns(ktime_add(end, start)) / 2 -
+				ts;
+
+			/* simple insertion sort based on duration */
+			index = counter - 1;
+			while (index >= 0) {
+				if (samples[index].duration_sys <
+				    sample.duration_sys)
+					break;
+				samples[index + 1] = samples[index];
+				index--;
+			}
+			samples[index + 1] = sample;
+			counter++;
+		}
+
+		i++;
+		if (counter >= num_samples || i >= 100000) {
+			endhw = ts;
+			break;
+		}
+	}
+
+	*hwtstamp = (endhw + starthw) / 2;
+
+	/* remove outliers by only using 75% of the samples */
+	used = counter * 3 / 4;
+	if (!used)
+		used = counter;
+	if (used) {
+		/* calculate average */
+		s64 off = 0;
+		for (index = 0; index < used; index++)
+			off += samples[index].offset;
+		*offset = div_s64(off, used);
+	}
+
+	if (samples && samples != buffer)
+		kfree(samples);
+
+	return used;
+}
+EXPORT_SYMBOL(clocksync_offset);
+
+void __clocksync_update(struct clocksync *sync,
+			u64 hwtstamp)
+{
+	s64 offset;
+	u64 average_time;
+
+	if (!clocksync_offset(sync, &offset, &average_time))
+		return;
+
+	printk(KERN_DEBUG
+		"average offset: %lld\n", offset);
+
+	if (!sync->last_update) {
+		sync->last_update = average_time;
+		sync->offset = offset;
+		sync->skew = 0;
+	} else {
+		s64 delta_nsec = average_time - sync->last_update;
+
+		/* avoid division by negative or small deltas */
+		if (delta_nsec >= 10000) {
+			s64 delta_offset_nsec = offset - sync->offset;
+			s64 skew; /* delta_offset_nsec *
+				     CLOCKSYNC_SKEW_RESOLUTION /
+				     delta_nsec */
+			u64 divisor;
+
+			/* div_s64() is limited to 32 bit divisor */
+			skew = delta_offset_nsec * CLOCKSYNC_SKEW_RESOLUTION;
+			divisor = delta_nsec;
+			while (unlikely(divisor >= ((s64)1) << 32)) {
+				/* divide both by 2; beware, right shift
+				   of negative value has undefined
+				   behavior and can only be used for
+				   the positive divisor */
+				skew = div_s64(skew, 2);
+				divisor >>= 1;
+			}
+			skew = div_s64(skew, divisor);
+
+			/*
+			 * Calculate new overall skew as 4/16 the
+			 * old value and 12/16 the new one. This is
+			 * a rather arbitrary tradeoff between
+			 * only using the latest measurement (0/16 and
+			 * 16/16) and even more weight on past measurements.
+			 */
+#define CLOCKSYNC_NEW_SKEW_PER_16 12
+			sync->skew =
+				div_s64((16 - CLOCKSYNC_NEW_SKEW_PER_16) *
+					sync->skew +
+					CLOCKSYNC_NEW_SKEW_PER_16 * skew,
+					16);
+			sync->last_update = average_time;
+			sync->offset = offset;
+		}
+	}
+}
+EXPORT_SYMBOL(__clocksync_update);