diff mbox

[net-next,v2,1/8] net-timestamp: explicit SO_TIMESTAMPING ancillary data struct

Message ID 1404416380-3545-2-git-send-email-willemb@google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Willem de Bruijn July 3, 2014, 7:39 p.m. UTC
Applications that request kernel transmit timestamps with
SO_TIMESTAMPING read timestamps using recvmsg() ancillary data.
This patch explicitly defines the struct that applications read, and
the semantics of its fields.

The code is backward compatible with legacy applications that treat
the ancillary data as an anonymous array 'struct timespec data[3]'.

Beyond documenting existing use, it adds support for new timestamping
points in the kernel tx datapath. On tx, the scm_timestamping struct
is always accompanied with an error message of type sock_exterr_skb.

This patch defines the previously unused field ee_info in that struct
as an explicit type field, communicating the type of each timestamp
in the three struct timespec in scm_timestamping. This does not change
legacy expectations, but allows returning multiple timestamp types to
the calling process.

This patch does not modify the reception path. On rx, SO_TIMESTAMPING
csmgs are not accompanied by a struct similar to sock_exterr_skb. If
at some point rx timestamp points are added, a similar extension in a
new cmsg can be defined.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/skbuff.h        |  2 ++
 include/uapi/linux/errqueue.h | 19 +++++++++++++++++++
 net/core/skbuff.c             |  1 +
 net/socket.c                  | 33 +++++++++++++++++++--------------
 4 files changed, 41 insertions(+), 14 deletions(-)

Comments

Richard Cochran July 5, 2014, 8:10 p.m. UTC | #1
On Thu, Jul 03, 2014 at 03:39:33PM -0400, Willem de Bruijn wrote:

> +/**
> + *	struct scm_timestamping - timestamps exposed through cmsg
> + *
> + *	The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
> + *	communicate network timestamps by passing this struct in a cmsg with
> + *	recvmsg(). See Documentation/networking/timestamping.txt for details.
> + */
> +struct scm_timestamping {
> +	struct timespec ts[3];
> +};
> +
> +#define SCM_TSTAMP_SND		0x1	/* driver passed skb to NIC */
> +#define SCM_TSTAMP_ACK		0x2	/* transport layer saw ACK */
> +#define SCM_TSTAMP_ENQ		0x4	/* stack passed skb to TC layer */
> +#define SCM_TSTAMP_RCV		0x8	/* stack received skb */
> +#define SCM_TSTAMP_HWSYS	0x10	/* NIC tstamp in system format */
> +#define SCM_TSTAMP_HWRAW	0x20	/* NIC tstamp in native format */
> +
> +#define SCM_TSTAMP_OFF(n, ts)	(ts << (10 * n))

Hm ...
  
>  #endif /* _UAPI_LINUX_ERRQUEUE_H */
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index c1a3303..1bcd05d 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3521,6 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
>  	memset(serr, 0, sizeof(*serr));
>  	serr->ee.ee_errno = ENOMSG;
>  	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
> +	serr->ee.ee_info = hwtstamps ? 0 : SCM_TSTAMP_SND;
>  
>  	err = sock_queue_err_skb(sk, skb);
>  
> diff --git a/net/socket.c b/net/socket.c
> index abf56b2..a31138d 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -106,6 +106,7 @@
>  #include <linux/sockios.h>
>  #include <linux/atalk.h>
>  #include <net/busy_poll.h>
> +#include <linux/errqueue.h>
>  
>  #ifdef CONFIG_NET_RX_BUSY_POLL
>  unsigned int sysctl_net_busy_read __read_mostly;
> @@ -696,9 +697,10 @@ EXPORT_SYMBOL(kernel_sendmsg);
>  void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
>  	struct sk_buff *skb)
>  {
> +	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
>  	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
> -	struct timespec ts[3];
> -	int empty = 1;
> +	struct scm_timestamping tss;
> +	int tstype = 0, is_tx = skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP;
>  	struct skb_shared_hwtstamps *shhwtstamps =
>  		skb_hwtstamps(skb);
>  
> @@ -714,28 +716,31 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
>  			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
>  				 sizeof(tv), &tv);
>  		} else {
> -			skb_get_timestampns(skb, &ts[0]);
> +			struct timespec ts;
> +			skb_get_timestampns(skb, &ts);
>  			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
> -				 sizeof(ts[0]), &ts[0]);
> +				 sizeof(ts), &ts);
>  		}
>  	}
>  
> -
> -	memset(ts, 0, sizeof(ts));
> +	memset(&tss, 0, sizeof(tss));
>  	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
> -	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
> -		empty = 0;
> +	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
> +		tstype |= is_tx ? serr->ee.ee_info : SCM_TSTAMP_RCV;
>  	if (shhwtstamps) {
>  		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
> -		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
> -			empty = 0;
> +		    ktime_to_timespec_cond(shhwtstamps->syststamp, tss.ts + 1))
> +			tstype |= SCM_TSTAMP_OFF(1, SCM_TSTAMP_HWSYS);
>  		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
> -		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
> -			empty = 0;
> +		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
> +			tstype |= SCM_TSTAMP_OFF(2, SCM_TSTAMP_HWRAW);

So you want ee_info to be a bit field like this?

   | 10 bits    | 10 bits    | 10 bits    | 2 bits |
   |------------+------------+------------+--------|
   | ts[0] type | ts[1] type | ts[2] type | rsv    |

Why not simplify this into two fields:

1. the index ts[] that contains a time stamp
2. the type of the time stamp

The kernel never provides more than one value in ts[], and it is hard
to imagine that we will ever do this. The original so_timestamping
interface and documentation seem to suggest that multiple values are
possible, but there was never, ever any code that did this. As an end
user, I found that very confusing.

I would prefer making the extended interface simpler, rather than
giving the impression that multiple time stamps are possible when they
really are not.

Thanks,
Richard

>  	}
> -	if (!empty)
> +	if (tstype) {
> +		if (is_tx)
> +			serr->ee.ee_info = tstype;
>  		put_cmsg(msg, SOL_SOCKET,
> -			 SCM_TIMESTAMPING, sizeof(ts), &ts);
> +			 SCM_TIMESTAMPING, sizeof(tss), &tss);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
>  
> -- 
> 2.0.0.526.g5318336
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 5, 2014, 8:18 p.m. UTC | #2
On Thu, Jul 03, 2014 at 03:39:33PM -0400, Willem de Bruijn wrote:

> +#define SCM_TSTAMP_SND		0x1	/* driver passed skb to NIC */
> +#define SCM_TSTAMP_ACK		0x2	/* transport layer saw ACK */
> +#define SCM_TSTAMP_ENQ		0x4	/* stack passed skb to TC layer */
> +#define SCM_TSTAMP_RCV		0x8	/* stack received skb */
> +#define SCM_TSTAMP_HWSYS	0x10	/* NIC tstamp in system format */

We don't need this flag. Better to omit it, because we are not going
to allow this kind of time stamp ever again.

> +#define SCM_TSTAMP_HWRAW	0x20	/* NIC tstamp in native format */

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn July 7, 2014, 3:34 p.m. UTC | #3
On Sat, Jul 5, 2014 at 4:18 PM, Richard Cochran
<richardcochran@gmail.com> wrote:
> On Thu, Jul 03, 2014 at 03:39:33PM -0400, Willem de Bruijn wrote:
>
>> +#define SCM_TSTAMP_SND               0x1     /* driver passed skb to NIC */
>> +#define SCM_TSTAMP_ACK               0x2     /* transport layer saw ACK */
>> +#define SCM_TSTAMP_ENQ               0x4     /* stack passed skb to TC layer */
>> +#define SCM_TSTAMP_RCV               0x8     /* stack received skb */
>> +#define SCM_TSTAMP_HWSYS     0x10    /* NIC tstamp in system format */
>
> We don't need this flag. Better to omit it, because we are not going
> to allow this kind of time stamp ever again.

The hardware timestamp converted to system time is deprecated? I did
not know that. Because it is largely unused, or for a more fundamental
reason?

If so, the documentation could indeed use an explicit comment. The
definition of skb_shared_hwtstamps.syststamp too. I can write a small
patch independent of this patchset.

Better, that field in could be removed completely if there are no
users.That would be self documenting, would simplify plumbing
boilerplate and shrink sbk_shared_info.

Unfortunately, a cursory inspection shows one, octeon. While that user
exists and generates such timestamps, I think that the above new flag
should be passed, as well, for API consistency.

>> +#define SCM_TSTAMP_HWRAW     0x20    /* NIC tstamp in native format */
>
> Thanks,
> Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 7, 2014, 6:47 p.m. UTC | #4
On Mon, Jul 07, 2014 at 11:34:05AM -0400, Willem de Bruijn wrote:
> 
> The hardware timestamp converted to system time is deprecated? I did
> not know that. Because it is largely unused, or for a more fundamental
> reason?

It is for the fundamental reason that the idea behind it is just plain
wrong. MAC drivers are not the place to put clock servos.

> If so, the documentation could indeed use an explicit comment. The
> definition of skb_shared_hwtstamps.syststamp too. I can write a small
> patch independent of this patchset.

Please do.
 
> Unfortunately, a cursory inspection shows one, octeon. While that user
> exists and generates such timestamps, I think that the above new flag
> should be passed, as well, for API consistency.

Ugh, how the heck did that turd get in? Its not like they bothered to
include the maintainer on CC. That code must go.

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn July 7, 2014, 7:14 p.m. UTC | #5
On Mon, Jul 7, 2014 at 2:47 PM, Richard Cochran
<richardcochran@gmail.com> wrote:
> On Mon, Jul 07, 2014 at 11:34:05AM -0400, Willem de Bruijn wrote:
>>
>> The hardware timestamp converted to system time is deprecated? I did
>> not know that. Because it is largely unused, or for a more fundamental
>> reason?
>
> It is for the fundamental reason that the idea behind it is just plain
> wrong. MAC drivers are not the place to put clock servos.
>
>> If so, the documentation could indeed use an explicit comment. The
>> definition of skb_shared_hwtstamps.syststamp too. I can write a small
>> patch independent of this patchset.
>
> Please do.

Ok. See below.

>
>> Unfortunately, a cursory inspection shows one, octeon. While that user
>> exists and generates such timestamps, I think that the above new flag
>> should be passed, as well, for API consistency.
>
> Ugh, how the heck did that turd get in? Its not like they bothered to
> include the maintainer on CC. That code must go.

I'm happy to unwind the syststamp part of 3d305850 ("netdev:
octeon_mgmt: Add hardware timestamp support"). Chad, let me know if
you object to that. Else, I'll send a patch to you both for review.

Once that is in, I'll follow up with a driver-independent stack patch
that removes the field in internal struct skb_shared_hwtstamps and
documents the legacy nature of the field in scm_timestamping. In
that case, we don't have to document the internal field in the interim.

>
> Thanks,
> Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chad Reese July 7, 2014, 7:44 p.m. UTC | #6
On 07/07/2014 12:14 PM, Willem de Bruijn wrote:
> On Mon, Jul 7, 2014 at 2:47 PM, Richard Cochran
> <richardcochran@gmail.com> wrote:
>> On Mon, Jul 07, 2014 at 11:34:05AM -0400, Willem de Bruijn wrote:
>>>
>>> The hardware timestamp converted to system time is deprecated? I did
>>> not know that. Because it is largely unused, or for a more fundamental
>>> reason?
>>
>> It is for the fundamental reason that the idea behind it is just plain
>> wrong. MAC drivers are not the place to put clock servos.
>>
>>> If so, the documentation could indeed use an explicit comment. The
>>> definition of skb_shared_hwtstamps.syststamp too. I can write a small
>>> patch independent of this patchset.
>>
>> Please do.
>
> Ok. See below.
>
>>
>>> Unfortunately, a cursory inspection shows one, octeon. While that user
>>> exists and generates such timestamps, I think that the above new flag
>>> should be passed, as well, for API consistency.
>>
>> Ugh, how the heck did that turd get in? Its not like they bothered to
>> include the maintainer on CC. That code must go.
>
> I'm happy to unwind the syststamp part of 3d305850 ("netdev:
> octeon_mgmt: Add hardware timestamp support"). Chad, let me know if
> you object to that. Else, I'll send a patch to you both for review.

A hardware timer used for ethernet timestamps is completely independent 
from the kernel's software view of time. Since the hardware timestamps 
are only exposed in the driver, how can they be correlated with system 
time? If the driver doesn't do it, then nobody else knows how.

For Octeon, you can optionally use the hardware timestamp as the system 
clock reference. Most people don't, but it is the only way to get the 
system time to be accurate. 1588 can synchronize two Octeon boards to 
less than 1ns for the hardware timer. The Linux software timers is 
always farther off.

Chad
Richard Cochran July 7, 2014, 8:11 p.m. UTC | #7
On Mon, Jul 07, 2014 at 12:44:44PM -0700, Chad Reese wrote:
> 
> A hardware timer used for ethernet timestamps is completely
> independent from the kernel's software view of time. Since the
> hardware timestamps are only exposed in the driver, how can they be
> correlated with system time? If the driver doesn't do it, then
> nobody else knows how.

Um, implement a PTP Hardware Clock device?

Don't reimplement clock servos in your driver. Instead, leave that to
the PTP stack (like using linuxptp's phc2sys).
 
> For Octeon, you can optionally use the hardware timestamp as the
> system clock reference. Most people don't, but it is the only way to
> get the system time to be accurate. 1588 can synchronize two Octeon
> boards to less than 1ns for the hardware timer. The Linux software
> timers is always farther off.

1588 cannot synchronize boards unless you expose the clock to the
userland PTP stack. Why don't you do that?

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 7, 2014, 8:18 p.m. UTC | #8
On Mon, Jul 07, 2014 at 12:44:44PM -0700, Chad Reese wrote:
> 
> For Octeon, you can optionally use the hardware timestamp as the
> system clock reference. Most people don't, but it is the only way to
> get the system time to be accurate. 1588 can synchronize two Octeon
> boards to less than 1ns for the hardware timer. The Linux software
> timers is always farther off.

Did you mean 1us ?

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chad Reese July 7, 2014, 9:03 p.m. UTC | #9
On 07/07/2014 01:11 PM, Richard Cochran wrote:
> On Mon, Jul 07, 2014 at 12:44:44PM -0700, Chad Reese wrote:
>>
>> A hardware timer used for ethernet timestamps is completely
>> independent from the kernel's software view of time. Since the
>> hardware timestamps are only exposed in the driver, how can they be
>> correlated with system time? If the driver doesn't do it, then
>> nobody else knows how.
>
> Um, implement a PTP Hardware Clock device?

Octeon does support using the 1588 clock as the kernel's clock source. 
Unfortunately most people don't seem to want to use it. I have no idea why.

> Don't reimplement clock servos in your driver. Instead, leave that to
> the PTP stack (like using linuxptp's phc2sys).

I obviously did it wrong. The one line comment in 
Documentation/networking/timestamping.txt was not enough for me to 
figure out the proper usage of syststamp.

>> For Octeon, you can optionally use the hardware timestamp as the
>> system clock reference. Most people don't, but it is the only way to
>> get the system time to be accurate. 1588 can synchronize two Octeon
>> boards to less than 1ns for the hardware timer. The Linux software
>> timers is always farther off.
>
> 1588 cannot synchronize boards unless you expose the clock to the
> userland PTP stack. Why don't you do that?

I was trying to stick with standard linux userspace APIs. People have no 
interest in the PTP clock at all. All they want is for the standard 
system time to be correct.

Chad
Chad Reese July 7, 2014, 9:08 p.m. UTC | #10
On 07/07/2014 01:18 PM, Richard Cochran wrote:
> On Mon, Jul 07, 2014 at 12:44:44PM -0700, Chad Reese wrote:
>>
>> For Octeon, you can optionally use the hardware timestamp as the
>> system clock reference. Most people don't, but it is the only way to
>> get the system time to be accurate. 1588 can synchronize two Octeon
>> boards to less than 1ns for the hardware timer. The Linux software
>> timers is always farther off.
>
> Did you mean 1us ?

No, it was 1ns. Who would go through all this effort if all you wanted 
was 1us?  Our requirement was within 20ns, but me managed to get 1ns 
with a lab setup. We had to be careful that the scope probes used in the 
measurement were the same length.

Chad
Richard Cochran July 8, 2014, 5:49 a.m. UTC | #11
On Mon, Jul 07, 2014 at 02:08:57PM -0700, Chad Reese wrote:
> 
> No, it was 1ns. Who would go through all this effort if all you
> wanted was 1us?  Our requirement was within 20ns, but me managed to
> get 1ns with a lab setup. We had to be careful that the scope probes
> used in the measurement were the same length.

Well, in a lab, carefully compensating for phy delays, etc, etc,
achieving 1ns is one thing. As a general statement, telling the
public, "our PTP hardware synchronizes to within one nanosecond",
is quite another thing.

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 8, 2014, 6:04 a.m. UTC | #12
On Mon, Jul 07, 2014 at 02:03:38PM -0700, Chad Reese wrote:
> On 07/07/2014 01:11 PM, Richard Cochran wrote:
> >Don't reimplement clock servos in your driver. Instead, leave that to
> >the PTP stack (like using linuxptp's phc2sys).
> 
> I obviously did it wrong. The one line comment in
> Documentation/networking/timestamping.txt was not enough for me to
> figure out the proper usage of syststamp.

Sorry about not having clearly deprecated syststamp. We'll do that now.
Next time, putting the PTP maintainer on CC will help catch such things.
 
> I was trying to stick with standard linux userspace APIs.

There is a standard PTP hardware clock subsystem and API, as explained
in Documentation/ptp/ptp.txt.  

> People
> have no interest in the PTP clock at all. All they want is for the
> standard system time to be correct.

You can't have the time correct unless you synchronize it to
something. That is what NTP and PTP are all about. When using a PTP
hardware clock, it is necessary to synchronize the Linux system time
to it. The right way to accomplish this is using a userland PTP
stack, and the wrong way is to implement a servo in every last MAC
driver.

What I don't understand is, how does your device get the PTP time
without running the PTP protocol?

How can your device work without implementing a PTP clock? What am I
missing?

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 8, 2014, 6:08 a.m. UTC | #13
On Tue, Jul 08, 2014 at 07:49:19AM +0200, Richard Cochran wrote:
> On Mon, Jul 07, 2014 at 02:08:57PM -0700, Chad Reese wrote:
> > 
> > No, it was 1ns. Who would go through all this effort if all you
> > wanted was 1us?  Our requirement was within 20ns, but me managed to
> > get 1ns with a lab setup. We had to be careful that the scope probes
> > used in the measurement were the same length.
> 
> Well, in a lab, carefully compensating for phy delays, etc, etc,
> achieving 1ns is one thing. As a general statement, telling the
> public, "our PTP hardware synchronizes to within one nanosecond",
> is quite another thing.

And you are using synchronous Ethernet, too, I suppose?

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chad Reese July 8, 2014, 7:42 a.m. UTC | #14
On 07/07/2014 11:04 PM, Richard Cochran wrote:
> On Mon, Jul 07, 2014 at 02:03:38PM -0700, Chad Reese wrote:
>> On 07/07/2014 01:11 PM, Richard Cochran wrote:
>>> Don't reimplement clock servos in your driver. Instead, leave that to
>>> the PTP stack (like using linuxptp's phc2sys).
>>
>> I obviously did it wrong. The one line comment in
>> Documentation/networking/timestamping.txt was not enough for me to
>> figure out the proper usage of syststamp.
>
> Sorry about not having clearly deprecated syststamp. We'll do that now.
> Next time, putting the PTP maintainer on CC will help catch such things.
>
>> I was trying to stick with standard linux userspace APIs.
>
> There is a standard PTP hardware clock subsystem and API, as explained
> in Documentation/ptp/ptp.txt.
>
>> People
>> have no interest in the PTP clock at all. All they want is for the
>> standard system time to be correct.
>
> You can't have the time correct unless you synchronize it to
> something. That is what NTP and PTP are all about. When using a PTP
> hardware clock, it is necessary to synchronize the Linux system time
> to it. The right way to accomplish this is using a userland PTP
> stack, and the wrong way is to implement a servo in every last MAC
> driver.

You keep saying I implemented a clock servo in the MAC driver. I didn't, 
as is apparent in the code. The only kludge was a conversion from the 
PTP clock to system time with a simple offset function. I will fully 
admit it wasn't great, but it was good enough.

> What I don't understand is, how does your device get the PTP time
> without running the PTP protocol?

I wrote a PTPv2 daemon for Cavium that ran in userspace. It implemented 
all the v2 spec with support for both IP and 802.3 transport. This was 
done from scratch as there wasn't an opensource PTPv2 daemon at the 
time. All this work actually started before there was any PTP timestamp 
support in the kernel. I originally started writing PTP to run on bare 
metal.

> How can your device work without implementing a PTP clock? What am I
> missing?

I'm guessing the details of our PTP implementation are not available to 
the opensource community. It was packaged as a product by Cavium's 
marketing along with other software to support base stations.

Chad
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 8, 2014, 9:41 a.m. UTC | #15
On Tue, Jul 08, 2014 at 12:42:46AM -0700, Chad Reese wrote:
> 
> You keep saying I implemented a clock servo in the MAC driver. I
> didn't, as is apparent in the code. The only kludge was a conversion
> from the PTP clock to system time with a simple offset function. I
> will fully admit it wasn't great, but it was good enough.

I assume that the point of providing both raw and sys time stamps was
to discipline the Linux system clock to the raw clock. (Otherwise, what
good are the sys values?)

In that case, the sample rate is a result of the packet activity in
the driver, and the rate is a part of the servo, at least to my
understanding.
 
> I wrote a PTPv2 daemon for Cavium that ran in userspace. It
> implemented all the v2 spec with support for both IP and 802.3
> transport. This was done from scratch as there wasn't an opensource
> PTPv2 daemon at the time. All this work actually started before
> there was any PTP timestamp support in the kernel. I originally
> started writing PTP to run on bare metal.

Okay, that is now clear. There is nothing wrong with that, although it
is too bad you cannot use the newer interface.

So, the question becomes, will removing the sys time stamp from the
octeon driver spoil your application?

[ The octeon time stamping was merged in August 2012, well after the
  PTP support which appeared in April 2011. So I guess that your
  custom application doesn't need the dual time stamps. ]

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn July 10, 2014, 3:36 p.m. UTC | #16
> So, the question becomes, will removing the sys time stamp from the
> octeon driver spoil your application?

Chad? If it cannot be removed easily, I will prepare the temporary
documentation patch to mark the field as deprecated.

On the topic of this original patch: I'll remove SCM_TSTAMP_HWSYS
and will simplify ee_info to only hold a type. No need for an explicit
index field if we simply assume that the ts[] index is always 0, except
for SCM_TSTAMP_HW, which is 2. We can even copy that one into
both fields 0 and 2 and consider the latter as a legacy workaround.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn July 18, 2014, 3:54 p.m. UTC | #17
On Sat, Jul 5, 2014 at 4:10 PM, Richard Cochran
<richardcochran@gmail.com> wrote:
> On Thu, Jul 03, 2014 at 03:39:33PM -0400, Willem de Bruijn wrote:
>
>> +struct scm_timestamping {
>> +     struct timespec ts[3];
>> +};
>> +
>> +#define SCM_TSTAMP_SND               0x1     /* driver passed skb to NIC */
>> +#define SCM_TSTAMP_ACK               0x2     /* transport layer saw ACK */
>> +#define SCM_TSTAMP_ENQ               0x4     /* stack passed skb to TC layer */
>> +#define SCM_TSTAMP_RCV               0x8     /* stack received skb */
>> +#define SCM_TSTAMP_HWSYS     0x10    /* NIC tstamp in system format */
>> +#define SCM_TSTAMP_HWRAW     0x20    /* NIC tstamp in native format */
>> +
>> +#define SCM_TSTAMP_OFF(n, ts)        (ts << (10 * n))
>
> Hm ...
>

> So you want ee_info to be a bit field like this?
>
>    | 10 bits    | 10 bits    | 10 bits    | 2 bits |
>    |------------+------------+------------+--------|
>    | ts[0] type | ts[1] type | ts[2] type | rsv    |
>
> Why not simplify this into two fields:
>
> 1. the index ts[] that contains a time stamp
> 2. the type of the time stamp
>
> The kernel never provides more than one value in ts[], and it is hard
> to imagine that we will ever do this. The original so_timestamping
> interface and documentation seem to suggest that multiple values are
> possible, but there was never, ever any code that did this. As an end
> user, I found that very confusing.
>
> I would prefer making the extended interface simpler, rather than
> giving the impression that multiple time stamps are possible when they
> really are not.

Agreed. I have a simpler patchset ready where ee_info only
defines contents of ts[0] and the SCM fields are just an enum.

Before I send the entire patchset again, I plan to simplify two
other parts if no one objects.

1. only one MSG_TSTAMP send() flag.

The socket flag space is limited, so defining a flag for
each type of timestamp is not practical. Instead, define one flag
MSG_TSTAMP that generates all timestamps by default. The
default can be overridden at the socket level with timestamping
option fields such as SOF_TIMESTAMPING_ENQ_SOFTWARE.

2. correlate tstamps with data

If multiple datagrams are in flight, it is ambiguous to which
datagram a timestamp refers. The payload is looped with the
timestamp, but content-based correlation can be expensive,
even impossible.

My previous patch addressed this for TCP only, by passing
a seqno in ee_data. That has issues of its own. For one, with
write_seq at send time not know to the process, it is still hard
to correlate. I am going to remove that.

Returning a counter in ee_data to distinguish tstamps from
successive send(.., MSG_TSTAMP) calls allows correlation.
Better is if it works for all protocols. An explicit per-sk counter
+ per-skb id field would require scarce skb space. Instead.
I plan to just loop skb->mark and leave it to the application to
choose marks wisely.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ec89301..c74aab5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -260,6 +260,8 @@  enum {
 	SKBTX_SHARED_FRAG = 1 << 5,
 };
 
+#define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_SW_TSTAMP)
+
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index aacd4fb..5d93d0b 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -22,5 +22,24 @@  struct sock_extended_err {
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+/**
+ *	struct scm_timestamping - timestamps exposed through cmsg
+ *
+ *	The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
+ *	communicate network timestamps by passing this struct in a cmsg with
+ *	recvmsg(). See Documentation/networking/timestamping.txt for details.
+ */
+struct scm_timestamping {
+	struct timespec ts[3];
+};
+
+#define SCM_TSTAMP_SND		0x1	/* driver passed skb to NIC */
+#define SCM_TSTAMP_ACK		0x2	/* transport layer saw ACK */
+#define SCM_TSTAMP_ENQ		0x4	/* stack passed skb to TC layer */
+#define SCM_TSTAMP_RCV		0x8	/* stack received skb */
+#define SCM_TSTAMP_HWSYS	0x10	/* NIC tstamp in system format */
+#define SCM_TSTAMP_HWRAW	0x20	/* NIC tstamp in native format */
+
+#define SCM_TSTAMP_OFF(n, ts)	(ts << (10 * n))
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a3303..1bcd05d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3521,6 +3521,7 @@  void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	serr->ee.ee_info = hwtstamps ? 0 : SCM_TSTAMP_SND;
 
 	err = sock_queue_err_skb(sk, skb);
 
diff --git a/net/socket.c b/net/socket.c
index abf56b2..a31138d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@ 
 #include <linux/sockios.h>
 #include <linux/atalk.h>
 #include <net/busy_poll.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 unsigned int sysctl_net_busy_read __read_mostly;
@@ -696,9 +697,10 @@  EXPORT_SYMBOL(kernel_sendmsg);
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
-	struct timespec ts[3];
-	int empty = 1;
+	struct scm_timestamping tss;
+	int tstype = 0, is_tx = skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
 
@@ -714,28 +716,31 @@  void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 				 sizeof(tv), &tv);
 		} else {
-			skb_get_timestampns(skb, &ts[0]);
+			struct timespec ts;
+			skb_get_timestampns(skb, &ts);
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts[0]), &ts[0]);
+				 sizeof(ts), &ts);
 		}
 	}
 
-
-	memset(ts, 0, sizeof(ts));
+	memset(&tss, 0, sizeof(tss));
 	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
-	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
-		empty = 0;
+	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
+		tstype |= is_tx ? serr->ee.ee_info : SCM_TSTAMP_RCV;
 	if (shhwtstamps) {
 		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
-			empty = 0;
+		    ktime_to_timespec_cond(shhwtstamps->syststamp, tss.ts + 1))
+			tstype |= SCM_TSTAMP_OFF(1, SCM_TSTAMP_HWSYS);
 		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
-			empty = 0;
+		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
+			tstype |= SCM_TSTAMP_OFF(2, SCM_TSTAMP_HWRAW);
 	}
-	if (!empty)
+	if (tstype) {
+		if (is_tx)
+			serr->ee.ee_info = tstype;
 		put_cmsg(msg, SOL_SOCKET,
-			 SCM_TIMESTAMPING, sizeof(ts), &ts);
+			 SCM_TIMESTAMPING, sizeof(tss), &tss);
+	}
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);