diff mbox

[net-next,06/12] ixgbe: Hardware Timestamping + PTP Hardware Clock (PHC)

Message ID 1336632413-19135-7-git-send-email-jeffrey.t.kirsher@intel.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Kirsher, Jeffrey T May 10, 2012, 6:46 a.m. UTC
From: Jacob Keller <jacob.e.keller@intel.com>

This patch enables hardware timestamping for use with PTP software by
extracting a ns counter from an arbitrary fixed point cycles counter.
The hardware generates SYSTIME registers using the DMA tick which
changes based on the current link speed. These SYSTIME registers are
converted to ns using the cyclecounter and timecounter structures
provided by the kernel. Using the SO_TIMESTAMPING api, software can
enable and access timestamps for PTP packets.

The SO_TIMESTAMPING API has space for 3 different kinds of timestamps,
SYS, RAW, and SOF. SYS hardware timestamps are hardware ns values that
are then scaled to the software clock. RAW hardware timestamps are the
direct raw value of the ns counter. SOF software timestamps are the
software timestamp calculated as close as possible to the software
transmit, but are not offloaded to the hardware. This patch only
supports the RAW hardware timestamps due to inefficiency of the SYS
design.

This patch also enables the PHC subsystem features for atomically
adjusting the cycle register, and adjusting the clock frequency in
parts per billion. This frequency adjustment works by slightly
adjusting the value added to the cycle registers each DMA tick. This
causes the hardware registers to overflow rapidly (approximately once
every 34 seconds, when at 10gig link). To solve this, the timecounter
structure is used, along with a timer set for every 25 seconds. This
allows for detecting register overflow and converting the cycle
counter registers into ns values needed for providing useful
timestamps to the network stack.

Only the basic required clock functions are supported at this time,
although the hardware supports some ancillary features and these could
easily be enabled in the future.

Note that use of this hardware timestamping requires modifying daemon
software to use the SO_TIMESTAMPING API for timestamps, and the
ptp_clock PHC framework for accessing the clock. The timestamps have
no relation to the system time at all, so software must use the posix
clock generated by the PHC framework instead.

Signed-off-by: Jacob E Keller <jacob.e.keller@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/Kconfig            |   11 +
 drivers/net/ethernet/intel/ixgbe/Makefile     |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |   32 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   53 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c  |  732 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |   32 ++
 6 files changed, 861 insertions(+), 1 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c

Comments

Richard Cochran May 10, 2012, 2:11 p.m. UTC | #1
Mostly, this looks very good. I do have one concern and a nit, though.

On Wed, May 09, 2012 at 11:46:47PM -0700, Jeff Kirsher wrote:
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index 1693ec3..9a83c40 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -789,6 +789,13 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
>  		total_bytes += tx_buffer->bytecount;
>  		total_packets += tx_buffer->gso_segs;
>  
> +#ifdef CONFIG_IXGBE_PTP
> +		if (unlikely(tx_buffer->tx_flags &
> +			     IXGBE_TX_FLAGS_TSTAMP))
> +			ixgbe_ptp_tx_hwtstamp(q_vector,
> +					      tx_buffer->skb);

This looks strangely wrapped.

> +
> +#endif
>  		/* free the skb */
>  		dev_kfree_skb_any(tx_buffer->skb);
>  

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> new file mode 100644
> index 0000000..0b6553e
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c

...

> +/**
> + * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp
> + * @q_vector: structure containing interrupt and ring information
> + * @skb: particular skb to send timestamp with
> + *
> + * if the timestamp is valid, we convert it into the timecounter ns
> + * value, then store that result into the shhwtstamps structure which
> + * is passed up the network stack
> + */
> +void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector,
> +			   struct sk_buff *skb)
> +{
> +	struct ixgbe_adapter *adapter;
> +	struct ixgbe_hw *hw;
> +	struct skb_shared_hwtstamps *shhwtstamps;
> +	u64 regval = 0, ns;
> +	u32 tsyncrxctl;
> +	unsigned long flags;
> +
> +	/* we cannot process timestamps on a ring without a q_vector */
> +	if (!q_vector || !q_vector->adapter)
> +		return;
> +
> +	adapter = q_vector->adapter;
> +	hw = &adapter->hw;
> +
> +	tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
> +	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
> +	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
> +
> +	/*
> +	 * If this bit is set, then the RX registers contain the time stamp. No
> +	 * other packet will be time stamped until we read these registers, so
> +	 * read the registers to make them available again. Because only one
> +	 * packet can be time stamped at a time, we know that the register
> +	 * values must belong to this one here and therefore we don't need to
> +	 * compare any of the additional attributes stored for it.

I suspect that this assumption is wrong. What happens if the time
stamping logic locks a value but the packet is lost because the ring
is full?

BTW, the IGB driver also has this defect.

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 10, 2012, 9:53 p.m. UTC | #2
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Thursday, May 10, 2012 7:11 AM
> To: Kirsher, Jeffrey T
> Cc: davem@davemloft.net; Keller, Jacob E; netdev@vger.kernel.org;
> gospo@redhat.com; sassmann@redhat.com
> Subject: Re: [net-next 06/12] ixgbe: Hardware Timestamping + PTP Hardware
> Clock (PHC)
> 
> Mostly, this looks very good. I do have one concern and a nit, though.
> 
> On Wed, May 09, 2012 at 11:46:47PM -0700, Jeff Kirsher wrote:
> > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > index 1693ec3..9a83c40 100644
> > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > @@ -789,6 +789,13 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector
> *q_vector,
> >  		total_bytes += tx_buffer->bytecount;
> >  		total_packets += tx_buffer->gso_segs;
> >
> > +#ifdef CONFIG_IXGBE_PTP
> > +		if (unlikely(tx_buffer->tx_flags &
> > +			     IXGBE_TX_FLAGS_TSTAMP))
> > +			ixgbe_ptp_tx_hwtstamp(q_vector,
> > +					      tx_buffer->skb);
> 
> This looks strangely wrapped.
> 
> > +
> > +#endif
> >  		/* free the skb */
> >  		dev_kfree_skb_any(tx_buffer->skb);
> >
> 
> ...
> 
> > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> > b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> > new file mode 100644
> > index 0000000..0b6553e
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> 
> ...
> 
> > +/**
> > + * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time
> > +stamp
> > + * @q_vector: structure containing interrupt and ring information
> > + * @skb: particular skb to send timestamp with
> > + *
> > + * if the timestamp is valid, we convert it into the timecounter ns
> > + * value, then store that result into the shhwtstamps structure which
> > + * is passed up the network stack
> > + */
> > +void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector,
> > +			   struct sk_buff *skb)
> > +{
> > +	struct ixgbe_adapter *adapter;
> > +	struct ixgbe_hw *hw;
> > +	struct skb_shared_hwtstamps *shhwtstamps;
> > +	u64 regval = 0, ns;
> > +	u32 tsyncrxctl;
> > +	unsigned long flags;
> > +
> > +	/* we cannot process timestamps on a ring without a q_vector */
> > +	if (!q_vector || !q_vector->adapter)
> > +		return;
> > +
> > +	adapter = q_vector->adapter;
> > +	hw = &adapter->hw;
> > +
> > +	tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
> > +	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
> > +	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
> > +
> > +	/*
> > +	 * If this bit is set, then the RX registers contain the time stamp. No
> > +	 * other packet will be time stamped until we read these registers, so
> > +	 * read the registers to make them available again. Because only one
> > +	 * packet can be time stamped at a time, we know that the register
> > +	 * values must belong to this one here and therefore we don't need to
> > +	 * compare any of the additional attributes stored for it.
> 
> I suspect that this assumption is wrong. What happens if the time stamping
> logic locks a value but the packet is lost because the ring is full?
> 
> BTW, the IGB driver also has this defect.
>

Note how I read the rx registers first? So it will always clear the value.
That should unlock the value for the next rx stamp packet.
 
> Thanks,
> Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran May 11, 2012, 5:15 a.m. UTC | #3
On Thu, May 10, 2012 at 09:53:18PM +0000, Keller, Jacob E wrote:
> > > +	/*
> > > +	 * If this bit is set, then the RX registers contain the time stamp. No
> > > +	 * other packet will be time stamped until we read these registers, so
> > > +	 * read the registers to make them available again. Because only one
> > > +	 * packet can be time stamped at a time, we know that the register
> > > +	 * values must belong to this one here and therefore we don't need to
> > > +	 * compare any of the additional attributes stored for it.
> > 
> > I suspect that this assumption is wrong. What happens if the time stamping
> > logic locks a value but the packet is lost because the ring is full?
> > 
> > BTW, the IGB driver also has this defect.
> >
> 
> Note how I read the rx registers first? So it will always clear the value.
> That should unlock the value for the next rx stamp packet.

1. Hw recognizes ptp event packet, locks time stamp
2. Hw drops packet because queue is full
3. No more time stamps are ever generated

Can this happen? The docs seems to say it can.

Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 11, 2012, 6:05 p.m. UTC | #4
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Thursday, May 10, 2012 10:15 PM
> To: Keller, Jacob E
> Cc: Kirsher, Jeffrey T; davem@davemloft.net; netdev@vger.kernel.org;
> gospo@redhat.com; sassmann@redhat.com
> Subject: Re: [net-next 06/12] ixgbe: Hardware Timestamping + PTP Hardware
> Clock (PHC)
> 
> On Thu, May 10, 2012 at 09:53:18PM +0000, Keller, Jacob E wrote:
> > > > +	/*
> > > > +	 * If this bit is set, then the RX registers contain the time
> stamp. No
> > > > +	 * other packet will be time stamped until we read these
> registers, so
> > > > +	 * read the registers to make them available again. Because only
> one
> > > > +	 * packet can be time stamped at a time, we know that the
> register
> > > > +	 * values must belong to this one here and therefore we don't
> need to
> > > > +	 * compare any of the additional attributes stored for it.
> > >
> > > I suspect that this assumption is wrong. What happens if the time
> > > stamping logic locks a value but the packet is lost because the ring is
> full?
> > >
> > > BTW, the IGB driver also has this defect.
> > >
> >
> > Note how I read the rx registers first? So it will always clear the value.
> > That should unlock the value for the next rx stamp packet.
> 
> 1. Hw recognizes ptp event packet, locks time stamp 2. Hw drops packet because
> queue is full 3. No more time stamps are ever generated
> 
> Can this happen? The docs seems to say it can.
> 
> Richard

It might be possible. I'm curious what's the best method to solve this. I don't
think this generation of hardware can get an interrupt when the RX timestamp is
locked, and I don't know how easy it would be to process the sequence number within
the driver. It might be the case that the next packet would still be marked in the
descriptor and therefor the previous timestamp would apply to the next packet. I am
not sure, but I know we haven't seen issues where timestamps drop completely. This
is something I am not sure what's the best solution.

- Jake
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 11, 2012, 7:23 p.m. UTC | #5
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Thursday, May 10, 2012 10:15 PM
> To: Keller, Jacob E
> Cc: Kirsher, Jeffrey T; davem@davemloft.net; netdev@vger.kernel.org;
> gospo@redhat.com; sassmann@redhat.com
> Subject: Re: [net-next 06/12] ixgbe: Hardware Timestamping + PTP Hardware
> Clock (PHC)
> 
> On Thu, May 10, 2012 at 09:53:18PM +0000, Keller, Jacob E wrote:
> > > > +	/*
> > > > +	 * If this bit is set, then the RX registers contain the time
> stamp. No
> > > > +	 * other packet will be time stamped until we read these
> registers, so
> > > > +	 * read the registers to make them available again. Because only
> one
> > > > +	 * packet can be time stamped at a time, we know that the
> register
> > > > +	 * values must belong to this one here and therefore we don't
> need to
> > > > +	 * compare any of the additional attributes stored for it.
> > >
> > > I suspect that this assumption is wrong. What happens if the time
> > > stamping logic locks a value but the packet is lost because the ring is
> full?
> > >
> > > BTW, the IGB driver also has this defect.
> > >
> >
> > Note how I read the rx registers first? So it will always clear the value.
> > That should unlock the value for the next rx stamp packet.
> 
> 1. Hw recognizes ptp event packet, locks time stamp 2. Hw drops packet because
> queue is full 3. No more time stamps are ever generated
> 
> Can this happen? The docs seems to say it can.
> 
> Richard

I believe this very rare case might be possible, but I don't think that checking the ptp seqid will fix anything. In normal cases, hardware latches Rx packet timestamp, then the ptp packet goes into the queue and we process it shortly after. Before we process that packet there will never be another packet in the queue that needs a timestamp. We know this because the hardware stops timestamping until we unlatch the RX registers. This should mean we don't need to check the sequence ID, and spending time doing it would never fix the issue you are talking about.

The issue is for when a packet is timestamped and then never reaches the queue. Then the rx stamp registers are locked for good, because we never clear them, and hardware would never timestamp another receive packet. I don't know a good solution to this, except to clear the registers periodically. Do you have any suggestions?

- Jake
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 11, 2012, 7:34 p.m. UTC | #6
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Thursday, May 10, 2012 10:15 PM
> To: Keller, Jacob E
> Cc: Kirsher, Jeffrey T; davem@davemloft.net; netdev@vger.kernel.org;
> gospo@redhat.com; sassmann@redhat.com
> Subject: Re: [net-next 06/12] ixgbe: Hardware Timestamping + PTP Hardware
> Clock (PHC)
> 
> On Thu, May 10, 2012 at 09:53:18PM +0000, Keller, Jacob E wrote:
> > > > +	/*
> > > > +	 * If this bit is set, then the RX registers contain the time
> stamp. No
> > > > +	 * other packet will be time stamped until we read these
> registers, so
> > > > +	 * read the registers to make them available again. Because only
> one
> > > > +	 * packet can be time stamped at a time, we know that the
> register
> > > > +	 * values must belong to this one here and therefore we don't
> need to
> > > > +	 * compare any of the additional attributes stored for it.
> > >
> > > I suspect that this assumption is wrong. What happens if the time
> > > stamping logic locks a value but the packet is lost because the ring is
> full?
> > >
> > > BTW, the IGB driver also has this defect.
> > >
> >
> > Note how I read the rx registers first? So it will always clear the value.
> > That should unlock the value for the next rx stamp packet.
> 
> 1. Hw recognizes ptp event packet, locks time stamp 2. Hw drops packet because
> queue is full 3. No more time stamps are ever generated
> 
> Can this happen? The docs seems to say it can.
> 
> Richard

Sorry for the spam here, but I looked at the ixgbe code, and found a solution. When ptp4l discovers a missing rx timestamp, it faults and then waits for 15 seconds until the fault is cleared. After this, it reopens the socket, and reruns the hwtstamp ioctl. This function actually does clear the tx/rx timestamps (just in case) So after a fault the values should end up being reset. Is this good enough?

- Jake
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran May 12, 2012, 5:24 a.m. UTC | #7
On Fri, May 11, 2012 at 07:34:12PM +0000, Keller, Jacob E wrote:
> 
> Sorry for the spam here, but I looked at the ixgbe code, and found a
> solution. When ptp4l discovers a missing rx timestamp, it faults and
> then waits for 15 seconds until the fault is cleared. After this, it
> reopens the socket, and reruns the hwtstamp ioctl. This function
> actually does clear the tx/rx timestamps (just in case) So after a
> fault the values should end up being reset. Is this good enough?

Yes, for ptp4l, that will work, but this program is acting extremely
defensively in that situation. Not every user space program will
necessarily behave in the same way. The classic ptpd just ignores
packets with missing time stamps, IRRC.

Richard

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran May 12, 2012, 5:34 a.m. UTC | #8
On Fri, May 11, 2012 at 07:23:44PM +0000, Keller, Jacob E wrote:
> 
>
> I believe this very rare case might be possible, but I don't think
> that checking the ptp seqid will fix anything. In normal cases,
> hardware latches Rx packet timestamp, then the ptp packet goes into
> the queue and we process it shortly after. Before we process that
> packet there will never be another packet in the queue that needs a
> timestamp. We know this because the hardware stops timestamping
> until we unlatch the RX registers. This should mean we don't need to
> check the sequence ID, and spending time doing it would never fix
> the issue you are talking about.
>
> The issue is for when a packet is timestamped and then never reaches
> the queue. Then the rx stamp registers are locked for good, because
> we never clear them, and hardware would never timestamp another
> receive packet. I don't know a good solution to this, except to
> clear the registers periodically. Do you have any suggestions?

Well, one solution would be to check every received packet with the
BPF in ptp_classify.h (whenever Rx time stamping is enabled).

When the driver finds an event packet in the Rx queue, and
TSYNCRXCTL[RXTT] is set, it reads out the time stamp along with
RXSATRL/H. If the fields match, then add the time stamp to the skb.

[ Or perhaps instead of using RXSATRL/H, just use the descriptor bit.
  If *not* set, then the time stamp does not belong to this packet. ]

HTH,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 14, 2012, 5:28 p.m. UTC | #9
On 05/11/2012 10:34 PM, Richard Cochran wrote:
> On Fri, May 11, 2012 at 07:23:44PM +0000, Keller, Jacob E wrote:
>>
>>
>> I believe this very rare case might be possible, but I don't think
>> that checking the ptp seqid will fix anything. In normal cases,
>> hardware latches Rx packet timestamp, then the ptp packet goes into
>> the queue and we process it shortly after. Before we process that
>> packet there will never be another packet in the queue that needs a
>> timestamp. We know this because the hardware stops timestamping
>> until we unlatch the RX registers. This should mean we don't need to
>> check the sequence ID, and spending time doing it would never fix
>> the issue you are talking about.
>>
>> The issue is for when a packet is timestamped and then never reaches
>> the queue. Then the rx stamp registers are locked for good, because
>> we never clear them, and hardware would never timestamp another
>> receive packet. I don't know a good solution to this, except to
>> clear the registers periodically. Do you have any suggestions?
>
> Well, one solution would be to check every received packet with the
> BPF in ptp_classify.h (whenever Rx time stamping is enabled).
>
> When the driver finds an event packet in the Rx queue, and
> TSYNCRXCTL[RXTT] is set, it reads out the time stamp along with
> RXSATRL/H. If the fields match, then add the time stamp to the skb.
>
> [ Or perhaps instead of using RXSATRL/H, just use the descriptor bit.
>    If *not* set, then the time stamp does not belong to this packet. ]
>
> HTH,
> Richard

Ok, this sounds like a good plan. Considering that the device already 
doesn't allow timestamping of other types of packets, so it doesn't need 
to be general purpose.

Am I correct in thinking all I need to do is check the type and if it 
matches the currently configured rx timestamp mode, then double check 
the bit for whether a timestamp is available, and whether the descriptor 
had a timestamp bit enabled?

Thanks

- Jake
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Keller May 14, 2012, 10:42 p.m. UTC | #10
On 05/11/2012 10:34 PM, Richard Cochran wrote:
> On Fri, May 11, 2012 at 07:23:44PM +0000, Keller, Jacob E wrote:
>>
>>
>> I believe this very rare case might be possible, but I don't think
>> that checking the ptp seqid will fix anything. In normal cases,
>> hardware latches Rx packet timestamp, then the ptp packet goes into
>> the queue and we process it shortly after. Before we process that
>> packet there will never be another packet in the queue that needs a
>> timestamp. We know this because the hardware stops timestamping
>> until we unlatch the RX registers. This should mean we don't need to
>> check the sequence ID, and spending time doing it would never fix
>> the issue you are talking about.
>>
>> The issue is for when a packet is timestamped and then never reaches
>> the queue. Then the rx stamp registers are locked for good, because
>> we never clear them, and hardware would never timestamp another
>> receive packet. I don't know a good solution to this, except to
>> clear the registers periodically. Do you have any suggestions?
>
> Well, one solution would be to check every received packet with the
> BPF in ptp_classify.h (whenever Rx time stamping is enabled).
>
> When the driver finds an event packet in the Rx queue, and
> TSYNCRXCTL[RXTT] is set, it reads out the time stamp along with
> RXSATRL/H. If the fields match, then add the time stamp to the skb.
>
> [ Or perhaps instead of using RXSATRL/H, just use the descriptor bit.
>    If *not* set, then the time stamp does not belong to this packet. ]
>
> HTH,
> Richard

sk_run_filter is always returning PTP_CLASS_NONE (0), instead of the 
correct type. Do you know what I am doing wrong? I can send you the 
patch I am working with...

- Jake
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran May 15, 2012, 5:32 a.m. UTC | #11
On Mon, May 14, 2012 at 03:42:03PM -0700, Jacob Keller wrote:
> 
> sk_run_filter is always returning PTP_CLASS_NONE (0), instead of the
> correct type. Do you know what I am doing wrong? I can send you the
> patch I am working with...

The filter need to run with a pointer to the DST MAC. Perhaps you are
passing it the address of padding or Layer 3?

I will take a look at the patch...

Thanks,
Richard



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 546efe3..79b07ec 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -220,6 +220,17 @@  config IXGBE_DCB
 
 	  If unsure, say N.
 
+config IXGBE_PTP
+	bool "PTP Clock Support"
+	default n
+	depends on IXGBE && PTP_1588_CLOCK
+	---help---
+	  Say Y here if you want support for 1588 Timestamping with a
+	  PHC device, using the PTP 1588 Clock support. This is
+	  required to enable timestamping support for the device.
+
+	  If unsure, say N.
+
 config IXGBEVF
 	tristate "Intel(R) 82599 Virtual Function Ethernet support"
 	depends on PCI_MSI
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 0708d7e..0bdf06b 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -39,4 +39,6 @@  ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
 ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
                               ixgbe_dcb_82599.o ixgbe_dcb_nl.o
 
+ixgbe-$(CONFIG_IXGBE_PTP) += ixgbe_ptp.o
+
 ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 2262bf7..c90fbd2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -36,6 +36,12 @@ 
 #include <linux/aer.h>
 #include <linux/if_vlan.h>
 
+#ifdef CONFIG_IXGBE_PTP
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#endif /* CONFIG_IXGBE_PTP */
+
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
 #include "ixgbe_dcb.h"
@@ -96,6 +102,7 @@ 
 #define IXGBE_TX_FLAGS_FCOE		(u32)(1 << 5)
 #define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
 #define IXGBE_TX_FLAGS_TXSW		(u32)(1 << 7)
+#define IXGBE_TX_FLAGS_TSTAMP		(u32)(1 << 8)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT  29
@@ -458,6 +465,7 @@  struct ixgbe_adapter {
 #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT        (u32)(1 << 7)
 #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP		(u32)(1 << 8)
 #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		(u32)(1 << 9)
+#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED	(u32)(1 << 10)
 
 	/* Tx fast path data */
 	int num_tx_queues;
@@ -545,6 +553,17 @@  struct ixgbe_adapter {
 	u32 interrupt_event;
 	u32 led_reg;
 
+#ifdef CONFIG_IXGBE_PTP
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	unsigned long last_overflow_check;
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
+	u32 base_incval;
+	u32 cycle_speed;
+#endif /* CONFIG_IXGBE_PTP */
+
 	/* SR-IOV */
 	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
 	unsigned int num_vfs;
@@ -689,4 +708,17 @@  static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring)
 	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
 }
 
+#ifdef CONFIG_IXGBE_PTP
+extern void ixgbe_ptp_init(struct ixgbe_adapter *adapter);
+extern void ixgbe_ptp_stop(struct ixgbe_adapter *adapter);
+extern void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter);
+extern void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector,
+				  struct sk_buff *skb);
+extern void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector,
+				  struct sk_buff *skb);
+extern int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter,
+				    struct ifreq *ifr, int cmd);
+extern void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter);
+#endif /* CONFIG_IXGBE_PTP */
+
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1693ec3..9a83c40 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -789,6 +789,13 @@  static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
 
+#ifdef CONFIG_IXGBE_PTP
+		if (unlikely(tx_buffer->tx_flags &
+			     IXGBE_TX_FLAGS_TSTAMP))
+			ixgbe_ptp_tx_hwtstamp(q_vector,
+					      tx_buffer->skb);
+
+#endif
 		/* free the skb */
 		dev_kfree_skb_any(tx_buffer->skb);
 
@@ -1389,6 +1396,11 @@  static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 
 	ixgbe_rx_checksum(rx_ring, rx_desc, skb);
 
+#ifdef CONFIG_IXGBE_PTP
+	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))
+		ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, skb);
+#endif
+
 	if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
 		u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
 		__vlan_hwaccel_put_tag(skb, vid);
@@ -5387,6 +5399,11 @@  static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
 		flow_rx = false;
 		break;
 	}
+
+#ifdef CONFIG_IXGBE_PTP
+	ixgbe_ptp_start_cyclecounter(adapter);
+#endif
+
 	e_info(drv, "NIC Link is Up %s, Flow Control: %s\n",
 	       (link_speed == IXGBE_LINK_SPEED_10GB_FULL ?
 	       "10 Gbps" :
@@ -5424,6 +5441,10 @@  static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter)
 	if (ixgbe_is_sfp(hw) && hw->mac.type == ixgbe_mac_82598EB)
 		adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
 
+#ifdef CONFIG_IXGBE_PTP
+	ixgbe_ptp_start_cyclecounter(adapter);
+#endif
+
 	e_info(drv, "NIC Link is Down\n");
 	netif_carrier_off(netdev);
 }
@@ -5723,6 +5744,9 @@  static void ixgbe_service_task(struct work_struct *work)
 	ixgbe_watchdog_subtask(adapter);
 	ixgbe_fdir_reinit_subtask(adapter);
 	ixgbe_check_hang_subtask(adapter);
+#ifdef CONFIG_IXGBE_PTP
+	ixgbe_ptp_overflow_check(adapter);
+#endif
 
 	ixgbe_service_event_complete(adapter);
 }
@@ -5873,6 +5897,11 @@  static __le32 ixgbe_tx_cmd_type(u32 tx_flags)
 	if (tx_flags & IXGBE_TX_FLAGS_HW_VLAN)
 		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE);
 
+#ifdef CONFIG_IXGBE_PTP
+	if (tx_flags & IXGBE_TX_FLAGS_TSTAMP)
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_MAC_TSTAMP);
+#endif
+
 	/* set segmentation enable bits for TSO/FSO */
 #ifdef IXGBE_FCOE
 	if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_FSO))
@@ -6263,6 +6292,13 @@  netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 		tx_flags |= IXGBE_TX_FLAGS_SW_VLAN;
 	}
 
+#ifdef CONFIG_IXGBE_PTP
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
+	}
+#endif
+
 #ifdef CONFIG_PCI_IOV
 	/*
 	 * Use the l2switch_enable flag - would be false if the DMA
@@ -6415,7 +6451,14 @@  static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
-	return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
+	switch (cmd) {
+#ifdef CONFIG_IXGBE_PTP
+	case SIOCSHWTSTAMP:
+		return ixgbe_ptp_hwtstamp_ioctl(adapter, req, cmd);
+#endif
+	default:
+		return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
+	}
 }
 
 /**
@@ -7202,6 +7245,10 @@  static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
 
+#ifdef CONFIG_IXGBE_PTP
+	ixgbe_ptp_init(adapter);
+#endif /* CONFIG_IXGBE_PTP*/
+
 	/* save off EEPROM version number */
 	hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh);
 	hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl);
@@ -7330,6 +7377,10 @@  static void __devexit ixgbe_remove(struct pci_dev *pdev)
 	set_bit(__IXGBE_DOWN, &adapter->state);
 	cancel_work_sync(&adapter->service_task);
 
+#ifdef CONFIG_IXGBE_PTP
+	ixgbe_ptp_stop(adapter);
+#endif
+
 #ifdef CONFIG_IXGBE_DCA
 	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
 		adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
new file mode 100644
index 0000000..0b6553e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -0,0 +1,732 @@ 
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+#include "ixgbe.h"
+#include <linux/export.h>
+
+/*
+ * The 82599 and the X540 do not have true 64bit nanosecond scale
+ * counter registers. Instead, SYSTIME is defined by a fixed point
+ * system which allows the user to define the scale counter increment
+ * value at every level change of the oscillator driving the SYSTIME
+ * value. For both devices the TIMINCA:IV field defines this
+ * increment. On the X540 device, 31 bits are provided. However on the
+ * 82599 only provides 24 bits. The time unit is determined by the
+ * clock frequency of the oscillator in combination with the TIMINCA
+ * register. When these devices link at 10Gb the oscillator has a
+ * period of 6.4ns. In order to convert the scale counter into
+ * nanoseconds the cyclecounter and timecounter structures are
+ * used. The SYSTIME registers need to be converted to ns values by use
+ * of only a right shift (division by power of 2). The following math
+ * determines the largest incvalue that will fit into the available
+ * bits in the TIMINCA register.
+ *
+ * PeriodWidth: Number of bits to store the clock period
+ * MaxWidth: The maximum width value of the TIMINCA register
+ * Period: The clock period for the oscillator
+ * round(): discard the fractional portion of the calculation
+ *
+ * Period * [ 2 ^ ( MaxWidth - PeriodWidth ) ]
+ *
+ * For the X540, MaxWidth is 31 bits, and the base period is 6.4 ns
+ * For the 82599, MaxWidth is 24 bits, and the base period is 6.4 ns
+ *
+ * The period also changes based on the link speed:
+ * At 10Gb link or no link, the period remains the same.
+ * At 1Gb link, the period is multiplied by 10. (64ns)
+ * At 100Mb link, the period is multiplied by 100. (640ns)
+ *
+ * The calculated value allows us to right shift the SYSTIME register
+ * value in order to quickly convert it into a nanosecond clock,
+ * while allowing for the maximum possible adjustment value.
+ *
+ * These diagrams are only for the 10Gb link period
+ *
+ *           SYSTIMEH            SYSTIMEL
+ *       +--------------+  +--------------+
+ * X540  |      32      |  | 1 | 3 |  28  |
+ *       *--------------+  +--------------+
+ *        \________ 36 bits ______/  fract
+ *
+ *       +--------------+  +--------------+
+ * 82599 |      32      |  | 8 | 3 |  21  |
+ *       *--------------+  +--------------+
+ *        \________ 43 bits ______/  fract
+ *
+ * The 36 bit X540 SYSTIME overflows every
+ *   2^36 * 10^-9 / 60 = 1.14 minutes or 69 seconds
+ *
+ * The 43 bit 82599 SYSTIME overflows every
+ *   2^43 * 10^-9 / 3600 = 2.4 hours
+ */
+#define IXGBE_INCVAL_10GB 0x66666666
+#define IXGBE_INCVAL_1GB  0x40000000
+#define IXGBE_INCVAL_100  0x50000000
+
+#define IXGBE_INCVAL_SHIFT_10GB  28
+#define IXGBE_INCVAL_SHIFT_1GB   24
+#define IXGBE_INCVAL_SHIFT_100   21
+
+#define IXGBE_INCVAL_SHIFT_82599 7
+#define IXGBE_INCPER_SHIFT_82599 24
+#define IXGBE_MAX_TIMEADJ_VALUE  0x7FFFFFFFFFFFFFFFULL
+
+#define IXGBE_OVERFLOW_PERIOD    (HZ * 30)
+
+/**
+ * ixgbe_ptp_read - read raw cycle counter (to be used by time counter)
+ * @cc - the cyclecounter structure
+ *
+ * this function reads the cyclecounter registers and is called by the
+ * cyclecounter structure used to construct a ns counter from the
+ * arbitrary fixed point registers
+ */
+static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(cc, struct ixgbe_adapter, cc);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 stamp = 0;
+
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+
+	return stamp;
+}
+
+/**
+ * ixgbe_ptp_adjfreq
+ * @ptp - the ptp clock structure
+ * @ppb - parts per billion adjustment from base
+ *
+ * adjust the frequency of the ptp cycle counter by the
+ * indicated ppb from the base frequency.
+ */
+static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 freq;
+	u32 diff, incval;
+	int neg_adj = 0;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+
+	smp_mb();
+	incval = ACCESS_ONCE(adapter->base_incval);
+
+	freq = incval;
+	freq *= ppb;
+	diff = div_u64(freq, 1000000000ULL);
+
+	incval = neg_adj ? (incval - diff) : (incval + diff);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
+		break;
+	case ixgbe_mac_82599EB:
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
+				(1 << IXGBE_INCPER_SHIFT_82599) |
+				incval);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_adjtime
+ * @ptp - the ptp clock structure
+ * @delta - offset to adjust the cycle counter by
+ *
+ * adjust the timer by resetting the timecounter structure.
+ */
+static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	unsigned long flags;
+	u64 now;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	now = timecounter_read(&adapter->tc);
+	now += delta;
+
+	/* reset the timecounter */
+	timecounter_init(&adapter->tc,
+			 &adapter->cc,
+			 now);
+
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_gettime
+ * @ptp - the ptp clock structure
+ * @ts - timespec structure to hold the current time value
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec.
+ */
+static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	u64 ns;
+	u32 remainder;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->tc);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
+	ts->tv_nsec = remainder;
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_settime
+ * @ptp - the ptp clock structure
+ * @ts - the timespec containing the new time for the cycle counter
+ *
+ * reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ */
+static int ixgbe_ptp_settime(struct ptp_clock_info *ptp,
+			     const struct timespec *ts)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	u64 ns;
+	unsigned long flags;
+
+	ns = ts->tv_sec * 1000000000ULL;
+	ns += ts->tv_nsec;
+
+	/* reset the timecounter */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	timecounter_init(&adapter->tc, &adapter->cc, ns);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_enable
+ * @ptp - the ptp clock structure
+ * @rq - the requested feature to change
+ * @on - whether to enable or disable the feature
+ *
+ * enable (or disable) ancillary features of the phc subsystem.
+ * our driver does not support any of these features
+ */
+static int ixgbe_ptp_enable(struct ptp_clock_info *ptp,
+			    struct ptp_clock_request *rq, int on)
+{
+	return -ENOTSUPP;
+}
+
+/**
+ * ixgbe_ptp_overflow_check - delayed work to detect SYSTIME overflow
+ * @work: structure containing information about this work task
+ *
+ * this work function is scheduled to continue reading the timecounter
+ * in order to prevent missing when the system time registers wrap
+ * around. This needs to be run approximately twice a minute when no
+ * PTP activity is occurring.
+ */
+void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter)
+{
+	unsigned long elapsed_jiffies = adapter->last_overflow_check - jiffies;
+	struct timespec ts;
+
+	if ((adapter->flags2 & IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED) &&
+	    (elapsed_jiffies >= IXGBE_OVERFLOW_PERIOD)) {
+		ixgbe_ptp_gettime(&adapter->ptp_caps, &ts);
+		adapter->last_overflow_check = jiffies;
+	}
+}
+
+/**
+ * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_hw *hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval = 0, ns;
+	u32 tsynctxctl;
+	unsigned long flags;
+
+	/* we cannot process timestamps on a ring without a q_vector */
+	if (!q_vector || !q_vector->adapter)
+		return;
+
+	adapter = q_vector->adapter;
+	hw = &adapter->hw;
+
+	tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32;
+
+	/*
+	 * if TX timestamp is not valid, exit after clearing the
+	 * timestamp registers
+	 */
+	if (!(tsynctxctl & IXGBE_TSYNCTXCTL_VALID))
+		return;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_cyc2time(&adapter->tc, regval);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+	skb_tstamp_tx(skb, &shhwtstamps);
+}
+
+/**
+ * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_hw *hw;
+	struct skb_shared_hwtstamps *shhwtstamps;
+	u64 regval = 0, ns;
+	u32 tsyncrxctl;
+	unsigned long flags;
+
+	/* we cannot process timestamps on a ring without a q_vector */
+	if (!q_vector || !q_vector->adapter)
+		return;
+
+	adapter = q_vector->adapter;
+	hw = &adapter->hw;
+
+	tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
+
+	/*
+	 * If this bit is set, then the RX registers contain the time stamp. No
+	 * other packet will be time stamped until we read these registers, so
+	 * read the registers to make them available again. Because only one
+	 * packet can be time stamped at a time, we know that the register
+	 * values must belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a skb_shared_tx that we
+	 * can turn into a skb_shared_hwtstamps.
+	 */
+	if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID))
+		return;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_cyc2time(&adapter->tc, regval);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	shhwtstamps = skb_hwtstamps(skb);
+	shhwtstamps->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * ixgbe_ptp_hwtstamp_ioctl - control hardware time stamping
+ * @adapter: pointer to adapter struct
+ * @ifreq: ioctl data
+ * @cmd: particular ioctl requested
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ */
+int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter,
+			     struct ifreq *ifr, int cmd)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hwtstamp_config config;
+	u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_mtrl = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V1_SYNC_MSG;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V2_SYNC_MSG;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V2_DELAY_REQ_MSG;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
+		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		/*
+		 * register RXMTRL must be set, therefore it is not
+		 * possible to time stamp both V1 Sync and Delay_Req messages
+		 * and hardware does not support timestamping all packets
+		 * => return error
+		 */
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -ERANGE;
+		return 0;
+	}
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(3),
+				(IXGBE_ETQF_FILTER_EN | /* enable filter */
+				 IXGBE_ETQF_1588 | /* enable timestamping */
+				 ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(3), 0);
+
+#define PTP_PORT 319
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IXGBE_FTQF_PROTOCOL_UDP /* UDP */
+			    | IXGBE_FTQF_POOL_MASK_EN /* Pool not compared */
+			    | IXGBE_FTQF_QUEUE_ENABLE);
+
+		ftqf |= ((IXGBE_FTQF_PROTOCOL_COMP_MASK /* protocol check */
+			  & IXGBE_FTQF_DEST_PORT_MASK /* dest check */
+			  & IXGBE_FTQF_SOURCE_PORT_MASK) /* source check */
+			 << IXGBE_FTQF_5TUPLE_MASK_SHIFT);
+
+		IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(3),
+				(3 << IXGBE_IMIR_RX_QUEUE_SHIFT_82599 |
+				 IXGBE_IMIR_SIZE_BP_82599));
+
+		/* enable port check */
+		IXGBE_WRITE_REG(hw, IXGBE_SDPQF(3),
+				(htons(PTP_PORT) |
+				 htons(PTP_PORT) << 16));
+
+		IXGBE_WRITE_REG(hw, IXGBE_FTQF(3), ftqf);
+
+		tsync_rx_mtrl |= PTP_PORT << 16;
+	} else {
+		IXGBE_WRITE_REG(hw, IXGBE_FTQF(3), 0);
+	}
+
+	/* enable/disable TX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
+	regval &= ~IXGBE_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	regval &= ~(IXGBE_TSYNCRXCTL_ENABLED | IXGBE_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	IXGBE_WRITE_REG(hw, IXGBE_RXMTRL, tsync_rx_mtrl);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
+	regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
+ * @adapter - pointer to the adapter structure
+ *
+ * this function initializes the timecounter and cyclecounter
+ * structures for use in generated a ns counter from the arbitrary
+ * fixed point cycles registers in the hardware.
+ *
+ * A change in link speed impacts the frequency of the DMA clock on
+ * the device, which is used to generate the cycle counter
+ * registers. Therefor this function is called whenever the link speed
+ * changes.
+ */
+void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 incval = 0;
+	u32 shift = 0;
+	u32 cycle_speed;
+	unsigned long flags;
+
+	/**
+	 * Determine what speed we need to set the cyclecounter
+	 * for. It should be different for 100Mb, 1Gb, and 10Gb. Treat
+	 * unknown speeds as 10Gb. (Hence why we can't just copy the
+	 * link_speed.
+	 */
+	switch (adapter->link_speed) {
+	case IXGBE_LINK_SPEED_100_FULL:
+	case IXGBE_LINK_SPEED_1GB_FULL:
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		cycle_speed = adapter->link_speed;
+		break;
+	default:
+		/* cycle speed should be 10Gb when there is no link */
+		cycle_speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	}
+
+	/* Bail if the cycle speed didn't change */
+	if (adapter->cycle_speed == cycle_speed)
+		return;
+
+	/**
+	 * Scale the NIC cycle counter by a large factor so that
+	 * relatively small corrections to the frequency can be added
+	 * or subtracted. The drawbacks of a large factor include
+	 * (a) the clock register overflows more quickly, (b) the cycle
+	 * counter structure must be able to convert the systime value
+	 * to nanoseconds using only a multiplier and a right-shift,
+	 * and (c) the value must fit within the timinca register space
+	 * => math based on internal DMA clock rate and available bits
+	 */
+	switch (cycle_speed) {
+	case IXGBE_LINK_SPEED_100_FULL:
+		incval = IXGBE_INCVAL_100;
+		shift = IXGBE_INCVAL_SHIFT_100;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		incval = IXGBE_INCVAL_1GB;
+		shift = IXGBE_INCVAL_SHIFT_1GB;
+		break;
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		incval = IXGBE_INCVAL_10GB;
+		shift = IXGBE_INCVAL_SHIFT_10GB;
+		break;
+	}
+
+	/**
+	 * Modify the calculated values to fit within the correct
+	 * number of bits specified by the hardware. The 82599 doesn't
+	 * have the same space as the X540, so bitshift the calculated
+	 * values to fit.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
+		break;
+	case ixgbe_mac_82599EB:
+		incval >>= IXGBE_INCVAL_SHIFT_82599;
+		shift -= IXGBE_INCVAL_SHIFT_82599;
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
+				(1 << IXGBE_INCPER_SHIFT_82599) |
+				incval);
+		break;
+	default:
+		/* other devices aren't supported */
+		return;
+	}
+
+	/* reset the system time registers */
+	IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000);
+	IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* store the new cycle speed */
+	adapter->cycle_speed = cycle_speed;
+
+	ACCESS_ONCE(adapter->base_incval) = incval;
+	smp_mb();
+
+	/* grab the ptp lock */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	memset(&adapter->cc, 0, sizeof(adapter->cc));
+	adapter->cc.read = ixgbe_ptp_read;
+	adapter->cc.mask = CLOCKSOURCE_MASK(64);
+	adapter->cc.shift = shift;
+	adapter->cc.mult = 1;
+
+	/* reset the ns time counter */
+	timecounter_init(&adapter->tc, &adapter->cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+}
+
+/**
+ * ixgbe_ptp_init
+ * @adapter - the ixgbe private adapter structure
+ *
+ * This function performs the required steps for enabling ptp
+ * support. If ptp support has already been loaded it simply calls the
+ * cyclecounter init routine and exits.
+ */
+void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X540:
+	case ixgbe_mac_82599EB:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 250000000;
+		adapter->ptp_caps.n_alarm = 0;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.n_per_out = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq;
+		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+		adapter->ptp_caps.gettime = ixgbe_ptp_gettime;
+		adapter->ptp_caps.settime = ixgbe_ptp_settime;
+		adapter->ptp_caps.enable = ixgbe_ptp_enable;
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		return;
+	}
+
+	spin_lock_init(&adapter->tmreg_lock);
+
+	ixgbe_ptp_start_cyclecounter(adapter);
+
+	/* (Re)start the overflow check */
+	adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED;
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		e_dev_err("ptp_clock_register failed\n");
+	} else
+		e_dev_info("registered PHC device on %s\n", netdev->name);
+
+	return;
+}
+
+/**
+ * ixgbe_ptp_stop - disable ptp device and stop the overflow check
+ * @adapter: pointer to adapter struct
+ *
+ * this function stops the ptp support, and cancels the delayed work.
+ */
+void ixgbe_ptp_stop(struct ixgbe_adapter *adapter)
+{
+	/* stop the overflow check task */
+	adapter->flags2 &= ~IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED;
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		adapter->ptp_clock = NULL;
+		e_dev_info("removed PHC on %s\n",
+			   adapter->netdev->name);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 9559c03..87d54ca 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1879,6 +1879,36 @@  enum {
 #define IXGBE_RXDCTL_RLPML_EN   0x00008000
 #define IXGBE_RXDCTL_VME        0x40000000  /* VLAN mode enable */
 
+#define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
+#define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
+
+#define IXGBE_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
+#define IXGBE_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
+#define IXGBE_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define IXGBE_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define IXGBE_TSYNCRXCTL_ENABLED	0x00000010 /* Rx Timestamping enabled */
+
+#define IXGBE_RXMTRL_V1_CTRLT_MASK	0x000000FF
+#define IXGBE_RXMTRL_V1_SYNC_MSG	0x00
+#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG	0x01
+#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG	0x02
+#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG	0x03
+#define IXGBE_RXMTRL_V1_MGMT_MSG	0x04
+
+#define IXGBE_RXMTRL_V2_MSGID_MASK		0x0000FF00
+#define IXGBE_RXMTRL_V2_SYNC_MSG		0x0000
+#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG		0x0100
+#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG		0x0200
+#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG		0x0300
+#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG		0x0800
+#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG		0x0900
+#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG	0x0A00
+#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG		0x0B00
+#define IXGBE_RXMTRL_V2_SIGNALING_MSG		0x0C00
+#define IXGBE_RXMTRL_V2_MGMT_MSG		0x0D00
+
 #define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */
 #define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/
 #define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */
@@ -2008,6 +2038,7 @@  enum {
 #define IXGBE_RXDADV_STAT_FCSTAT_NODDP  0x00000010 /* 01: Ctxt w/o DDP */
 #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
 #define IXGBE_RXDADV_STAT_FCSTAT_DDP    0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_TS		0x00010000 /* IEEE 1588 Time Stamp */
 
 /* PSRTYPE bit definitions */
 #define IXGBE_PSRTYPE_TCPHDR    0x00000010
@@ -2285,6 +2316,7 @@  struct ixgbe_adv_tx_context_desc {
 /* Adv Transmit Descriptor Config Masks */
 #define IXGBE_ADVTXD_DTALEN_MASK      0x0000FFFF /* Data buf length(bytes) */
 #define IXGBE_ADVTXD_MAC_LINKSEC      0x00040000 /* Insert LinkSec */
+#define IXGBE_ADVTXD_MAC_TSTAMP	      0x00080000 /* IEEE 1588 Time Stamp */
 #define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK   0x000003FF /* IPSec SA index */
 #define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK    0x000001FF /* IPSec ESP length */
 #define IXGBE_ADVTXD_DTYP_MASK  0x00F00000 /* DTYP mask */