diff mbox

[net-next] dp83640: Fix receive timestamp race condition

Message ID 1405603079-23572-1-git-send-email-stefan.sorensen@spectralink.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Sørensen, Stefan July 17, 2014, 1:17 p.m. UTC
When timestamping received packets, rx_timestamp_work may be scheduled before
the timestamps is received from the hardware resulting in the packet beeing
delivered without the timestamp.

This is fixed by changing the receive timestamp path: On receiving a packet
that need timestamping, it is added to a queue. When a timestamp arrives, the
queue is traversed and if a matching packet is found, it is deliverd with the
timestamp. In case the hardware drops a timestamp, a workqueue regularly
checks the queue for old packets and delivers them without a timestamp.

Signed-off-by: Stefan Sørensen <stefan.sorensen@spectralink.com>
---
 drivers/net/phy/dp83640.c | 197 +++++++++++++++++++---------------------------
 1 file changed, 82 insertions(+), 115 deletions(-)

Comments

Richard Cochran July 17, 2014, 6:12 p.m. UTC | #1
On Thu, Jul 17, 2014 at 03:17:59PM +0200, Stefan Sørensen wrote:
> When timestamping received packets, rx_timestamp_work may be scheduled before
> the timestamps is received from the hardware resulting in the packet beeing
> delivered without the timestamp.
> 
> This is fixed by changing the receive timestamp path: On receiving a packet
> that need timestamping, it is added to a queue. When a timestamp arrives, the
> queue is traversed and if a matching packet is found, it is deliverd with the
> timestamp. In case the hardware drops a timestamp, a workqueue regularly
> checks the queue for old packets and delivers them without a timestamp.

And what happens when the timestamp arrives *before* the PTP packet?

The original code was desgined to handle this possibility. Although it
makes logical sense for the PHY to deliver the status frame after the
regular frame, there is nothing in the documentation that confirms
this behavior, and hardware engineering decisions are not always
logical. If some newer silicon revision were to buffer the incoming
packet, waiting for the time stamp logic, then the driver should still
work.

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Cochran July 17, 2014, 7:24 p.m. UTC | #2
On Thu, Jul 17, 2014 at 08:12:22PM +0200, Richard Cochran wrote:
> 
> And what happens when the timestamp arrives *before* the PTP packet?

It could also happen that the MAC driver re-orders the packets. Think
about multiple hardware queues putting multicast UDP to one side, etc.
Sure, maybe not a typical combination with the phyter, but still the
driver should handle this.

Thanks,
Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 255c21f..89eea31 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -74,7 +74,10 @@ 
 #define ENDIAN_FLAG	PSF_ENDIAN
 #endif
 
-#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+struct dp83640_skb_info {
+	int ptp_type;
+	unsigned long tmo;
+};
 
 struct phy_rxts {
 	u16 ns_lo;   /* ns[15:0] */
@@ -94,7 +97,6 @@  struct phy_txts {
 
 struct rxts {
 	struct list_head list;
-	unsigned long tmo;
 	u64 ns;
 	u16 seqid;
 	u8  msgtype;
@@ -116,12 +118,6 @@  struct dp83640_private {
 	int cfg0;
 	/* remember the last event time stamp */
 	struct phy_txts edata;
-	/* list of rx timestamps */
-	struct list_head rxts;
-	struct list_head rxpool;
-	struct rxts rx_pool_data[MAX_RXTS];
-	/* protects above three fields from concurrent access */
-	spinlock_t rx_lock;
 	/* queues of incoming and outgoing packets */
 	struct sk_buff_head rx_queue;
 	struct sk_buff_head tx_queue;
@@ -281,7 +277,6 @@  static void phy2rxts(struct phy_rxts *p, struct rxts *rxts)
 	rxts->seqid = p->seqid;
 	rxts->msgtype = (p->msgtype >> 12) & 0xf;
 	rxts->hash = p->msgtype & 0x0fff;
-	rxts->tmo = jiffies + 2;
 }
 
 static u64 phy2txts(struct phy_txts *p)
@@ -563,26 +558,6 @@  static bool is_status_frame(struct sk_buff *skb, int type)
 		return false;
 }
 
-static int expired(struct rxts *rxts)
-{
-	return time_after(jiffies, rxts->tmo);
-}
-
-/* Caller must hold rx_lock. */
-static void prune_rx_ts(struct dp83640_private *dp83640)
-{
-	struct list_head *this, *next;
-	struct rxts *rxts;
-
-	list_for_each_safe(this, next, &dp83640->rxts) {
-		rxts = list_entry(this, struct rxts, list);
-		if (expired(rxts)) {
-			list_del_init(&rxts->list);
-			list_add(&rxts->list, &dp83640->rxpool);
-		}
-	}
-}
-
 /* synchronize the phyters so they act as one clock */
 
 static void enable_broadcast(struct phy_device *phydev, int init_page, int on)
@@ -768,26 +743,70 @@  static int decode_evnt(struct dp83640_private *dp83640,
 	return parsed * sizeof(u16);
 }
 
+static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts)
+{
+	u16 *seqid;
+	unsigned int offset = 0;
+	u8 *msgtype, *data = skb_mac_header(skb);
+
+	/* check sequenceID, messageType, 12 bit hash of offset 20-29 */
+
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return 0;
+	}
+
+	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+		return 0;
+
+	if (unlikely(type & PTP_CLASS_V1))
+		msgtype = data + offset + OFF_PTP_CONTROL;
+	else
+		msgtype = data + offset;
+
+	seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+
+	return rxts->msgtype == (*msgtype & 0xf) &&
+		rxts->seqid   == ntohs(*seqid);
+}
+
 static void decode_rxts(struct dp83640_private *dp83640,
 			struct phy_rxts *phy_rxts)
 {
-	struct rxts *rxts;
+	struct rxts rxts;
+	struct skb_shared_hwtstamps *shhwtstamps;
+	struct sk_buff *skb;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dp83640->rx_lock, flags);
+	phy2rxts(phy_rxts, &rxts);
 
-	prune_rx_ts(dp83640);
+	spin_lock_irqsave(&dp83640->rx_queue.lock, flags);
+	skb_queue_walk(&dp83640->rx_queue, skb) {
+		struct dp83640_skb_info *skb_info;
 
-	if (list_empty(&dp83640->rxpool)) {
-		pr_debug("rx timestamp pool is empty\n");
-		goto out;
+		skb_info = (struct dp83640_skb_info *)skb->cb;
+		if (match(skb, skb_info->ptp_type, &rxts)) {
+			__skb_unlink(skb, &dp83640->rx_queue);
+			shhwtstamps = skb_hwtstamps(skb);
+			memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+			shhwtstamps->hwtstamp = ns_to_ktime(rxts.ns);
+			netif_rx_ni(skb);
+			break;
+		}
 	}
-	rxts = list_first_entry(&dp83640->rxpool, struct rxts, list);
-	list_del_init(&rxts->list);
-	phy2rxts(phy_rxts, rxts);
-	list_add_tail(&rxts->list, &dp83640->rxts);
-out:
-	spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+	spin_unlock_irqrestore(&dp83640->rx_queue.lock, flags);
 }
 
 static void decode_txts(struct dp83640_private *dp83640,
@@ -887,45 +906,6 @@  static int is_sync(struct sk_buff *skb, int type)
 	return (*msgtype & 0xf) == 0;
 }
 
-static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts)
-{
-	u16 *seqid;
-	unsigned int offset = 0;
-	u8 *msgtype, *data = skb_mac_header(skb);
-
-	/* check sequenceID, messageType, 12 bit hash of offset 20-29 */
-
-	if (type & PTP_CLASS_VLAN)
-		offset += VLAN_HLEN;
-
-	switch (type & PTP_CLASS_PMASK) {
-	case PTP_CLASS_IPV4:
-		offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
-		break;
-	case PTP_CLASS_IPV6:
-		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
-		break;
-	case PTP_CLASS_L2:
-		offset += ETH_HLEN;
-		break;
-	default:
-		return 0;
-	}
-
-	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
-		return 0;
-
-	if (unlikely(type & PTP_CLASS_V1))
-		msgtype = data + offset + OFF_PTP_CONTROL;
-	else
-		msgtype = data + offset;
-
-	seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
-
-	return rxts->msgtype == (*msgtype & 0xf) &&
-		rxts->seqid   == ntohs(*seqid);
-}
-
 static void dp83640_free_clocks(void)
 {
 	struct dp83640_clock *clock;
@@ -1049,7 +1029,7 @@  static int dp83640_probe(struct phy_device *phydev)
 {
 	struct dp83640_clock *clock;
 	struct dp83640_private *dp83640;
-	int err = -ENOMEM, i;
+	int err = -ENOMEM;
 
 	if (phydev->addr == BROADCAST_ADDR)
 		return 0;
@@ -1065,14 +1045,8 @@  static int dp83640_probe(struct phy_device *phydev)
 	dp83640->phydev = phydev;
 	INIT_WORK(&dp83640->ts_work, rx_timestamp_work);
 
-	INIT_LIST_HEAD(&dp83640->rxts);
-	INIT_LIST_HEAD(&dp83640->rxpool);
-	for (i = 0; i < MAX_RXTS; i++)
-		list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool);
-
 	phydev->priv = dp83640;
 
-	spin_lock_init(&dp83640->rx_lock);
 	skb_queue_head_init(&dp83640->rx_queue);
 	skb_queue_head_init(&dp83640->tx_queue);
 
@@ -1267,6 +1241,13 @@  static int dp83640_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
+	if (!dp83640->hwts_rx_en) {
+		struct sk_buff *skb;
+
+		while ((skb = skb_dequeue(&dp83640->rx_queue)))
+			netif_rx_ni(skb);
+	}
+
 	txcfg0 = (dp83640->version & TX_PTP_VER_MASK) << TX_PTP_VER_SHIFT;
 	rxcfg0 = (dp83640->version & TX_PTP_VER_MASK) << TX_PTP_VER_SHIFT;
 
@@ -1302,44 +1283,30 @@  static void rx_timestamp_work(struct work_struct *work)
 {
 	struct dp83640_private *dp83640 =
 		container_of(work, struct dp83640_private, ts_work);
-	struct list_head *this, *next;
-	struct rxts *rxts;
-	struct skb_shared_hwtstamps *shhwtstamps;
 	struct sk_buff *skb;
-	unsigned int type;
-	unsigned long flags;
 
-	/* Deliver each deferred packet, with or without a time stamp. */
-
-	while ((skb = skb_dequeue(&dp83640->rx_queue)) != NULL) {
-		type = SKB_PTP_TYPE(skb);
-		spin_lock_irqsave(&dp83640->rx_lock, flags);
-		list_for_each_safe(this, next, &dp83640->rxts) {
-			rxts = list_entry(this, struct rxts, list);
-			if (match(skb, type, rxts)) {
-				shhwtstamps = skb_hwtstamps(skb);
-				memset(shhwtstamps, 0, sizeof(*shhwtstamps));
-				shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns);
-				list_del_init(&rxts->list);
-				list_add(&rxts->list, &dp83640->rxpool);
-				break;
-			}
+	/* Deliver expired packets. */
+	while ((skb = skb_dequeue(&dp83640->rx_queue))) {
+		struct dp83640_skb_info *skb_info;
+
+		skb_info = (struct dp83640_skb_info *)skb->cb;
+		if (!time_after(jiffies, skb_info->tmo)) {
+			skb_queue_head(&dp83640->rx_queue, skb);
+			break;
 		}
-		spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+
 		netif_rx_ni(skb);
 	}
 
-	/* Clear out expired time stamps. */
-
-	spin_lock_irqsave(&dp83640->rx_lock, flags);
-	prune_rx_ts(dp83640);
-	spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+	if (!skb_queue_empty(&dp83640->rx_queue))
+		schedule_work(&dp83640->ts_work);
 }
 
 static bool dp83640_rxtstamp(struct phy_device *phydev,
 			     struct sk_buff *skb, int type)
 {
 	struct dp83640_private *dp83640 = phydev->priv;
+	struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb;
 
 	if (is_status_frame(skb, type)) {
 		decode_status_frame(dp83640, skb);
@@ -1350,7 +1317,8 @@  static bool dp83640_rxtstamp(struct phy_device *phydev,
 	if (!dp83640->hwts_rx_en)
 		return false;
 
-	SKB_PTP_TYPE(skb) = type;
+	skb_info->ptp_type = type;
+	skb_info->tmo = jiffies + 2;
 	skb_queue_tail(&dp83640->rx_queue, skb);
 	schedule_work(&dp83640->ts_work);
 
@@ -1373,7 +1341,6 @@  static void dp83640_txtstamp(struct phy_device *phydev,
 	case HWTSTAMP_TX_ON:
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		skb_queue_tail(&dp83640->tx_queue, skb);
-		schedule_work(&dp83640->ts_work);
 		break;
 
 	case HWTSTAMP_TX_OFF: