diff mbox

sky2: safer transmit ring cleaning (v4)

Message ID 20100113194148.139091a3@nehalam
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

stephen hemminger Jan. 14, 2010, 3:41 a.m. UTC
Subject: sky2: safer transmit cleanup

This code makes transmit path and transmit reset safer by:
  * adding memory barrier before checking available ring slots
  * reseting state of tx ring elements after free
  * seperate cleanup function from ring done function
  * removing mostly unused tx_next element
  * ignoring transmit completion if device is offline

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
This patch is against the current net-next-2.6 tree.
This version handles the case of dual port shared transmit status
and other cases where it is possible for tx_done to be called when
device is being changed.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jarek Poplawski Jan. 14, 2010, 10:14 a.m. UTC | #1
On Wed, Jan 13, 2010 at 07:41:48PM -0800, Stephen Hemminger wrote:
> Subject: sky2: safer transmit cleanup
> 
> This code makes transmit path and transmit reset safer by:
>   * adding memory barrier before checking available ring slots
>   * reseting state of tx ring elements after free
>   * seperate cleanup function from ring done function
>   * removing mostly unused tx_next element
>   * ignoring transmit completion if device is offline
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> ---
> This patch is against the current net-next-2.6 tree.
> This version handles the case of dual port shared transmit status
> and other cases where it is possible for tx_done to be called when
> device is being changed.
> 
> --- a/drivers/net/sky2.c	2010-01-13 08:32:51.360161158 -0800
> +++ b/drivers/net/sky2.c	2010-01-13 08:35:37.685531490 -0800
> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct 
>  /* Number of list elements available for next tx */
>  static inline int tx_avail(const struct sky2_port *sky2)
>  {
> +	/* Makes sure update of tx_prod from start_xmit and
> +	   tx_cons from tx_done are seen. */
> +	smp_mb();
>  	return sky2->tx_pending - tx_inuse(sky2);
>  }
>  
> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>  	return count;
>  }
>  
> -static void sky2_tx_unmap(struct pci_dev *pdev,
> -			  const struct tx_ring_info *re)
> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
>  {
>  	if (re->flags & TX_MAP_SINGLE)
>  		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>  		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>  			       pci_unmap_len(re, maplen),
>  			       PCI_DMA_TODEVICE);
> +	re->flags = 0;
>  }
>  
>  /*
> @@ -1804,7 +1807,8 @@ mapping_error:
>  }
>  
>  /*
> - * Free ring elements from starting at tx_cons until "done"
> + * Transmit complete processing
> + * Free ring elements from starting at tx_cons until done index
>   *
>   * NB:
>   *  1. The hardware will tell us about partial completion of multi-part
> @@ -1813,11 +1817,14 @@ mapping_error:
>   *     looks at the tail of the queue of FIFO (tx_cons), not
>   *     the head (tx_prod)
>   */
> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
> +static void sky2_tx_done(struct net_device *dev, u16 done)
>  {
> -	struct net_device *dev = sky2->netdev;
> +	struct sky2_port *sky2 = netdev_priv(dev);
>  	unsigned idx;
>  
> +	if (!(netif_running(dev) & netif_device_present(dev)))

This makes it safe, but it still resembles the "short term fix"
according do David's opinion.

This change seems to affect dev->stats too. Since they are not
updated in sky2_tx_clean(). Btw, I hope "&" is some optimization
because it's less readable than "&&".

Jarek P.

> +		return;
> +
>  	BUG_ON(done >= sky2->tx_ring_size);
>  
>  	for (idx = sky2->tx_cons; idx != done;
> @@ -1828,6 +1835,8 @@ static void sky2_tx_complete(struct sky2
>  		sky2_tx_unmap(sky2->hw->pdev, re);
>  
>  		if (skb) {
> +			re->skb = NULL;
> +
>  			if (unlikely(netif_msg_tx_done(sky2)))
>  				printk(KERN_DEBUG "%s: tx done %u\n",
>  				       dev->name, idx);
> @@ -1836,16 +1845,12 @@ static void sky2_tx_complete(struct sky2
>  			dev->stats.tx_bytes += skb->len;
>  
>  			dev_kfree_skb_any(skb);
> -
> -			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>  		}
>  	}
>  
>  	sky2->tx_cons = idx;
> -	smp_mb();
>  
> -	/* Wake unless it's detached, and called e.g. from sky2_down() */
> -	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4 && netif_device_present(dev))
> +	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4)
>  		netif_wake_queue(dev);
>  }
>  
> @@ -1871,6 +1876,21 @@ static void sky2_tx_reset(struct sky2_hw
>  	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>  }
>  
> +static void sky2_tx_clean(struct sky2_port *sky2)
> +{
> +	u16 idx;
> +
> +	for (idx = 0; idx < sky2->tx_ring_size; idx++) {
> +		struct tx_ring_info *re = sky2->tx_ring + idx;
> +
> +		sky2_tx_unmap(sky2->hw->pdev, re);
> +		if (re->skb) {
> +			dev_kfree_skb_any(re->skb);
> +			re->skb = NULL;
> +		}
> +	}
> +}
> +
>  /* Network shutdown */
>  static int sky2_down(struct net_device *dev)
>  {
> @@ -1934,8 +1954,7 @@ static int sky2_down(struct net_device *
>  	sky2_tx_reset(hw, port);
>  
>  	/* Free any pending frames stuck in HW queue */
> -	sky2_tx_complete(sky2, sky2->tx_prod);
> -
> +	sky2_tx_clean(sky2);
>  	sky2_rx_clean(sky2);
>  
>  	sky2_free_buffers(sky2);
> @@ -2412,15 +2431,6 @@ error:
>  	goto resubmit;
>  }
>  
> -/* Transmit complete */
> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
> -{
> -	struct sky2_port *sky2 = netdev_priv(dev);
> -
> -	if (netif_running(dev))
> -		sky2_tx_complete(sky2, last);
> -}
> -
>  static inline void sky2_skb_rx(const struct sky2_port *sky2,
>  			       u32 status, struct sk_buff *skb)
>  {
> @@ -3177,9 +3187,9 @@ static void sky2_reset(struct sky2_hw *h
>  static void sky2_detach(struct net_device *dev)
>  {
>  	if (netif_running(dev)) {
> -		netif_tx_lock(dev);
> +		netif_tx_lock_bh(dev);
>  		netif_device_detach(dev);	/* stop txq */
> -		netif_tx_unlock(dev);
> +		netif_tx_unlock_bh(dev);
>  		sky2_down(dev);
>  	}
>  }
> @@ -4202,7 +4212,7 @@ static int sky2_debug_show(struct seq_fi
>  
>  	/* Dump contents of tx ring */
>  	sop = 1;
> -	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
> +	for (idx = sky2->tx_cons; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
>  	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>  		const struct sky2_tx_le *le = sky2->tx_le + idx;
>  		u32 a = le32_to_cpu(le->addr);
> --- a/drivers/net/sky2.h	2010-01-13 08:32:27.919849429 -0800
> +++ b/drivers/net/sky2.h	2010-01-13 08:33:03.410162026 -0800
> @@ -2187,7 +2187,6 @@ struct sky2_port {
>  	u16		     tx_ring_size;
>  	u16		     tx_cons;		/* next le to check */
>  	u16		     tx_prod;		/* next le to use */
> -	u16		     tx_next;		/* debug only */
>  
>  	u16		     tx_pending;
>  	u16		     tx_last_mss;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Jan. 14, 2010, 11:16 a.m. UTC | #2
On Thu, Jan 14, 2010 at 10:14:45AM +0000, Jarek Poplawski wrote:
> On Wed, Jan 13, 2010 at 07:41:48PM -0800, Stephen Hemminger wrote:
> > Subject: sky2: safer transmit cleanup
...
> > +	if (!(netif_running(dev) & netif_device_present(dev)))
> 
> This makes it safe, but it still resembles the "short term fix"
> according do David's opinion.

Hmm... Actually, it's not safe, but still safer again. It looks like
David was right (this time ;-). Since netif_device_present() isn't
protected by a lock here, this is still racy against napi, since the
flag can be set between the test and the wakeup. Btw, there is even no
barrier, and in your patch this dangerous distance is made much wider.

So, now I really ;-) agree with David: this needs a proper fix.
 
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Jan. 14, 2010, 11:20 a.m. UTC | #3
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 14 Jan 2010 11:16:36 +0000

> So, now I really ;-) agree with David: this needs a proper fix.

Now Jarek, do you now see my dirty little secret?

When people disagree with me, I just silently sit around waiting for
them to eventually change their mind.

Isn't it brilliant? -)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Jan. 14, 2010, 11:26 a.m. UTC | #4
On Thu, Jan 14, 2010 at 03:20:09AM -0800, David Miller wrote:
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Thu, 14 Jan 2010 11:16:36 +0000
> 
> > So, now I really ;-) agree with David: this needs a proper fix.
> 
> Now Jarek, do you now see my dirty little secret?
> 
> When people disagree with me, I just silently sit around waiting for
> them to eventually change their mind.
> 
> Isn't it brilliant? -)

If it were that easy... (it never works for me :-()

Probably, there is something more... ;-)

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer Jan. 14, 2010, 3:46 p.m. UTC | #5
On 1/13/2010 10:41 PM, Stephen Hemminger wrote:
> Subject: sky2: safer transmit cleanup
>
> This code makes transmit path and transmit reset safer by:
>    * adding memory barrier before checking available ring slots
>    * reseting state of tx ring elements after free
>    * seperate cleanup function from ring done function
>    * removing mostly unused tx_next element
>    * ignoring transmit completion if device is offline
>
> Signed-off-by: Stephen Hemminger<shemminger@vyatta.com>
>
> ---
> This patch is against the current net-next-2.6 tree.
> This version handles the case of dual port shared transmit status
> and other cases where it is possible for tx_done to be called when
> device is being changed.
>
> --- a/drivers/net/sky2.c	2010-01-13 08:32:51.360161158 -0800
> +++ b/drivers/net/sky2.c	2010-01-13 08:35:37.685531490 -0800
> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>   /* Number of list elements available for next tx */
>   static inline int tx_avail(const struct sky2_port *sky2)
>   {
> +	/* Makes sure update of tx_prod from start_xmit and
> +	   tx_cons from tx_done are seen. */
> +	smp_mb();
>   	return sky2->tx_pending - tx_inuse(sky2);
>   }
>
> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>   	return count;
>   }
>
> -static void sky2_tx_unmap(struct pci_dev *pdev,
> -			  const struct tx_ring_info *re)
> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
>   {
>   	if (re->flags&  TX_MAP_SINGLE)
>   		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>   		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>   			       pci_unmap_len(re, maplen),
>   			       PCI_DMA_TODEVICE);
> +	re->flags = 0;
>   }
>
>   /*
> @@ -1804,7 +1807,8 @@ mapping_error:
>   }
>
>   /*
> - * Free ring elements from starting at tx_cons until "done"
> + * Transmit complete processing
> + * Free ring elements from starting at tx_cons until done index
>    *
>    * NB:
>    *  1. The hardware will tell us about partial completion of multi-part
> @@ -1813,11 +1817,14 @@ mapping_error:
>    *     looks at the tail of the queue of FIFO (tx_cons), not
>    *     the head (tx_prod)
>    */
> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
> +static void sky2_tx_done(struct net_device *dev, u16 done)
>   {
> -	struct net_device *dev = sky2->netdev;
> +	struct sky2_port *sky2 = netdev_priv(dev);
>   	unsigned idx;
>
> +	if (!(netif_running(dev)&  netif_device_present(dev)))
> +		return;
> +
>   	BUG_ON(done>= sky2->tx_ring_size);
>
>   	for (idx = sky2->tx_cons; idx != done;
> @@ -1828,6 +1835,8 @@ static void sky2_tx_complete(struct sky2
>   		sky2_tx_unmap(sky2->hw->pdev, re);
>
>   		if (skb) {
> +			re->skb = NULL;
> +
>   			if (unlikely(netif_msg_tx_done(sky2)))
>   				printk(KERN_DEBUG "%s: tx done %u\n",
>   				       dev->name, idx);
> @@ -1836,16 +1845,12 @@ static void sky2_tx_complete(struct sky2
>   			dev->stats.tx_bytes += skb->len;
>
>   			dev_kfree_skb_any(skb);
> -
> -			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>   		}
>   	}
>
>   	sky2->tx_cons = idx;
> -	smp_mb();
>
> -	/* Wake unless it's detached, and called e.g. from sky2_down() */
> -	if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4&&  netif_device_present(dev))
> +	if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4)
>   		netif_wake_queue(dev);
>   }
>
> @@ -1871,6 +1876,21 @@ static void sky2_tx_reset(struct sky2_hw
>   	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>   }
>
> +static void sky2_tx_clean(struct sky2_port *sky2)
> +{
> +	u16 idx;
> +
> +	for (idx = 0; idx<  sky2->tx_ring_size; idx++) {
> +		struct tx_ring_info *re = sky2->tx_ring + idx;
> +
> +		sky2_tx_unmap(sky2->hw->pdev, re);
> +		if (re->skb) {
> +			dev_kfree_skb_any(re->skb);
> +			re->skb = NULL;
> +		}
> +	}
> +}
> +
>   /* Network shutdown */
>   static int sky2_down(struct net_device *dev)
>   {
> @@ -1934,8 +1954,7 @@ static int sky2_down(struct net_device *
>   	sky2_tx_reset(hw, port);
>
>   	/* Free any pending frames stuck in HW queue */
> -	sky2_tx_complete(sky2, sky2->tx_prod);
> -
> +	sky2_tx_clean(sky2);
>   	sky2_rx_clean(sky2);
>
>   	sky2_free_buffers(sky2);
> @@ -2412,15 +2431,6 @@ error:
>   	goto resubmit;
>   }
>
> -/* Transmit complete */
> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
> -{
> -	struct sky2_port *sky2 = netdev_priv(dev);
> -
> -	if (netif_running(dev))
> -		sky2_tx_complete(sky2, last);
> -}
> -
>   static inline void sky2_skb_rx(const struct sky2_port *sky2,
>   			       u32 status, struct sk_buff *skb)
>   {
> @@ -3177,9 +3187,9 @@ static void sky2_reset(struct sky2_hw *h
>   static void sky2_detach(struct net_device *dev)
>   {
>   	if (netif_running(dev)) {
> -		netif_tx_lock(dev);
> +		netif_tx_lock_bh(dev);
>   		netif_device_detach(dev);	/* stop txq */
> -		netif_tx_unlock(dev);
> +		netif_tx_unlock_bh(dev);
>   		sky2_down(dev);
>   	}
>   }
> @@ -4202,7 +4212,7 @@ static int sky2_debug_show(struct seq_fi
>
>   	/* Dump contents of tx ring */
>   	sop = 1;
> -	for (idx = sky2->tx_next; idx != sky2->tx_prod&&  idx<  sky2->tx_ring_size;
> +	for (idx = sky2->tx_cons; idx != sky2->tx_prod&&  idx<  sky2->tx_ring_size;
>   	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>   		const struct sky2_tx_le *le = sky2->tx_le + idx;
>   		u32 a = le32_to_cpu(le->addr);
> --- a/drivers/net/sky2.h	2010-01-13 08:32:27.919849429 -0800
> +++ b/drivers/net/sky2.h	2010-01-13 08:33:03.410162026 -0800
> @@ -2187,7 +2187,6 @@ struct sky2_port {
>   	u16		     tx_ring_size;
>   	u16		     tx_cons;		/* next le to check */
>   	u16		     tx_prod;		/* next le to use */
> -	u16		     tx_next;		/* debug only */
>
>   	u16		     tx_pending;
>   	u16		     tx_last_mss;
>    
Tested this a bit (w/o Mike's patch from this morning). Overall, works 
(vs. v3), but still something flaky going on WRT dhcp traffic:

Under load (and only under load) I'm seeing mutliple failed dhcp client 
requests - 4-6 discover/offer sequences before I see request/ack. I 
don't see errors, dropped packets, etc., at the time this is happening. 
I'd blow it off to load, but I don't see this with the earlier 
(pskb_may_pull) patch.

Going to rebuild with the inclusion of Mike's patch and see what happens.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Jan. 14, 2010, 5:52 p.m. UTC | #6
On Thu, 14 Jan 2010 10:14:45 +0000
Jarek Poplawski <jarkao2@gmail.com> wrote:

> This makes it safe, but it still resembles the "short term fix"
> according do David's opinion.
> 
> This change seems to affect dev->stats too. Since they are not
> updated in sky2_tx_clean(). Btw, I hope "&" is some optimization
> because it's less readable than "&&".

Stats don't matter for packets flushed during device reset.

The & is because in the most common case device is up,
and we don't want the additional conditional branch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer Jan. 14, 2010, 11:51 p.m. UTC | #7
On 1/14/2010 12:52 PM, Stephen Hemminger wrote:
> On Thu, 14 Jan 2010 10:14:45 +0000
> Jarek Poplawski<jarkao2@gmail.com>  wrote:
>
>    
>> This makes it safe, but it still resembles the "short term fix"
>> according do David's opinion.
>>
>> This change seems to affect dev->stats too. Since they are not
>> updated in sky2_tx_clean(). Btw, I hope "&" is some optimization
>> because it's less readable than "&&".
>>      
> Stats don't matter for packets flushed during device reset.
>
> The&  is because in the most common case device is up,
> and we don't want the additional conditional branch.
>    
I've been looking at what might explain the dhcp stuff - as well as the 
dropped packets only when there's an extra hop. I came across one path 
that seems suspect - although I'm really not familiar with the network 
stack code... that said, I'm wondering about neigh_compat_output (and 
eth_rebuild_header and arp_find). If I'm following things correctly (or 
perhaps mostly correctly), the only time anything goes this route (pun 
intentional) is when the packet was routed to this box. I'm guessing 
that bridging makes this more likely. So my dhcp stuff would all be 
going through here, as would the smb stuff that seemed flaky. The race 
I'm seeing (maybe) is that when the arp table is being rebuilt, there's 
a possibility that arp_find frees the skb. There's some other locking 
and stuff going on that seems maybe races with sky2.c in places on both 
the rx and tx path. I *think* it's right from looking at it, but test 
results suggest otherwise. Aside from the potential race, I think 
there's also a corner case where neigh_compat_output can return either 
with or without freeing the skb depending on the return from 
dev_hard_header... this may also be part of the race.

Maybe I've missed something... but as far as I can see, this is just 
about the only difference in code path taken between stuff that is 
working and stuff that is occasionally not.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer Jan. 16, 2010, 6:35 p.m. UTC | #8
On 1/14/2010 6:51 PM, Michael Breuer wrote:
> On 1/14/2010 12:52 PM, Stephen Hemminger wrote:
>> On Thu, 14 Jan 2010 10:14:45 +0000
>> Jarek Poplawski<jarkao2@gmail.com>  wrote:
>>
>>> This makes it safe, but it still resembles the "short term fix"
>>> according do David's opinion.
>>>
>>> This change seems to affect dev->stats too. Since they are not
>>> updated in sky2_tx_clean(). Btw, I hope "&" is some optimization
>>> because it's less readable than "&&".
>> Stats don't matter for packets flushed during device reset.
>>
>> The&  is because in the most common case device is up,
>> and we don't want the additional conditional branch.
> I've been looking at what might explain the dhcp stuff - as well as 
> the dropped packets only when there's an extra hop. I came across one 
> path that seems suspect - although I'm really not familiar with the 
> network stack code... that said, I'm wondering about 
> neigh_compat_output (and eth_rebuild_header and arp_find). If I'm 
> following things correctly (or perhaps mostly correctly), the only 
> time anything goes this route (pun intentional) is when the packet was 
> routed to this box. I'm guessing that bridging makes this more likely. 
> So my dhcp stuff would all be going through here, as would the smb 
> stuff that seemed flaky. The race I'm seeing (maybe) is that when the 
> arp table is being rebuilt, there's a possibility that arp_find frees 
> the skb. There's some other locking and stuff going on that seems 
> maybe races with sky2.c in places on both the rx and tx path. I 
> *think* it's right from looking at it, but test results suggest 
> otherwise. Aside from the potential race, I think there's also a 
> corner case where neigh_compat_output can return either with or 
> without freeing the skb depending on the return from 
> dev_hard_header... this may also be part of the race.
>
> Maybe I've missed something... but as far as I can see, this is just 
> about the only difference in code path taken between stuff that is 
> working and stuff that is occasionally not.
Ok - brief update. I've confirmed that under load, outgoing DHCPOFFER 
packets are being silently dropped. I don't know yet where.

Test scenario, what I do know, etc.:

Scenario:

Two systems; one Gb switch; one wifi router; one Blackberry client.

System A: Linux host; Asus P6T Deluxe V2/Sky2. eth1-> internet eth0-> 
internal (10.0.0.1/24).
Switch: HP Procurve unmanaged - one port connected to System A; another 
to the wifi router; another to System B.
System B: Win7; Asus M2N Deluxe SLI/ Nforce 5 (10.0.0.11)
Router: Wrt54g-tm (dd-wrt) Connected to switch & various wifi clients 
including a Blackberry. WEP enabled. (10.0.0.60)
Blackberry Curve 8320 (wifi enabled). (10.0.0.56 via dhcp lease)

Test that causes packet loss:

1. Turn off BB wifi.
2. Start copy of large files (4GB) from System B to an CIFS share on 
System A.
3. Start nethogs on system A.
4. Start tcpdump on the wifi router (interface br0 - wired 10.0.0.60 
connection)
5. Start wireshark on System A
6. tail system A /var/log/messages - watching for DHCP activity
7. When smb traffic load (incoming) exceeds 40,000KBPS (nethogs) - 
enable wifi on the blackberry.
8. Stop test after multiple DHCPDISCOVER/OFFER observed without REQUEST/ACK.

Results:

1. It seems that the problem occurs intermittently below 40,000KBPS, and 
consistently over that number as reported by nethogs. Lots of 
fluctuation, so figure that the 40k is approximate.
2. wireshark (system A) shows DHCPOFFER traffic outgoing.
3. tcpdump (wifi - wired incoming interface) does NOT show DHCPOFFER 
traffic when this problem occurs.
4. Both traces show arp activity during the DISCOVER/OFFER sequence.
5. There is no evidence of tx errors or packet drops, in any statistics 
I can find.

Thoughts:

I still think there's a race happening between the arp neighbor update 
and sky2. Might be higher up, but as I'm seeing the outgoing packets 
when sniffing eth0, can't be too much higher up. This problem seems to 
be exacerbated by the more recent patches, however I believe that this 
is a result of the higher throughput achievable with these patches. With 
the older set, I saw this problem less frequently, but found it much 
harder to get over the 40K RX number.

I am also seeing (as previously reported - but haven't sniffed yet) SMB 
ACK dropped packets, but only when traversing the wifi router. Not sure 
if this is related, but hey, you never know.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/drivers/net/sky2.c	2010-01-13 08:32:51.360161158 -0800
+++ b/drivers/net/sky2.c	2010-01-13 08:35:37.685531490 -0800
@@ -1596,6 +1596,9 @@  static inline int tx_inuse(const struct 
 /* Number of list elements available for next tx */
 static inline int tx_avail(const struct sky2_port *sky2)
 {
+	/* Makes sure update of tx_prod from start_xmit and
+	   tx_cons from tx_done are seen. */
+	smp_mb();
 	return sky2->tx_pending - tx_inuse(sky2);
 }
 
@@ -1618,8 +1621,7 @@  static unsigned tx_le_req(const struct s
 	return count;
 }
 
-static void sky2_tx_unmap(struct pci_dev *pdev,
-			  const struct tx_ring_info *re)
+static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
 {
 	if (re->flags & TX_MAP_SINGLE)
 		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
@@ -1629,6 +1631,7 @@  static void sky2_tx_unmap(struct pci_dev
 		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
 			       pci_unmap_len(re, maplen),
 			       PCI_DMA_TODEVICE);
+	re->flags = 0;
 }
 
 /*
@@ -1804,7 +1807,8 @@  mapping_error:
 }
 
 /*
- * Free ring elements from starting at tx_cons until "done"
+ * Transmit complete processing
+ * Free ring elements from starting at tx_cons until done index
  *
  * NB:
  *  1. The hardware will tell us about partial completion of multi-part
@@ -1813,11 +1817,14 @@  mapping_error:
  *     looks at the tail of the queue of FIFO (tx_cons), not
  *     the head (tx_prod)
  */
-static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
+static void sky2_tx_done(struct net_device *dev, u16 done)
 {
-	struct net_device *dev = sky2->netdev;
+	struct sky2_port *sky2 = netdev_priv(dev);
 	unsigned idx;
 
+	if (!(netif_running(dev) & netif_device_present(dev)))
+		return;
+
 	BUG_ON(done >= sky2->tx_ring_size);
 
 	for (idx = sky2->tx_cons; idx != done;
@@ -1828,6 +1835,8 @@  static void sky2_tx_complete(struct sky2
 		sky2_tx_unmap(sky2->hw->pdev, re);
 
 		if (skb) {
+			re->skb = NULL;
+
 			if (unlikely(netif_msg_tx_done(sky2)))
 				printk(KERN_DEBUG "%s: tx done %u\n",
 				       dev->name, idx);
@@ -1836,16 +1845,12 @@  static void sky2_tx_complete(struct sky2
 			dev->stats.tx_bytes += skb->len;
 
 			dev_kfree_skb_any(skb);
-
-			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
 		}
 	}
 
 	sky2->tx_cons = idx;
-	smp_mb();
 
-	/* Wake unless it's detached, and called e.g. from sky2_down() */
-	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4 && netif_device_present(dev))
+	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4)
 		netif_wake_queue(dev);
 }
 
@@ -1871,6 +1876,21 @@  static void sky2_tx_reset(struct sky2_hw
 	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
 }
 
+static void sky2_tx_clean(struct sky2_port *sky2)
+{
+	u16 idx;
+
+	for (idx = 0; idx < sky2->tx_ring_size; idx++) {
+		struct tx_ring_info *re = sky2->tx_ring + idx;
+
+		sky2_tx_unmap(sky2->hw->pdev, re);
+		if (re->skb) {
+			dev_kfree_skb_any(re->skb);
+			re->skb = NULL;
+		}
+	}
+}
+
 /* Network shutdown */
 static int sky2_down(struct net_device *dev)
 {
@@ -1934,8 +1954,7 @@  static int sky2_down(struct net_device *
 	sky2_tx_reset(hw, port);
 
 	/* Free any pending frames stuck in HW queue */
-	sky2_tx_complete(sky2, sky2->tx_prod);
-
+	sky2_tx_clean(sky2);
 	sky2_rx_clean(sky2);
 
 	sky2_free_buffers(sky2);
@@ -2412,15 +2431,6 @@  error:
 	goto resubmit;
 }
 
-/* Transmit complete */
-static inline void sky2_tx_done(struct net_device *dev, u16 last)
-{
-	struct sky2_port *sky2 = netdev_priv(dev);
-
-	if (netif_running(dev))
-		sky2_tx_complete(sky2, last);
-}
-
 static inline void sky2_skb_rx(const struct sky2_port *sky2,
 			       u32 status, struct sk_buff *skb)
 {
@@ -3177,9 +3187,9 @@  static void sky2_reset(struct sky2_hw *h
 static void sky2_detach(struct net_device *dev)
 {
 	if (netif_running(dev)) {
-		netif_tx_lock(dev);
+		netif_tx_lock_bh(dev);
 		netif_device_detach(dev);	/* stop txq */
-		netif_tx_unlock(dev);
+		netif_tx_unlock_bh(dev);
 		sky2_down(dev);
 	}
 }
@@ -4202,7 +4212,7 @@  static int sky2_debug_show(struct seq_fi
 
 	/* Dump contents of tx ring */
 	sop = 1;
-	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
+	for (idx = sky2->tx_cons; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
 	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
 		const struct sky2_tx_le *le = sky2->tx_le + idx;
 		u32 a = le32_to_cpu(le->addr);
--- a/drivers/net/sky2.h	2010-01-13 08:32:27.919849429 -0800
+++ b/drivers/net/sky2.h	2010-01-13 08:33:03.410162026 -0800
@@ -2187,7 +2187,6 @@  struct sky2_port {
 	u16		     tx_ring_size;
 	u16		     tx_cons;		/* next le to check */
 	u16		     tx_prod;		/* next le to use */
-	u16		     tx_next;		/* debug only */
 
 	u16		     tx_pending;
 	u16		     tx_last_mss;