Patchwork sky2: safer transmit ring cleaning

login
register
mail settings
Submitter stephen hemminger
Date Jan. 12, 2010, 4:15 p.m.
Message ID <20100112081513.0175d579@nehalam>
Download mbox | patch
Permalink /patch/42734/
State RFC
Delegated to: David Miller
Headers show

Comments

stephen hemminger - Jan. 12, 2010, 4:15 p.m.
This code makes transmit path and transmit reset safer by:
  * adding memory barrier before checking available ring slots
  * reseting state of tx ring elements after free
  * seperate cleanup function from ring done function
  * removing mostly unused tx_next element

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
Please apply this instead of the various bits and pieces flying
around labeled as sky2 panic under load


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer - Jan. 12, 2010, 4:32 p.m.
On 01/12/2010 11:15 AM, Stephen Hemminger wrote:
> This code makes transmit path and transmit reset safer by:
>    * adding memory barrier before checking available ring slots
>    * reseting state of tx ring elements after free
>    * seperate cleanup function from ring done function
>    * removing mostly unused tx_next element
>
>
>    
Stephen,

Just want to confirm that this supersedes both of the patches I'm 
currently running with:

1. skb_may_pull
2. Nash patch
--
Mike



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger - Jan. 12, 2010, 5:02 p.m.
On Tue, 12 Jan 2010 11:32:45 -0500
Michael Breuer <mbreuer@majjas.com> wrote:

> On 01/12/2010 11:15 AM, Stephen Hemminger wrote:
> > This code makes transmit path and transmit reset safer by:
> >    * adding memory barrier before checking available ring slots
> >    * reseting state of tx ring elements after free
> >    * seperate cleanup function from ring done function
> >    * removing mostly unused tx_next element
> >
> >
> >    
> Stephen,
> 
> Just want to confirm that this supersedes both of the patches I'm 
> currently running with:
> 
> 1. skb_may_pull
> 2. Nash patch

You want:
  1. AF_PACKET patch that makes sure skb is not modified after send
  2. This patch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski - Jan. 12, 2010, 6:04 p.m.
On Tue, Jan 12, 2010 at 08:15:13AM -0800, Stephen Hemminger wrote:
> This code makes transmit path and transmit reset safer by:
>   * adding memory barrier before checking available ring slots
>   * reseting state of tx ring elements after free
>   * seperate cleanup function from ring done function
>   * removing mostly unused tx_next element

Does this patch prevent re-enabling tx after netif_device_detach(),
e.g. when sky2_detach() and sky2_tx_done() run at the same time on
different cpus?

Jarek P.

> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> ---
> Please apply this instead of the various bits and pieces flying
> around labeled as sky2 panic under load
> 
> 
> --- a/drivers/net/sky2.c	2010-01-11 10:49:50.907113126 -0800
> +++ b/drivers/net/sky2.c	2010-01-11 17:36:22.027429875 -0800
> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct 
>  /* Number of list elements available for next tx */
>  static inline int tx_avail(const struct sky2_port *sky2)
>  {
> +	/* Makes sure update of tx_prod from start_xmit and
> +	   tx_cons from tx_done are seen. */
> +	smp_mb();
>  	return sky2->tx_pending - tx_inuse(sky2);
>  }
>  
> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>  	return count;
>  }
>  
> -static void sky2_tx_unmap(struct pci_dev *pdev,
> -			  const struct tx_ring_info *re)
> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
>  {
>  	if (re->flags & TX_MAP_SINGLE)
>  		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>  		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>  			       pci_unmap_len(re, maplen),
>  			       PCI_DMA_TODEVICE);
> +	re->flags = 0;
>  }
>  
>  /*
> @@ -1804,7 +1807,8 @@ mapping_error:
>  }
>  
>  /*
> - * Free ring elements from starting at tx_cons until "done"
> + * Transmit complete processing
> + * Free ring elements from starting at tx_cons until done index
>   *
>   * NB:
>   *  1. The hardware will tell us about partial completion of multi-part
> @@ -1813,9 +1817,9 @@ mapping_error:
>   *     looks at the tail of the queue of FIFO (tx_cons), not
>   *     the head (tx_prod)
>   */
> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
> +static void sky2_tx_done(struct net_device *dev, u16 done)
>  {
> -	struct net_device *dev = sky2->netdev;
> +	struct sky2_port *sky2 = netdev_priv(dev);
>  	unsigned idx;
>  
>  	BUG_ON(done >= sky2->tx_ring_size);
> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>  		sky2_tx_unmap(sky2->hw->pdev, re);
>  
>  		if (skb) {
> +			re->skb = NULL;
> +
>  			if (unlikely(netif_msg_tx_done(sky2)))
>  				printk(KERN_DEBUG "%s: tx done %u\n",
>  				       dev->name, idx);
> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>  			dev->stats.tx_bytes += skb->len;
>  
>  			dev_kfree_skb_any(skb);
> -
> -			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>  		}
>  	}
>  
>  	sky2->tx_cons = idx;
> -	smp_mb();
>  
>  	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4)
>  		netif_wake_queue(dev);
> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>  	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>  }
>  
> +static void sky2_tx_clean(struct sky2_port *sky2)
> +{
> +	u16 idx;
> +
> +	for (idx = 0; idx < sky2->tx_ring_size; idx++) {
> +		struct tx_ring_info *re = sky2->tx_ring + idx;
> +
> +		sky2_tx_unmap(sky2->hw->pdev, re);
> +		if (re->skb) {
> +			dev_kfree_skb_any(re->skb);
> +			re->skb = NULL;
> +		}
> +	}
> +}
> +
>  /* Network shutdown */
>  static int sky2_down(struct net_device *dev)
>  {
> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>  	sky2_tx_reset(hw, port);
>  
>  	/* Free any pending frames stuck in HW queue */
> -	sky2_tx_complete(sky2, sky2->tx_prod);
> -
> +	sky2_tx_clean(sky2);
>  	sky2_rx_clean(sky2);
>  
>  	sky2_free_buffers(sky2);
> @@ -2411,15 +2428,6 @@ error:
>  	goto resubmit;
>  }
>  
> -/* Transmit complete */
> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
> -{
> -	struct sky2_port *sky2 = netdev_priv(dev);
> -
> -	if (netif_running(dev))
> -		sky2_tx_complete(sky2, last);
> -}
> -
>  static inline void sky2_skb_rx(const struct sky2_port *sky2,
>  			       u32 status, struct sk_buff *skb)
>  {
> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>  
>  	/* Dump contents of tx ring */
>  	sop = 1;
> -	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
> +	for (idx = sky2->tx_cons; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
>  	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>  		const struct sky2_tx_le *le = sky2->tx_le + idx;
>  		u32 a = le32_to_cpu(le->addr);
> --- a/drivers/net/sky2.h	2010-01-11 17:29:22.817088617 -0800
> +++ b/drivers/net/sky2.h	2010-01-11 17:29:28.197120484 -0800
> @@ -2187,7 +2187,6 @@ struct sky2_port {
>  	u16		     tx_ring_size;
>  	u16		     tx_cons;		/* next le to check */
>  	u16		     tx_prod;		/* next le to use */
> -	u16		     tx_next;		/* debug only */
>  
>  	u16		     tx_pending;
>  	u16		     tx_last_mss;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger - Jan. 12, 2010, 6:13 p.m.
On Tue, 12 Jan 2010 19:04:30 +0100
Jarek Poplawski <jarkao2@gmail.com> wrote:

> On Tue, Jan 12, 2010 at 08:15:13AM -0800, Stephen Hemminger wrote:
> > This code makes transmit path and transmit reset safer by:
> >   * adding memory barrier before checking available ring slots
> >   * reseting state of tx ring elements after free
> >   * seperate cleanup function from ring done function
> >   * removing mostly unused tx_next element  
> 
> Does this patch prevent re-enabling tx after netif_device_detach(),
> e.g. when sky2_detach() and sky2_tx_done() run at the same time on
> different cpus?
> 

Yes.
The napi is disabled during the detach so transmit completion can
not be done during that period.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski - Jan. 12, 2010, 6:24 p.m.
On Tue, Jan 12, 2010 at 10:13:06AM -0800, Stephen Hemminger wrote:
> On Tue, 12 Jan 2010 19:04:30 +0100
> Jarek Poplawski <jarkao2@gmail.com> wrote:
> 
> > On Tue, Jan 12, 2010 at 08:15:13AM -0800, Stephen Hemminger wrote:
> > > This code makes transmit path and transmit reset safer by:
> > >   * adding memory barrier before checking available ring slots
> > >   * reseting state of tx ring elements after free
> > >   * seperate cleanup function from ring done function
> > >   * removing mostly unused tx_next element  
> > 
> > Does this patch prevent re-enabling tx after netif_device_detach(),
> > e.g. when sky2_detach() and sky2_tx_done() run at the same time on
> > different cpus?
> > 
> 
> Yes.
> The napi is disabled during the detach so transmit completion can
> not be done during that period.

Could you point me where exactly the napi is disabled, probably I
missed this?

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer - Jan. 12, 2010, 6:35 p.m.
On 1/12/2010 11:15 AM, Stephen Hemminger wrote:
> This code makes transmit path and transmit reset safer by:
>    * adding memory barrier before checking available ring slots
>    * reseting state of tx ring elements after free
>    * seperate cleanup function from ring done function
>    * removing mostly unused tx_next element
>
> Signed-off-by: Stephen Hemminger<shemminger@vyatta.com>
>
> ---
> Please apply this instead of the various bits and pieces flying
> around labeled as sky2 panic under load
>
>
> --- a/drivers/net/sky2.c	2010-01-11 10:49:50.907113126 -0800
> +++ b/drivers/net/sky2.c	2010-01-11 17:36:22.027429875 -0800
> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>   /* Number of list elements available for next tx */
>   static inline int tx_avail(const struct sky2_port *sky2)
>   {
> +	/* Makes sure update of tx_prod from start_xmit and
> +	   tx_cons from tx_done are seen. */
> +	smp_mb();
>   	return sky2->tx_pending - tx_inuse(sky2);
>   }
>
> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>   	return count;
>   }
>
> -static void sky2_tx_unmap(struct pci_dev *pdev,
> -			  const struct tx_ring_info *re)
> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
>   {
>   	if (re->flags&  TX_MAP_SINGLE)
>   		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>   		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>   			       pci_unmap_len(re, maplen),
>   			       PCI_DMA_TODEVICE);
> +	re->flags = 0;
>   }
>
>   /*
> @@ -1804,7 +1807,8 @@ mapping_error:
>   }
>
>   /*
> - * Free ring elements from starting at tx_cons until "done"
> + * Transmit complete processing
> + * Free ring elements from starting at tx_cons until done index
>    *
>    * NB:
>    *  1. The hardware will tell us about partial completion of multi-part
> @@ -1813,9 +1817,9 @@ mapping_error:
>    *     looks at the tail of the queue of FIFO (tx_cons), not
>    *     the head (tx_prod)
>    */
> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
> +static void sky2_tx_done(struct net_device *dev, u16 done)
>   {
> -	struct net_device *dev = sky2->netdev;
> +	struct sky2_port *sky2 = netdev_priv(dev);
>   	unsigned idx;
>
>   	BUG_ON(done>= sky2->tx_ring_size);
> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>   		sky2_tx_unmap(sky2->hw->pdev, re);
>
>   		if (skb) {
> +			re->skb = NULL;
> +
>   			if (unlikely(netif_msg_tx_done(sky2)))
>   				printk(KERN_DEBUG "%s: tx done %u\n",
>   				       dev->name, idx);
> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>   			dev->stats.tx_bytes += skb->len;
>
>   			dev_kfree_skb_any(skb);
> -
> -			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>   		}
>   	}
>
>   	sky2->tx_cons = idx;
> -	smp_mb();
>
>   	if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4)
>   		netif_wake_queue(dev);
> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>   	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>   }
>
> +static void sky2_tx_clean(struct sky2_port *sky2)
> +{
> +	u16 idx;
> +
> +	for (idx = 0; idx<  sky2->tx_ring_size; idx++) {
> +		struct tx_ring_info *re = sky2->tx_ring + idx;
> +
> +		sky2_tx_unmap(sky2->hw->pdev, re);
> +		if (re->skb) {
> +			dev_kfree_skb_any(re->skb);
> +			re->skb = NULL;
> +		}
> +	}
> +}
> +
>   /* Network shutdown */
>   static int sky2_down(struct net_device *dev)
>   {
> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>   	sky2_tx_reset(hw, port);
>
>   	/* Free any pending frames stuck in HW queue */
> -	sky2_tx_complete(sky2, sky2->tx_prod);
> -
> +	sky2_tx_clean(sky2);
>   	sky2_rx_clean(sky2);
>
>   	sky2_free_buffers(sky2);
> @@ -2411,15 +2428,6 @@ error:
>   	goto resubmit;
>   }
>
> -/* Transmit complete */
> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
> -{
> -	struct sky2_port *sky2 = netdev_priv(dev);
> -
> -	if (netif_running(dev))
> -		sky2_tx_complete(sky2, last);
> -}
> -
>   static inline void sky2_skb_rx(const struct sky2_port *sky2,
>   			       u32 status, struct sk_buff *skb)
>   {
> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>
>   	/* Dump contents of tx ring */
>   	sop = 1;
> -	for (idx = sky2->tx_next; idx != sky2->tx_prod&&  idx<  sky2->tx_ring_size;
> +	for (idx = sky2->tx_cons; idx != sky2->tx_prod&&  idx<  sky2->tx_ring_size;
>   	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>   		const struct sky2_tx_le *le = sky2->tx_le + idx;
>   		u32 a = le32_to_cpu(le->addr);
> --- a/drivers/net/sky2.h	2010-01-11 17:29:22.817088617 -0800
> +++ b/drivers/net/sky2.h	2010-01-11 17:29:28.197120484 -0800
> @@ -2187,7 +2187,6 @@ struct sky2_port {
>   	u16		     tx_ring_size;
>   	u16		     tx_cons;		/* next le to check */
>   	u16		     tx_prod;		/* next le to use */
> -	u16		     tx_next;		/* debug only */
>
>   	u16		     tx_pending;
>   	u16		     tx_last_mss;
>    
Test observations:

1. DHCP request/response sequence having some issues... can't confirm 
that it's a result of this patch, but I don't see this with the prior 
patch. Prior to this patch, if I connect a new device (Blackberry in 
this case) I see DHCPDISCOVER;DHCPOFFER;DHCPREQUEST;DHCPACK (just the 
four messages). With this patch I'm seeing repeated attempts - i.e., 
DISCOVER/OFFER 6 times before the REQUEST/ACK. This is repeatable and  
happening whether or not under load. As my original problem started with 
DHCP packets, this seems interesting. I don't see any errors logged 
(dmesg, messages, etc.).
2. Probably not related to this patch, but perhaps to the driver - I've 
finally tracked the source of my dropped RX packets. It's happening when 
a sending data to a CIFS client on a MacOS system. The RX drop rate 
seems proportional to the TX rate for SMB to that client - at a tx rate 
of about 200Kb/s I see about 20 dropped RX packets/sec - at 400 I see 
about 40.  I'm thinking therefore it's ACKs being dropped on RX. Why? no 
idea (yet). Nothing reported by ethtool or netstat -s remotely 
correlates to the number of dropped RX packets.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer - Jan. 12, 2010, 6:42 p.m.
On 1/12/2010 1:35 PM, Michael Breuer wrote:
> On 1/12/2010 11:15 AM, Stephen Hemminger wrote:
>> This code makes transmit path and transmit reset safer by:
>>    * adding memory barrier before checking available ring slots
>>    * reseting state of tx ring elements after free
>>    * seperate cleanup function from ring done function
>>    * removing mostly unused tx_next element
>>
>> Signed-off-by: Stephen Hemminger<shemminger@vyatta.com>
>>
>> ---
>> Please apply this instead of the various bits and pieces flying
>> around labeled as sky2 panic under load
>>
>>
>> --- a/drivers/net/sky2.c    2010-01-11 10:49:50.907113126 -0800
>> +++ b/drivers/net/sky2.c    2010-01-11 17:36:22.027429875 -0800
>> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>>   /* Number of list elements available for next tx */
>>   static inline int tx_avail(const struct sky2_port *sky2)
>>   {
>> +    /* Makes sure update of tx_prod from start_xmit and
>> +       tx_cons from tx_done are seen. */
>> +    smp_mb();
>>       return sky2->tx_pending - tx_inuse(sky2);
>>   }
>>
>> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>>       return count;
>>   }
>>
>> -static void sky2_tx_unmap(struct pci_dev *pdev,
>> -              const struct tx_ring_info *re)
>> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info 
>> *re)
>>   {
>>       if (re->flags&  TX_MAP_SINGLE)
>>           pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
>> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>>           pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>>                      pci_unmap_len(re, maplen),
>>                      PCI_DMA_TODEVICE);
>> +    re->flags = 0;
>>   }
>>
>>   /*
>> @@ -1804,7 +1807,8 @@ mapping_error:
>>   }
>>
>>   /*
>> - * Free ring elements from starting at tx_cons until "done"
>> + * Transmit complete processing
>> + * Free ring elements from starting at tx_cons until done index
>>    *
>>    * NB:
>>    *  1. The hardware will tell us about partial completion of 
>> multi-part
>> @@ -1813,9 +1817,9 @@ mapping_error:
>>    *     looks at the tail of the queue of FIFO (tx_cons), not
>>    *     the head (tx_prod)
>>    */
>> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
>> +static void sky2_tx_done(struct net_device *dev, u16 done)
>>   {
>> -    struct net_device *dev = sky2->netdev;
>> +    struct sky2_port *sky2 = netdev_priv(dev);
>>       unsigned idx;
>>
>>       BUG_ON(done>= sky2->tx_ring_size);
>> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>>           sky2_tx_unmap(sky2->hw->pdev, re);
>>
>>           if (skb) {
>> +            re->skb = NULL;
>> +
>>               if (unlikely(netif_msg_tx_done(sky2)))
>>                   printk(KERN_DEBUG "%s: tx done %u\n",
>>                          dev->name, idx);
>> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>>               dev->stats.tx_bytes += skb->len;
>>
>>               dev_kfree_skb_any(skb);
>> -
>> -            sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>>           }
>>       }
>>
>>       sky2->tx_cons = idx;
>> -    smp_mb();
>>
>>       if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4)
>>           netif_wake_queue(dev);
>> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>>       sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>>   }
>>
>> +static void sky2_tx_clean(struct sky2_port *sky2)
>> +{
>> +    u16 idx;
>> +
>> +    for (idx = 0; idx<  sky2->tx_ring_size; idx++) {
>> +        struct tx_ring_info *re = sky2->tx_ring + idx;
>> +
>> +        sky2_tx_unmap(sky2->hw->pdev, re);
>> +        if (re->skb) {
>> +            dev_kfree_skb_any(re->skb);
>> +            re->skb = NULL;
>> +        }
>> +    }
>> +}
>> +
>>   /* Network shutdown */
>>   static int sky2_down(struct net_device *dev)
>>   {
>> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>>       sky2_tx_reset(hw, port);
>>
>>       /* Free any pending frames stuck in HW queue */
>> -    sky2_tx_complete(sky2, sky2->tx_prod);
>> -
>> +    sky2_tx_clean(sky2);
>>       sky2_rx_clean(sky2);
>>
>>       sky2_free_buffers(sky2);
>> @@ -2411,15 +2428,6 @@ error:
>>       goto resubmit;
>>   }
>>
>> -/* Transmit complete */
>> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
>> -{
>> -    struct sky2_port *sky2 = netdev_priv(dev);
>> -
>> -    if (netif_running(dev))
>> -        sky2_tx_complete(sky2, last);
>> -}
>> -
>>   static inline void sky2_skb_rx(const struct sky2_port *sky2,
>>                      u32 status, struct sk_buff *skb)
>>   {
>> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>>
>>       /* Dump contents of tx ring */
>>       sop = 1;
>> -    for (idx = sky2->tx_next; idx != sky2->tx_prod&&  idx<  
>> sky2->tx_ring_size;
>> +    for (idx = sky2->tx_cons; idx != sky2->tx_prod&&  idx<  
>> sky2->tx_ring_size;
>>            idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>>           const struct sky2_tx_le *le = sky2->tx_le + idx;
>>           u32 a = le32_to_cpu(le->addr);
>> --- a/drivers/net/sky2.h    2010-01-11 17:29:22.817088617 -0800
>> +++ b/drivers/net/sky2.h    2010-01-11 17:29:28.197120484 -0800
>> @@ -2187,7 +2187,6 @@ struct sky2_port {
>>       u16             tx_ring_size;
>>       u16             tx_cons;        /* next le to check */
>>       u16             tx_prod;        /* next le to use */
>> -    u16             tx_next;        /* debug only */
>>
>>       u16             tx_pending;
>>       u16             tx_last_mss;
> Test observations:
>
> 1. DHCP request/response sequence having some issues... can't confirm 
> that it's a result of this patch, but I don't see this with the prior 
> patch. Prior to this patch, if I connect a new device (Blackberry in 
> this case) I see DHCPDISCOVER;DHCPOFFER;DHCPREQUEST;DHCPACK (just the 
> four messages). With this patch I'm seeing repeated attempts - i.e., 
> DISCOVER/OFFER 6 times before the REQUEST/ACK. This is repeatable and  
> happening whether or not under load. As my original problem started 
> with DHCP packets, this seems interesting. I don't see any errors 
> logged (dmesg, messages, etc.).
> 2. Probably not related to this patch, but perhaps to the driver - 
> I've finally tracked the source of my dropped RX packets. It's 
> happening when a sending data to a CIFS client on a MacOS system. The 
> RX drop rate seems proportional to the TX rate for SMB to that client 
> - at a tx rate of about 200Kb/s I see about 20 dropped RX packets/sec 
> - at 400 I see about 40.  I'm thinking therefore it's ACKs being 
> dropped on RX. Why? no idea (yet). Nothing reported by ethtool or 
> netstat -s remotely correlates to the number of dropped RX packets.
>
>
Let me add: the CIFS client from which the packets are dropped is 
connected via a dd-wrt router (on wifi) connected to the sky2 1G port. A 
Windows client connected directly to the 1G port does not exhibit the 
same symptoms (. I'll try later the Mac directly connected & a Wintel 
box over wifi if possible. The DD-WRT router (linksys wrt54g-tm) is 
bridged (wifi clients on same subnet as wired & serviced by DHCPD 
running on the linux box). This is also the source of the aforementioned 
perhaps-flaky DHCP connections.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Breuer - Jan. 12, 2010, 8:31 p.m.
On 1/12/2010 1:42 PM, Michael Breuer wrote:
> On 1/12/2010 1:35 PM, Michael Breuer wrote:
>> On 1/12/2010 11:15 AM, Stephen Hemminger wrote:
>>> This code makes transmit path and transmit reset safer by:
>>>    * adding memory barrier before checking available ring slots
>>>    * reseting state of tx ring elements after free
>>>    * seperate cleanup function from ring done function
>>>    * removing mostly unused tx_next element
>>>
>>> Signed-off-by: Stephen Hemminger<shemminger@vyatta.com>
>>>
>>> ---
>>> Please apply this instead of the various bits and pieces flying
>>> around labeled as sky2 panic under load
>>>
>>>
>>> --- a/drivers/net/sky2.c    2010-01-11 10:49:50.907113126 -0800
>>> +++ b/drivers/net/sky2.c    2010-01-11 17:36:22.027429875 -0800
>>> @@ -1596,6 +1596,9 @@ static inline int tx_inuse(const struct
>>>   /* Number of list elements available for next tx */
>>>   static inline int tx_avail(const struct sky2_port *sky2)
>>>   {
>>> +    /* Makes sure update of tx_prod from start_xmit and
>>> +       tx_cons from tx_done are seen. */
>>> +    smp_mb();
>>>       return sky2->tx_pending - tx_inuse(sky2);
>>>   }
>>>
>>> @@ -1618,8 +1621,7 @@ static unsigned tx_le_req(const struct s
>>>       return count;
>>>   }
>>>
>>> -static void sky2_tx_unmap(struct pci_dev *pdev,
>>> -              const struct tx_ring_info *re)
>>> +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info 
>>> *re)
>>>   {
>>>       if (re->flags&  TX_MAP_SINGLE)
>>>           pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
>>> @@ -1629,6 +1631,7 @@ static void sky2_tx_unmap(struct pci_dev
>>>           pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
>>>                      pci_unmap_len(re, maplen),
>>>                      PCI_DMA_TODEVICE);
>>> +    re->flags = 0;
>>>   }
>>>
>>>   /*
>>> @@ -1804,7 +1807,8 @@ mapping_error:
>>>   }
>>>
>>>   /*
>>> - * Free ring elements from starting at tx_cons until "done"
>>> + * Transmit complete processing
>>> + * Free ring elements from starting at tx_cons until done index
>>>    *
>>>    * NB:
>>>    *  1. The hardware will tell us about partial completion of 
>>> multi-part
>>> @@ -1813,9 +1817,9 @@ mapping_error:
>>>    *     looks at the tail of the queue of FIFO (tx_cons), not
>>>    *     the head (tx_prod)
>>>    */
>>> -static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
>>> +static void sky2_tx_done(struct net_device *dev, u16 done)
>>>   {
>>> -    struct net_device *dev = sky2->netdev;
>>> +    struct sky2_port *sky2 = netdev_priv(dev);
>>>       unsigned idx;
>>>
>>>       BUG_ON(done>= sky2->tx_ring_size);
>>> @@ -1828,6 +1832,8 @@ static void sky2_tx_complete(struct sky2
>>>           sky2_tx_unmap(sky2->hw->pdev, re);
>>>
>>>           if (skb) {
>>> +            re->skb = NULL;
>>> +
>>>               if (unlikely(netif_msg_tx_done(sky2)))
>>>                   printk(KERN_DEBUG "%s: tx done %u\n",
>>>                          dev->name, idx);
>>> @@ -1836,13 +1842,10 @@ static void sky2_tx_complete(struct sky2
>>>               dev->stats.tx_bytes += skb->len;
>>>
>>>               dev_kfree_skb_any(skb);
>>> -
>>> -            sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
>>>           }
>>>       }
>>>
>>>       sky2->tx_cons = idx;
>>> -    smp_mb();
>>>
>>>       if (tx_avail(sky2)>  MAX_SKB_TX_LE + 4)
>>>           netif_wake_queue(dev);
>>> @@ -1870,6 +1873,21 @@ static void sky2_tx_reset(struct sky2_hw
>>>       sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
>>>   }
>>>
>>> +static void sky2_tx_clean(struct sky2_port *sky2)
>>> +{
>>> +    u16 idx;
>>> +
>>> +    for (idx = 0; idx<  sky2->tx_ring_size; idx++) {
>>> +        struct tx_ring_info *re = sky2->tx_ring + idx;
>>> +
>>> +        sky2_tx_unmap(sky2->hw->pdev, re);
>>> +        if (re->skb) {
>>> +            dev_kfree_skb_any(re->skb);
>>> +            re->skb = NULL;
>>> +        }
>>> +    }
>>> +}
>>> +
>>>   /* Network shutdown */
>>>   static int sky2_down(struct net_device *dev)
>>>   {
>>> @@ -1933,8 +1951,7 @@ static int sky2_down(struct net_device *
>>>       sky2_tx_reset(hw, port);
>>>
>>>       /* Free any pending frames stuck in HW queue */
>>> -    sky2_tx_complete(sky2, sky2->tx_prod);
>>> -
>>> +    sky2_tx_clean(sky2);
>>>       sky2_rx_clean(sky2);
>>>
>>>       sky2_free_buffers(sky2);
>>> @@ -2411,15 +2428,6 @@ error:
>>>       goto resubmit;
>>>   }
>>>
>>> -/* Transmit complete */
>>> -static inline void sky2_tx_done(struct net_device *dev, u16 last)
>>> -{
>>> -    struct sky2_port *sky2 = netdev_priv(dev);
>>> -
>>> -    if (netif_running(dev))
>>> -        sky2_tx_complete(sky2, last);
>>> -}
>>> -
>>>   static inline void sky2_skb_rx(const struct sky2_port *sky2,
>>>                      u32 status, struct sk_buff *skb)
>>>   {
>>> @@ -4201,7 +4209,7 @@ static int sky2_debug_show(struct seq_fi
>>>
>>>       /* Dump contents of tx ring */
>>>       sop = 1;
>>> -    for (idx = sky2->tx_next; idx != sky2->tx_prod&&  idx<  
>>> sky2->tx_ring_size;
>>> +    for (idx = sky2->tx_cons; idx != sky2->tx_prod&&  idx<  
>>> sky2->tx_ring_size;
>>>            idx = RING_NEXT(idx, sky2->tx_ring_size)) {
>>>           const struct sky2_tx_le *le = sky2->tx_le + idx;
>>>           u32 a = le32_to_cpu(le->addr);
>>> --- a/drivers/net/sky2.h    2010-01-11 17:29:22.817088617 -0800
>>> +++ b/drivers/net/sky2.h    2010-01-11 17:29:28.197120484 -0800
>>> @@ -2187,7 +2187,6 @@ struct sky2_port {
>>>       u16             tx_ring_size;
>>>       u16             tx_cons;        /* next le to check */
>>>       u16             tx_prod;        /* next le to use */
>>> -    u16             tx_next;        /* debug only */
>>>
>>>       u16             tx_pending;
>>>       u16             tx_last_mss;
>> Test observations:
>>
>> 1. DHCP request/response sequence having some issues... can't confirm 
>> that it's a result of this patch, but I don't see this with the prior 
>> patch. Prior to this patch, if I connect a new device (Blackberry in 
>> this case) I see DHCPDISCOVER;DHCPOFFER;DHCPREQUEST;DHCPACK (just the 
>> four messages). With this patch I'm seeing repeated attempts - i.e., 
>> DISCOVER/OFFER 6 times before the REQUEST/ACK. This is repeatable 
>> and  happening whether or not under load. As my original problem 
>> started with DHCP packets, this seems interesting. I don't see any 
>> errors logged (dmesg, messages, etc.).
>> 2. Probably not related to this patch, but perhaps to the driver - 
>> I've finally tracked the source of my dropped RX packets. It's 
>> happening when a sending data to a CIFS client on a MacOS system. The 
>> RX drop rate seems proportional to the TX rate for SMB to that client 
>> - at a tx rate of about 200Kb/s I see about 20 dropped RX packets/sec 
>> - at 400 I see about 40.  I'm thinking therefore it's ACKs being 
>> dropped on RX. Why? no idea (yet). Nothing reported by ethtool or 
>> netstat -s remotely correlates to the number of dropped RX packets.
>>
>>
> Let me add: the CIFS client from which the packets are dropped is 
> connected via a dd-wrt router (on wifi) connected to the sky2 1G port. 
> A Windows client connected directly to the 1G port does not exhibit 
> the same symptoms (. I'll try later the Mac directly connected & a 
> Wintel box over wifi if possible. The DD-WRT router (linksys 
> wrt54g-tm) is bridged (wifi clients on same subnet as wired & serviced 
> by DHCPD running on the linux box). This is also the source of the 
> aforementioned perhaps-flaky DHCP connections.
Ok - a little more on the dropped packets - only happens when connected 
through the wifi router - but happens using a wired connection as well 
(via the router's ethernet port).

 From sniffer traces: I see large frames outbound (even though 
MTU=1500). I see the fragmented result arriving on the remote side. I 
see ACK's for each of the fragmented frames outbound from the Mac, but 
not received on the Linux side.

I also don't see any retransmissions or duplicate ACKS on either sniffer 
trace. I'm wondering whether there is fragmentation offload to the 
Yukon2, and whether the individual fragment acks are what's showing up 
dropped. If so, I don't understand why it would only happen with the 
wifi router vs. switch in the middle. Maybe the router is doing 
something to the packets which is causing the Yukon to forward the acks 
up differently? The router MTU is also 1500 on all ports, and does not 
show dropped packets or errors corresponding to what I'm seeing on the 
sky2 adapter.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

--- a/drivers/net/sky2.c	2010-01-11 10:49:50.907113126 -0800
+++ b/drivers/net/sky2.c	2010-01-11 17:36:22.027429875 -0800
@@ -1596,6 +1596,9 @@  static inline int tx_inuse(const struct 
 /* Number of list elements available for next tx */
 static inline int tx_avail(const struct sky2_port *sky2)
 {
+	/* Makes sure update of tx_prod from start_xmit and
+	   tx_cons from tx_done are seen. */
+	smp_mb();
 	return sky2->tx_pending - tx_inuse(sky2);
 }
 
@@ -1618,8 +1621,7 @@  static unsigned tx_le_req(const struct s
 	return count;
 }
 
-static void sky2_tx_unmap(struct pci_dev *pdev,
-			  const struct tx_ring_info *re)
+static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
 {
 	if (re->flags & TX_MAP_SINGLE)
 		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
@@ -1629,6 +1631,7 @@  static void sky2_tx_unmap(struct pci_dev
 		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
 			       pci_unmap_len(re, maplen),
 			       PCI_DMA_TODEVICE);
+	re->flags = 0;
 }
 
 /*
@@ -1804,7 +1807,8 @@  mapping_error:
 }
 
 /*
- * Free ring elements from starting at tx_cons until "done"
+ * Transmit complete processing
+ * Free ring elements from starting at tx_cons until done index
  *
  * NB:
  *  1. The hardware will tell us about partial completion of multi-part
@@ -1813,9 +1817,9 @@  mapping_error:
  *     looks at the tail of the queue of FIFO (tx_cons), not
  *     the head (tx_prod)
  */
-static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
+static void sky2_tx_done(struct net_device *dev, u16 done)
 {
-	struct net_device *dev = sky2->netdev;
+	struct sky2_port *sky2 = netdev_priv(dev);
 	unsigned idx;
 
 	BUG_ON(done >= sky2->tx_ring_size);
@@ -1828,6 +1832,8 @@  static void sky2_tx_complete(struct sky2
 		sky2_tx_unmap(sky2->hw->pdev, re);
 
 		if (skb) {
+			re->skb = NULL;
+
 			if (unlikely(netif_msg_tx_done(sky2)))
 				printk(KERN_DEBUG "%s: tx done %u\n",
 				       dev->name, idx);
@@ -1836,13 +1842,10 @@  static void sky2_tx_complete(struct sky2
 			dev->stats.tx_bytes += skb->len;
 
 			dev_kfree_skb_any(skb);
-
-			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);
 		}
 	}
 
 	sky2->tx_cons = idx;
-	smp_mb();
 
 	if (tx_avail(sky2) > MAX_SKB_TX_LE + 4)
 		netif_wake_queue(dev);
@@ -1870,6 +1873,21 @@  static void sky2_tx_reset(struct sky2_hw
 	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
 }
 
+static void sky2_tx_clean(struct sky2_port *sky2)
+{
+	u16 idx;
+
+	for (idx = 0; idx < sky2->tx_ring_size; idx++) {
+		struct tx_ring_info *re = sky2->tx_ring + idx;
+
+		sky2_tx_unmap(sky2->hw->pdev, re);
+		if (re->skb) {
+			dev_kfree_skb_any(re->skb);
+			re->skb = NULL;
+		}
+	}
+}
+
 /* Network shutdown */
 static int sky2_down(struct net_device *dev)
 {
@@ -1933,8 +1951,7 @@  static int sky2_down(struct net_device *
 	sky2_tx_reset(hw, port);
 
 	/* Free any pending frames stuck in HW queue */
-	sky2_tx_complete(sky2, sky2->tx_prod);
-
+	sky2_tx_clean(sky2);
 	sky2_rx_clean(sky2);
 
 	sky2_free_buffers(sky2);
@@ -2411,15 +2428,6 @@  error:
 	goto resubmit;
 }
 
-/* Transmit complete */
-static inline void sky2_tx_done(struct net_device *dev, u16 last)
-{
-	struct sky2_port *sky2 = netdev_priv(dev);
-
-	if (netif_running(dev))
-		sky2_tx_complete(sky2, last);
-}
-
 static inline void sky2_skb_rx(const struct sky2_port *sky2,
 			       u32 status, struct sk_buff *skb)
 {
@@ -4201,7 +4209,7 @@  static int sky2_debug_show(struct seq_fi
 
 	/* Dump contents of tx ring */
 	sop = 1;
-	for (idx = sky2->tx_next; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
+	for (idx = sky2->tx_cons; idx != sky2->tx_prod && idx < sky2->tx_ring_size;
 	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
 		const struct sky2_tx_le *le = sky2->tx_le + idx;
 		u32 a = le32_to_cpu(le->addr);
--- a/drivers/net/sky2.h	2010-01-11 17:29:22.817088617 -0800
+++ b/drivers/net/sky2.h	2010-01-11 17:29:28.197120484 -0800
@@ -2187,7 +2187,6 @@  struct sky2_port {
 	u16		     tx_ring_size;
 	u16		     tx_cons;		/* next le to check */
 	u16		     tx_prod;		/* next le to use */
-	u16		     tx_next;		/* debug only */
 
 	u16		     tx_pending;
 	u16		     tx_last_mss;