diff mbox

[Linux-ATM-General] atm/br2684: netif_stop_queue() when atm device busy and netif_wake_queue() when we can send packets again.

Message ID 4AA95838.4010007@redfish-solutions.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Philip Prindeville Sept. 10, 2009, 7:49 p.m. UTC
I had noticed that with my Solos card, and using Qwest 7062/896kb/s
service that I was typically only getting ~400kb/s upstream, so I
thought that delayed transmitter restarts might be the culprit and
decided to try out this patch.

I'm running 2.6.27.26, and I modified the patch as below.

Has anyone confirmed this patch (Karl's) against 2.6.27?

Thanks,

-Philip


Karl Hiramoto wrote:
> This patch removes the call to dev_kfree_skb() when the atm device is busy.
> Calling dev_kfree_skb() causes heavy packet loss then the device is under
> heavy load, the more correct behavior should be to stop the upper layers,
> then when the lower device can queue packets again wake the upper layers.
>
> Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
> ---
>  net/atm/br2684.c |   37 +++++++++++++++++++++++++++----------
>  1 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/net/atm/br2684.c b/net/atm/br2684.c
> index 2912665..5c42225 100644
> --- a/net/atm/br2684.c
> +++ b/net/atm/br2684.c
> @@ -69,7 +69,7 @@ struct br2684_vcc {
>  	struct net_device *device;
>  	/* keep old push, pop functions for chaining */
>  	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
> -	/* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
> +	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
>  	enum br2684_encaps encaps;
>  	struct list_head brvccs;
>  #ifdef CONFIG_ATM_BR2684_IPFILTER
> @@ -142,6 +142,22 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
>  	return NULL;
>  }
>  
> +/* chained vcc->pop function.  Check if we should wake the netif_queue */
> +static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
> +{
> +	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
> +	struct net_device *net_dev = skb->dev;
> +
> +	pr_debug("br2684_pop(vcc %p ; net_dev %p )\n", vcc, net_dev);
> +	brvcc->old_pop(vcc, skb);
> +
> +	if (!net_dev)
> +		return;
> +
> +	if (atm_may_send(vcc, 0))
> +		netif_wake_queue(net_dev);
> +
> +}
>  /*
>   * Send a packet out a particular vcc.  Not to useful right now, but paves
>   * the way for multiple vcc's per itf.  Returns true if we can send,
> @@ -200,20 +216,19 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
>  
>  	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
>  	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
> -	if (!atm_may_send(atmvcc, skb->truesize)) {
> -		/*
> -		 * We free this here for now, because we cannot know in a higher
> -		 * layer whether the skb pointer it supplied wasn't freed yet.
> -		 * Now, it always is.
> -		 */
> -		dev_kfree_skb(skb);
> -		return 0;
> -	}
>  	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
>  	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
>  	dev->stats.tx_packets++;
>  	dev->stats.tx_bytes += skb->len;
>  	atmvcc->send(atmvcc, skb);
> +
> +	if (!atm_may_send(atmvcc, 0)) {
> +		netif_stop_queue(brvcc->device);
> +		/*check for race with br2684_pop*/
> +		if (atm_may_send(atmvcc, 0))
> +			netif_start_queue(brvcc->device);
> +	}
> +
>  	return 1;
>  }
>  
> @@ -503,8 +518,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
>  	atmvcc->user_back = brvcc;
>  	brvcc->encaps = (enum br2684_encaps)be.encaps;
>  	brvcc->old_push = atmvcc->push;
> +	brvcc->old_pop = atmvcc->pop;
>  	barrier();
>  	atmvcc->push = br2684_push;
> +	atmvcc->pop = br2684_pop;
>  
>  	__skb_queue_head_init(&queue);
>  	rq = &sk_atm(atmvcc)->sk_receive_queue;
>

Comments

Karl Hiramoto Sept. 10, 2009, 9:30 p.m. UTC | #1
Philip A. Prindeville wrote:
> I had noticed that with my Solos card, and using Qwest 7062/896kb/s
> service that I was typically only getting ~400kb/s upstream, so I
> thought that delayed transmitter restarts might be the culprit and
> decided to try out this patch.
>
> I'm running 2.6.27.26, and I modified the patch as below.
>
> Has anyone confirmed this patch (Karl's) against 2.6.27?
>
> Thanks,
>
> -Philip
I'd be interested in hearing comparisons with/without the patch.    What 
i have is no upstream packet loss with this patch, however slightly 
lower total throughput.  I think because the upper networking layers 
take time to restart the packet flow.   I'm not really sure if or how 
many packets to upper layers buffer.  I haven't had time to debug it 
further.

I don't have a solos card, but you may have to tweak the solos driver 
sk_sndbuf  value.

--
Karl
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 11, 2009, 6:48 p.m. UTC | #2
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 10 Sep 2009 23:30:44 +0200

> I'm not really sure if or how many packets to upper layers buffer.

This is determined by ->tx_queue_len, so whatever value is being
set for ATM network devices is what the core will use for backlog
limiting while the device's TX queue is stopped.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Philip Prindeville Sept. 11, 2009, 7:56 p.m. UTC | #3
Karl Hiramoto wrote:
> Philip A. Prindeville wrote:
>> I had noticed that with my Solos card, and using Qwest 7062/896kb/s
>> service that I was typically only getting ~400kb/s upstream, so I
>> thought that delayed transmitter restarts might be the culprit and
>> decided to try out this patch.
>>
>> I'm running 2.6.27.26, and I modified the patch as below.
>>
>> Has anyone confirmed this patch (Karl's) against 2.6.27?
>>
>> Thanks,
>>
>> -Philip
> I'd be interested in hearing comparisons with/without the patch.    What
> i have is no upstream packet loss with this patch, however slightly
> lower total throughput.  I think because the upper networking layers
> take time to restart the packet flow.   I'm not really sure if or how
> many packets to upper layers buffer.  I haven't had time to debug it
> further.
> 
> I don't have a solos card, but you may have to tweak the solos driver
> sk_sndbuf  value.
> 
> -- 
> Karl

I'm running with the patch right now...  Can't tell if latency has been affected.

Seems to be working a bit better, but I've not done any formalized testing.

-Philip

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Karl Hiramoto Sept. 15, 2009, 1:44 p.m. UTC | #4
David Miller wrote:
> From: Karl Hiramoto <karl@hiramoto.org>
> Date: Thu, 10 Sep 2009 23:30:44 +0200
>
>   
>> I'm not really sure if or how many packets to upper layers buffer.
>>     
>
> This is determined by ->tx_queue_len, so whatever value is being
> set for ATM network devices is what the core will use for backlog
> limiting while the device's TX queue is stopped.
I tried varying tx_queue_len by 10, 100,  and 1000x, but it didn't seem 
to help much.  Whenever the atm dev called netif_wake_queue() it seems 
like the driver still starves for packets  and still takes time to get 
going again.


It seem like when the driver calls netif_wake_queue() it's TX hardware 
queue is nearly full, but it has space to accept new packets.  The TX 
hardware queue has time to empty, devices starves for packets(goes 
idle), then finally a packet comes in from the upper networking 
layers.   I'm not really sure at the moment where the problem lies to my 
maximum throughput dropping.

I did try changing sk_sndbuf to 256K but that didn't seem to help either.

--
Karl
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Karl Hiramoto Sept. 15, 2009, 2:57 p.m. UTC | #5
Karl Hiramoto wrote:
> David Miller wrote:
>   
>> From: Karl Hiramoto <karl@hiramoto.org>
>> Date: Thu, 10 Sep 2009 23:30:44 +0200
>>
>>   
>>     
>>> I'm not really sure if or how many packets to upper layers buffer.
>>>     
>>>       
>> This is determined by ->tx_queue_len, so whatever value is being
>> set for ATM network devices is what the core will use for backlog
>> limiting while the device's TX queue is stopped.
>>     
> I tried varying tx_queue_len by 10, 100,  and 1000x, but it didn't seem 
> to help much.  Whenever the atm dev called netif_wake_queue() it seems 
> like the driver still starves for packets  and still takes time to get 
> going again.
>
>
> It seem like when the driver calls netif_wake_queue() it's TX hardware 
> queue is nearly full, but it has space to accept new packets.  The TX 
> hardware queue has time to empty, devices starves for packets(goes 
> idle), then finally a packet comes in from the upper networking 
> layers.   I'm not really sure at the moment where the problem lies to my 
> maximum throughput dropping.
>
> I did try changing sk_sndbuf to 256K but that didn't seem to help either.
>
> --
Actually i think i spoke too soon,  tuning TCP parameters, txqueuelen on 
all machines the server, router and client it seems my performance came 
back.

--
Karl

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Philip Prindeville Sept. 16, 2009, 6:04 p.m. UTC | #6
On 09/15/2009 07:57 AM, Karl Hiramoto wrote:
> Karl Hiramoto wrote:
>   
>> David Miller wrote:
>>   
>>     
>>> From: Karl Hiramoto <karl@hiramoto.org>
>>> Date: Thu, 10 Sep 2009 23:30:44 +0200
>>>
>>>   
>>>     
>>>       
>>>> I'm not really sure if or how many packets to upper layers buffer.
>>>>     
>>>>       
>>>>         
>>> This is determined by ->tx_queue_len, so whatever value is being
>>> set for ATM network devices is what the core will use for backlog
>>> limiting while the device's TX queue is stopped.
>>>     
>>>       
>> I tried varying tx_queue_len by 10, 100,  and 1000x, but it didn't seem 
>> to help much.  Whenever the atm dev called netif_wake_queue() it seems 
>> like the driver still starves for packets  and still takes time to get 
>> going again.
>>
>>
>> It seem like when the driver calls netif_wake_queue() it's TX hardware 
>> queue is nearly full, but it has space to accept new packets.  The TX 
>> hardware queue has time to empty, devices starves for packets(goes 
>> idle), then finally a packet comes in from the upper networking 
>> layers.   I'm not really sure at the moment where the problem lies to my 
>> maximum throughput dropping.
>>
>> I did try changing sk_sndbuf to 256K but that didn't seem to help either.
>>
>> --
>>     
> Actually i think i spoke too soon,  tuning TCP parameters, txqueuelen on 
> all machines the server, router and client it seems my performance came 
> back.
>
> --
> Karl
>   

So what size are you currently using?

Out-of-the-box build, 2.6.27.29 seems to set it to 1000.

-Philip

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- linux-2.6.27.29-astlinux/net/atm/br2684.c.orig	2009-07-30 16:06:41.000000000 -0700
+++ linux-2.6.27.29-astlinux/net/atm/br2684.c	2009-09-10 12:42:50.000000000 -0700
@@ -69,7 +69,7 @@  struct br2684_vcc {
 	struct net_device *device;
 	/* keep old push, pop functions for chaining */
 	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
-	/* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
+	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
 	enum br2684_encaps encaps;
 	struct list_head brvccs;
 #ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -143,6 +143,22 @@  static struct net_device *br2684_find_de
 	return NULL;
 }
 
+/* chained vcc->pop function.  Check if we should wake the netif_queue */
+static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
+	struct net_device *net_dev = skb->dev;
+
+	pr_debug("br2684_pop(vcc %p ; net_dev %p )\n", vcc, net_dev);
+	brvcc->old_pop(vcc, skb);
+
+	if (!net_dev)
+		return;
+
+	if (atm_may_send(vcc, 0))
+		netif_wake_queue(net_dev);
+
+}
 /*
  * Send a packet out a particular vcc.  Not to useful right now, but paves
  * the way for multiple vcc's per itf.  Returns true if we can send,
@@ -200,20 +216,19 @@  static int br2684_xmit_vcc(struct sk_buf
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-	if (!atm_may_send(atmvcc, skb->truesize)) {
-		/*
-		 * We free this here for now, because we cannot know in a higher
-		 * layer whether the skb pointer it supplied wasn't freed yet.
-		 * Now, it always is.
-		 */
-		dev_kfree_skb(skb);
-		return 0;
-	}
 	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	brdev->stats.tx_packets++;
 	brdev->stats.tx_bytes += skb->len;
 	atmvcc->send(atmvcc, skb);
+
+	if (!atm_may_send(atmvcc, 0)) {
+		netif_stop_queue(brvcc->device);
+		/*check for race with br2684_pop*/
+		if (atm_may_send(atmvcc, 0))
+			netif_start_queue(brvcc->device);
+	}
+
 	return 1;
 }
 
@@ -509,8 +524,10 @@  static int br2684_regvcc(struct atm_vcc 
 	atmvcc->user_back = brvcc;
 	brvcc->encaps = (enum br2684_encaps)be.encaps;
 	brvcc->old_push = atmvcc->push;
+	brvcc->old_pop = atmvcc->pop;
 	barrier();
 	atmvcc->push = br2684_push;
+	atmvcc->pop = br2684_pop;
 
 	rq = &sk_atm(atmvcc)->sk_receive_queue;