diff mbox series

net: convert gro_count to bitmask

Message ID 1531300553-21413-1-git-send-email-lirongqing@baidu.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series net: convert gro_count to bitmask | expand

Commit Message

Li RongQing July 11, 2018, 9:15 a.m. UTC
gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
flows, gro_hash may be not fully used, so it is unnecessary to iterate
all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.

convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
represents a element of gro_hash, only flush a gro_hash element if the
related bit is set, to speed up napi_gro_flush().

and update gro_bitmask only if it will be changed, to reduce cache
update

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 35 +++++++++++++++++++++++------------
 2 files changed, 24 insertions(+), 13 deletions(-)

Comments

Stefano Brivio July 11, 2018, 10:51 a.m. UTC | #1
On Wed, 11 Jul 2018 17:15:53 +0800
Li RongQing <lirongqing@baidu.com> wrote:

> @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	if (grow > 0)
>  		gro_pull_from_frag0(skb, grow);
>  ok:
> +	if (napi->gro_hash[hash].count)
> +		if (!test_bit(hash, &napi->gro_bitmask))
> +			set_bit(hash, &napi->gro_bitmask);
> +	else if (test_bit(hash, &napi->gro_bitmask))
> +		clear_bit(hash, &napi->gro_bitmask);

This might not do what you want.
Eric Dumazet July 11, 2018, 11:32 a.m. UTC | #2
On 07/11/2018 02:15 AM, Li RongQing wrote:
> gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
> flows, gro_hash may be not fully used, so it is unnecessary to iterate
> all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.
> 
> convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
> represents a element of gro_hash, only flush a gro_hash element if the
> related bit is set, to speed up napi_gro_flush().
> 
> and update gro_bitmask only if it will be changed, to reduce cache
> update
> 
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  include/linux/netdevice.h |  2 +-
>  net/core/dev.c            | 35 +++++++++++++++++++++++------------
>  2 files changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b683971e500d..df49b36ef378 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -322,7 +322,7 @@ struct napi_struct {
>  
>  	unsigned long		state;
>  	int			weight;
> -	unsigned int		gro_count;
> +	unsigned long		gro_bitmask;
>  	int			(*poll)(struct napi_struct *, int);
>  #ifdef CONFIG_NETPOLL
>  	int			poll_owner;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d13cddcac41f..a08dbdd217a6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5171,9 +5171,11 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
>  			return;
>  		list_del_init(&skb->list);
>  		napi_gro_complete(skb);
> -		napi->gro_count--;
>  		napi->gro_hash[index].count--;
>  	}
> +
> +	if (!napi->gro_hash[index].count)
> +		clear_bit(index, &napi->gro_bitmask);

I suggest you not add an atomic operation here.

Current cpu owns this NAPI after all.

Same remark for the whole patch.

->  __clear_bit(), __set_bit() and similar operators

Ideally you should provide TCP_RR number with busy polling enabled, to eventually catch regressions.

Thanks.
Li RongQing July 12, 2018, 2:31 a.m. UTC | #3
> -----邮件原件-----
> 发件人: Stefano Brivio [mailto:sbrivio@redhat.com]
> 发送时间: 2018年7月11日 18:52
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: netdev@vger.kernel.org; Eric Dumazet <edumazet@google.com>
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> On Wed, 11 Jul 2018 17:15:53 +0800
> Li RongQing <lirongqing@baidu.com> wrote:
> 
> > @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct
> napi_struct *napi, struct sk_buff
> >  	if (grow > 0)
> >  		gro_pull_from_frag0(skb, grow);
> >  ok:
> > +	if (napi->gro_hash[hash].count)
> > +		if (!test_bit(hash, &napi->gro_bitmask))
> > +			set_bit(hash, &napi->gro_bitmask);
> > +	else if (test_bit(hash, &napi->gro_bitmask))
> > +		clear_bit(hash, &napi->gro_bitmask);
> 
> This might not do what you want.
> 
> --

could you show detail ?

-RongQing

> Stefano
Li RongQing July 12, 2018, 2:31 a.m. UTC | #4
> -----邮件原件-----
> 发件人: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> 发送时间: 2018年7月11日 19:32
> 收件人: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> 
> 
> On 07/11/2018 02:15 AM, Li RongQing wrote:
> > gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
> > flows, gro_hash may be not fully used, so it is unnecessary to iterate
> > all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.
> >
> > convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
> > represents a element of gro_hash, only flush a gro_hash element if the
> > related bit is set, to speed up napi_gro_flush().
> >
> > and update gro_bitmask only if it will be changed, to reduce cache
> > update
> >
> > Suggested-by: Eric Dumazet <edumazet@google.com>
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > ---
> >  include/linux/netdevice.h |  2 +-
> >  net/core/dev.c            | 35 +++++++++++++++++++++++------------
> >  2 files changed, 24 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index b683971e500d..df49b36ef378 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -322,7 +322,7 @@ struct napi_struct {
> >
> >  	unsigned long		state;
> >  	int			weight;
> > -	unsigned int		gro_count;
> > +	unsigned long		gro_bitmask;
> >  	int			(*poll)(struct napi_struct *, int);
> >  #ifdef CONFIG_NETPOLL
> >  	int			poll_owner;
> > diff --git a/net/core/dev.c b/net/core/dev.c index
> > d13cddcac41f..a08dbdd217a6 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -5171,9 +5171,11 @@ static void __napi_gro_flush_chain(struct
> napi_struct *napi, u32 index,
> >  			return;
> >  		list_del_init(&skb->list);
> >  		napi_gro_complete(skb);
> > -		napi->gro_count--;
> >  		napi->gro_hash[index].count--;
> >  	}
> > +
> > +	if (!napi->gro_hash[index].count)
> > +		clear_bit(index, &napi->gro_bitmask);
> 
> I suggest you not add an atomic operation here.
> 
> Current cpu owns this NAPI after all.
> 
> Same remark for the whole patch.
> 
> ->  __clear_bit(), __set_bit() and similar operators
> 
> Ideally you should provide TCP_RR number with busy polling enabled, to
> eventually catch regressions.
> 

I will change and do the test
Thank you.

-RongQing

> Thanks.
David Miller July 12, 2018, 2:49 a.m. UTC | #5
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 11 Jul 2018 17:15:53 +0800

> +		clear_bit(index, &napi->gro_bitmask);

Please don't use atomics here, at least use __clear_bit().

This is why I did the operations by hand in my version of the patch.
Also, if you are going to preempt my patch, at least retain the
comment I added around the GRO_HASH_BUCKETS definitions which warns
the reader about the limit.

Thanks.
Li RongQing July 12, 2018, 3:03 a.m. UTC | #6
> -----邮件原件-----
> 发件人: David Miller [mailto:davem@davemloft.net]
> 发送时间: 2018年7月12日 10:49
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: netdev@vger.kernel.org
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> From: Li RongQing <lirongqing@baidu.com>
> Date: Wed, 11 Jul 2018 17:15:53 +0800
> 
> > +		clear_bit(index, &napi->gro_bitmask);
> 
> Please don't use atomics here, at least use __clear_bit().
> 

Thanks, this is same as Eric's suggestion.


> This is why I did the operations by hand in my version of the patch.
> Also, if you are going to preempt my patch, at least retain the comment I
> added around the GRO_HASH_BUCKETS definitions which warns the reader
> about the limit.
> 

I add BUILD_BUG_ON in netdev_init, so I think we need not to add comment

@@ -9151,6 +9159,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */  static int __net_init netdev_init(struct net *net)  {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+


-RongQing

> Thanks.
David Miller July 12, 2018, 3:51 a.m. UTC | #7
From: "Li,Rongqing" <lirongqing@baidu.com>
Date: Thu, 12 Jul 2018 03:03:51 +0000

> 
> 
>> -----邮件原件-----
>> 发件人: David Miller [mailto:davem@davemloft.net]
>> 发送时间: 2018年7月12日 10:49
>> 收件人: Li,Rongqing <lirongqing@baidu.com>
>> 抄送: netdev@vger.kernel.org
>> 主题: Re: [PATCH] net: convert gro_count to bitmask
>> 
>> From: Li RongQing <lirongqing@baidu.com>
>> Date: Wed, 11 Jul 2018 17:15:53 +0800
>> 
>> > +		clear_bit(index, &napi->gro_bitmask);
>> 
>> Please don't use atomics here, at least use __clear_bit().
>> 
> 
> Thanks, this is same as Eric's suggestion.
> 
> 
>> This is why I did the operations by hand in my version of the patch.
>> Also, if you are going to preempt my patch, at least retain the comment I
>> added around the GRO_HASH_BUCKETS definitions which warns the reader
>> about the limit.
>> 
> 
> I add BUILD_BUG_ON in netdev_init, so I think we need not to add comment

That's a good compile time check, but the person thinking about editing
the definition doesn't see the limit in the header file nor know why
the limit exists in the first place.
Stefano Brivio July 12, 2018, 9:48 a.m. UTC | #8
On Thu, 12 Jul 2018 02:31:10 +0000
"Li,Rongqing" <lirongqing@baidu.com> wrote:

> > -----邮件原件-----
> > 发件人: Stefano Brivio [mailto:sbrivio@redhat.com]
> > 发送时间: 2018年7月11日 18:52
> > 收件人: Li,Rongqing <lirongqing@baidu.com>
> > 抄送: netdev@vger.kernel.org; Eric Dumazet <edumazet@google.com>
> > 主题: Re: [PATCH] net: convert gro_count to bitmask
> > 
> > On Wed, 11 Jul 2018 17:15:53 +0800
> > Li RongQing <lirongqing@baidu.com> wrote:
> >   
> > > @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct  
> > napi_struct *napi, struct sk_buff  
> > >  	if (grow > 0)
> > >  		gro_pull_from_frag0(skb, grow);
> > >  ok:
> > > +	if (napi->gro_hash[hash].count)
> > > +		if (!test_bit(hash, &napi->gro_bitmask))
> > > +			set_bit(hash, &napi->gro_bitmask);
> > > +	else if (test_bit(hash, &napi->gro_bitmask))
> > > +		clear_bit(hash, &napi->gro_bitmask);  
> > 
> > This might not do what you want.
> > 
> > --  
> 
> could you show detail ?

$ cat if1.c; gcc -o if1 if1.c
#include <stdio.h>

int main()
{
	if (1)
		if (0)
			;
	else if (2)
		printf("whoops\n");

	return 0;
}
$ ./if1
whoops

$ cat if2.c; gcc -o if2 if2.c
#include <stdio.h>

int main()
{
	if (1) {
		if (0)
			;
	} else if (2) {
		printf("whoops\n");
	}

	return 0;
}
$ ./if2
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..df49b36ef378 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@  struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
-	unsigned int		gro_count;
+	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	int			poll_owner;
diff --git a/net/core/dev.c b/net/core/dev.c
index d13cddcac41f..a08dbdd217a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5171,9 +5171,11 @@  static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 			return;
 		list_del_init(&skb->list);
 		napi_gro_complete(skb);
-		napi->gro_count--;
 		napi->gro_hash[index].count--;
 	}
+
+	if (!napi->gro_hash[index].count)
+		clear_bit(index, &napi->gro_bitmask);
 }
 
 /* napi->gro_hash[].list contains packets ordered by age.
@@ -5184,8 +5186,10 @@  void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	u32 i;
 
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		__napi_gro_flush_chain(napi, i, flush_old);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		if (test_bit(i, &napi->gro_bitmask))
+			__napi_gro_flush_chain(napi, i, flush_old);
+	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5277,8 +5281,8 @@  static void gro_flush_oldest(struct list_head *head)
 	if (WARN_ON_ONCE(!oldest))
 		return;
 
-	/* Do not adjust napi->gro_count, caller is adding a new SKB to
-	 * the chain.
+	/* Do not adjust napi->gro_hash[].count, caller is adding a new
+	 * SKB to the chain.
 	 */
 	list_del(&oldest->list);
 	napi_gro_complete(oldest);
@@ -5352,7 +5356,6 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (pp) {
 		list_del_init(&pp->list);
 		napi_gro_complete(pp);
-		napi->gro_count--;
 		napi->gro_hash[hash].count--;
 	}
 
@@ -5365,7 +5368,6 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
 		gro_flush_oldest(gro_head);
 	} else {
-		napi->gro_count++;
 		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
@@ -5380,6 +5382,12 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
+	if (napi->gro_hash[hash].count)
+		if (!test_bit(hash, &napi->gro_bitmask))
+			set_bit(hash, &napi->gro_bitmask);
+	else if (test_bit(hash, &napi->gro_bitmask))
+		clear_bit(hash, &napi->gro_bitmask);
+
 	return ret;
 
 normal:
@@ -5778,7 +5786,7 @@  bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5987,7 +5995,7 @@  static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_count && !napi_disable_pending(napi) &&
+	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -6002,7 +6010,7 @@  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		INIT_LIST_HEAD(&napi->gro_hash[i].list);
 		napi->gro_hash[i].count = 0;
@@ -6062,7 +6070,7 @@  void netif_napi_del(struct napi_struct *napi)
 	napi_free_frags(napi);
 
 	flush_gro_hash(napi);
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -6104,7 +6112,7 @@  static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
@@ -9151,6 +9159,9 @@  static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);