diff mbox series

[RFC] udp: cache sock to avoid searching it twice

Message ID 1541744479-12810-1-git-send-email-lirongqing@baidu.com
State RFC, archived
Delegated to: David Miller
Headers show
Series [RFC] udp: cache sock to avoid searching it twice | expand

Commit Message

Li RongQing Nov. 9, 2018, 6:21 a.m. UTC
GRO for UDP needs to lookup socket twice, first is in gro receive,
second is gro complete, so if store sock to skb to avoid looking up
twice, this can give small performance boost

netperf -t UDP_RR -l 10

Before:
	Rate per sec: 28746.01
After:
	Rate per sec: 29401.67

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 net/ipv4/udp_offload.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Comments

Paolo Abeni Nov. 9, 2018, 1:41 p.m. UTC | #1
Hi,

Adding Willem, I think he can be interested.

On Fri, 2018-11-09 at 14:21 +0800, Li RongQing wrote:
> GRO for UDP needs to lookup socket twice, first is in gro receive,
> second is gro complete, so if store sock to skb to avoid looking up
> twice, this can give small performance boost
> 
> netperf -t UDP_RR -l 10
> 
> Before:
> 	Rate per sec: 28746.01
> After:
> 	Rate per sec: 29401.67
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  net/ipv4/udp_offload.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 0646d61f4fa8..429570112a33 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -408,6 +408,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  
>  	if (udp_sk(sk)->gro_enabled) {
>  		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> +
> +		if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> +			sock_hold(sk);
> +			pp->sk = sk;
> +		}
>  		rcu_read_unlock();
>  		return pp;
>  	}

What if 'pp' is NULL?

Aside from that, this replace a lookup with 2 atomic ops, and only when
such lookup is amortized on multiple aggregated packets: I'm unsure if
it's worthy and I don't understand how that improves RR tests (where
the socket can't see multiple, consecutive skbs, AFAIK).

Cheers,

Paolo
Eric Dumazet Nov. 9, 2018, 5:25 p.m. UTC | #2
On 11/08/2018 10:21 PM, Li RongQing wrote:
> GRO for UDP needs to lookup socket twice, first is in gro receive,
> second is gro complete, so if store sock to skb to avoid looking up
> twice, this can give small performance boost
> 
> netperf -t UDP_RR -l 10
> 
> Before:
> 	Rate per sec: 28746.01
> After:
> 	Rate per sec: 29401.67
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  net/ipv4/udp_offload.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 0646d61f4fa8..429570112a33 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -408,6 +408,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  
>  	if (udp_sk(sk)->gro_enabled) {
>  		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> +
> +		if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> +			sock_hold(sk);
> +			pp->sk = sk;


You also have to set pp->destructor to sock_edemux

flush_gro_hash -> kfree_skb()

If there is no destructor, the reference on pp->sk will never be released.




> +		}
>  		rcu_read_unlock();
>  		return pp;
>  	}
> @@ -444,6 +449,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>  	pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
>  
> +	if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> +		sock_hold(sk);
> +		pp->sk = sk;
> +	}
>  out_unlock:
>  	rcu_read_unlock();
>  	skb_gro_flush_final(skb, pp, flush);
> @@ -502,7 +511,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>  	uh->len = newlen;
>  
>  	rcu_read_lock();
> -	sk = (*lookup)(skb, uh->source, uh->dest);
> +	sk = skb->sk;
> +	if (!sk)
> +		sk = (*lookup)(skb, uh->source, uh->dest);
>  	if (sk && udp_sk(sk)->gro_enabled) {
>  		err = udp_gro_complete_segment(skb);
>  	} else if (sk && udp_sk(sk)->gro_complete) {
> @@ -516,6 +527,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>  		err = udp_sk(sk)->gro_complete(sk, skb,
>  				nhoff + sizeof(struct udphdr));
>  	}
> +
> +	if (skb->sk) {
> +		sock_put(skb->sk);
> +		skb->sk = NULL;
> +	}
>  	rcu_read_unlock();
>  
>  	if (skb->remcsum_offload)
>
Li RongQing Nov. 12, 2018, 5:39 a.m. UTC | #3
On Sat, Nov 10, 2018 at 1:29 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 11/08/2018 10:21 PM, Li RongQing wrote:
> > GRO for UDP needs to lookup socket twice, first is in gro receive,
> > second is gro complete, so if store sock to skb to avoid looking up
> > twice, this can give small performance boost
> >
> > netperf -t UDP_RR -l 10
> >
> > Before:
> >       Rate per sec: 28746.01
> > After:
> >       Rate per sec: 29401.67
> >
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > ---
> >  net/ipv4/udp_offload.c | 18 +++++++++++++++++-
> >  1 file changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 0646d61f4fa8..429570112a33 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -408,6 +408,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >
> >       if (udp_sk(sk)->gro_enabled) {
> >               pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> > +
> > +             if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> > +                     sock_hold(sk);
> > +                     pp->sk = sk;
>
>
> You also have to set pp->destructor to sock_edemux
>
> flush_gro_hash -> kfree_skb()
>
> If there is no destructor, the reference on pp->sk will never be released.
>
>

Ok, thanks,

does it need to reset sk in udp_gro_complete,  ip early demuxing will lookup udp socket again, if we can keep it, we can avoid to lookup socket again


-RongQing
>
>
> > +             }
> >               rcu_read_unlock();
> >               return pp;
> >       }
> > @@ -444,6 +449,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> >       pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
> >
> > +     if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> > +             sock_hold(sk);
> > +             pp->sk = sk;
> > +     }
> >  out_unlock:
> >       rcu_read_unlock();
> >       skb_gro_flush_final(skb, pp, flush);
> > @@ -502,7 +511,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
> >       uh->len = newlen;
> >
> >       rcu_read_lock();
> > -     sk = (*lookup)(skb, uh->source, uh->dest);
> > +     sk = skb->sk;
> > +     if (!sk)
> > +             sk = (*lookup)(skb, uh->source, uh->dest);
> >       if (sk && udp_sk(sk)->gro_enabled) {
> >               err = udp_gro_complete_segment(skb);
> >       } else if (sk && udp_sk(sk)->gro_complete) {
> > @@ -516,6 +527,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
> >               err = udp_sk(sk)->gro_complete(sk, skb,
> >                               nhoff + sizeof(struct udphdr));
> >       }
> > +
> > +     if (skb->sk) {
> > +             sock_put(skb->sk);
> > +             skb->sk = NULL;
> > +     }
> >       rcu_read_unlock();
> >
> >       if (skb->remcsum_offload)
> >
Li RongQing Nov. 12, 2018, 5:46 a.m. UTC | #4
> >               return pp;
> >       }
>
> What if 'pp' is NULL?
>
> Aside from that, this replace a lookup with 2 atomic ops, and only when
> such lookup is amortized on multiple aggregated packets: I'm unsure if
> it's worthy and I don't understand how that improves RR tests (where
> the socket can't see multiple, consecutive skbs, AFAIK).
>
> Cheers,
>
> Paolo
>

If we not release the socket in udp_gro_complete , we can reduce a udp socket
Lookup when do ip demux again, it maybe more worthy.

I test UDP_STREAM, find no difference, both can reach NIC's limit, 10G; 
so Test RR, I will do more tests

-RongQing
diff mbox series

Patch

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0646d61f4fa8..429570112a33 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -408,6 +408,11 @@  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 
 	if (udp_sk(sk)->gro_enabled) {
 		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+
+		if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
+			sock_hold(sk);
+			pp->sk = sk;
+		}
 		rcu_read_unlock();
 		return pp;
 	}
@@ -444,6 +449,10 @@  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 	pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
 
+	if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
+		sock_hold(sk);
+		pp->sk = sk;
+	}
 out_unlock:
 	rcu_read_unlock();
 	skb_gro_flush_final(skb, pp, flush);
@@ -502,7 +511,9 @@  int udp_gro_complete(struct sk_buff *skb, int nhoff,
 	uh->len = newlen;
 
 	rcu_read_lock();
-	sk = (*lookup)(skb, uh->source, uh->dest);
+	sk = skb->sk;
+	if (!sk)
+		sk = (*lookup)(skb, uh->source, uh->dest);
 	if (sk && udp_sk(sk)->gro_enabled) {
 		err = udp_gro_complete_segment(skb);
 	} else if (sk && udp_sk(sk)->gro_complete) {
@@ -516,6 +527,11 @@  int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		err = udp_sk(sk)->gro_complete(sk, skb,
 				nhoff + sizeof(struct udphdr));
 	}
+
+	if (skb->sk) {
+		sock_put(skb->sk);
+		skb->sk = NULL;
+	}
 	rcu_read_unlock();
 
 	if (skb->remcsum_offload)