diff mbox series

[net-next,RFC,7/8] udp: gro behind static key

Message ID 20180914175941.213950-8-willemdebruijn.kernel@gmail.com
State RFC, archived
Headers show
Series udp and configurable gro | expand

Commit Message

Willem de Bruijn Sept. 14, 2018, 5:59 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

Avoid the socket lookup cost in udp_gro_receive if no socket has a
gro callback configured.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/net/udp.h      | 2 ++
 net/ipv4/udp.c         | 2 +-
 net/ipv4/udp_offload.c | 2 +-
 net/ipv6/udp.c         | 2 +-
 net/ipv6/udp_offload.c | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

Comments

Subash Abhinov Kasiviswanathan Sept. 15, 2018, 3:37 a.m. UTC | #1
On 2018-09-14 11:59, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Avoid the socket lookup cost in udp_gro_receive if no socket has a
> gro callback configured.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 4f6aa95a9b12..f44fe328aa0f 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct
> list_head *head,
>  {
>  	struct udphdr *uh = udp_gro_udphdr(skb);
> 
> -	if (unlikely(!uh))
> +	if (unlikely(!uh) ||
> !static_branch_unlikely(&udp_encap_needed_key))
>  		goto flush;
> 

Hi Willem

Does this need to be

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6dd3f0a..fcd5589 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -407,7 +407,7 @@ static struct sk_buff *udp4_gro_receive(struct 
list_head *head,
  {
         struct udphdr *uh = udp_gro_udphdr(skb);

-       if (unlikely(!uh) || 
!static_branch_unlikely(&udp_encap_needed_key))
+       if (unlikely(!uh) || 
static_branch_unlikely(&udp_encap_needed_key))
                 goto flush;

         /* Don't bother verifying checksum if we're going to flush 
anyway. */

I tried setting UDP_GRO socket option and I had to make this change to
exercise the udp_gro_receive_cb code path.
Willem de Bruijn Sept. 16, 2018, 6:10 p.m. UTC | #2
On Fri, Sep 14, 2018 at 11:37 PM Subash Abhinov Kasiviswanathan
<subashab@codeaurora.org> wrote:
>
> On 2018-09-14 11:59, Willem de Bruijn wrote:
> > From: Willem de Bruijn <willemb@google.com>
> >
> > Avoid the socket lookup cost in udp_gro_receive if no socket has a
> > gro callback configured.
> >
> > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 4f6aa95a9b12..f44fe328aa0f 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct
> > list_head *head,
> >  {
> >       struct udphdr *uh = udp_gro_udphdr(skb);
> >
> > -     if (unlikely(!uh))
> > +     if (unlikely(!uh) ||
> > !static_branch_unlikely(&udp_encap_needed_key))
> >               goto flush;
> >
>
> Hi Willem
>
> Does this need to be
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 6dd3f0a..fcd5589 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -407,7 +407,7 @@ static struct sk_buff *udp4_gro_receive(struct
> list_head *head,
>   {
>          struct udphdr *uh = udp_gro_udphdr(skb);
>
> -       if (unlikely(!uh) ||
> !static_branch_unlikely(&udp_encap_needed_key))
> +       if (unlikely(!uh) ||
> static_branch_unlikely(&udp_encap_needed_key))
>                  goto flush;
>
>          /* Don't bother verifying checksum if we're going to flush
> anyway. */
>
> I tried setting UDP_GRO socket option and I had to make this change to
> exercise the udp_gro_receive_cb code path.

Thanks for trying it out, Subash.

The static_branch logic is correct as is. It skips the gro logic if
the static key is not enabled.

But there is indeed a bug. the UDP_GRO setsockopt has to enable
the static key. A full patch is more complete, but this should fix it for
now:

--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2506,6 +2506,7 @@ int udp_lib_setsockopt(struct sock *sk, int
level, int optname,
                        if (!udp_sk(sk)->gro_receive) {
                                udp_sk(sk)->gro_complete = udp_gro_complete_cb;
                                udp_sk(sk)->gro_receive = udp_gro_receive_cb;
+                               udp_encap_enable();

sorry about that. I had tested the two patches independently, but
apparently not on top of each other.

Independent of udp gro, I tested the static key by running udp_rr
with perf record -a -g and looking for __udp4_lib_lookup. With the
patch, all calls are from __udp4_lib_rcv else udp_gro_receive also
shows up. I had to stress the porttable by opening ~32K ports.

Also, the simple patch to udpgso_bench_rx.c that I used to test
gro:

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c
b/tools/testing/selftests/net/udpgso_bench_rx.c
index 727cf67a3f75..35ba8567fc56 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -31,6 +31,11 @@
 #include <sys/wait.h>
 #include <unistd.h>

+#ifndef UDP_GRO
+#define UDP_GRO                104
+#endif
+
+static bool cfg_do_gro;
 static int  cfg_port           = 8000;
 static bool cfg_tcp;
 static bool cfg_verify;
@@ -89,6 +94,11 @@ static int do_socket(bool do_tcp)
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
                error(1, errno, "setsockopt reuseport");

+       if (cfg_do_gro) {
+               if (setsockopt(fd, SOL_UDP, UDP_GRO, &val, sizeof(val)))
+                       error(1, errno, "setsockopt gro");
+       }
+
        addr.sin6_family =      PF_INET6;
        addr.sin6_port =        htons(cfg_port);
        addr.sin6_addr =        in6addr_any;
@@ -199,8 +209,11 @@ static void parse_opts(int argc, char **argv)
 {
        int c;

-       while ((c = getopt(argc, argv, "ptv")) != -1) {
+       while ((c = getopt(argc, argv, "gptv")) != -1) {
                switch (c) {
+               case 'g':
+                       cfg_do_gro = true;
+                       break;
                case 'p':
                        cfg_port = htons(strtoul(optarg, NULL, 0));
                        break;
Steffen Klassert Sept. 17, 2018, 9:03 a.m. UTC | #3
On Fri, Sep 14, 2018 at 01:59:40PM -0400, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Avoid the socket lookup cost in udp_gro_receive if no socket has a
> gro callback configured.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>

...

> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 4f6aa95a9b12..f44fe328aa0f 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
>  {
>  	struct udphdr *uh = udp_gro_udphdr(skb);
>  
> -	if (unlikely(!uh))
> +	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
>  		goto flush;

If you use udp_encap_needed_key to enalbe UDP GRO, then a UDP
encapsulation socket will enable it too. Not sure if this is
intentional.

That said, enabling UDP GRO on a UDP encapsulation socket
(ESP in UPD etc.) will fail badly as then encrypted ESP
packets might be merged together. So we somehow should
make sure that this does not happen.

Anyway, this reminds me that we can support GRO for
UDP encapsulation. It just requires separate GRO
callbacks for the different encapsulation types.
Paolo Abeni Sept. 17, 2018, 10:24 a.m. UTC | #4
On Fri, 2018-09-14 at 13:59 -0400, Willem de Bruijn wrote:
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 4f6aa95a9b12..f44fe328aa0f 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
>  {
>  	struct udphdr *uh = udp_gro_udphdr(skb);
>  
> -	if (unlikely(!uh))
> +	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
>  		goto flush;
>  
>  	/* Don't bother verifying checksum if we're going to flush anyway. */

If I read this correctly, once udp_encap_needed_key is enabled, it will
never be turned off, because the tunnel and encap socket shut down does
not cope with udp_encap_needed_key.

Perhaps we should take care of that, too.

Cheers,

Paolo
Steffen Klassert Sept. 17, 2018, 10:37 a.m. UTC | #5
On Fri, Sep 14, 2018 at 01:59:40PM -0400, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Avoid the socket lookup cost in udp_gro_receive if no socket has a
> gro callback configured.

It would be nice if we could do GRO not just for GRO configured
sockets, but also for flows that are going to be IPsec transformed
or directly forwarded.

Maybe in case that forwarding is enabled on the receiving device,
inet_gro_receive() could do a route lookup and allow GRO if the
route lookup returned at forwarding route.

For flows that are likely software segmented after that, it
would be worth to build packet chains insted of merging the
payload. Packets of the same flow could travel together, but
it would save the cost of the packet merging and segmenting.
This could be done similar to what I proposed for the list
receive case:

https://www.spinics.net/lists/netdev/msg522706.html

How GRO should be done could be even configured
by replacing the net_offload pointer similar
to what Paolo propsed in his pachset with
the inet_update_offload() function.
Willem de Bruijn Sept. 17, 2018, 2:10 p.m. UTC | #6
On Mon, Sep 17, 2018 at 5:03 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> On Fri, Sep 14, 2018 at 01:59:40PM -0400, Willem de Bruijn wrote:
> > From: Willem de Bruijn <willemb@google.com>
> >
> > Avoid the socket lookup cost in udp_gro_receive if no socket has a
> > gro callback configured.
> >
> > Signed-off-by: Willem de Bruijn <willemb@google.com>
>
> ...
>
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 4f6aa95a9b12..f44fe328aa0f 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
> >  {
> >       struct udphdr *uh = udp_gro_udphdr(skb);
> >
> > -     if (unlikely(!uh))
> > +     if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
> >               goto flush;
>
> If you use udp_encap_needed_key to enalbe UDP GRO, then a UDP
> encapsulation socket will enable it too. Not sure if this is
> intentional.

Yes. That is already the case to a certain point. The function was
introduced with tunnels and is enabled by tunnels, but so far only
compiles out the encap_rcv() branch in udp_qeueue_rcv_skb.

With patch 7/8 it also toggles the GRO path. Critically, both are
enabled as soon as a tunnel is registered.

>
> That said, enabling UDP GRO on a UDP encapsulation socket
> (ESP in UPD etc.) will fail badly as then encrypted ESP
> packets might be merged together. So we somehow should
> make sure that this does not happen.

Absolutely. This initial implementation probably breaks UDP tunnels
badly. That needs to be addressed.

>
> Anyway, this reminds me that we can support GRO for
> UDP encapsulation. It just requires separate GRO
> callbacks for the different encapsulation types.
Willem de Bruijn Sept. 17, 2018, 2:12 p.m. UTC | #7
On Mon, Sep 17, 2018 at 6:24 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Fri, 2018-09-14 at 13:59 -0400, Willem de Bruijn wrote:
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 4f6aa95a9b12..f44fe328aa0f 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
> >  {
> >       struct udphdr *uh = udp_gro_udphdr(skb);
> >
> > -     if (unlikely(!uh))
> > +     if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
> >               goto flush;
> >
> >       /* Don't bother verifying checksum if we're going to flush anyway. */
>
> If I read this correctly, once udp_encap_needed_key is enabled, it will
> never be turned off, because the tunnel and encap socket shut down does
> not cope with udp_encap_needed_key.
>
> Perhaps we should take care of that, too.

Agreed. For now I reused what's already there, but I can extend
that with refcounting using static_branch_inc/static_branch_dec.
Willem de Bruijn Sept. 17, 2018, 2:19 p.m. UTC | #8
On Mon, Sep 17, 2018 at 6:37 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> On Fri, Sep 14, 2018 at 01:59:40PM -0400, Willem de Bruijn wrote:
> > From: Willem de Bruijn <willemb@google.com>
> >
> > Avoid the socket lookup cost in udp_gro_receive if no socket has a
> > gro callback configured.
>
> It would be nice if we could do GRO not just for GRO configured
> sockets, but also for flows that are going to be IPsec transformed
> or directly forwarded.

I thought about that, as we have GSO. An egregious hack enables
GRO for all registered local sockets that support it and for any flow
for which no local socket is registered:

@@ -365,11 +369,13 @@ struct sk_buff *udp_gro_receive(struct list_head
*head, struct sk_buff *skb,
        rcu_read_lock();
        sk = (*lookup)(skb, uh->source, uh->dest);

-       if (sk && udp_sk(sk)->gro_receive)
-               goto unflush;
-       goto out_unlock;
+       if (!sk)
+               gro_receive_cb = udp_gro_receive_cb;
+       else if (!udp_sk(sk)->gro_receive)
+               goto out_unlock;
+       else
+               gro_receive_cb = udp_sk(sk)->gro_receive;

@@ -392,7 +398,7 @@ struct sk_buff *udp_gro_receive(struct list_head
*head, struct sk_buff *skb,

        skb_gro_pull(skb, sizeof(struct udphdr)); /* pull
encapsulating udp header */
        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
-       pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
+       pp = call_gro_receive_sk(gro_receive_cb, sk, head, skb);


But not having a local socket does not imply forwarding path, of course.

>
> Maybe in case that forwarding is enabled on the receiving device,
> inet_gro_receive() could do a route lookup and allow GRO if the
> route lookup returned at forwarding route.

That's a better solution, if the cost is acceptable. We do have to
be careful against increasing per packet cycle cost in this path
given that it's a possible vector for DoS attempts.

> For flows that are likely software segmented after that, it
> would be worth to build packet chains insted of merging the
> payload. Packets of the same flow could travel together, but
> it would save the cost of the packet merging and segmenting.

With software GSO that is faster, as it would have to allocate
all the separate segment skbs in skb_segment later. Though
there is some complexity if MTUs differ.

With hardware UDP GSO, having a single skb will be cheaper in
the forwarding path. Using napi_gro_frags, device drivers really
do only end up allocating one skb for the GSO packet.

> This could be done similar to what I proposed for the list
> receive case:
>
> https://www.spinics.net/lists/netdev/msg522706.html
>
> How GRO should be done could be even configured
> by replacing the net_offload pointer similar
> to what Paolo propsed in his pachset with
> the inet_update_offload() function.

Right. The above hack also already has to use two distinct
callback assignments.
Steffen Klassert Sept. 18, 2018, 10:59 a.m. UTC | #9
On Mon, Sep 17, 2018 at 10:19:22AM -0400, Willem de Bruijn wrote:
> On Mon, Sep 17, 2018 at 6:37 AM Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> >
> > Maybe in case that forwarding is enabled on the receiving device,
> > inet_gro_receive() could do a route lookup and allow GRO if the
> > route lookup returned at forwarding route.
> 
> That's a better solution, if the cost is acceptable. We do have to
> be careful against increasing per packet cycle cost in this path
> given that it's a possible vector for DoS attempts.

I played with this already and I have not seen any significant
overhead when doing a route lookup in GRO. We have to do a route
lookup anyway, so it should not matter too much if we do it here
or later in the IP layer.

> 
> > For flows that are likely software segmented after that, it
> > would be worth to build packet chains insted of merging the
> > payload. Packets of the same flow could travel together, but
> > it would save the cost of the packet merging and segmenting.
> 
> With software GSO that is faster, as it would have to allocate
> all the separate segment skbs in skb_segment later. Though
> there is some complexity if MTUs differ.

This can be handled the same way as it is done for TCP GSO
packets. If the size of the original packets is bigger than
the outgoing MTU, we either signal ICMP_FRAG_NEEDED to the
sender, or split the chain back into single packets and do
fragmentation on them.

> 
> With hardware UDP GSO, having a single skb will be cheaper in
> the forwarding path. Using napi_gro_frags, device drivers really
> do only end up allocating one skb for the GSO packet.

Right, this is why it would be nice to have this
configurable.
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e82cb391dea 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -443,8 +443,10 @@  int udpv4_offload_init(void);
 
 void udp_init(void);
 
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void);
 #if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void);
 #endif
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..bd873a5b8a86 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1889,7 +1889,7 @@  static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void)
 {
 	static_branch_enable(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4f6aa95a9b12..f44fe328aa0f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -405,7 +405,7 @@  static struct sk_buff *udp4_gro_receive(struct list_head *head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	if (unlikely(!uh))
+	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 83f4c77c79d8..d84672959f10 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -548,7 +548,7 @@  static __inline__ void udpv6_err(struct sk_buff *skb,
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
 
-static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void)
 {
 	static_branch_enable(&udpv6_encap_needed_key);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2a41da0dd33f..e00f19c4a939 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -119,7 +119,7 @@  static struct sk_buff *udp6_gro_receive(struct list_head *head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	if (unlikely(!uh))
+	if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */