diff mbox series

[net-next,v2,4/4] udp: Support UDP fraglist GRO/GSO.

Message ID 20200124082218.2572-5-steffen.klassert@secunet.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Support fraglist GRO/GSO | expand

Commit Message

Steffen Klassert Jan. 24, 2020, 8:22 a.m. UTC
This patch extends UDP GRO to support fraglist GRO/GSO
by using the previously introduced infrastructure.
If the feature is enabled, all UDP packets are going to
fraglist GRO (local input and forward).

After validating the csum,  we mark ip_summed as
CHECKSUM_UNNECESSARY for fraglist GRO packets to
make sure that the csum is not touched.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/udp.h      |   2 +-
 net/ipv4/udp_offload.c | 104 ++++++++++++++++++++++++++++++++---------
 net/ipv6/udp_offload.c |  22 ++++++++-
 3 files changed, 102 insertions(+), 26 deletions(-)

Comments

Willem de Bruijn Jan. 24, 2020, 9:13 p.m. UTC | #1
On Fri, Jan 24, 2020 at 3:24 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> This patch extends UDP GRO to support fraglist GRO/GSO
> by using the previously introduced infrastructure.
> If the feature is enabled, all UDP packets are going to
> fraglist GRO (local input and forward).
>
> After validating the csum,  we mark ip_summed as
> CHECKSUM_UNNECESSARY for fraglist GRO packets to
> make sure that the csum is not touched.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
>  include/net/udp.h      |   2 +-
>  net/ipv4/udp_offload.c | 104 ++++++++++++++++++++++++++++++++---------
>  net/ipv6/udp_offload.c |  22 ++++++++-
>  3 files changed, 102 insertions(+), 26 deletions(-)
>
> diff --git a/include/net/udp.h b/include/net/udp.h
> index bad74f780831..44e0e52b585c 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -167,7 +167,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
>                                      __be16 dport);
>
>  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> -                               struct udphdr *uh, udp_lookup_t lookup);
> +                               struct udphdr *uh, struct sock *sk);
>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>
>  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index b25e42100ceb..1a98583a79f4 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -184,6 +184,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(skb_udp_tunnel_segment);
>
> +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> +                                             netdev_features_t features)
> +{
> +       unsigned int mss = skb_shinfo(skb)->gso_size;
> +
> +       skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> +       if (IS_ERR(skb))
> +               return skb;
> +
> +       udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> +
> +       return skb;
> +}
> +
>  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
>                                   netdev_features_t features)
>  {
> @@ -196,6 +210,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
>         __sum16 check;
>         __be16 newlen;
>
> +       if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
> +               return __udp_gso_segment_list(gso_skb, features);
> +
>         mss = skb_shinfo(gso_skb)->gso_size;
>         if (gso_skb->len <= sizeof(*uh) + mss)
>                 return ERR_PTR(-EINVAL);
> @@ -354,6 +371,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>         struct udphdr *uh2;
>         struct sk_buff *p;
>         unsigned int ulen;
> +       int ret = 0;
>
>         /* requires non zero csum, for symmetry with GSO */
>         if (!uh->check) {
> @@ -369,7 +387,6 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>         }
>         /* pull encapsulating udp header */
>         skb_gro_pull(skb, sizeof(struct udphdr));
> -       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>
>         list_for_each_entry(p, head, list) {
>                 if (!NAPI_GRO_CB(p)->same_flow)
> @@ -383,14 +400,40 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>                         continue;
>                 }
>
> +               if (NAPI_GRO_CB(skb)->is_flist != NAPI_GRO_CB(p)->is_flist) {
> +                       NAPI_GRO_CB(skb)->flush = 1;
> +                       return p;
> +               }
> +
>                 /* Terminate the flow on len mismatch or if it grow "too much".
>                  * Under small packet flood GRO count could elsewhere grow a lot
>                  * leading to excessive truesize values.
>                  * On len mismatch merge the first packet shorter than gso_size,
>                  * otherwise complete the GRO packet.
>                  */
> -               if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
> -                   ulen != ntohs(uh2->len) ||
> +               if (ulen > ntohs(uh2->len)) {
> +                       pp = p;
> +               } else {
> +                       if (NAPI_GRO_CB(skb)->is_flist) {
> +                               if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
> +                                       NAPI_GRO_CB(skb)->flush = 1;
> +                                       return NULL;
> +                               }
> +                               if ((skb->ip_summed != p->ip_summed) ||
> +                                   (skb->csum_level != p->csum_level)) {
> +                                       NAPI_GRO_CB(skb)->flush = 1;
> +                                       return NULL;
> +                               }
> +                               ret = skb_gro_receive_list(p, skb);
> +                       } else {
> +                               skb_gro_postpull_rcsum(skb, uh,
> +                                                      sizeof(struct udphdr));
> +
> +                               ret = skb_gro_receive(p, skb);
> +                       }
> +               }
> +
> +               if (ret || ulen != ntohs(uh2->len) ||
>                     NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
>                         pp = p;
>
> @@ -401,36 +444,29 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>         return NULL;
>  }
>
> -INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
> -                                                  __be16 sport, __be16 dport));
>  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> -                               struct udphdr *uh, udp_lookup_t lookup)
> +                               struct udphdr *uh, struct sock *sk)
>  {
>         struct sk_buff *pp = NULL;
>         struct sk_buff *p;
>         struct udphdr *uh2;
>         unsigned int off = skb_gro_offset(skb);
>         int flush = 1;
> -       struct sock *sk;
>
> -       rcu_read_lock();
> -       sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
> -                               udp4_lib_lookup_skb, skb, uh->source, uh->dest);
> -       if (!sk)
> -               goto out_unlock;
> +       if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
> +               NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
>
> -       if (udp_sk(sk)->gro_enabled) {
> +       if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
>                 pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> -               rcu_read_unlock();
>                 return pp;
>         }
>
> -       if (NAPI_GRO_CB(skb)->encap_mark ||
> +       if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
>             (skb->ip_summed != CHECKSUM_PARTIAL &&
>              NAPI_GRO_CB(skb)->csum_cnt == 0 &&
>              !NAPI_GRO_CB(skb)->csum_valid) ||
>             !udp_sk(sk)->gro_receive)
> -               goto out_unlock;
> +               goto out;
>
>         /* mark that this skb passed once through the tunnel gro layer */
>         NAPI_GRO_CB(skb)->encap_mark = 1;
> @@ -457,8 +493,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>         skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>         pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
>
> -out_unlock:
> -       rcu_read_unlock();
> +out:
>         skb_gro_flush_final(skb, pp, flush);
>         return pp;
>  }
> @@ -468,8 +503,10 @@ INDIRECT_CALLABLE_SCOPE
>  struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>  {
>         struct udphdr *uh = udp_gro_udphdr(skb);
> +       struct sk_buff *pp;
> +       struct sock *sk;
>
> -       if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
> +       if (unlikely(!uh))
>                 goto flush;
>
>         /* Don't bother verifying checksum if we're going to flush anyway. */
> @@ -484,7 +521,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>                                              inet_gro_compute_pseudo);
>  skip:
>         NAPI_GRO_CB(skb)->is_ipv6 = 0;
> -       return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
> +       rcu_read_lock();
> +       sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> +       pp = udp_gro_receive(head, skb, uh, sk);
> +       rcu_read_unlock();
> +       return pp;
>
>  flush:
>         NAPI_GRO_CB(skb)->flush = 1;
> @@ -517,9 +558,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>         rcu_read_lock();
>         sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
>                                 udp4_lib_lookup_skb, skb, uh->source, uh->dest);
> -       if (sk && udp_sk(sk)->gro_enabled) {
> -               err = udp_gro_complete_segment(skb);
> -       } else if (sk && udp_sk(sk)->gro_complete) {
> +       if (sk && udp_sk(sk)->gro_complete) {
>                 skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
>                                         : SKB_GSO_UDP_TUNNEL;
>
> @@ -529,6 +568,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>                 skb->encapsulation = 1;
>                 err = udp_sk(sk)->gro_complete(sk, skb,
>                                 nhoff + sizeof(struct udphdr));
> +       } else {
> +               err = udp_gro_complete_segment(skb);
>         }
>         rcu_read_unlock();
>
> @@ -544,6 +585,23 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
>         const struct iphdr *iph = ip_hdr(skb);
>         struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               uh->len = htons(skb->len - nhoff);
> +
> +               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
> +                       if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
> +                               skb->csum_level++;
> +               } else {
> +                       skb->ip_summed = CHECKSUM_UNNECESSARY;
> +                       skb->csum_level = 0;
> +               }
> +
> +               return 0;
> +       }
> +
>         if (uh->check)
>                 uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
>                                           iph->daddr, 0);
> diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
> index f0d5fc27d0b5..4c55b0efe0cb 100644
> --- a/net/ipv6/udp_offload.c
> +++ b/net/ipv6/udp_offload.c
> @@ -115,8 +115,10 @@ INDIRECT_CALLABLE_SCOPE
>  struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
>  {
>         struct udphdr *uh = udp_gro_udphdr(skb);
> +       struct sk_buff *pp;
> +       struct sock *sk;
>
> -       if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
> +       if (unlikely(!uh))
>                 goto flush;
>
>         /* Don't bother verifying checksum if we're going to flush anyway. */
> @@ -132,7 +134,11 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
>
>  skip:
>         NAPI_GRO_CB(skb)->is_ipv6 = 1;
> -       return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
> +       rcu_read_lock();
> +       sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> +       pp = udp_gro_receive(head, skb, uh, sk);
> +       rcu_read_unlock();
> +       return pp;
>
>  flush:
>         NAPI_GRO_CB(skb)->flush = 1;
> @@ -144,6 +150,18 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
>         const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
>         struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               uh->len = htons(skb->len - nhoff);
> +
> +               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> +               skb->csum_level = ~0;

This probably needs to be the same change as in udp4_gro_complete.

Otherwise patch set looks great to me based on a git range-diff to v1.
Steffen Klassert Jan. 24, 2020, 9:30 p.m. UTC | #2
On Fri, Jan 24, 2020 at 04:13:17PM -0500, Willem de Bruijn wrote:
> On Fri, Jan 24, 2020 at 3:24 AM Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> >         NAPI_GRO_CB(skb)->flush = 1;
> > @@ -144,6 +150,18 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
> >         const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> >         struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
> >
> > +       if (NAPI_GRO_CB(skb)->is_flist) {
> > +               uh->len = htons(skb->len - nhoff);
> > +
> > +               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> > +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> > +
> > +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> > +               skb->csum_level = ~0;
> 
> This probably needs to be the same change as in udp4_gro_complete.
> 
> Otherwise patch set looks great to me based on a git range-diff to v1.

Uhm, yes absolutely.

I'll do a v3 tomorrow.

Thanks for your review Willem!
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index bad74f780831..44e0e52b585c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,7 +167,7 @@  typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
 				     __be16 dport);
 
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
-				struct udphdr *uh, udp_lookup_t lookup);
+				struct udphdr *uh, struct sock *sk);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b25e42100ceb..1a98583a79f4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -184,6 +184,20 @@  struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+					      netdev_features_t features)
+{
+	unsigned int mss = skb_shinfo(skb)->gso_size;
+
+	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+	if (IS_ERR(skb))
+		return skb;
+
+	udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+
+	return skb;
+}
+
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 				  netdev_features_t features)
 {
@@ -196,6 +210,9 @@  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 	__sum16 check;
 	__be16 newlen;
 
+	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+		return __udp_gso_segment_list(gso_skb, features);
+
 	mss = skb_shinfo(gso_skb)->gso_size;
 	if (gso_skb->len <= sizeof(*uh) + mss)
 		return ERR_PTR(-EINVAL);
@@ -354,6 +371,7 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 	struct udphdr *uh2;
 	struct sk_buff *p;
 	unsigned int ulen;
+	int ret = 0;
 
 	/* requires non zero csum, for symmetry with GSO */
 	if (!uh->check) {
@@ -369,7 +387,6 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 	}
 	/* pull encapsulating udp header */
 	skb_gro_pull(skb, sizeof(struct udphdr));
-	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 
 	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
@@ -383,14 +400,40 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 			continue;
 		}
 
+		if (NAPI_GRO_CB(skb)->is_flist != NAPI_GRO_CB(p)->is_flist) {
+			NAPI_GRO_CB(skb)->flush = 1;
+			return p;
+		}
+
 		/* Terminate the flow on len mismatch or if it grow "too much".
 		 * Under small packet flood GRO count could elsewhere grow a lot
 		 * leading to excessive truesize values.
 		 * On len mismatch merge the first packet shorter than gso_size,
 		 * otherwise complete the GRO packet.
 		 */
-		if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
-		    ulen != ntohs(uh2->len) ||
+		if (ulen > ntohs(uh2->len)) {
+			pp = p;
+		} else {
+			if (NAPI_GRO_CB(skb)->is_flist) {
+				if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
+					NAPI_GRO_CB(skb)->flush = 1;
+					return NULL;
+				}
+				if ((skb->ip_summed != p->ip_summed) ||
+				    (skb->csum_level != p->csum_level)) {
+					NAPI_GRO_CB(skb)->flush = 1;
+					return NULL;
+				}
+				ret = skb_gro_receive_list(p, skb);
+			} else {
+				skb_gro_postpull_rcsum(skb, uh,
+						       sizeof(struct udphdr));
+
+				ret = skb_gro_receive(p, skb);
+			}
+		}
+
+		if (ret || ulen != ntohs(uh2->len) ||
 		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
 			pp = p;
 
@@ -401,36 +444,29 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 	return NULL;
 }
 
-INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
-						   __be16 sport, __be16 dport));
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
-				struct udphdr *uh, udp_lookup_t lookup)
+				struct udphdr *uh, struct sock *sk)
 {
 	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct udphdr *uh2;
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
-	struct sock *sk;
 
-	rcu_read_lock();
-	sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
-				udp4_lib_lookup_skb, skb, uh->source, uh->dest);
-	if (!sk)
-		goto out_unlock;
+	if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+		NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 
-	if (udp_sk(sk)->gro_enabled) {
+	if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
 		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
-		rcu_read_unlock();
 		return pp;
 	}
 
-	if (NAPI_GRO_CB(skb)->encap_mark ||
+	if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid) ||
 	    !udp_sk(sk)->gro_receive)
-		goto out_unlock;
+		goto out;
 
 	/* mark that this skb passed once through the tunnel gro layer */
 	NAPI_GRO_CB(skb)->encap_mark = 1;
@@ -457,8 +493,7 @@  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 	pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
 
-out_unlock:
-	rcu_read_unlock();
+out:
 	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
@@ -468,8 +503,10 @@  INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
+	struct sk_buff *pp;
+	struct sock *sk;
 
-	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+	if (unlikely(!uh))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -484,7 +521,11 @@  struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 					     inet_gro_compute_pseudo);
 skip:
 	NAPI_GRO_CB(skb)->is_ipv6 = 0;
-	return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
+	rcu_read_lock();
+	sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+	pp = udp_gro_receive(head, skb, uh, sk);
+	rcu_read_unlock();
+	return pp;
 
 flush:
 	NAPI_GRO_CB(skb)->flush = 1;
@@ -517,9 +558,7 @@  int udp_gro_complete(struct sk_buff *skb, int nhoff,
 	rcu_read_lock();
 	sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
 				udp4_lib_lookup_skb, skb, uh->source, uh->dest);
-	if (sk && udp_sk(sk)->gro_enabled) {
-		err = udp_gro_complete_segment(skb);
-	} else if (sk && udp_sk(sk)->gro_complete) {
+	if (sk && udp_sk(sk)->gro_complete) {
 		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
 					: SKB_GSO_UDP_TUNNEL;
 
@@ -529,6 +568,8 @@  int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		skb->encapsulation = 1;
 		err = udp_sk(sk)->gro_complete(sk, skb,
 				nhoff + sizeof(struct udphdr));
+	} else {
+		err = udp_gro_complete_segment(skb);
 	}
 	rcu_read_unlock();
 
@@ -544,6 +585,23 @@  INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		uh->len = htons(skb->len - nhoff);
+
+		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+			if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+				skb->csum_level++;
+		} else {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			skb->csum_level = 0;
+		}
+
+		return 0;
+	}
+
 	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index f0d5fc27d0b5..4c55b0efe0cb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -115,8 +115,10 @@  INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
+	struct sk_buff *pp;
+	struct sock *sk;
 
-	if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
+	if (unlikely(!uh))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -132,7 +134,11 @@  struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 skip:
 	NAPI_GRO_CB(skb)->is_ipv6 = 1;
-	return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
+	rcu_read_lock();
+	sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+	pp = udp_gro_receive(head, skb, uh, sk);
+	rcu_read_unlock();
+	return pp;
 
 flush:
 	NAPI_GRO_CB(skb)->flush = 1;
@@ -144,6 +150,18 @@  INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		uh->len = htons(skb->len - nhoff);
+
+		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->csum_level = ~0;
+
+		return 0;
+	}
+
 	if (uh->check)
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);