diff mbox series

[RFC,v2,3/3] udp: Support UDP fraglist GRO/GSO.

Message ID 20190128085025.14532-4-steffen.klassert@secunet.com
State RFC
Delegated to: David Miller
Headers show
Series Support fraglist GRO/GSO | expand

Commit Message

Steffen Klassert Jan. 28, 2019, 8:50 a.m. UTC
This patch extends UDP GRO to support fraglist GRO/GSO
by using the previously introduced infrastructure.
All UDP packets that are not targeted to a GRO capable
UDP sockets are going to fraglist GRO now (local input
and forward).
---
 net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
 net/ipv6/udp_offload.c |  9 +++++++++
 2 files changed, 50 insertions(+), 4 deletions(-)

Comments

Paolo Abeni Jan. 28, 2019, 4:37 p.m. UTC | #1
Hi,

On Mon, 2019-01-28 at 09:50 +0100, Steffen Klassert wrote:
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 584635db9231..c0be33216750 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
[...]
> @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>  		 * Under small packet flood GRO count could elsewhere grow a lot
>  		 * leading to execessive truesize values
>  		 */
> -		if (!skb_gro_receive(p, skb) &&
> -		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> +		if (NAPI_GRO_CB(skb)->is_flist) {
> +			if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> +				return NULL;
> +			ret = skb_gro_receive_list(p, skb);
> +		} else {
> +			skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> +
> +			ret = skb_gro_receive(p, skb);
> +		}
> +
> +		if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
                                                  ^^
Minor nitpick: here we may want to preserve the '>=' operator.

Note: I've not finished looking at the patches yet, I'll try to provide
some benck-marking and it will take some time.

Cheers,

Paolo
Willem de Bruijn Jan. 28, 2019, 8:49 p.m. UTC | #2
On Mon, Jan 28, 2019 at 2:51 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> This patch extends UDP GRO to support fraglist GRO/GSO
> by using the previously introduced infrastructure.
> All UDP packets that are not targeted to a GRO capable
> UDP sockets are going to fraglist GRO now (local input
> and forward).
> ---
>  net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
>  net/ipv6/udp_offload.c |  9 +++++++++
>  2 files changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 584635db9231..c0be33216750 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(skb_udp_tunnel_segment);
>
> +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> +                                             netdev_features_t features)
> +{
> +       unsigned int mss = skb_shinfo(skb)->gso_size;
> +
> +       skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> +       if (IS_ERR(skb))
> +               return skb;
> +
> +       udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> +       skb->ip_summed = CHECKSUM_NONE;
> +       skb->csum_valid = 1;

csum_valid is only used on ingress.

Hardcoding CHECKSUM_NONE is probably fine as long as this function is
only used for forwarding, assuming we don't care about verifiying
checksums in the forwarding case.

But this is fragile if we ever add local list segment output. Should
convert the checksum field in skb_forward_csum, instead of at the GSO
layer, just as for forwarding of non list skbs? Though that would
require traversing the list yet another time. Other option is to
already do this conversion when building the list in GRO.

The comment also applies to the same logic in skb_segment_list. As a
matter or fact, even if this belongs in GSO instead of core forwarding
or GRO, then probably both the list head and frag_list skbs should be
set in the same function, so skb_segment_list.

> +
> +       return skb;
> +}
> +
>  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
>                                   netdev_features_t features)
>  {
> @@ -200,6 +216,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
>         __sum16 check;
>         __be16 newlen;
>
> +       if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
> +               return __udp_gso_segment_list(gso_skb, features);
> +
>         mss = skb_shinfo(gso_skb)->gso_size;
>         if (gso_skb->len <= sizeof(*uh) + mss)
>                 return ERR_PTR(-EINVAL);
> @@ -352,16 +371,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>         struct sk_buff *pp = NULL;
>         struct udphdr *uh2;
>         struct sk_buff *p;
> +       int ret;
>
>         /* requires non zero csum, for symmetry with GSO */
>         if (!uh->check) {
>                 NAPI_GRO_CB(skb)->flush = 1;
>                 return NULL;
>         }
> -

Accidental whitespace removal?

>         /* pull encapsulating udp header */
>         skb_gro_pull(skb, sizeof(struct udphdr));
> -       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>
>         list_for_each_entry(p, head, list) {
>                 if (!NAPI_GRO_CB(p)->same_flow)
> @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>                  * Under small packet flood GRO count could elsewhere grow a lot
>                  * leading to execessive truesize values
>                  */
> -               if (!skb_gro_receive(p, skb) &&
> -                   NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> +               if (NAPI_GRO_CB(skb)->is_flist) {
> +                       if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> +                               return NULL;
> +                       ret = skb_gro_receive_list(p, skb);
> +               } else {
> +                       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> +
> +                       ret = skb_gro_receive(p, skb);
> +               }
> +
> +               if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
>                         pp = p;
>                 else if (uh->len != uh2->len)
>                         pp = p;
> @@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>         int flush = 1;
>
>         if (!sk || !udp_sk(sk)->gro_receive) {
> +               NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;

This updates the choice of whether to use a list on each received skb.
Which is problematic as a socket can call the setsockopt in between
packets.

Actually, there no longer is a need for a route lookup for each skb at
all. We always apply GRO, which was the previous reason for the
lookup. And if a matching flow is found in the GRO table, we already
the choice to use a list is already stored.


>                 pp = call_gro_receive(udp_gro_receive_segment, head, skb);
>                 return pp;
>         }
> @@ -530,6 +558,15 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
>         const struct iphdr *iph = ip_hdr(skb);
>         struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               uh->len = htons(skb->len - nhoff);
> +
> +               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               return 0;
> +       }
> +
>         if (uh->check)
>                 uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
>                                           iph->daddr, 0);
> diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
> index 5f7937a4f71a..7c3f28310baa 100644
> --- a/net/ipv6/udp_offload.c
> +++ b/net/ipv6/udp_offload.c
> @@ -154,6 +154,15 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
>         const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
>         struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               uh->len = htons(skb->len - nhoff);
> +
> +               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               return 0;
> +       }
> +
>         if (uh->check)
>                 uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
>                                           &ipv6h->daddr, 0);
> --
> 2.17.1
>
Steffen Klassert Feb. 13, 2019, 11:48 a.m. UTC | #3
On Mon, Jan 28, 2019 at 02:49:32PM -0600, Willem de Bruijn wrote:
> On Mon, Jan 28, 2019 at 2:51 AM Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> >
> > This patch extends UDP GRO to support fraglist GRO/GSO
> > by using the previously introduced infrastructure.
> > All UDP packets that are not targeted to a GRO capable
> > UDP sockets are going to fraglist GRO now (local input
> > and forward).
> > ---
> >  net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
> >  net/ipv6/udp_offload.c |  9 +++++++++
> >  2 files changed, 50 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 584635db9231..c0be33216750 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> >  }
> >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >
> > +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> > +                                             netdev_features_t features)
> > +{
> > +       unsigned int mss = skb_shinfo(skb)->gso_size;
> > +
> > +       skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> > +       if (IS_ERR(skb))
> > +               return skb;
> > +
> > +       udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> > +       skb->ip_summed = CHECKSUM_NONE;
> > +       skb->csum_valid = 1;
> 
> csum_valid is only used on ingress.
> 
> Hardcoding CHECKSUM_NONE is probably fine as long as this function is
> only used for forwarding, assuming we don't care about verifiying
> checksums in the forwarding case.
> 
> But this is fragile if we ever add local list segment output. Should
> convert the checksum field in skb_forward_csum, instead of at the GSO
> layer, just as for forwarding of non list skbs? Though that would
> require traversing the list yet another time. Other option is to
> already do this conversion when building the list in GRO.
> 
> The comment also applies to the same logic in skb_segment_list. As a
> matter or fact, even if this belongs in GSO instead of core forwarding
> or GRO, then probably both the list head and frag_list skbs should be
> set in the same function, so skb_segment_list.

I had a deeper look into this now, it seems that the head skb has
already the correct conversion (either for local input or forwarding).
So we probaply just need to adjust the frag_list skbs to the
head skb checksum conversion.

> >         /* pull encapsulating udp header */
> >         skb_gro_pull(skb, sizeof(struct udphdr));
> > -       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> >
> >         list_for_each_entry(p, head, list) {
> >                 if (!NAPI_GRO_CB(p)->same_flow)
> > @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> >                  * Under small packet flood GRO count could elsewhere grow a lot
> >                  * leading to execessive truesize values
> >                  */
> > -               if (!skb_gro_receive(p, skb) &&
> > -                   NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> > +               if (NAPI_GRO_CB(skb)->is_flist) {
> > +                       if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> > +                               return NULL;
> > +                       ret = skb_gro_receive_list(p, skb);
> > +               } else {
> > +                       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> > +
> > +                       ret = skb_gro_receive(p, skb);
> > +               }
> > +
> > +               if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
> >                         pp = p;
> >                 else if (uh->len != uh2->len)
> >                         pp = p;
> > @@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >         int flush = 1;
> >
> >         if (!sk || !udp_sk(sk)->gro_receive) {
> > +               NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
> 
> This updates the choice of whether to use a list on each received skb.
> Which is problematic as a socket can call the setsockopt in between
> packets.
> 
> Actually, there no longer is a need for a route lookup for each skb at
> all. We always apply GRO, which was the previous reason for the
> lookup. And if a matching flow is found in the GRO table, we already
> the choice to use a list is already stored.

Yes, that's true. I'll change that.
diff mbox series

Patch

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 584635db9231..c0be33216750 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -188,6 +188,22 @@  struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+					      netdev_features_t features)
+{
+	unsigned int mss = skb_shinfo(skb)->gso_size;
+
+	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+	if (IS_ERR(skb))
+		return skb;
+
+	udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_valid = 1;
+
+	return skb;
+}
+
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 				  netdev_features_t features)
 {
@@ -200,6 +216,9 @@  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 	__sum16 check;
 	__be16 newlen;
 
+	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+		return __udp_gso_segment_list(gso_skb, features);
+
 	mss = skb_shinfo(gso_skb)->gso_size;
 	if (gso_skb->len <= sizeof(*uh) + mss)
 		return ERR_PTR(-EINVAL);
@@ -352,16 +371,15 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 	struct sk_buff *pp = NULL;
 	struct udphdr *uh2;
 	struct sk_buff *p;
+	int ret;
 
 	/* requires non zero csum, for symmetry with GSO */
 	if (!uh->check) {
 		NAPI_GRO_CB(skb)->flush = 1;
 		return NULL;
 	}
-
 	/* pull encapsulating udp header */
 	skb_gro_pull(skb, sizeof(struct udphdr));
-	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 
 	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
@@ -379,8 +397,17 @@  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 		 * Under small packet flood GRO count could elsewhere grow a lot
 		 * leading to execessive truesize values
 		 */
-		if (!skb_gro_receive(p, skb) &&
-		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
+		if (NAPI_GRO_CB(skb)->is_flist) {
+			if (!pskb_may_pull(skb, skb_gro_offset(skb)))
+				return NULL;
+			ret = skb_gro_receive_list(p, skb);
+		} else {
+			skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+			ret = skb_gro_receive(p, skb);
+		}
+
+		if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
 			pp = p;
 		else if (uh->len != uh2->len)
 			pp = p;
@@ -402,6 +429,7 @@  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	int flush = 1;
 
 	if (!sk || !udp_sk(sk)->gro_receive) {
+		NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
 		return pp;
 	}
@@ -530,6 +558,15 @@  INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		uh->len = htons(skb->len - nhoff);
+
+		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		return 0;
+	}
+
 	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 5f7937a4f71a..7c3f28310baa 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -154,6 +154,15 @@  INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		uh->len = htons(skb->len - nhoff);
+
+		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		return 0;
+	}
+
 	if (uh->check)
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);