diff mbox series

[RFC,1/3] net: Prepare GSO return values for fraglist GSO.

Message ID 20181221075334.9000-2-steffen.klassert@secunet.com
State RFC, archived
Delegated to: David Miller
Headers show
Series Support fraglist GRO/GSO | expand

Commit Message

Steffen Klassert Dec. 21, 2018, 7:53 a.m. UTC
On fraglist GSO, we don't need to clone the original
skb. So we don't have anything to return to free.
Prepare GSO that it frees the original skb only
if the return pointer really changed. Fraglist
GSO frees the original skb itself on error and
returns -EREMOTE in this case.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/udp.h      |  8 ++++++--
 net/core/dev.c         | 11 +++++++----
 net/ipv4/ip_output.c   |  3 ++-
 net/xfrm/xfrm_output.c |  3 ++-
 4 files changed, 17 insertions(+), 8 deletions(-)

Comments

Paolo Abeni Jan. 8, 2019, 1:53 p.m. UTC | #1
On Fri, 2018-12-21 at 08:53 +0100, Steffen Klassert wrote:
> On fraglist GSO, we don't need to clone the original
> skb. So we don't have anything to return to free.
> Prepare GSO that it frees the original skb only
> if the return pointer really changed. Fraglist
> GSO frees the original skb itself on error and
> returns -EREMOTE in this case.

I think it would be nicer preseving the same sematic with gro list, so
that we don't have to add this special handling.

e.g. calling skb_get(skb) in skb_segment_list() when successful, would
avoid the special handling for the no error case (at the cost of 2
atomic ops per gso_list packet)

> diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
> index 4ae87c5ce2e3..1941dc2a80a0 100644
> --- a/net/xfrm/xfrm_output.c
> +++ b/net/xfrm/xfrm_output.c
> @@ -183,7 +183,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
>  	BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
>  	BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
>  	segs = skb_gso_segment(skb, 0);
> -	kfree_skb(skb);
> +	if (segs != skb)
> +		kfree_skb(skb);
>  	if (IS_ERR(segs))

what if IS_ERR(segs) == -EREMOTE here?

>  		return PTR_ERR(segs);
>  	if (segs == NULL)
Steffen Klassert Jan. 14, 2019, 12:53 p.m. UTC | #2
On Tue, Jan 08, 2019 at 02:53:52PM +0100, Paolo Abeni wrote:
> On Fri, 2018-12-21 at 08:53 +0100, Steffen Klassert wrote:
> > On fraglist GSO, we don't need to clone the original
> > skb. So we don't have anything to return to free.
> > Prepare GSO that it frees the original skb only
> > if the return pointer really changed. Fraglist
> > GSO frees the original skb itself on error and
> > returns -EREMOTE in this case.
> 
> I think it would be nicer preseving the same sematic with gro list, so
> that we don't have to add this special handling.
> 
> e.g. calling skb_get(skb) in skb_segment_list() when successful, would
> avoid the special handling for the no error case (at the cost of 2
> atomic ops per gso_list packet)

That's a good idea, I'll do this in the next version of the patchset.
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index fd6d948755c8..f89b95c3f91e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -482,11 +482,15 @@  static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 
 		atomic_add(segs_nr, &sk->sk_drops);
 		SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr);
-		kfree_skb(skb);
+
+		if (PTR_ERR(segs) != -EREMOTE)
+			kfree_skb(skb);
 		return NULL;
 	}
 
-	consume_skb(skb);
+	if (segs != skb)
+		consume_skb(skb);
+
 	return segs;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 754284873355..53df5ac7c9b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3030,7 +3030,8 @@  struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	}
 	rcu_read_unlock();
 
-	__skb_push(skb, skb->data - skb_mac_header(skb));
+	if (segs != skb)
+		__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	return segs;
 }
@@ -3099,7 +3100,7 @@  struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 
 	segs = skb_mac_gso_segment(skb, features);
 
-	if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+	if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
 		skb_warn_bad_offload(skb);
 
 	return segs;
@@ -3345,8 +3346,10 @@  static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 
 		segs = skb_gso_segment(skb, features);
 		if (IS_ERR(segs)) {
-			goto out_kfree_skb;
-		} else if (segs) {
+			if (PTR_ERR(segs) != -EREMOTE)
+				goto out_kfree_skb;
+			goto out_null;
+		} else if (segs && segs != skb) {
 			consume_skb(skb);
 			skb = segs;
 		}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ab6618036afe..f4cecda6c1e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -272,7 +272,8 @@  static int ip_finish_output_gso(struct net *net, struct sock *sk,
 		return -ENOMEM;
 	}
 
-	consume_skb(skb);
+	if (segs != skb)
+		consume_skb(skb);
 
 	do {
 		struct sk_buff *nskb = segs->next;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 4ae87c5ce2e3..1941dc2a80a0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -183,7 +183,8 @@  static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
 	BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
 	BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
 	segs = skb_gso_segment(skb, 0);
-	kfree_skb(skb);
+	if (segs != skb)
+		kfree_skb(skb);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 	if (segs == NULL)