diff mbox

[1/8] net: Add frag_list support to skb_segment

Message ID E1LB0cv-0000qk-Ec@gondolin.me.apana.org.au
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Herbert Xu Dec. 12, 2008, 5:31 a.m. UTC
net: Add frag_list support to skb_segment

This patch adds limited support for handling frag_list packets in
skb_segment.  The intention is to support GRO (Generic Receive Offload)
packets which will be constructed by chaining normal packets using
frag_list.

As such we require all frag_list members terminate on exact MSS
boundaries.  This is checked using BUG_ON.

As there should only be one producer in the kernel of such packets,
namely GRO, this requirement should not be difficult to maintain.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 net/core/skbuff.c |   70 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 14 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Evgeniy Polyakov Dec. 12, 2008, 7:46 p.m. UTC | #1
Hi.

Couple of comments below.

On Fri, Dec 12, 2008 at 04:31:49PM +1100, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> @@ -2317,9 +2317,34 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
>  		if (hsize > len || !sg)
>  			hsize = len;
>  
> -		nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
> -		if (unlikely(!nskb))
> -			goto err;
> +		if (!hsize && i >= nfrags) {
> +			BUG_ON(fskb->len != len);
> +
> +			pos += len;
> +			nskb = skb_clone(fskb, GFP_ATOMIC);
> +			fskb = fskb->next;
> +
> +			if (unlikely(!nskb))
> +				goto err;
> +
> +			hsize = skb_end_pointer(nskb) - nskb->head;
> +			if (skb_cow_head(nskb, doffset + headroom))
> +				goto err;

This should free nskb first.

> +			nskb->truesize += skb_end_pointer(nskb) - nskb->head -
> +					  hsize;
> +			skb_release_head_state(nskb);
> +			__skb_push(nskb, doffset);
> +		} else {
> +			nskb = alloc_skb(hsize + doffset + headroom,
> +					 GFP_ATOMIC);
> +
> +			if (unlikely(!nskb))
> +				goto err;
> +

The same.

> +			skb_reserve(nskb, headroom);
> +			__skb_put(nskb, doffset);
> +		}
>  
>  		if (segs)
>  			tail->next = nskb;
> @@ -2330,13 +2355,15 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
>  		__copy_skb_header(nskb, skb);
>  		nskb->mac_len = skb->mac_len;
>  
> -		skb_reserve(nskb, headroom);
>  		skb_reset_mac_header(nskb);
>  		skb_set_network_header(nskb, skb->mac_len);
>  		nskb->transport_header = (nskb->network_header +
>  					  skb_network_header_len(skb));
> -		skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
> -					  doffset);
> +		skb_copy_from_linear_data(skb, nskb->data, doffset);
> +
> +		if (pos >= offset + len)
> +			continue;
> +
>  		if (!sg) {
>  			nskb->ip_summed = CHECKSUM_NONE;
>  			nskb->csum = skb_copy_and_csum_bits(skb, offset,
> @@ -2346,14 +2373,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
>  		}
>  
>  		frag = skb_shinfo(nskb)->frags;
> -		k = 0;
>  
>  		skb_copy_from_linear_data_offset(skb, offset,
>  						 skb_put(nskb, hsize), hsize);
>  
> -		while (pos < offset + len) {
> -			BUG_ON(i >= nfrags);
> -
> +		while (pos < offset + len && i < nfrags) {
>  			*frag = skb_shinfo(skb)->frags[i];
>  			get_page(frag->page);
>  			size = frag->size;
> @@ -2363,20 +2387,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
>  				frag->size -= offset - pos;
>  			}
>  
> -			k++;
> +			skb_shinfo(nskb)->nr_frags++;
>  
>  			if (pos + size <= offset + len) {
>  				i++;
>  				pos += size;
>  			} else {
>  				frag->size -= pos + size - (offset + len);
> -				break;
> +				goto skip_fraglist;
>  			}
>  
>  			frag++;
>  		}
>  
> -		skb_shinfo(nskb)->nr_frags = k;
> +		if (pos < offset + len) {
> +			struct sk_buff *fskb2 = fskb;
> +
> +			BUG_ON(pos + fskb->len != offset + len);
> +
> +			pos += fskb->len;
> +			fskb = fskb->next;
> +
> +			if (fskb2->next) {
> +				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
> +				if (!fskb2)
> +					goto err;
> +			} else
> +				skb_get(fskb2);
> +
> +			skb_shinfo(nskb)->frag_list = fskb2;

Is it guaranteed that nskb does not have frags here?
Herbert Xu Dec. 12, 2008, 9:41 p.m. UTC | #2
On Fri, Dec 12, 2008 at 10:46:18PM +0300, Evgeniy Polyakov wrote:
> 
> On Fri, Dec 12, 2008 at 04:31:49PM +1100, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> > @@ -2317,9 +2317,34 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
> >  		if (hsize > len || !sg)
> >  			hsize = len;
> >  
> > -		nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
> > -		if (unlikely(!nskb))
> > -			goto err;
> > +		if (!hsize && i >= nfrags) {
> > +			BUG_ON(fskb->len != len);
> > +
> > +			pos += len;
> > +			nskb = skb_clone(fskb, GFP_ATOMIC);
> > +			fskb = fskb->next;
> > +
> > +			if (unlikely(!nskb))
> > +				goto err;
> > +
> > +			hsize = skb_end_pointer(nskb) - nskb->head;
> > +			if (skb_cow_head(nskb, doffset + headroom))
> > +				goto err;
> 
> This should free nskb first.

Good catch! I'll get this fixed.

> > +			nskb->truesize += skb_end_pointer(nskb) - nskb->head -
> > +					  hsize;
> > +			skb_release_head_state(nskb);
> > +			__skb_push(nskb, doffset);
> > +		} else {
> > +			nskb = alloc_skb(hsize + doffset + headroom,
> > +					 GFP_ATOMIC);
> > +
> > +			if (unlikely(!nskb))
> > +				goto err;
> > +
> 
> The same.

nskb is NULL here.

> > +		if (pos < offset + len) {
> > +			struct sk_buff *fskb2 = fskb;
> > +
> > +			BUG_ON(pos + fskb->len != offset + len);
> > +
> > +			pos += fskb->len;
> > +			fskb = fskb->next;
> > +
> > +			if (fskb2->next) {
> > +				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
> > +				if (!fskb2)
> > +					goto err;
> > +			} else
> > +				skb_get(fskb2);
> > +
> > +			skb_shinfo(nskb)->frag_list = fskb2;
> 
> Is it guaranteed that nskb does not have frags here?

This bit only gets executed the first time we hit the frag_list
in the original skb.  Therefore we've just allocated nskb for
the very first time.  I'll add a BUG_ON for you :)

Cheers,
Evgeniy Polyakov Dec. 13, 2008, 2:38 a.m. UTC | #3
On Sat, Dec 13, 2008 at 08:41:25AM +1100, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> > > +			nskb->truesize += skb_end_pointer(nskb) - nskb->head -
> > > +					  hsize;
> > > +			skb_release_head_state(nskb);
> > > +			__skb_push(nskb, doffset);
> > > +		} else {
> > > +			nskb = alloc_skb(hsize + doffset + headroom,
> > > +					 GFP_ATOMIC);
> > > +
> > > +			if (unlikely(!nskb))
> > > +				goto err;
> > > +
> > 
> > The same.
> 
> nskb is NULL here.

Yup, I meant below 'goto err' when we failed to clone skb.

> > > +		if (pos < offset + len) {
> > > +			struct sk_buff *fskb2 = fskb;
> > > +
> > > +			BUG_ON(pos + fskb->len != offset + len);
> > > +
> > > +			pos += fskb->len;
> > > +			fskb = fskb->next;
> > > +
> > > +			if (fskb2->next) {
> > > +				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
> > > +				if (!fskb2)
> > > +					goto err;
> > > +			} else
> > > +				skb_get(fskb2);
> > > +
> > > +			skb_shinfo(nskb)->frag_list = fskb2;
Herbert Xu Dec. 13, 2008, 2:43 a.m. UTC | #4
On Sat, Dec 13, 2008 at 05:38:55AM +0300, Evgeniy Polyakov wrote:
>
> Yup, I meant below 'goto err' when we failed to clone skb.
> 
> > > > +		if (pos < offset + len) {
> > > > +			struct sk_buff *fskb2 = fskb;
> > > > +
> > > > +			BUG_ON(pos + fskb->len != offset + len);
> > > > +
> > > > +			pos += fskb->len;
> > > > +			fskb = fskb->next;
> > > > +
> > > > +			if (fskb2->next) {
> > > > +				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
> > > > +				if (!fskb2)
> > > > +					goto err;
> > > > +			} else
> > > > +				skb_get(fskb2);
> > > > +
> > > > +			skb_shinfo(nskb)->frag_list = fskb2;

This isn't a problem.  fskb2 is still on the original frag_list so
we don't need to free it if skb_clone fails.

Cheers,
Evgeniy Polyakov Dec. 13, 2008, 3:11 a.m. UTC | #5
On Sat, Dec 13, 2008 at 01:43:05PM +1100, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> > > > > +		if (pos < offset + len) {
> > > > > +			struct sk_buff *fskb2 = fskb;
> > > > > +
> > > > > +			BUG_ON(pos + fskb->len != offset + len);
> > > > > +
> > > > > +			pos += fskb->len;
> > > > > +			fskb = fskb->next;
> > > > > +
> > > > > +			if (fskb2->next) {
> > > > > +				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
> > > > > +				if (!fskb2)
> > > > > +					goto err;
> > > > > +			} else
> > > > > +				skb_get(fskb2);
> > > > > +
> > > > > +			skb_shinfo(nskb)->frag_list = fskb2;
> 
> This isn't a problem.  fskb2 is still on the original frag_list so
> we don't need to free it if skb_clone fails.

But nskb was not added into the list or it was?
Herbert Xu Dec. 13, 2008, 3:20 a.m. UTC | #6
On Sat, Dec 13, 2008 at 06:11:10AM +0300, Evgeniy Polyakov wrote:
>
> > This isn't a problem.  fskb2 is still on the original frag_list so
> > we don't need to free it if skb_clone fails.
> 
> But nskb was not added into the list or it was?

We've added it to the segs list which is freed at err:.

Cheers,
diff mbox

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d49ef83..d24c181 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2285,6 +2285,7 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
+	struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
@@ -2304,7 +2305,6 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		struct sk_buff *nskb;
 		skb_frag_t *frag;
 		int hsize;
-		int k;
 		int size;
 
 		len = skb->len - offset;
@@ -2317,9 +2317,34 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		if (hsize > len || !sg)
 			hsize = len;
 
-		nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
-		if (unlikely(!nskb))
-			goto err;
+		if (!hsize && i >= nfrags) {
+			BUG_ON(fskb->len != len);
+
+			pos += len;
+			nskb = skb_clone(fskb, GFP_ATOMIC);
+			fskb = fskb->next;
+
+			if (unlikely(!nskb))
+				goto err;
+
+			hsize = skb_end_pointer(nskb) - nskb->head;
+			if (skb_cow_head(nskb, doffset + headroom))
+				goto err;
+
+			nskb->truesize += skb_end_pointer(nskb) - nskb->head -
+					  hsize;
+			skb_release_head_state(nskb);
+			__skb_push(nskb, doffset);
+		} else {
+			nskb = alloc_skb(hsize + doffset + headroom,
+					 GFP_ATOMIC);
+
+			if (unlikely(!nskb))
+				goto err;
+
+			skb_reserve(nskb, headroom);
+			__skb_put(nskb, doffset);
+		}
 
 		if (segs)
 			tail->next = nskb;
@@ -2330,13 +2355,15 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		__copy_skb_header(nskb, skb);
 		nskb->mac_len = skb->mac_len;
 
-		skb_reserve(nskb, headroom);
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
-					  doffset);
+		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		if (pos >= offset + len)
+			continue;
+
 		if (!sg) {
 			nskb->ip_summed = CHECKSUM_NONE;
 			nskb->csum = skb_copy_and_csum_bits(skb, offset,
@@ -2346,14 +2373,11 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		}
 
 		frag = skb_shinfo(nskb)->frags;
-		k = 0;
 
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
-		while (pos < offset + len) {
-			BUG_ON(i >= nfrags);
-
+		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
 			get_page(frag->page);
 			size = frag->size;
@@ -2363,20 +2387,38 @@  struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 				frag->size -= offset - pos;
 			}
 
-			k++;
+			skb_shinfo(nskb)->nr_frags++;
 
 			if (pos + size <= offset + len) {
 				i++;
 				pos += size;
 			} else {
 				frag->size -= pos + size - (offset + len);
-				break;
+				goto skip_fraglist;
 			}
 
 			frag++;
 		}
 
-		skb_shinfo(nskb)->nr_frags = k;
+		if (pos < offset + len) {
+			struct sk_buff *fskb2 = fskb;
+
+			BUG_ON(pos + fskb->len != offset + len);
+
+			pos += fskb->len;
+			fskb = fskb->next;
+
+			if (fskb2->next) {
+				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
+				if (!fskb2)
+					goto err;
+			} else
+				skb_get(fskb2);
+
+			skb_shinfo(nskb)->frag_list = fskb2;
+		}
+
+skip_fraglist:
 		nskb->data_len = len - hsize;
 		nskb->len += nskb->data_len;
 		nskb->truesize += nskb->data_len;