diff mbox

gso: Handle new frag_list of frags GRO packets

Message ID 20131113142615.GA1851@gondor.apana.org.au
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Herbert Xu Nov. 13, 2013, 2:26 p.m. UTC
On Tue, Nov 12, 2013 at 06:45:04PM -0800, Eric Dumazet wrote:
> On Wed, 2013-11-13 at 10:25 +0800, Herbert Xu wrote:
> 
> > So a better test for the time being would be to test with TSO
> > disabled in both cases.
> > 
> 
> GRO on, TSO off, little difference between two cases :
> 
> (Note some small things run in background, so these numbers are not
> ultra precise)

OK looks pretty sane.
 
> > In the mean time I'm cooking up a patch to generate TSO packets.
> 
> That would be very nice !

It's failing on my machine but I'm not certain whether it's a bug
in my patch or a bug in my NIC's TSO code.  So I'd appreciate it
if you can give this a spin on your mlx4.


Thanks!

Comments

Eric Dumazet Nov. 13, 2013, 3:06 p.m. UTC | #1
On Wed, 2013-11-13 at 22:26 +0800, Herbert Xu wrote:
> On Tue, Nov 12, 2013 at 06:45:04PM -0800, Eric Dumazet wrote:
> > On Wed, 2013-11-13 at 10:25 +0800, Herbert Xu wrote:
> > 
> > > So a better test for the time being would be to test with TSO
> > > disabled in both cases.
> > > 
> > 
> > GRO on, TSO off, little difference between two cases :
> > 
> > (Note some small things run in background, so these numbers are not
> > ultra precise)
> 
> OK looks pretty sane.
>  
> > > In the mean time I'm cooking up a patch to generate TSO packets.
> > 
> > That would be very nice !
> 
> It's failing on my machine but I'm not certain whether it's a bug
> in my patch or a bug in my NIC's TSO code.  So I'd appreciate it
> if you can give this a spin on your mlx4.
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 557e1a5..1302515 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c


Well, I wont try this patch, as it can not possibly work :(

The problem is not only skb_segment() but all its callers / users, as I
mentioned yesterday.

Specifically, you'll have to change inet_gso_segment(),
gre_gso_segment(), tcp_gso_segment(), ipv6_gso_segment(),
udp6_ufo_fragment() ...

Yes, we have to propagate correct IP id identifiers, and correct tcp
sequence numbers, tcp checksums.





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 557e1a5..1302515 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2786,6 +2786,8 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	__be16 proto;
 	bool csum;
 	int sg = !!(features & NETIF_F_SG);
+	int gso_type = 0;
+	int gso_size = 0;
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int err = -ENOMEM;
 	int i = 0;
@@ -2795,6 +2797,11 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);
 
+	if (net_gso_ok(gso_type, features)) {
+		gso_type = skb_shinfo(skb)->gso_type & ~SKB_GSO_DODGY;
+		gso_size = mss;
+	}
+
 	csum = !!can_checksum_protocol(features, proto);
 	__skb_push(skb, doffset);
 	headroom = skb_headroom(skb);
@@ -2807,7 +2814,7 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		int size;
 
 		len = skb->len - offset;
-		if (len > mss)
+		if (!gso_size && len > mss)
 			len = mss;
 
 		hsize = skb_headlen(skb) - offset;
@@ -2819,6 +2826,26 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		if (!hsize && i >= nfrags && skb_headlen(fskb) &&
 		    (skb_headlen(fskb) == len || sg)) {
 			BUG_ON(skb_headlen(fskb) > len);
+			SKB_FRAG_ASSERT(fskb);
+
+			nskb = skb_clone(fskb, GFP_ATOMIC);
+			if (unlikely(!nskb))
+				goto err;
+
+			if (gso_size) {
+				len = nskb->len;
+				pos += len;
+
+				skb_shinfo(nskb)->gso_segs = len / mss;
+
+				/*
+				 * Original GRO packet boundaries must
+				 * have been preserved.
+				 */
+				BUG_ON(fskb->next && len % mss);
+
+				goto skip_trim;
+			}
 
 			i = 0;
 			nfrags = skb_shinfo(fskb)->nr_frags;
@@ -2837,17 +2864,14 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 				skb_frag++;
 			}
 
-			nskb = skb_clone(fskb, GFP_ATOMIC);
-			fskb = fskb->next;
-
-			if (unlikely(!nskb))
-				goto err;
-
 			if (unlikely(pskb_trim(nskb, len))) {
 				kfree_skb(nskb);
 				goto err;
 			}
 
+skip_trim:
+			fskb = fskb->next;
+
 			hsize = skb_end_offset(nskb);
 			if (skb_cow_head(nskb, doffset + headroom)) {
 				kfree_skb(nskb);
@@ -2880,6 +2904,9 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 
 		skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
 
+		skb_shinfo(nskb)->gso_size = gso_size;
+		skb_shinfo(nskb)->gso_type = gso_type;
+
 		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
 						 nskb->data - tnl_hlen,
 						 doffset + tnl_hlen);
@@ -2902,6 +2929,41 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 
 		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 
+		/*
+		 * virtio-net misery:
+		 *
+		 * Do a trial run for hardware GSO to get the proper length.
+		 */
+		if (pos < offset + len && gso_size) {
+			int j;
+
+			len = hsize - (offset - pos);
+
+			for (j = i; j < nfrags; j++)
+				len += skb_frag_size(skb_frag + j);
+
+			if (fskb && !skb_headlen(fskb)) {
+				j = min_t(int,
+					  skb_shinfo(fskb)->nr_frags,
+					  MAX_SKB_FRAGS - nfrags + i);
+
+				while (--j >= 0)
+					len += skb_frag_size(
+						skb_shinfo(fskb)->frags + j);
+			}
+
+			if (len < mss && offset + len < skb->len)
+				goto too_many_frags;
+
+			skb_shinfo(nskb)->gso_segs = len / mss;
+			if (len % mss) {
+				if (offset + len >= skb->len)
+					skb_shinfo(nskb)->gso_segs++;
+				else
+					len -= len % mss;
+			}
+		}
+
 		while (pos < offset + len) {
 			if (i >= nfrags) {
 				BUG_ON(skb_headlen(fskb));
@@ -2917,6 +2979,7 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 
 			if (unlikely(skb_shinfo(nskb)->nr_frags >=
 				     MAX_SKB_FRAGS)) {
+too_many_frags:
 				net_warn_ratelimited(
 					"skb_segment: too many frags: %u %u\n",
 					pos, mss);