diff mbox

[net-next,02/13] sock: skb_copy_ubufs support for compound pages

Message ID 20170618224414.59012-3-willemdebruijn.kernel@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Willem de Bruijn June 18, 2017, 10:44 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

Refine skb_copy_ubufs to support compound pages. With upcoming TCP
and UDP zerocopy sendmsg, such fragments may appear.

The existing code replaces each page one for one. Splitting each
compound page into an independent number of regular pages can result
in exceeding limit MAX_SKB_FRAGS.

Instead, fill all destination pages but the last to PAGE_SIZE.
Split the existing alloc + copy loop into separate stages. Compute
the bytelength and allocate the minimum number of pages needed to
hold this. Revise the copy loop to fill each destination page.

It is not safe to modify skb frags when the skbuff is shared. No
existing codepath should hit this case.

Eventually, this fragile function can perhaps be replaced with calls
to skb_linearize -- when converted to not always require GFP_ATOMIC.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/skbuff.h |  9 +++++++--
 net/core/skbuff.c      | 50 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 14 deletions(-)

Comments

kernel test robot June 19, 2017, 1:23 a.m. UTC | #1
Hi Willem,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Willem-de-Bruijn/sock-allocate-skbs-from-optmem/20170619-072924
config: i386-randconfig-a0-06162344 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

Note: the linux-review/Willem-de-Bruijn/sock-allocate-skbs-from-optmem/20170619-072924 HEAD 967736700e28113ea867e0f2358aca7dcced7407 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   net//core/skbuff.c: In function 'skb_copy_ubufs':
>> net//core/skbuff.c:1016:2: error: 'uarg' undeclared (first use in this function)
     uarg->callback(uarg, false);
     ^
   net//core/skbuff.c:1016:2: note: each undeclared identifier is reported only once for each function it appears in

vim +/uarg +1016 net//core/skbuff.c

396206af Willem de Bruijn   2017-06-18  1000  				d_off = 0;
396206af Willem de Bruijn   2017-06-18  1001  				page = (struct page *)page_private(page);
396206af Willem de Bruijn   2017-06-18  1002  			}
396206af Willem de Bruijn   2017-06-18  1003  			copy = min_t(u32, PAGE_SIZE - d_off, f_size);
396206af Willem de Bruijn   2017-06-18  1004  			memcpy(page_address(page) + d_off, vaddr + f_off, copy);
396206af Willem de Bruijn   2017-06-18  1005  			f_size -= copy;
396206af Willem de Bruijn   2017-06-18  1006  			d_off += copy;
396206af Willem de Bruijn   2017-06-18  1007  			f_off += copy;
396206af Willem de Bruijn   2017-06-18  1008  		}
396206af Willem de Bruijn   2017-06-18  1009  		kunmap_atomic(vaddr);
396206af Willem de Bruijn   2017-06-18  1010  	}
396206af Willem de Bruijn   2017-06-18  1011  
a6686f2f Shirley Ma         2011-07-06  1012  	/* skb frags release userspace buffers */
02756ed4 Krishna Kumar      2012-07-17  1013  	for (i = 0; i < num_frags; i++)
a8605c60 Ian Campbell       2011-10-19  1014  		skb_frag_unref(skb, i);
a6686f2f Shirley Ma         2011-07-06  1015  
e19d6763 Michael S. Tsirkin 2012-11-01 @1016  	uarg->callback(uarg, false);
a6686f2f Shirley Ma         2011-07-06  1017  
a6686f2f Shirley Ma         2011-07-06  1018  	/* skb frags point to kernel buffers */
396206af Willem de Bruijn   2017-06-18  1019  	for (i = 0; i < new_frags - 1; i++) {
396206af Willem de Bruijn   2017-06-18  1020  		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
40dadff2 Sunghan Suh        2013-07-12  1021  		head = (struct page *)page_private(head);
a6686f2f Shirley Ma         2011-07-06  1022  	}
396206af Willem de Bruijn   2017-06-18  1023  	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
396206af Willem de Bruijn   2017-06-18  1024  	skb_shinfo(skb)->nr_frags = new_frags;

:::::: The code at line 1016 was first introduced by commit
:::::: e19d6763cc300fcb706bd291b24ac06be71e1ce6 skb: report completion status for zero copy skbs

:::::: TO: Michael S. Tsirkin <mst@redhat.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Willem de Bruijn June 19, 2017, 2:21 a.m. UTC | #2
> [auto build test ERROR on net-next/master]
>>    net//core/skbuff.c: In function 'skb_copy_ubufs':
>>> net//core/skbuff.c:1016:2: error: 'uarg' undeclared (first use in this function)
>      uarg->callback(uarg, false);
>      ^
>    net//core/skbuff.c:1016:2: note: each undeclared identifier is reported only once for each function it appears in

I made a mistake during rebase. Will fix.

That callback is removed in patch 5/13. uarg must remain until then.
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 852feacf4bbf..4f520cc9b914 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1783,13 +1783,18 @@  static inline unsigned int skb_headlen(const struct sk_buff *skb)
 	return skb->len - skb->data_len;
 }
 
-static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
 {
 	unsigned int i, len = 0;
 
 	for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
 		len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
-	return len + skb_headlen(skb);
+	return len;
+}
+
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+{
+	return skb_headlen(skb) + __skb_pagelen(skb);
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f75897a33fa4..c417b619bec8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -958,15 +958,19 @@  EXPORT_SYMBOL_GPL(skb_morph);
  */
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	int i;
 	int num_frags = skb_shinfo(skb)->nr_frags;
 	struct page *page, *head = NULL;
-	struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+	int i, new_frags;
+	u32 d_off;
 
-	for (i = 0; i < num_frags; i++) {
-		u8 *vaddr;
-		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+	if (!num_frags)
+		return 0;
 
+	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+		return -EINVAL;
+
+	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < new_frags; i++) {
 		page = alloc_page(gfp_mask);
 		if (!page) {
 			while (head) {
@@ -976,14 +980,35 @@  int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 			}
 			return -ENOMEM;
 		}
-		vaddr = kmap_atomic(skb_frag_page(f));
-		memcpy(page_address(page),
-		       vaddr + f->page_offset, skb_frag_size(f));
-		kunmap_atomic(vaddr);
 		set_page_private(page, (unsigned long)head);
 		head = page;
 	}
 
+	page = head;
+	d_off = 0;
+	for (i = 0; i < num_frags; i++) {
+		u8 *vaddr;
+		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+		u32 f_off, f_size, copy;
+
+		f_off = f->page_offset;
+		f_size = f->size;
+
+		vaddr = kmap_atomic(skb_frag_page(f));
+		while (f_size) {
+			if (d_off == PAGE_SIZE) {
+				d_off = 0;
+				page = (struct page *)page_private(page);
+			}
+			copy = min_t(u32, PAGE_SIZE - d_off, f_size);
+			memcpy(page_address(page) + d_off, vaddr + f_off, copy);
+			f_size -= copy;
+			d_off += copy;
+			f_off += copy;
+		}
+		kunmap_atomic(vaddr);
+	}
+
 	/* skb frags release userspace buffers */
 	for (i = 0; i < num_frags; i++)
 		skb_frag_unref(skb, i);
@@ -991,11 +1016,12 @@  int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 	uarg->callback(uarg, false);
 
 	/* skb frags point to kernel buffers */
-	for (i = num_frags - 1; i >= 0; i--) {
-		__skb_fill_page_desc(skb, i, head, 0,
-				     skb_shinfo(skb)->frags[i].size);
+	for (i = 0; i < new_frags - 1; i++) {
+		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
 		head = (struct page *)page_private(head);
 	}
+	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+	skb_shinfo(skb)->nr_frags = new_frags;
 
 	skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
 	return 0;