Patchwork [RFC,v7,08/19] Make __alloc_skb() to get external buffer.

login
register
mail settings
Submitter Xin, Xiaohui
Date June 5, 2010, 10:14 a.m.
Message ID <1275732899-5423-8-git-send-email-xiaohui.xin@intel.com>
Download mbox | patch
Permalink /patch/54761/
State RFC
Delegated to: David Miller
Headers show

Comments

Xin, Xiaohui - June 5, 2010, 10:14 a.m.
From: Xin Xiaohui <xiaohui.xin@intel.com>

Add a dev parameter to __alloc_skb(), skb->data
points to external buffer, recompute skb->head,
maintain shinfo of the external buffer, record
external buffer info into destructor_arg field.

Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Zhao Yu <yzhao81new@gmail.com>
Reviewed-by: Jeff Dike <jdike@linux.intel.com>
---

        __alloc_skb() cleanup by

        Jeff Dike <jdike@linux.intel.com>

 include/linux/skbuff.h |    7 ++++---
 net/core/skbuff.c      |   43 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 9 deletions(-)
Eric Dumazet - June 5, 2010, 2:53 p.m.
Le samedi 05 juin 2010 à 18:14 +0800, xiaohui.xin@intel.com a écrit :
> From: Xin Xiaohui <xiaohui.xin@intel.com>
> 	child->fclone = SKB_FCLONE_UNAVAILABLE;
>  	}
> +	/* Record the external buffer info in this field. It's not so good,
> +	 * but we cannot find another place easily.
> +	 */
> +	shinfo->destructor_arg = ext_page;
> +


Yes this is a big problem, its basically using a cache line that was not
touched before.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xin, Xiaohui - June 9, 2010, 7:34 a.m.
>-----Original Message-----
>From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
>Sent: Saturday, June 05, 2010 10:53 PM
>To: Xin, Xiaohui
>Cc: netdev@vger.kernel.org; kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
>mst@redhat.com; mingo@elte.hu; davem@davemloft.net; herbert@gondor.apana.org.au;
>jdike@linux.intel.com
>Subject: Re: [RFC PATCH v7 08/19] Make __alloc_skb() to get external buffer.
>
>Le samedi 05 juin 2010 à 18:14 +0800, xiaohui.xin@intel.com a écrit :
>> From: Xin Xiaohui <xiaohui.xin@intel.com>
>> 	child->fclone = SKB_FCLONE_UNAVAILABLE;
>>  	}
>> +	/* Record the external buffer info in this field. It's not so good,
>> +	 * but we cannot find another place easily.
>> +	 */
>> +	shinfo->destructor_arg = ext_page;
>> +
>
>
>Yes this is a big problem, its basically using a cache line that was not
>touched before.
>

Did your patch which moves destructor_arg before frags[] also fix this?

Thanks
Xiaohui
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 281a1c0..5ff8c27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -442,17 +442,18 @@  extern void kfree_skb(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int fclone, int node);
+				   gfp_t priority, int fclone,
+				   int node, struct net_device *dev);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
-	return __alloc_skb(size, priority, 0, -1);
+	return __alloc_skb(size, priority, 0, -1, NULL);
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-	return __alloc_skb(size, priority, 1, -1);
+	return __alloc_skb(size, priority, 1, -1, NULL);
 }
 
 extern int skb_recycle_check(struct sk_buff *skb, int skb_size);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fbdb1f1..38d19d0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -161,7 +161,8 @@  EXPORT_SYMBOL(skb_under_panic);
  *	@fclone: allocate from fclone cache instead of head cache
  *		and allocate a cloned (child) skb
  *	@node: numa node to allocate memory on
- *
+ *	@dev: a device owns the skb if the skb try to get external buffer.
+ *		otherwise is NULL.
  *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  *	tail room of size bytes. The object has a reference count of one.
  *	The return is the buffer. On a failure the return is %NULL.
@@ -170,12 +171,13 @@  EXPORT_SYMBOL(skb_under_panic);
  *	%GFP_ATOMIC.
  */
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
-			    int fclone, int node)
+			    int fclone, int node, struct net_device *dev)
 {
 	struct kmem_cache *cache;
 	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
-	u8 *data;
+	u8 *data = NULL;
+	struct skb_external_page *ext_page = NULL;
 
 	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
@@ -185,8 +187,23 @@  struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		goto out;
 
 	size = SKB_DATA_ALIGN(size);
-	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-			gfp_mask, node);
+
+	/* If the device wants to do mediate passthru(zero-copy),
+	 * the skb may try to get external buffers from outside.
+	 * If fails, then fall back to alloc buffers from kernel.
+	 */
+	if (dev && dev->mp_port) {
+		ext_page = netdev_alloc_external_page(dev, skb, size);
+		if (ext_page) {
+			data = ext_page->start;
+			size = ext_page->size;
+		}
+	}
+
+	if (!data)
+		data = kmalloc_node_track_caller(
+				size + sizeof(struct skb_shared_info),
+				gfp_mask, node);
 	if (!data)
 		goto nodata;
 
@@ -208,6 +225,15 @@  struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->mac_header = ~0U;
 #endif
 
+	/* If the skb get external buffers sucessfully, since the shinfo is
+	 * at the end of the buffer, we may retain the shinfo once we
+	 * need it sometime.
+	 */
+	if (ext_page) {
+		skb->head = skb->data - NET_IP_ALIGN - NET_SKB_PAD;
+		memcpy(ext_page->ushinfo, skb_shinfo(skb),
+		       sizeof(struct skb_shared_info));
+	}
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -231,6 +257,11 @@  struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
+	/* Record the external buffer info in this field. It's not so good,
+	 * but we cannot find another place easily.
+	 */
+	shinfo->destructor_arg = ext_page;
+
 out:
 	return skb;
 nodata:
@@ -259,7 +290,7 @@  struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 	struct sk_buff *skb;
 
-	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node, dev);
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
 		skb->dev = dev;