diff mbox

[7/7] venet: add scatter-gather/GSO support

Message ID 20090803171807.17268.49836.stgit@dev.haskins.net
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Gregory Haskins Aug. 3, 2009, 5:18 p.m. UTC
SG/GSO significantly enhance the performance of network traffic under
certain circumstances.  We implement this feature as a separate patch
to avoid intially complicating the baseline venet driver.  This will
presumably make the review process slightly easier, since we can
focus on the basic interface first.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 drivers/net/vbus-enet.c |  249 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/venet.h   |   39 +++++++
 2 files changed, 275 insertions(+), 13 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

stephen hemminger Aug. 3, 2009, 6:32 p.m. UTC | #1
On Mon, 03 Aug 2009 13:18:07 -0400
Gregory Haskins <ghaskins@novell.com> wrote:

> SG/GSO significantly enhance the performance of network traffic under
> certain circumstances.  We implement this feature as a separate patch
> to avoid intially complicating the baseline venet driver.  This will
> presumably make the review process slightly easier, since we can
> focus on the basic interface first.
> 
> Signed-off-by: Gregory Haskins <ghaskins@novell.com>

Never mind, previous comment about adding TSO.  But it still would
be good to have ethtool interface for this.
stephen hemminger Aug. 3, 2009, 6:33 p.m. UTC | #2
On Mon, 03 Aug 2009 13:18:07 -0400
Gregory Haskins <ghaskins@novell.com> wrote:

> +	struct {
> +		int                sg:1;
> +		int                tso:1;
> +		int                ufo:1;
> +		int                tso6:1;
> +		int                ecn:1;
> +	} flags;

Why do you have to shadow flags that are already available in net_device?
It is bad design to replicate state in a device driver. The problem
with replicated state is that it has to be updated in both places.
Gregory Haskins Aug. 3, 2009, 7:30 p.m. UTC | #3
Stephen Hemminger wrote:
> On Mon, 03 Aug 2009 13:18:07 -0400
> Gregory Haskins <ghaskins@novell.com> wrote:
> 
>> SG/GSO significantly enhance the performance of network traffic under
>> certain circumstances.  We implement this feature as a separate patch
>> to avoid intially complicating the baseline venet driver.  This will
>> presumably make the review process slightly easier, since we can
>> focus on the basic interface first.
>>
>> Signed-off-by: Gregory Haskins <ghaskins@novell.com>
> 
> Never mind, previous comment about adding TSO.


I should probably fold 6+7 together anyway, so this was my fault.  I
think I was originally keeping them separate because the venet was
serving as the canonical example for the vbus interface.  But Ill just
come up with something simpler to demonstrate the interface and merge
these two patches.

> But it still would be good to have ethtool interface for this.

Ack.  Will do.

Thanks Stephen,
-Greg
Gregory Haskins Aug. 3, 2009, 7:57 p.m. UTC | #4
Stephen Hemminger wrote:
> On Mon, 03 Aug 2009 13:18:07 -0400
> Gregory Haskins <ghaskins@novell.com> wrote:
> 
>> +	struct {
>> +		int                sg:1;
>> +		int                tso:1;
>> +		int                ufo:1;
>> +		int                tso6:1;
>> +		int                ecn:1;
>> +	} flags;
> 
> Why do you have to shadow flags that are already available in net_device?
> It is bad design to replicate state in a device driver. The problem
> with replicated state is that it has to be updated in both places.
> 

Ya, you are right.  I think the rationale was that the flags were "hw"
state, whereas dev->features was software state.  But thinking about it
after you comments, I don't think it makes much difference either way.

I will just have the negcap() function set the features directly in v2.

Kind Regards,
-Greg
diff mbox

Patch

diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c
index 8fcc2d6..5aa56ff 100644
--- a/drivers/net/vbus-enet.c
+++ b/drivers/net/vbus-enet.c
@@ -42,6 +42,8 @@  static int rx_ringlen = 256;
 module_param(rx_ringlen, int, 0444);
 static int tx_ringlen = 256;
 module_param(tx_ringlen, int, 0444);
+static int sg_enabled = 1;
+module_param(sg_enabled, int, 0444);
 
 #define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args)
 
@@ -58,8 +60,17 @@  struct vbus_enet_priv {
 	struct vbus_enet_queue     rxq;
 	struct vbus_enet_queue     txq;
 	struct tasklet_struct      txtask;
+	struct {
+		int                sg:1;
+		int                tso:1;
+		int                ufo:1;
+		int                tso6:1;
+		int                ecn:1;
+	} flags;
 };
 
+static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force);
+
 static struct vbus_enet_priv *
 napi_to_priv(struct napi_struct *napi)
 {
@@ -193,6 +204,93 @@  rx_teardown(struct vbus_enet_priv *priv)
 	}
 }
 
+static int
+tx_setup(struct vbus_enet_priv *priv)
+{
+	struct ioq *ioq = priv->txq.queue;
+	struct ioq_iterator iter;
+	int i;
+	int ret;
+
+	if (!priv->flags.sg)
+		/*
+		 * There is nothing to do for a ring that is not using
+		 * scatter-gather
+		 */
+		return 0;
+
+	ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+	BUG_ON(ret < 0);
+
+	ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+	BUG_ON(ret < 0);
+
+	/*
+	 * Now populate each descriptor with an empty SG descriptor
+	 */
+	for (i = 0; i < tx_ringlen; i++) {
+		struct venet_sg *vsg;
+		size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1);
+		size_t len = sizeof(*vsg) + iovlen;
+
+		vsg = kzalloc(len, GFP_KERNEL);
+		if (!vsg)
+			return -ENOMEM;
+
+		iter.desc->cookie = (u64)vsg;
+		iter.desc->len    = len;
+		iter.desc->ptr    = (u64)__pa(vsg);
+
+		ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+		BUG_ON(ret < 0);
+	}
+
+	return 0;
+}
+
+static void
+tx_teardown(struct vbus_enet_priv *priv)
+{
+	struct ioq *ioq = priv->txq.queue;
+	struct ioq_iterator iter;
+	int ret;
+
+	/* forcefully free all outstanding transmissions */
+	vbus_enet_tx_reap(priv, 1);
+
+	if (!priv->flags.sg)
+		/*
+		 * There is nothing else to do for a ring that is not using
+		 * scatter-gather
+		 */
+		return;
+
+	ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+	BUG_ON(ret < 0);
+
+	/* seek to position 0 */
+	ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+	BUG_ON(ret < 0);
+
+	/*
+	 * free each valid descriptor
+	 */
+	while (iter.desc->cookie) {
+		struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie;
+
+		iter.desc->valid = 0;
+		wmb();
+
+		iter.desc->ptr = 0;
+		iter.desc->cookie = 0;
+
+		ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+		BUG_ON(ret < 0);
+
+		kfree(vsg);
+	}
+}
+
 /*
  * Open and close
  */
@@ -396,14 +494,67 @@  vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev)
 	BUG_ON(ret < 0);
 	BUG_ON(iter.desc->sown);
 
-	/*
-	 * We simply put the skb right onto the ring.  We will get an interrupt
-	 * later when the data has been consumed and we can reap the pointers
-	 * at that time
-	 */
-	iter.desc->cookie = (u64)skb;
-	iter.desc->len = (u64)skb->len;
-	iter.desc->ptr = (u64)__pa(skb->data);
+	if (priv->flags.sg) {
+		struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie;
+		struct scatterlist sgl[MAX_SKB_FRAGS+1];
+		struct scatterlist *sg;
+		int count, maxcount = ARRAY_SIZE(sgl);
+
+		sg_init_table(sgl, maxcount);
+
+		memset(vsg, 0, sizeof(*vsg));
+
+		vsg->cookie = (u64)skb;
+		vsg->len    = skb->len;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			vsg->flags      |= VENET_SG_FLAG_NEEDS_CSUM;
+			vsg->csum.start  = skb->csum_start - skb_headroom(skb);
+			vsg->csum.offset = skb->csum_offset;
+		}
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			vsg->flags |= VENET_SG_FLAG_GSO;
+
+			vsg->gso.hdrlen = skb_transport_header(skb) - skb->data;
+			vsg->gso.size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				vsg->gso.type = VENET_GSO_TYPE_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				vsg->gso.type = VENET_GSO_TYPE_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				vsg->gso.type = VENET_GSO_TYPE_UDP;
+			else
+				panic("Virtual-Ethernet: unknown GSO type " \
+				      "0x%x\n", sinfo->gso_type);
+
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				vsg->flags |= VENET_SG_FLAG_ECN;
+		}
+
+		count = skb_to_sgvec(skb, sgl, 0, skb->len);
+
+		BUG_ON(count > maxcount);
+
+		for (sg = &sgl[0]; sg; sg = sg_next(sg)) {
+			struct venet_iov *iov = &vsg->iov[vsg->count++];
+
+			iov->len = sg->length;
+			iov->ptr = (u64)sg_phys(sg);
+		}
+
+	} else {
+		/*
+		 * non scatter-gather mode: simply put the skb right onto the
+		 * ring.
+		 */
+		iter.desc->cookie = (u64)skb;
+		iter.desc->len = (u64)skb->len;
+		iter.desc->ptr = (u64)__pa(skb->data);
+	}
+
 	iter.desc->valid  = 1;
 
 	priv->dev->stats.tx_packets++;
@@ -459,7 +610,17 @@  vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force)
 	 * owned by the south-side
 	 */
 	while (iter.desc->valid && (!iter.desc->sown || force)) {
-		struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie;
+		struct sk_buff *skb;
+
+		if (priv->flags.sg) {
+			struct venet_sg *vsg;
+
+			vsg = (struct venet_sg *)iter.desc->cookie;
+			skb = (struct sk_buff *)vsg->cookie;
+
+		} else {
+			skb = (struct sk_buff *)iter.desc->cookie;
+		}
 
 		PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len);
 
@@ -538,6 +699,47 @@  tx_isr(struct ioq_notifier *notifier)
        tasklet_schedule(&priv->txtask);
 }
 
+static int
+vbus_enet_negcap(struct vbus_enet_priv *priv)
+{
+	int ret;
+	struct venet_capabilities caps;
+
+	memset(&caps, 0, sizeof(caps));
+
+	if (sg_enabled) {
+		caps.gid = VENET_CAP_GROUP_SG;
+		caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6
+			      |VENET_CAP_ECN);
+		/* note: exclude UFO for now due to stack bug */
+	}
+
+	ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps));
+	if (ret < 0)
+		return ret;
+
+	if (caps.bits & VENET_CAP_SG) {
+		priv->flags.sg = true;
+
+		if (caps.bits & VENET_CAP_TSO4)
+			priv->flags.tso = true;
+		if (caps.bits & VENET_CAP_TSO6)
+			priv->flags.tso6 = true;
+		if (caps.bits & VENET_CAP_UFO)
+			priv->flags.ufo = true;
+		if (caps.bits & VENET_CAP_ECN)
+			priv->flags.ecn = true;
+
+		dev_info(&priv->dev->dev, "Detected GSO features %s%s%s%s\n",
+			 priv->flags.tso  ? "t" : "-",
+			 priv->flags.tso6 ? "T" : "-",
+			 priv->flags.ufo  ? "u" : "-",
+			 priv->flags.ecn  ? "e" : "-");
+	}
+
+	return 0;
+}
+
 static const struct net_device_ops vbus_enet_netdev_ops = {
 	.ndo_open          = vbus_enet_open,
 	.ndo_stop          = vbus_enet_stop,
@@ -574,12 +776,21 @@  vbus_enet_probe(struct vbus_device_proxy *vdev)
 	priv->dev  = dev;
 	priv->vdev = vdev;
 
+	ret = vbus_enet_negcap(priv);
+	if (ret < 0) {
+		printk(KERN_INFO "VENET: Error negotiating capabilities for " \
+		       "%lld\n",
+		       priv->vdev->id);
+		goto out_free;
+	}
+
 	tasklet_init(&priv->txtask, deferred_tx_isr, (unsigned long)priv);
 
 	queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr);
 	queue_init(priv, &priv->txq, VENET_QUEUE_TX, tx_ringlen, tx_isr);
 
 	rx_setup(priv);
+	tx_setup(priv);
 
 	ioq_notify_enable(priv->rxq.queue, 0);  /* enable interrupts */
 	ioq_notify_enable(priv->txq.queue, 0);
@@ -599,6 +810,22 @@  vbus_enet_probe(struct vbus_device_proxy *vdev)
 
 	dev->features |= NETIF_F_HIGHDMA;
 
+	if (priv->flags.sg) {
+		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST;
+
+		if (priv->flags.tso)
+			dev->features |= NETIF_F_TSO;
+
+		if (priv->flags.ufo)
+			dev->features |= NETIF_F_UFO;
+
+		if (priv->flags.tso6)
+			dev->features |= NETIF_F_TSO6;
+
+		if (priv->flags.ecn)
+			dev->features |= NETIF_F_TSO_ECN;
+	}
+
 	ret = register_netdev(dev);
 	if (ret < 0) {
 		printk(KERN_INFO "VENET: error %i registering device \"%s\"\n",
@@ -626,9 +853,9 @@  vbus_enet_remove(struct vbus_device_proxy *vdev)
 	napi_disable(&priv->napi);
 
 	rx_teardown(priv);
-	vbus_enet_tx_reap(priv, 1);
-
 	ioq_put(priv->rxq.queue);
+
+	tx_teardown(priv);
 	ioq_put(priv->txq.queue);
 
 	dev->ops->close(dev, 0);
diff --git a/include/linux/venet.h b/include/linux/venet.h
index 586be40..47ed37d 100644
--- a/include/linux/venet.h
+++ b/include/linux/venet.h
@@ -37,8 +37,43 @@  struct venet_capabilities {
 	__u32 bits;
 };
 
-/* CAPABILITIES-GROUP 0 */
-/* #define VENET_CAP_FOO    0   (No capabilities defined yet, for now) */
+#define VENET_CAP_GROUP_SG 0
+
+/* CAPABILITIES-GROUP SG */
+#define VENET_CAP_SG     (1 << 0)
+#define VENET_CAP_TSO4   (1 << 1)
+#define VENET_CAP_TSO6   (1 << 2)
+#define VENET_CAP_ECN    (1 << 3)
+#define VENET_CAP_UFO    (1 << 4)
+
+struct venet_iov {
+	__u32 len;
+	__u64 ptr;
+};
+
+#define VENET_SG_FLAG_NEEDS_CSUM (1 << 0)
+#define VENET_SG_FLAG_GSO        (1 << 1)
+#define VENET_SG_FLAG_ECN        (1 << 2)
+
+struct venet_sg {
+	__u64            cookie;
+	__u32            flags;
+	__u32            len;     /* total length of all iovs */
+	struct {
+		__u16    start;	  /* csum starting position */
+		__u16    offset;  /* offset to place csum */
+	} csum;
+	struct {
+#define VENET_GSO_TYPE_TCPV4	0	/* IPv4 TCP (TSO) */
+#define VENET_GSO_TYPE_UDP	1	/* IPv4 UDP (UFO) */
+#define VENET_GSO_TYPE_TCPV6	2	/* IPv6 TCP */
+		__u8     type;
+		__u16    hdrlen;
+		__u16    size;
+	} gso;
+	__u32            count;   /* nr of iovs */
+	struct venet_iov iov[1];
+};
 
 #define VENET_FUNC_LINKUP   0
 #define VENET_FUNC_LINKDOWN 1