diff mbox series

[2/2] net: emac: implement TCP TSO

Message ID 222fc61f88a39393b8830d9bfcc7b88d24fb81d6.1539804852.git.chunkeey@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [1/2] net: emac: implement 802.1Q VLAN TX tagging support | expand

Commit Message

Christian Lamparter Oct. 17, 2018, 7:53 p.m. UTC
This patch enables TSO(v4) hw feature for emac driver.
As atleast the APM82181's TCP/IP acceleration hardware
controller (TAH) provides TCP segmentation support in
the transmit path.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
---
 drivers/net/ethernet/ibm/emac/core.c | 101 ++++++++++++++++++++++++++-
 drivers/net/ethernet/ibm/emac/core.h |   4 ++
 drivers/net/ethernet/ibm/emac/emac.h |   7 ++
 drivers/net/ethernet/ibm/emac/tah.c  |  20 ++++++
 drivers/net/ethernet/ibm/emac/tah.h  |   2 +
 5 files changed, 133 insertions(+), 1 deletion(-)

Comments

Florian Fainelli Oct. 17, 2018, 8:09 p.m. UTC | #1
On 10/17/2018 12:53 PM, Christian Lamparter wrote:
> This patch enables TSO(v4) hw feature for emac driver.
> As atleast the APM82181's TCP/IP acceleration hardware
> controller (TAH) provides TCP segmentation support in
> the transmit path.
> 
> Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
> ---
>  drivers/net/ethernet/ibm/emac/core.c | 101 ++++++++++++++++++++++++++-
>  drivers/net/ethernet/ibm/emac/core.h |   4 ++
>  drivers/net/ethernet/ibm/emac/emac.h |   7 ++
>  drivers/net/ethernet/ibm/emac/tah.c  |  20 ++++++
>  drivers/net/ethernet/ibm/emac/tah.h  |   2 +
>  5 files changed, 133 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
> index be560f9031f4..49ffbd6e1707 100644
> --- a/drivers/net/ethernet/ibm/emac/core.c
> +++ b/drivers/net/ethernet/ibm/emac/core.c
> @@ -38,6 +38,9 @@
>  #include <linux/mii.h>
>  #include <linux/bitops.h>
>  #include <linux/if_vlan.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
>  #include <linux/workqueue.h>
>  #include <linux/of.h>
>  #include <linux/of_address.h>
> @@ -1410,6 +1413,52 @@ static inline u16 emac_tx_csum(struct emac_instance *dev,
>  	return 0;
>  }
>  
> +const u32 tah_ss[TAH_NO_SSR] = { 9000, 4500, 1500, 1300, 576, 176 };
> +
> +static int emac_tx_tso(struct emac_instance *dev, struct sk_buff *skb,
> +		       u16 *ctrl)
> +{
> +	if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO) &&
> +	    skb_is_gso(skb) && !!(skb_shinfo(skb)->gso_type &
> +				(SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
> +		u32 seg_size = 0, i;
> +
> +		/* Get the MTU */
> +		seg_size = skb_shinfo(skb)->gso_size + tcp_hdrlen(skb)
> +			+ skb_network_header_len(skb);
> +
> +		/* Restriction applied for the segmentation size
> +		 * to use HW segmentation offload feature: the size
> +		 * of the segment must not be less than 168 bytes for
> +		 * DIX formatted segments, or 176 bytes for
> +		 * IEEE formatted segments.
> +		 *
> +		 * I use value 176 to check for the segment size here
> +		 * as it can cover both 2 conditions above.
> +		 */
> +		if (seg_size < 176)
> +			return -ENODEV;
> +
> +		/* Get the best suitable MTU */
> +		for (i = 0; i < ARRAY_SIZE(tah_ss); i++) {
> +			u32 curr_seg = tah_ss[i];
> +
> +			if (curr_seg > dev->ndev->mtu ||
> +			    curr_seg > seg_size)
> +				continue;
> +
> +			*ctrl &= ~EMAC_TX_CTRL_TAH_CSUM;
> +			*ctrl |= EMAC_TX_CTRL_TAH_SSR(i);
> +			return 0;

This is something that you can possibly take out of your hot path and
recalculate when the MTU actually changes?

[snip]

> +static netdev_tx_t emac_sw_tso(struct sk_buff *skb, struct net_device *ndev)
> +{
> +	struct emac_instance *dev = netdev_priv(ndev);
> +	struct sk_buff *segs, *curr;
> +
> +	segs = skb_gso_segment(skb, ndev->features &
> +					~(NETIF_F_TSO | NETIF_F_TSO6));
> +	if (IS_ERR_OR_NULL(segs)) {
> +		goto drop;
> +	} else {
> +		while (segs) {
> +			/* check for overflow */
> +			if (dev->tx_cnt >= NUM_TX_BUFF) {
> +				dev_kfree_skb_any(segs);
> +				goto drop;
> +			}

Would setting dev->max_gso_segs somehow help make sure the stack does
not feed you oversized GSO'd skbs?
Christian Lamparter Oct. 19, 2018, 3:39 p.m. UTC | #2
Hello,

On Wednesday, October 17, 2018 10:09:44 PM CEST Florian Fainelli wrote:
> On 10/17/2018 12:53 PM, Christian Lamparter wrote:
> > This patch enables TSO(v4) hw feature for emac driver.
> > As atleast the APM82181's TCP/IP acceleration hardware
> > controller (TAH) provides TCP segmentation support in
> > the transmit path.
> > 
> > Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
> > ---
> > diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
> > index be560f9031f4..49ffbd6e1707 100644
> > --- a/drivers/net/ethernet/ibm/emac/core.c
> > +++ b/drivers/net/ethernet/ibm/emac/core.c
> > @@ -1410,6 +1413,52 @@ static inline u16 emac_tx_csum(struct emac_instance *dev,
> >  	return 0;
> >  }
> >  
> > +const u32 tah_ss[TAH_NO_SSR] = { 9000, 4500, 1500, 1300, 576, 176 };
> > +
> > +static int emac_tx_tso(struct emac_instance *dev, struct sk_buff *skb,
> > +		       u16 *ctrl)
> > +{
> > +	if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO) &&
> > +	    skb_is_gso(skb) && !!(skb_shinfo(skb)->gso_type &
> > +				(SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
> > +		u32 seg_size = 0, i;
> > +
> > +		/* Get the MTU */
> > +		seg_size = skb_shinfo(skb)->gso_size + tcp_hdrlen(skb)
> > +			+ skb_network_header_len(skb);
> > +
> > +		/* Restriction applied for the segmentation size
> > +		 * to use HW segmentation offload feature: the size
> > +		 * of the segment must not be less than 168 bytes for
> > +		 * DIX formatted segments, or 176 bytes for
> > +		 * IEEE formatted segments.
> > +		 *
> > +		 * I use value 176 to check for the segment size here
> > +		 * as it can cover both 2 conditions above.
> > +		 */
> > +		if (seg_size < 176)
> > +			return -ENODEV;
> > +
> > +		/* Get the best suitable MTU */
> > +		for (i = 0; i < ARRAY_SIZE(tah_ss); i++) {
> > +			u32 curr_seg = tah_ss[i];
> > +
> > +			if (curr_seg > dev->ndev->mtu ||
> > +			    curr_seg > seg_size)
> > +				continue;
> > +
> > +			*ctrl &= ~EMAC_TX_CTRL_TAH_CSUM;
> > +			*ctrl |= EMAC_TX_CTRL_TAH_SSR(i);
> > +			return 0;
> 
> This is something that you can possibly take out of your hot path and
> recalculate when the MTU actually changes?

Do you mean the curr_seg > dev->ndev->mtu check? Because from what I
know, the MSS can be manually set by a socket option (TCP_MAXSEG) on a
"per-socket" base.

(Altough, iperf warns that "Setting MSS is very buggy"... Which it is
as I see more way retries with a iperf run with a MSS less than
768-ish. Ideally, the change_mtu could update the TAH_SS register )

> [snip]
> 
> > +static netdev_tx_t emac_sw_tso(struct sk_buff *skb, struct net_device *ndev)
> > +{
> > +	struct emac_instance *dev = netdev_priv(ndev);
> > +	struct sk_buff *segs, *curr;
> > +
> > +	segs = skb_gso_segment(skb, ndev->features &
> > +					~(NETIF_F_TSO | NETIF_F_TSO6));
> > +	if (IS_ERR_OR_NULL(segs)) {
> > +		goto drop;
> > +	} else {
> > +		while (segs) {
> > +			/* check for overflow */
> > +			if (dev->tx_cnt >= NUM_TX_BUFF) {
> > +				dev_kfree_skb_any(segs);
> > +				goto drop;
> > +			}
> 
> Would setting dev->max_gso_segs somehow help make sure the stack does
> not feed you oversized GSO'd skbs?
Ok, thanks's for that pointer. I'll look at dev->gso_max_segs and
dev->gso_max_size. The hardware documentation doesn't mention any
hard upper limit for how many segments it can do. What it does tell
is that it just needs an extra 512Bytes in the TX FIFO as a buffer
to store the header template and the calculated checksums and what
not.
 
But this should be no problem because that TX FIFO is 10 KiB. so even
the 9000 Jumbo frames should have plenty of "free real estate".

As for the "overflow" case:

There's a check in emac_start_xmit_sg() before emac_tx_tso() call that
does an *estimation* check and goes to "stop_queue" if it doesn't fit:

|        /* Note, this is only an *estimation*, we can still run out of empty
|         * slots because of the additional fragmentation into
|         * MAL_MAX_TX_SIZE-sized chunks
|         */
|        if (unlikely(dev->tx_cnt + nr_frags + mal_tx_chunks(len) > NUM_TX_BUFF))
|                goto stop_queue;
|        [...]
|
|        if (emac_tx_tso(dev, skb, &ctrl))
|               return emac_sw_tso(skb, ndev);
|        [....]
|
|		stop_queue:
|		 netif_stop_queue(ndev);
|		 DBG2(dev, "stopped TX queue" NL);
|		 return NETDEV_TX_BUSY;

emac_start_xmit_sg() can also drop the whole skb later if it turns
out that it doesn't fit. (see the "undo_frame" goto label in emac's
core.c file (not included in the extract above, but it is there).

That said, I could do a better job at making sure that no incomplete
skb gets sent to the network though. 
@Dave: Please ignore this version of the TSO patch for now.

Regards,
Christian
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index be560f9031f4..49ffbd6e1707 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -38,6 +38,9 @@ 
 #include <linux/mii.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -1410,6 +1413,52 @@  static inline u16 emac_tx_csum(struct emac_instance *dev,
 	return 0;
 }
 
+const u32 tah_ss[TAH_NO_SSR] = { 9000, 4500, 1500, 1300, 576, 176 };
+
+static int emac_tx_tso(struct emac_instance *dev, struct sk_buff *skb,
+		       u16 *ctrl)
+{
+	if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO) &&
+	    skb_is_gso(skb) && !!(skb_shinfo(skb)->gso_type &
+				(SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+		u32 seg_size = 0, i;
+
+		/* Get the MTU */
+		seg_size = skb_shinfo(skb)->gso_size + tcp_hdrlen(skb)
+			+ skb_network_header_len(skb);
+
+		/* Restriction applied for the segmentation size
+		 * to use HW segmentation offload feature: the size
+		 * of the segment must not be less than 168 bytes for
+		 * DIX formatted segments, or 176 bytes for
+		 * IEEE formatted segments.
+		 *
+		 * I use value 176 to check for the segment size here
+		 * as it can cover both 2 conditions above.
+		 */
+		if (seg_size < 176)
+			return -ENODEV;
+
+		/* Get the best suitable MTU */
+		for (i = 0; i < ARRAY_SIZE(tah_ss); i++) {
+			u32 curr_seg = tah_ss[i];
+
+			if (curr_seg > dev->ndev->mtu ||
+			    curr_seg > seg_size)
+				continue;
+
+			*ctrl &= ~EMAC_TX_CTRL_TAH_CSUM;
+			*ctrl |= EMAC_TX_CTRL_TAH_SSR(i);
+			return 0;
+		}
+
+		/* none found fall back to software */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static inline netdev_tx_t emac_xmit_finish(struct emac_instance *dev, int len)
 {
 	struct emac_regs __iomem *p = dev->emacp;
@@ -1452,8 +1501,46 @@  static inline u16 emac_tx_vlan(struct emac_instance *dev, struct sk_buff *skb)
 	return 0;
 }
 
+static netdev_tx_t
+emac_start_xmit(struct sk_buff *skb, struct net_device *ndev);
+
+static netdev_tx_t emac_sw_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct emac_instance *dev = netdev_priv(ndev);
+	struct sk_buff *segs, *curr;
+
+	segs = skb_gso_segment(skb, ndev->features &
+					~(NETIF_F_TSO | NETIF_F_TSO6));
+	if (IS_ERR_OR_NULL(segs)) {
+		goto drop;
+	} else {
+		while (segs) {
+			/* check for overflow */
+			if (dev->tx_cnt >= NUM_TX_BUFF) {
+				dev_kfree_skb_any(segs);
+				goto drop;
+			}
+
+			curr = segs;
+			segs = curr->next;
+			curr->next = NULL;
+
+			emac_start_xmit(curr, ndev);
+		}
+		dev_consume_skb_any(skb);
+	}
+
+	return NETDEV_TX_OK;
+
+drop:
+	++dev->estats.tx_dropped;
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 /* Tx lock BH */
-static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t
+emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
 	unsigned int len = skb->len;
@@ -1462,6 +1549,9 @@  static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	u16 ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY |
 	    MAL_TX_CTRL_LAST | emac_tx_csum(dev, skb) | emac_tx_vlan(dev, skb);
 
+	if (emac_tx_tso(dev, skb, &ctrl))
+		return emac_sw_tso(skb, ndev);
+
 	slot = dev->tx_slot++;
 	if (dev->tx_slot == NUM_TX_BUFF) {
 		dev->tx_slot = 0;
@@ -1536,6 +1626,9 @@  emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev)
 
 	ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY |
 	    emac_tx_csum(dev, skb) | emac_tx_vlan(dev, skb);
+	if (emac_tx_tso(dev, skb, &ctrl))
+		return emac_sw_tso(skb, ndev);
+
 	slot = dev->tx_slot;
 
 	/* skb data */
@@ -2946,6 +3039,9 @@  static int emac_init_config(struct emac_instance *dev)
 	if (dev->tah_ph != 0) {
 #ifdef CONFIG_IBM_EMAC_TAH
 		dev->features |= EMAC_FTR_HAS_TAH;
+
+		if (of_device_is_compatible(np, "ibm,emac-apm821xx"))
+			dev->features |= EMAC_FTR_TAH_HAS_TSO;
 #else
 		printk(KERN_ERR "%pOF: TAH support not enabled !\n", np);
 		return -ENXIO;
@@ -3167,6 +3263,9 @@  static int emac_probe(struct platform_device *ofdev)
 	if (dev->tah_dev) {
 		ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG;
 
+		if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO))
+			ndev->hw_features |= NETIF_F_TSO;
+
 		if (emac_has_feature(dev, EMAC_FTR_HAS_VLAN_CTAG_TX)) {
 			ndev->vlan_features |= ndev->hw_features;
 			ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 8d84d439168c..e40ccee58775 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -336,6 +336,8 @@  struct emac_instance {
 #define EMAC_FTR_APM821XX_NO_HALF_DUPLEX	0x00001000
 /* EMAC can insert 802.1Q tag */
 #define EMAC_FTR_HAS_VLAN_CTAG_TX		0x00002000
+/* TAH can do TCP segmentation offload */
+#define EMAC_FTR_TAH_HAS_TSO			0x00004000
 
 /* Right now, we don't quite handle the always/possible masks on the
  * most optimal way as we don't have a way to say something like
@@ -352,6 +354,8 @@  enum {
 #endif
 #ifdef CONFIG_IBM_EMAC_TAH
 	    EMAC_FTR_HAS_TAH	|
+	    EMAC_FTR_TAH_HAS_TSO	|
+
 #endif
 #ifdef CONFIG_IBM_EMAC_ZMII
 	    EMAC_FTR_HAS_ZMII	|
diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index e2f80cca9bed..833967aceb2f 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h
@@ -266,6 +266,13 @@  struct emac_regs {
 #define EMAC_TX_CTRL_IVT		0x0020
 #define EMAC_TX_CTRL_RVT		0x0010
 #define EMAC_TX_CTRL_TAH_CSUM		0x000e
+#define EMAC_TX_CTRL_TAH_SSR(idx)	(((idx) + 1) << 1)
+#define EMAC_TX_CTRL_TAH_SSR5		0x000c
+#define EMAC_TX_CTRL_TAH_SSR4		0x000a
+#define EMAC_TX_CTRL_TAH_SSR3		0x0008
+#define EMAC_TX_CTRL_TAH_SSR2		0x0006
+#define EMAC_TX_CTRL_TAH_SSR1		0x0004
+#define EMAC_TX_CTRL_TAH_SSR0		0x0002
 
 /* EMAC specific TX descriptor status fields (read access) */
 #define EMAC_TX_ST_BFCS			0x0200
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index 9912456dca48..a0236eb305ae 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -45,6 +45,24 @@  void tah_detach(struct platform_device *ofdev, int channel)
 	mutex_unlock(&dev->lock);
 }
 
+static void tah_set_ssr(struct platform_device *ofdev)
+{
+	struct tah_instance *dev = dev_get_drvdata(&ofdev->dev);
+	struct tah_regs __iomem *p = dev->base;
+	int i;
+
+	mutex_lock(&dev->lock);
+
+	for (i = 0; i < ARRAY_SIZE(tah_ss); i++) {
+		/* Segment size can be up to 16K, but needs
+		 * to be a multiple of 2 bytes
+		 */
+		out_be32(&p->ssr0 + i, (tah_ss[i] & 0x3ffe) << 16);
+	}
+
+	mutex_unlock(&dev->lock);
+}
+
 void tah_reset(struct platform_device *ofdev)
 {
 	struct tah_instance *dev = platform_get_drvdata(ofdev);
@@ -64,6 +82,8 @@  void tah_reset(struct platform_device *ofdev)
 	out_be32(&p->mr,
 		 TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP |
 		 TAH_MR_DIG);
+
+	tah_set_ssr(ofdev);
 }
 
 int tah_get_regs_len(struct platform_device *ofdev)
diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h
index 4d5f336f07b3..2cb0629f30e2 100644
--- a/drivers/net/ethernet/ibm/emac/tah.h
+++ b/drivers/net/ethernet/ibm/emac/tah.h
@@ -36,6 +36,8 @@  struct tah_regs {
 	u32 tsr;
 };
 
+#define TAH_NO_SSR	6
+extern const u32 tah_ss[TAH_NO_SSR];
 
 /* TAH device */
 struct tah_instance {