diff mbox

[net,/,RFC] net: fec: increase frame size limitation to actually available buffer

Message ID 1480444528-30054-1-git-send-email-nikita.yoush@cogentembedded.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Nikita Yushchenko Nov. 29, 2016, 6:35 p.m. UTC
Fec driver uses Rx buffers of 2k, but programs hardware to limit
incoming frames to 1522 bytes. This raises issues when FEC device
is used with DSA (since DSA tag can make frame larger), and also
disallows manual sending and receiving larger frames.

This patch removes the limitation, allowing Rx size up to entire buffer.
At the same time possible Tx size is increased as well, because hardware
uses the same register field to limit Rx and Tx.

Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
---
 drivers/net/ethernet/freescale/fec_main.c | 33 +++++++++++--------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

Comments

Andy Duan Nov. 30, 2016, 2:37 a.m. UTC | #1
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com> Sent: Wednesday, November 30, 2016 2:35 AM
 >To: David S. Miller <davem@davemloft.net>; Andy Duan
 ><fugang.duan@nxp.com>; Troy Kisky <troy.kisky@boundarydevices.com>;
 >Andrew Lunn <andrew@lunn.ch>; Eric Nelson <eric@nelint.com>; Philippe
 >Reynes <tremyfr@gmail.com>; Johannes Berg <johannes@sipsolutions.net>;
 >netdev@vger.kernel.org
 >Cc: Chris Healy <cphealy@gmail.com>; Fabio Estevam
 ><fabio.estevam@nxp.com>; linux-kernel@vger.kernel.org; Nikita
 >Yushchenko <nikita.yoush@cogentembedded.com>
 >Subject: [patch net / RFC] net: fec: increase frame size limitation to actually
 >available buffer
 >
 >Fec driver uses Rx buffers of 2k, but programs hardware to limit incoming
 >frames to 1522 bytes. This raises issues when FEC device is used with DSA
 >(since DSA tag can make frame larger), and also disallows manual sending and
 >receiving larger frames.
 >
 >This patch removes the limitation, allowing Rx size up to entire buffer.
 >At the same time possible Tx size is increased as well, because hardware uses
 >the same register field to limit Rx and Tx.
 >
 >Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
 >---
 > drivers/net/ethernet/freescale/fec_main.c | 33 +++++++++++----------------
 >----
 > 1 file changed, 12 insertions(+), 21 deletions(-)
 >
 >diff --git a/drivers/net/ethernet/freescale/fec_main.c
 >b/drivers/net/ethernet/freescale/fec_main.c
 >index 73ac35780611..c09789a71024 100644
 >--- a/drivers/net/ethernet/freescale/fec_main.c
 >+++ b/drivers/net/ethernet/freescale/fec_main.c
 >@@ -171,30 +171,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet
 >MAC address");  #endif  #endif /* CONFIG_M5272 */
 >
 >-/* The FEC stores dest/src/type/vlan, data, and checksum for receive
 >packets.
 >- */
 >-#define PKT_MAXBUF_SIZE		1522
 >-#define PKT_MINBUF_SIZE		64
 >-#define PKT_MAXBLR_SIZE		1536
 >-
 > /* FEC receive acceleration */
 > #define FEC_RACC_IPDIS		(1 << 1)
 > #define FEC_RACC_PRODIS		(1 << 2)
 > #define FEC_RACC_SHIFT16	BIT(7)
 > #define FEC_RACC_OPTIONS	(FEC_RACC_IPDIS | FEC_RACC_PRODIS)
 >
 >-/*
 >- * The 5270/5271/5280/5282/532x RX control register also contains maximum
 >frame
 >- * size bits. Other FEC hardware does not, so we need to take that into
 >- * account when setting it.
 >- */
 >-#if defined(CONFIG_M523x) || defined(CONFIG_M527x) ||
 >defined(CONFIG_M528x) || \
 >-    defined(CONFIG_M520x) || defined(CONFIG_M532x) ||
 >defined(CONFIG_ARM)
 >-#define	OPT_FRAME_SIZE	(PKT_MAXBUF_SIZE << 16)
 >-#else
 >-#define	OPT_FRAME_SIZE	0
 >-#endif
 >-
 > /* FEC MII MMFR bits definition */
 > #define FEC_MMFR_ST		(1 << 30)
 > #define FEC_MMFR_OP_READ	(2 << 28)
 >@@ -847,7 +829,8 @@ static void fec_enet_enable_ring(struct net_device
 >*ndev)
 > 	for (i = 0; i < fep->num_rx_queues; i++) {
 > 		rxq = fep->rx_queue[i];
 > 		writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
 >-		writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
 >+		writel(FEC_ENET_RX_FRSIZE - fep->rx_align,
 >+		       fep->hwp + FEC_R_BUFF_SIZE(i));
 >
 > 		/* enable DMA1/2 */
 > 		if (i)
 >@@ -895,9 +878,17 @@ fec_restart(struct net_device *ndev)
 > 	struct fec_enet_private *fep = netdev_priv(ndev);
 > 	u32 val;
 > 	u32 temp_mac[2];
 >-	u32 rcntl = OPT_FRAME_SIZE | 0x04;
 >+	u32 rcntl = 0x04;
 > 	u32 ecntl = 0x2; /* ETHEREN */
 >
 >+	/* The 5270/5271/5280/5282/532x RX control register also contains
 >+	 * maximum frame * size bits. Other FEC hardware does not.
 >+	 */
 >+#if defined(CONFIG_M523x) || defined(CONFIG_M527x) ||
 >defined(CONFIG_M528x) || \
 >+    defined(CONFIG_M520x) || defined(CONFIG_M532x) ||
 >defined(CONFIG_ARM)
 >+	rcntl |= (FEC_ENET_RX_FRSIZE - fep->rx_align) << 16; #endif
 >+
 > 	/* Whack a reset.  We should wait for this.
 > 	 * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
 > 	 * instead of reset MAC itself.
 >@@ -953,7 +944,7 @@ fec_restart(struct net_device *ndev)
 > 		else
 > 			val &= ~FEC_RACC_OPTIONS;
 > 		writel(val, fep->hwp + FEC_RACC);
 >-		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
 >+		writel(FEC_ENET_RX_FRSIZE - fep->rx_align, fep->hwp +
 >FEC_FTRL);

By the patch itself, it seems fine except MRBRn must be evenly divisible by 64.
But I think it is not necessary since the driver don't support jumbo frame.

 > 	}
 > #endif
 >
 >--
 >2.1.4
Nikita Yushchenko Nov. 30, 2016, 6:36 a.m. UTC | #2
> But I think it is not necessary since the driver don't support jumbo frame.

Hardcoded 1522 raises two separate issues.

(1) When DSA is in use, frames processed by FEC chip contain DSA tag and
thus can be larger than hardcoded limit of 1522. This issue is not
FEC-specific, any driver that hardcodes maximum frame size to 1522 (many
do) will have this issue if used with DSA.

Clean solution for this must take into account that difference between
MTU and max frame size is no longer known at compile time. Actually this
is the case even without DSA, due to VLANs: max frame size is (MTU + 18)
without VLANs, but (MTU + 22) with VLANs. However currently drivers tend
to ignore this and hardcode 22.  With DSA, 22 is not enough, need to add
switch-specific tag size to that.

Not yet sure how to handle this. DSA-specific API to find out tag size
could be added, but generic solution should handle all cases of dynamic
difference between MTU and max frame size, not only DSA.


(2) There is some demand to use larger frames for optimization purposes.

FEC register fields that limit frame size are 14-bit, thus allowing
frames up to (4k-1). I'm about to prepare a larger patch:
- add ndo_change_mtu handler, allowing MTU up to (4k - overhead),
- set MAX_FL / TRUNC_FL based on configured MTU,
- if necessary, do buffer reallocation with larger buffers.

Is this suitable for upstreaming?
Is there any policy related to handling larger frames?
Toshiaki Makita Nov. 30, 2016, 7:25 a.m. UTC | #3
On 2016/11/30 15:36, Nikita Yushchenko wrote:
>> But I think it is not necessary since the driver don't support jumbo frame.
> 
> Hardcoded 1522 raises two separate issues.
> 
> (1) When DSA is in use, frames processed by FEC chip contain DSA tag and
> thus can be larger than hardcoded limit of 1522. This issue is not
> FEC-specific, any driver that hardcodes maximum frame size to 1522 (many
> do) will have this issue if used with DSA.
> 
> Clean solution for this must take into account that difference between
> MTU and max frame size is no longer known at compile time. Actually this
> is the case even without DSA, due to VLANs: max frame size is (MTU + 18)
> without VLANs, but (MTU + 22) with VLANs. However currently drivers tend
> to ignore this and hardcode 22.  With DSA, 22 is not enough, need to add
> switch-specific tag size to that.
> 
> Not yet sure how to handle this. DSA-specific API to find out tag size
> could be added, but generic solution should handle all cases of dynamic
> difference between MTU and max frame size, not only DSA.

BTW I'm trying to introduce envelope frames to solve this kind of problems.
http://marc.info/?t=147496691500005&r=1&w=2
http://marc.info/?t=147496691500003&r=1&w=2
http://marc.info/?t=147496691500002&r=1&w=2
http://marc.info/?t=147496691500004&r=1&w=2
http://marc.info/?t=147496691500001&r=1&w=2

It needs jumbo frame support of NICs though.

Regards,
Toshiaki Makita
Andy Duan Nov. 30, 2016, 7:35 a.m. UTC | #4
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com> Sent: Wednesday, November 30, 2016 2:36 PM
 >To: Andy Duan <fugang.duan@nxp.com>; David S. Miller
 ><davem@davemloft.net>; Troy Kisky <troy.kisky@boundarydevices.com>;
 >Andrew Lunn <andrew@lunn.ch>; Eric Nelson <eric@nelint.com>; Philippe
 >Reynes <tremyfr@gmail.com>; Johannes Berg <johannes@sipsolutions.net>;
 >netdev@vger.kernel.org
 >Cc: Chris Healy <cphealy@gmail.com>; Fabio Estevam
 ><fabio.estevam@nxp.com>; linux-kernel@vger.kernel.org
 >Subject: Re: [patch net / RFC] net: fec: increase frame size limitation to
 >actually available buffer
 >
 >> But I think it is not necessary since the driver don't support jumbo frame.
 >
 >Hardcoded 1522 raises two separate issues.
 >
 >(1) When DSA is in use, frames processed by FEC chip contain DSA tag and
 >thus can be larger than hardcoded limit of 1522. This issue is not FEC-specific,
 >any driver that hardcodes maximum frame size to 1522 (many
 >do) will have this issue if used with DSA.
 >
 >Clean solution for this must take into account that difference between MTU
 >and max frame size is no longer known at compile time. Actually this is the
 >case even without DSA, due to VLANs: max frame size is (MTU + 18) without
 >VLANs, but (MTU + 22) with VLANs. However currently drivers tend to ignore
 >this and hardcode 22.  With DSA, 22 is not enough, need to add switch-
 >specific tag size to that.
 >
 >Not yet sure how to handle this. DSA-specific API to find out tag size could be
 >added, but generic solution should handle all cases of dynamic difference
 >between MTU and max frame size, not only DSA.
 >
 >
 >(2) There is some demand to use larger frames for optimization purposes.
 >
 >FEC register fields that limit frame size are 14-bit, thus allowing frames up to
 >(4k-1). I'm about to prepare a larger patch:
 >- add ndo_change_mtu handler, allowing MTU up to (4k - overhead),
 >- set MAX_FL / TRUNC_FL based on configured MTU,
 >- if necessary, do buffer reallocation with larger buffers.
 >
 >Is this suitable for upstreaming?
 >Is there any policy related to handling larger frames?

Of course, welcome to upstream the jumbo frame patches, but hope to also add the transmit jumbo frame, not only receive path, which is helpful for testing with two boards connection.
And, some notes need you to care:
- the maximum jumbo frame should consider the fifo size. Different chip has different fifo size. Like i.MX53 tx and rx share one fifo, i.mx6q/dl/sl have separate 4k fifo for tx and rx, i.mx6sx/i.mx7x have separate 8k fifo for tx and rx.
- rx fifo watermark to generate pause frame in busy loading system to avoid fifo overrun. In general,  little pause frames bring better performance, mass of pause frames cause worse performance.


Regards,
Andy
Nikita Yushchenko Nov. 30, 2016, 2:58 p.m. UTC | #5
>> (1) When DSA is in use, frames processed by FEC chip contain DSA tag and
>> thus can be larger than hardcoded limit of 1522. This issue is not
>> FEC-specific, any driver that hardcodes maximum frame size to 1522 (many
>> do) will have this issue if used with DSA.
> 
> BTW I'm trying to introduce envelope frames to solve this kind of problems.
> http://marc.info/?t=147496691500005&r=1&w=2
> http://marc.info/?t=147496691500003&r=1&w=2
> http://marc.info/?t=147496691500002&r=1&w=2
> http://marc.info/?t=147496691500004&r=1&w=2
> http://marc.info/?t=147496691500001&r=1&w=2
> 
> It needs jumbo frame support of NICs though.

Thanks for pointing to this.

Indeed frame with DSA tag conceptually is an envelope frame.

ndev->env_hdr_len introduced by your patches, actually is explicitly
handled difference between (MTU + 18) and frame that HW should allow.
If this is known, hardware can be configured to work with DSA. At least
FEC hardware that can send and receive "slightly larger" frames after
simple register configuration.

Furthermore, since DSA configuration is known statically (it comes from
device tree), ndo_set_env_hdr_len method could be automatically called
at init, making setup working by default if driver supports that. And if
not, perhaps can automatically lower MTU.

Looks like a solution :)

What's current status of this work?

What is not really clear - what if several tagging protocols are used
together. AFAIU, things may be more complex that simple appending of
tags, e.g. EDSA tag can carry VLAN id inside.

Nikita
Andrew Lunn Nov. 30, 2016, 3:10 p.m. UTC | #6
> What is not really clear - what if several tagging protocols are used
> together. AFAIU, things may be more complex that simple appending of
> tags, e.g. EDSA tag can carry VLAN id inside.

Hi Nikita

At least for all current tagging protocols, the size of the tag is
constant. And you cannot run different tagging protocols on the same
master interface at the same time.

However, i think Florian tried something funky with the SF2 and B53
driver. He has a b53 hanging off a sf2. So i think he used nested
tagging protocols!

	Andrew
Zefir Kurtisi Nov. 30, 2016, 4:05 p.m. UTC | #7
On 11/29/2016 07:35 PM, Nikita Yushchenko wrote:
> Fec driver uses Rx buffers of 2k, but programs hardware to limit
> incoming frames to 1522 bytes. This raises issues when FEC device
> is used with DSA (since DSA tag can make frame larger), and also
> disallows manual sending and receiving larger frames.
> 
> This patch removes the limitation, allowing Rx size up to entire buffer.
> At the same time possible Tx size is increased as well, because hardware
> uses the same register field to limit Rx and Tx.
> 
> Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
> ---

I recently did similar changes to the freescale/gianfar to prevent fragmentation
of (E)DSA frames [1].

With the fragmentation kicking in, I noticed a bug in the scatter-gather logic
which caused the frame-size of fragmented frames to be reported wrong [2]. Not
sure if the fec driver has SG capabilities and if this issue also applies, you
might want to double-check.


Cheers,
Zefir


[1] http://marc.info/?l=linux-netdev&m=147187438313519&w=2
[2] http://marc.info/?l=linux-netdev&m=147187430213484&w=2
Florian Fainelli Dec. 1, 2016, 3:26 a.m. UTC | #8
On 11/30/2016 07:10 AM, Andrew Lunn wrote:
>> What is not really clear - what if several tagging protocols are used
>> together. AFAIU, things may be more complex that simple appending of
>> tags, e.g. EDSA tag can carry VLAN id inside.
> 
> Hi Nikita
> 
> At least for all current tagging protocols, the size of the tag is
> constant. And you cannot run different tagging protocols on the same
> master interface at the same time.

I am not sure if using envelope frames is entirely appropriate here,
because there are existing switch tagging protocols that:

- don't have a specific Ethernet type allocated (Broadcom tags, DSA)
- could be appended at the end of the frame instead of pre-pended

Alexander Duyck suggested a while ago that we may be able to use the
headers_ops to implement the DSA tag pop/push, as well as get an
appropriate MTU adjustment, can you see if that would work?

> 
> However, i think Florian tried something funky with the SF2 and B53
> driver. He has a b53 hanging off a sf2. So i think he used nested
> tagging protocols!

Well, this actually did not work, because the SF2 and B53 switches
essentially terminate switch tags when they ingress their switch ports,
so the tag inserted by B53 ingressing into the SF2 port does not get
sent all the way to the CPU hanging off the SF2... With the B53 hanging
off the SF2, we essentially have to disable Broadcom tags in the B53
device, because the tag and switches were never designed to be cascaded
in the first place (at least not these specific cores).
Toshiaki Makita Dec. 1, 2016, 12:46 p.m. UTC | #9
On 2016/11/30 23:58, Nikita Yushchenko wrote:
>>> (1) When DSA is in use, frames processed by FEC chip contain DSA tag and
>>> thus can be larger than hardcoded limit of 1522. This issue is not
>>> FEC-specific, any driver that hardcodes maximum frame size to 1522 (many
>>> do) will have this issue if used with DSA.
>>
>> BTW I'm trying to introduce envelope frames to solve this kind of problems.
>> http://marc.info/?t=147496691500005&r=1&w=2
>> http://marc.info/?t=147496691500003&r=1&w=2
>> http://marc.info/?t=147496691500002&r=1&w=2
>> http://marc.info/?t=147496691500004&r=1&w=2
>> http://marc.info/?t=147496691500001&r=1&w=2
>>
>> It needs jumbo frame support of NICs though.
> 
> Thanks for pointing to this.
> 
> Indeed frame with DSA tag conceptually is an envelope frame.
> 
> ndev->env_hdr_len introduced by your patches, actually is explicitly
> handled difference between (MTU + 18) and frame that HW should allow.
> If this is known, hardware can be configured to work with DSA. At least
> FEC hardware that can send and receive "slightly larger" frames after
> simple register configuration.
> 
> Furthermore, since DSA configuration is known statically (it comes from
> device tree), ndo_set_env_hdr_len method could be automatically called
> at init, making setup working by default if driver supports that. And if
> not, perhaps can automatically lower MTU.
> 
> Looks like a solution :)
> 
> What's current status of this work?

Thank you for taking a look.
I'm planning to post v2 soon.

> What is not really clear - what if several tagging protocols are used
> together. AFAIU, things may be more complex that simple appending of
> tags, e.g. EDSA tag can carry VLAN id inside.

If kernel is aware of VLAN configuration, add 4 bytes + DSA tag size.
(I'm not familiar with how dsa knows vlan configuration, but probably
through switchdev_port_obj_add()? If so, dsa should be able to take into
account additional vlan tag size.)

If vlan tag is opaque from kernel, e.g. forwarding vlan tagged frames
without configuring vlan_filtering in bridge, admin needs to set
env_hdr_len manually. This is why I'm proposing manual operation.

Regards,
Toshiaki Makita
diff mbox

Patch

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 73ac35780611..c09789a71024 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -171,30 +171,12 @@  MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #endif
 #endif /* CONFIG_M5272 */
 
-/* The FEC stores dest/src/type/vlan, data, and checksum for receive packets.
- */
-#define PKT_MAXBUF_SIZE		1522
-#define PKT_MINBUF_SIZE		64
-#define PKT_MAXBLR_SIZE		1536
-
 /* FEC receive acceleration */
 #define FEC_RACC_IPDIS		(1 << 1)
 #define FEC_RACC_PRODIS		(1 << 2)
 #define FEC_RACC_SHIFT16	BIT(7)
 #define FEC_RACC_OPTIONS	(FEC_RACC_IPDIS | FEC_RACC_PRODIS)
 
-/*
- * The 5270/5271/5280/5282/532x RX control register also contains maximum frame
- * size bits. Other FEC hardware does not, so we need to take that into
- * account when setting it.
- */
-#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
-#define	OPT_FRAME_SIZE	(PKT_MAXBUF_SIZE << 16)
-#else
-#define	OPT_FRAME_SIZE	0
-#endif
-
 /* FEC MII MMFR bits definition */
 #define FEC_MMFR_ST		(1 << 30)
 #define FEC_MMFR_OP_READ	(2 << 28)
@@ -847,7 +829,8 @@  static void fec_enet_enable_ring(struct net_device *ndev)
 	for (i = 0; i < fep->num_rx_queues; i++) {
 		rxq = fep->rx_queue[i];
 		writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
-		writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
+		writel(FEC_ENET_RX_FRSIZE - fep->rx_align,
+		       fep->hwp + FEC_R_BUFF_SIZE(i));
 
 		/* enable DMA1/2 */
 		if (i)
@@ -895,9 +878,17 @@  fec_restart(struct net_device *ndev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	u32 val;
 	u32 temp_mac[2];
-	u32 rcntl = OPT_FRAME_SIZE | 0x04;
+	u32 rcntl = 0x04;
 	u32 ecntl = 0x2; /* ETHEREN */
 
+	/* The 5270/5271/5280/5282/532x RX control register also contains
+	 * maximum frame * size bits. Other FEC hardware does not.
+	 */
+#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
+    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
+	rcntl |= (FEC_ENET_RX_FRSIZE - fep->rx_align) << 16;
+#endif
+
 	/* Whack a reset.  We should wait for this.
 	 * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
 	 * instead of reset MAC itself.
@@ -953,7 +944,7 @@  fec_restart(struct net_device *ndev)
 		else
 			val &= ~FEC_RACC_OPTIONS;
 		writel(val, fep->hwp + FEC_RACC);
-		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
+		writel(FEC_ENET_RX_FRSIZE - fep->rx_align, fep->hwp + FEC_FTRL);
 	}
 #endif