[v2] ethernet:arc: Fix racing of TX ring buffer

Message ID	5744EA24.7060806@gmx.de
State	RFC, archived
Delegated to:	David Miller
Headers	show Return-Path: <netdev-owner@vger.kernel.org> Subject: Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer To: Francois Romieu <romieu@fr.zoreil.com> References: <20160517.142456.2247845107325931733.davem@davemloft.net> <20160518000153.GA21757@electric-eye.fr.zoreil.com> <573CD09D.1060307@gmx.de> <20160518225529.GA18671@electric-eye.fr.zoreil.com> <573E2D0C.604@gmx.de> <20160520003145.GA22420@electric-eye.fr.zoreil.com> <20160521160910.GA14945@debian-dorm> <5740E82F.8040903@gmx.de> <20160522091742.GA8681@debian-dorm> <57419853.9050701@gmx.de> <20160522223659.GB5086@electric-eye.fr.zoreil.com> <5743A9DD.8010202@gmx.de> Cc: Shuyu Wei <wsy2220@gmail.com>, David Miller <davem@davemloft.net>, wxt@rock-chips.com, heiko@sntech.de, linux-rockchip@lists.infradead.org, netdev@vger.kernel.org, al.kochet@gmail.com From: Lino Sanfilippo <LinoSanfilippo@gmx.de> Message-ID: <5744EA24.7060806@gmx.de> Date: Wed, 25 May 2016 01:56:20 +0200 User-Agent: Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 Thunderbird/38.8.0 MIME-Version: 1.0 In-Reply-To: <5743A9DD.8010202@gmx.de> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Sender: netdev-owner@vger.kernel.org Precedence: bulk

Message ID

5744EA24.7060806@gmx.de

State

RFC, archived

Delegated to:

David Miller

Headers

Subject: Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
To: Francois Romieu <romieu@fr.zoreil.com>
References: <20160517.142456.2247845107325931733.davem@davemloft.net>
	<20160518000153.GA21757@electric-eye.fr.zoreil.com>
	<573CD09D.1060307@gmx.de>
	<20160518225529.GA18671@electric-eye.fr.zoreil.com>
	<573E2D0C.604@gmx.de>
	<20160520003145.GA22420@electric-eye.fr.zoreil.com>
	<20160521160910.GA14945@debian-dorm> <5740E82F.8040903@gmx.de>
	<20160522091742.GA8681@debian-dorm> <57419853.9050701@gmx.de>
	<20160522223659.GB5086@electric-eye.fr.zoreil.com>
	<5743A9DD.8010202@gmx.de>
Cc: Shuyu Wei <wsy2220@gmail.com>, David Miller <davem@davemloft.net>,
	wxt@rock-chips.com, heiko@sntech.de,
	linux-rockchip@lists.infradead.org, netdev@vger.kernel.org,
	al.kochet@gmail.com
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Message-ID: <5744EA24.7060806@gmx.de>
Date: Wed, 25 May 2016 01:56:20 +0200
User-Agent: Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101
	Thunderbird/38.8.0
MIME-Version: 1.0
In-Reply-To: <5743A9DD.8010202@gmx.de>
Content-Type: text/plain; charset=windows-1252
Content-Transfer-Encoding: 7bit
Sender: netdev-owner@vger.kernel.org
Precedence: bulk

Commit Message

Lino Sanfilippo May 24, 2016, 11:56 p.m. UTC

Francois, Shuyu,

this is the patch with the discussed changes.

Shuyu it would be great if you could test this one. If it passes
and there are no further objections I will resend it as a regular patch
(including commit message, etc.) to the mailing list.

Comments

Shuyu Wei May 28, 2016, 6:43 a.m. UTC | #1

On Wed, May 25, 2016 at 01:56:20AM +0200, Lino Sanfilippo wrote:
> Francois, Shuyu,
> 
> this is the patch with the discussed changes.
> 
> Shuyu it would be great if you could test this one. If it passes
> and there are no further objections I will resend it as a regular patch
> (including commit message, etc.) to the mailing list.
> 
> 
> diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
> index a3a9392..ec656b3 100644
> --- a/drivers/net/ethernet/arc/emac_main.c
> +++ b/drivers/net/ethernet/arc/emac_main.c
> @@ -153,18 +153,29 @@ static void arc_emac_tx_clean(struct net_device *ndev)
>  {
>  	struct arc_emac_priv *priv = netdev_priv(ndev);
>  	struct net_device_stats *stats = &ndev->stats;
> +	unsigned int curr = priv->txbd_curr;
>  	unsigned int i;
>  
> +	/* Make sure buffers and txbd_curr are consistent */
> +	smp_rmb();
> +
>  	for (i = 0; i < TX_BD_NUM; i++) {
>  		unsigned int *txbd_dirty = &priv->txbd_dirty;
>  		struct arc_emac_bd *txbd = &priv->txbd[*txbd_dirty];
>  		struct buffer_state *tx_buff = &priv->tx_buff[*txbd_dirty];
> -		struct sk_buff *skb = tx_buff->skb;
> -		unsigned int info = le32_to_cpu(txbd->info);
> +		unsigned int info;
> +		struct sk_buff *skb;
>  
> -		if ((info & FOR_EMAC) || !txbd->data || !skb)
> +		if (*txbd_dirty == curr)
>  			break;
>  
> +		info = le32_to_cpu(txbd->info);
> +
> +		if (info & FOR_EMAC)
> +			break;
> +
> +		skb = tx_buff->skb;
> +
>  		if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
>  			stats->tx_errors++;
>  			stats->tx_dropped++;
> @@ -195,8 +206,8 @@ static void arc_emac_tx_clean(struct net_device *ndev)
>  		*txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
>  	}
>  
> -	/* Ensure that txbd_dirty is visible to tx() before checking
> -	 * for queue stopped.
> +	/* Ensure that txbd_dirty is visible to tx() and we see the most recent
> +	 * value for txbd_curr.
>  	 */
>  	smp_mb();
>  
> @@ -680,27 +691,24 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
>  	dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
>  
>  	priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
> -
> -	/* Make sure pointer to data buffer is set */
> -	wmb();
> +	priv->tx_buff[*txbd_curr].skb = skb;
>  
>  	skb_tx_timestamp(skb);
>  
>  	*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
>  
> -	/* Make sure info word is set */
> +	/* 1. Make sure that with respect to tx_clean everything is set up
> +	 * properly before we advance txbd_curr.
> +	 * 2. Make sure writes to DMA descriptors are completed before we inform
> +	 * the hardware.
> +	 */
>  	wmb();
>  
> -	priv->tx_buff[*txbd_curr].skb = skb;
> -
>  	/* Increment index to point to the next BD */
>  	*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
>  
> -	/* Ensure that tx_clean() sees the new txbd_curr before
> -	 * checking the queue status. This prevents an unneeded wake
> -	 * of the queue in tx_clean().
> -	 */
> -	smp_mb();
> +	/* Ensure tx_clean() sees the updated value of txbd_curr */
> +	smp_wmb();
>  
>  	if (!arc_emac_tx_avail(priv)) {
>  		netif_stop_queue(ndev);

After some stress testing, it worked well most of the time.
But there is a chance that it may get stuck when I use 2 nc process
to send TCP packects at full speed.  Only when a new rx packet 
arrive can trigger it to run again. This happens only once per several
hours. No problem in UDP mode.  I'm not sure if it's related to tx code in
the driver.

Lino Sanfilippo May 30, 2016, 9:41 p.m. UTC | #2

Hi Shuyu,

On 28.05.2016 08:43, Shuyu Wei wrote:
> 
> After some stress testing, it worked well most of the time.
> But there is a chance that it may get stuck when I use 2 nc process
> to send TCP packects at full speed.  Only when a new rx packet 
> arrive can trigger it to run again. This happens only once per several
> hours. No problem in UDP mode.  I'm not sure if it's related to tx code in
> the driver.
> 

This sounds strange. One reason I could imagine for such an issue is that 
occassionally tx completion interrupts get lost: 
In this case skbs may not be freed (or be freed too late) which may result in a TCP
stream getting stuck, because each skb belongs to a socket and there is a limit for
the allowed number of skbs per socket. TCP would only proceed if the number of pending 
skbs falls under this limit again but since the tx completion irq is lost, the only thing 
that could trigger the tx completion handler is another irq, e.g for a received packet.
At least this could explain what you observed.

Did you see the same issues with the patch before (the one that, as you wrote,
survived a whole night of stress testing)?

Lino

Shuyu Wei June 5, 2016, 2:02 p.m. UTC | #3

On Mon, May 30, 2016 at 11:41:22PM +0200, Lino Sanfilippo wrote:
> 
> Did you see the same issues with the patch before (the one that, as you wrote,
> survived a whole night of stress testing)?
> 
> Lino

Hi Lino,
Sorry for my late reply. I retested the previous patch, it did have
the same issue. However it seems that the possibility of stuck is lower
(just my instinct, no evidence).

Lino Sanfilippo June 8, 2016, 7:54 a.m. UTC | #4

Hi Shuyu,

On 05.06.2016 16:02, Shuyu Wei wrote:
> Hi Lino,
> Sorry for my late reply. I retested the previous patch, it did have
> the same issue. However it seems that the possibility of stuck is lower
> (just my instinct, no evidence).
> 

Thank you for the feedback. It is hard to guess what is still going wrong.
But if you - as I assume - dont see this issue with the original code (as it is
in net-next) there is still something wrong with the changes we made. So we
probably should leave the code as it is for now.

Regards,
Lino

diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index a3a9392..ec656b3 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -153,18 +153,29 @@  static void arc_emac_tx_clean(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &ndev->stats;
+	unsigned int curr = priv->txbd_curr;
 	unsigned int i;
 
+	/* Make sure buffers and txbd_curr are consistent */
+	smp_rmb();
+
 	for (i = 0; i < TX_BD_NUM; i++) {
 		unsigned int *txbd_dirty = &priv->txbd_dirty;
 		struct arc_emac_bd *txbd = &priv->txbd[*txbd_dirty];
 		struct buffer_state *tx_buff = &priv->tx_buff[*txbd_dirty];
-		struct sk_buff *skb = tx_buff->skb;
-		unsigned int info = le32_to_cpu(txbd->info);
+		unsigned int info;
+		struct sk_buff *skb;
 
-		if ((info & FOR_EMAC) || !txbd->data || !skb)
+		if (*txbd_dirty == curr)
 			break;
 
+		info = le32_to_cpu(txbd->info);
+
+		if (info & FOR_EMAC)
+			break;
+
+		skb = tx_buff->skb;
+
 		if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
 			stats->tx_errors++;
 			stats->tx_dropped++;
@@ -195,8 +206,8 @@  static void arc_emac_tx_clean(struct net_device *ndev)
 		*txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
 	}
 
-	/* Ensure that txbd_dirty is visible to tx() before checking
-	 * for queue stopped.
+	/* Ensure that txbd_dirty is visible to tx() and we see the most recent
+	 * value for txbd_curr.
 	 */
 	smp_mb();
 
@@ -680,27 +691,24 @@  static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
 	dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
 
 	priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
-
-	/* Make sure pointer to data buffer is set */
-	wmb();
+	priv->tx_buff[*txbd_curr].skb = skb;
 
 	skb_tx_timestamp(skb);
 
 	*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
-	/* Make sure info word is set */
+	/* 1. Make sure that with respect to tx_clean everything is set up
+	 * properly before we advance txbd_curr.
+	 * 2. Make sure writes to DMA descriptors are completed before we inform
+	 * the hardware.
+	 */
 	wmb();
 
-	priv->tx_buff[*txbd_curr].skb = skb;
-
 	/* Increment index to point to the next BD */
 	*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
 
-	/* Ensure that tx_clean() sees the new txbd_curr before
-	 * checking the queue status. This prevents an unneeded wake
-	 * of the queue in tx_clean().
-	 */
-	smp_mb();
+	/* Ensure tx_clean() sees the updated value of txbd_curr */
+	smp_wmb();
 
 	if (!arc_emac_tx_avail(priv)) {
 		netif_stop_queue(ndev);

[v2] ethernet:arc: Fix racing of TX ring buffer

Commit Message

Comments

Patch