Patchwork via-rhine: Problem with lost link after a while

login
register
mail settings
Submitter françois romieu
Date April 10, 2012, 10:55 p.m.
Message ID <20120410225534.GA28480@electric-eye.fr.zoreil.com>
Download mbox | patch
Permalink /patch/151700/
State RFC
Delegated to: David Miller
Headers show

Comments

françois romieu - April 10, 2012, 10:55 p.m.
Bjarke Istrup Pedersen <gurligebis@gentoo.org> :
> 10. apr. 2012 22.42 skrev Francois Romieu <romieu@fr.zoreil.com>:
[...]
> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;h=3f8c91a7398b9266fbe7abcbe4bd5dffef907643
[...]
> Great, I'll try a 3.4-rc2 kernel, and see how it runs.
> 
> The thread I was talking about earlier is here:
> http://lists.soekris.com/pipermail/soekris-tech/2012-April/018318.html
> Is there any of the changes he has there, that makes sense in the new
> driver you wrote ?

(I did not write a new driver)

Regarding Svenning's patch:
- the wmb in alloc_rbufs may help rhine_reset_task().
- one should probably add one in rhine_rx() as well.
- rhine_start_tx() is supposed to stop queueing when there is no room left.
  I'm curious to know if the "Tx descriptor busy" test triggered.
- the rmb() in rhine_tx() will not make a difference for a single core but
  it's a good reminder that I should not have forgotten to propagate the
  xmit / Tx completion fix back from the r8169 driver to the via-rhine one
  (sigh)

mmiowb is probably missing. I doubt it hits hard right now.

I have not checked if MMIO flushes are missing. Actually I need some sleep.
Bjarke Istrup Pedersen - April 10, 2012, 11:58 p.m.
11. apr. 2012 00.55 skrev Francois Romieu <romieu@fr.zoreil.com>:
> Bjarke Istrup Pedersen <gurligebis@gentoo.org> :
>> 10. apr. 2012 22.42 skrev Francois Romieu <romieu@fr.zoreil.com>:
> [...]
>> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;h=3f8c91a7398b9266fbe7abcbe4bd5dffef907643
> [...]
>> Great, I'll try a 3.4-rc2 kernel, and see how it runs.
>>
>> The thread I was talking about earlier is here:
>> http://lists.soekris.com/pipermail/soekris-tech/2012-April/018318.html
>> Is there any of the changes he has there, that makes sense in the new
>> driver you wrote ?
>
> (I did not write a new driver)

Sorry, bad choice of words, meant fixed some issues with the driver :)

> Regarding Svenning's patch:
> - the wmb in alloc_rbufs may help rhine_reset_task().
> - one should probably add one in rhine_rx() as well.
> - rhine_start_tx() is supposed to stop queueing when there is no room left.
>  I'm curious to know if the "Tx descriptor busy" test triggered.
> - the rmb() in rhine_tx() will not make a difference for a single core but
>  it's a good reminder that I should not have forgotten to propagate the
>  xmit / Tx completion fix back from the r8169 driver to the via-rhine one
>  (sigh)
>
> mmiowb is probably missing. I doubt it hits hard right now.

I'll give it a spin tomorrow, and see how it runs :)

> I have not checked if MMIO flushes are missing. Actually I need some sleep.
>
> diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
> index fcfa01f..dfa9fc0 100644
> --- a/drivers/net/ethernet/via/via-rhine.c
> +++ b/drivers/net/ethernet/via/via-rhine.c
> @@ -1163,6 +1163,7 @@ static void alloc_rbufs(struct net_device *dev)
>                                       PCI_DMA_FROMDEVICE);
>
>                rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
> +               wmb();
>                rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
>        }
>        rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
> @@ -1709,8 +1710,13 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
>               ioaddr + ChipCmd1);
>        IOSYNC;
>
> -       if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
> +       if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) {
> +               smp_wmb();
>                netif_stop_queue(dev);
> +               smp_mb();
> +               if (rp->cur_tx != rp->dirty_tx + TX_QUEUE_LEN)
> +                       netif_wake_queue(dev);
> +       }
>
>        netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n",
>                  rp->cur_tx - 1, entry);
> @@ -1759,6 +1765,7 @@ static void rhine_tx(struct net_device *dev)
>        struct rhine_private *rp = netdev_priv(dev);
>        int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
>
> +       smp_rmb();
>        /* find and cleanup dirty tx descriptors */
>        while (rp->dirty_tx != rp->cur_tx) {
>                txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
> @@ -1806,8 +1813,12 @@ static void rhine_tx(struct net_device *dev)
>                rp->tx_skbuff[entry] = NULL;
>                entry = (++rp->dirty_tx) % TX_RING_SIZE;
>        }
> -       if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
> +
> +       smp_mb();
> +       if (netif_queue_stopped(dev) &&
> +           (rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) {
>                netif_wake_queue(dev);
> +       }
>  }
>
>  /**
> @@ -1947,6 +1958,7 @@ static int rhine_rx(struct net_device *dev, int limit)
>                                               PCI_DMA_FROMDEVICE);
>                        rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]);
>                }
> +               wmb();
>                rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
>        }
>
> --
> Ueimor
>
> Will code drivers for food.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Svenning Sørensen - April 11, 2012, 9:49 a.m.
On 11-04-2012 00:55, Francois Romieu wrote:
> Bjarke Istrup Pedersen<gurligebis@gentoo.org>  :
>
> [...]
>> Great, I'll try a 3.4-rc2 kernel, and see how it runs.
>>
>> The thread I was talking about earlier is here:
>> http://lists.soekris.com/pipermail/soekris-tech/2012-April/018318.html
>> Is there any of the changes he has there, that makes sense in the new
>> driver you wrote ?
> (I did not write a new driver)
>
> Regarding Svenning's patch:
> - the wmb in alloc_rbufs may help rhine_reset_task().
> - one should probably add one in rhine_rx() as well.
> - rhine_start_tx() is supposed to stop queueing when there is no room left.
>    I'm curious to know if the "Tx descriptor busy" test triggered.
> - the rmb() in rhine_tx() will not make a difference for a single core but
>    it's a good reminder that I should not have forgotten to propagate the
>    xmit / Tx completion fix back from the r8169 driver to the via-rhine one
>    (sigh)
>
> mmiowb is probably missing. I doubt it hits hard right now.
>
> I have not checked if MMIO flushes are missing. Actually I need some sleep.
>

Regarding the "Tx descriptor busy" test: no, I didn't see it trigger, I 
just put it there just in case because I suspected there could be a race 
due to the lock-free tx path.
But I'm glad if you're confident that it can't happen :)

Svenning
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
françois romieu - April 11, 2012, 10:21 p.m.
Svenning Sørensen <sss@secomea.com> :
[...]
> Regarding the "Tx descriptor busy" test: no, I didn't see it
> trigger, I just put it there just in case because I suspected there
> could be a race due to the lock-free tx path.
> But I'm glad if you're confident that it can't happen :)

Almost :o)

Without the patch it may happen in mainline. Once the napi Tx completion
function checks if queueing is stopped before enabling it, it should not
happen though. The patch should fix the race where queuing is not enabled
as well.

That being said, I would welcome a pony^W^W some testing on a multi-core
system with lots of Tx and enough irq from a different (non via-rhine)
source to starve the softirq processing.
Bjarke Istrup Pedersen - April 13, 2012, 6:16 a.m.
11. apr. 2012 00.55 skrev Francois Romieu <romieu@fr.zoreil.com>:
> Bjarke Istrup Pedersen <gurligebis@gentoo.org> :
>> 10. apr. 2012 22.42 skrev Francois Romieu <romieu@fr.zoreil.com>:
> [...]
>> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;h=3f8c91a7398b9266fbe7abcbe4bd5dffef907643
> [...]
>> Great, I'll try a 3.4-rc2 kernel, and see how it runs.
>>
>> The thread I was talking about earlier is here:
>> http://lists.soekris.com/pipermail/soekris-tech/2012-April/018318.html
>> Is there any of the changes he has there, that makes sense in the new
>> driver you wrote ?
>
> (I did not write a new driver)
>
> Regarding Svenning's patch:
> - the wmb in alloc_rbufs may help rhine_reset_task().
> - one should probably add one in rhine_rx() as well.
> - rhine_start_tx() is supposed to stop queueing when there is no room left.
>  I'm curious to know if the "Tx descriptor busy" test triggered.
> - the rmb() in rhine_tx() will not make a difference for a single core but
>  it's a good reminder that I should not have forgotten to propagate the
>  xmit / Tx completion fix back from the r8169 driver to the via-rhine one
>  (sigh)
>
> mmiowb is probably missing. I doubt it hits hard right now.
>
> I have not checked if MMIO flushes are missing. Actually I need some sleep.

I've been testing v3.4-rc2 with this patch applied for the past few
days, with alot of trafic across, both incoming and outgoing, with VPN
which is normally what can trigger problems, and it seems rock solid
:o)

Do you need me to do any specific testing of this patch?

/Bjarke

> diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
> index fcfa01f..dfa9fc0 100644
> --- a/drivers/net/ethernet/via/via-rhine.c
> +++ b/drivers/net/ethernet/via/via-rhine.c
> @@ -1163,6 +1163,7 @@ static void alloc_rbufs(struct net_device *dev)
>                                       PCI_DMA_FROMDEVICE);
>
>                rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
> +               wmb();
>                rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
>        }
>        rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
> @@ -1709,8 +1710,13 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
>               ioaddr + ChipCmd1);
>        IOSYNC;
>
> -       if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
> +       if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) {
> +               smp_wmb();
>                netif_stop_queue(dev);
> +               smp_mb();
> +               if (rp->cur_tx != rp->dirty_tx + TX_QUEUE_LEN)
> +                       netif_wake_queue(dev);
> +       }
>
>        netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n",
>                  rp->cur_tx - 1, entry);
> @@ -1759,6 +1765,7 @@ static void rhine_tx(struct net_device *dev)
>        struct rhine_private *rp = netdev_priv(dev);
>        int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
>
> +       smp_rmb();
>        /* find and cleanup dirty tx descriptors */
>        while (rp->dirty_tx != rp->cur_tx) {
>                txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
> @@ -1806,8 +1813,12 @@ static void rhine_tx(struct net_device *dev)
>                rp->tx_skbuff[entry] = NULL;
>                entry = (++rp->dirty_tx) % TX_RING_SIZE;
>        }
> -       if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
> +
> +       smp_mb();
> +       if (netif_queue_stopped(dev) &&
> +           (rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) {
>                netif_wake_queue(dev);
> +       }
>  }
>
>  /**
> @@ -1947,6 +1958,7 @@ static int rhine_rx(struct net_device *dev, int limit)
>                                               PCI_DMA_FROMDEVICE);
>                        rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]);
>                }
> +               wmb();
>                rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
>        }
>
> --
> Ueimor
>
> Will code drivers for food.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
françois romieu - April 14, 2012, 10:06 a.m.
Bjarke Istrup Pedersen <gurligebis@gentoo.org> :
[...]
> I've been testing v3.4-rc2 with this patch applied for the past few
> days, with alot of trafic across, both incoming and outgoing, with VPN
> which is normally what can trigger problems, and it seems rock solid
> :o)

Thanks.

> Do you need me to do any specific testing of this patch?

See below. 

Changes of behavior should be reported for this patch over plain 3.4-rc2 or
current 3.3-stable as both include Andreas's fix.

Otherwise it can not be labelled it as a fix : it does not go into 3.4-rc
nor 3.3-stable but proceeds through net-next where it will land into
3.5-rc then -stable.

One can not exercize the Tx xmit vs napi Tx completion race part of the
patch with a single core. Afaiui non-Soekris users are needed here.

Even if you don't experience change of behavior over 3.4-rc or 3.3-stable,
some longterm use of this driver would be welcome. It may end as an effort
for 3.5-rc though.

--
Ueimor

Will code drivers for food.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjarke Istrup Pedersen - April 14, 2012, 3:10 p.m.
14. apr. 2012 12.06 skrev Francois Romieu <romieu@fr.zoreil.com>:
> Bjarke Istrup Pedersen <gurligebis@gentoo.org> :
> [...]
>> I've been testing v3.4-rc2 with this patch applied for the past few
>> days, with alot of trafic across, both incoming and outgoing, with VPN
>> which is normally what can trigger problems, and it seems rock solid
>> :o)
>
> Thanks.
>
>> Do you need me to do any specific testing of this patch?
>
> See below.
>
> Changes of behavior should be reported for this patch over plain 3.4-rc2 or
> current 3.3-stable as both include Andreas's fix.
>
> Otherwise it can not be labelled it as a fix : it does not go into 3.4-rc
> nor 3.3-stable but proceeds through net-next where it will land into
> 3.5-rc then -stable.
>
> One can not exercize the Tx xmit vs napi Tx completion race part of the
> patch with a single core. Afaiui non-Soekris users are needed here.
>
> Even if you don't experience change of behavior over 3.4-rc or 3.3-stable,
> some longterm use of this driver would be welcome. It may end as an effort
> for 3.5-rc though.

Okay, I'll keep staying up to date with 3.4-rcX and report if anything breaks.
Going for v3.5-rc should be fine AFAICS :-)

/Bjarke

> --
> Ueimor
>
> Will code drivers for food.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index fcfa01f..dfa9fc0 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1163,6 +1163,7 @@  static void alloc_rbufs(struct net_device *dev)
 				       PCI_DMA_FROMDEVICE);
 
 		rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
+		wmb();
 		rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
 	}
 	rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
@@ -1709,8 +1710,13 @@  static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 	       ioaddr + ChipCmd1);
 	IOSYNC;
 
-	if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
+	if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) {
+		smp_wmb();
 		netif_stop_queue(dev);
+		smp_mb();
+		if (rp->cur_tx != rp->dirty_tx + TX_QUEUE_LEN)
+			netif_wake_queue(dev);
+	}
 
 	netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n",
 		  rp->cur_tx - 1, entry);
@@ -1759,6 +1765,7 @@  static void rhine_tx(struct net_device *dev)
 	struct rhine_private *rp = netdev_priv(dev);
 	int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
 
+	smp_rmb();
 	/* find and cleanup dirty tx descriptors */
 	while (rp->dirty_tx != rp->cur_tx) {
 		txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
@@ -1806,8 +1813,12 @@  static void rhine_tx(struct net_device *dev)
 		rp->tx_skbuff[entry] = NULL;
 		entry = (++rp->dirty_tx) % TX_RING_SIZE;
 	}
-	if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
+
+	smp_mb();
+	if (netif_queue_stopped(dev) &&
+	    (rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) {
 		netif_wake_queue(dev);
+	}
 }
 
 /**
@@ -1947,6 +1958,7 @@  static int rhine_rx(struct net_device *dev, int limit)
 					       PCI_DMA_FROMDEVICE);
 			rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]);
 		}
+		wmb();
 		rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
 	}