[1/2] Revert "e1000e: fix cyclic resets at link up with active tx"
diff mbox series

Message ID 155548879651.3454.13167784936351314661.stgit@buzz
State Accepted
Delegated to: Jeff Kirsher
Headers show
Series
  • [1/2] Revert "e1000e: fix cyclic resets at link up with active tx"
Related show

Commit Message

Konstantin Khlebnikov April 17, 2019, 8:13 a.m. UTC
This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.

That change cased false-positive warning about hardware hang:

e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:
   TDH                  <0>
   TDT                  <1>
   next_to_use          <1>
   next_to_clean        <0>
buffer_info[next_to_clean]:
   time_stamp           <fffba7a7>
   next_to_watch        <0>
   jiffies              <fffbb140>
   next_to_watch.status <0>
MAC Status             <40080080>
PHY Status             <7949>
PHY 1000BASE-T Status  <0>
PHY Extended Status    <3000>
PCI Status             <10>
e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx

Besides warning everything works fine.
Original issue will be fixed property in following patch.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Joseph Yasi <joe.yasi@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203175
---
 drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

Comments

Joseph Yasi April 23, 2019, 12:49 a.m. UTC | #1
On Wed, Apr 17, 2019 at 4:13 AM Konstantin Khlebnikov <
khlebnikov@yandex-team.ru> wrote:

> This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.
>
> That change cased false-positive warning about hardware hang:
>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:
>    TDH                  <0>
>    TDT                  <1>
>    next_to_use          <1>
>    next_to_clean        <0>
> buffer_info[next_to_clean]:
>    time_stamp           <fffba7a7>
>    next_to_watch        <0>
>    jiffies              <fffbb140>
>    next_to_watch.status <0>
> MAC Status             <40080080>
> PHY Status             <7949>
> PHY 1000BASE-T Status  <0>
> PHY Extended Status    <3000>
> PCI Status             <10>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
>
> Besides warning everything works fine.
> Original issue will be fixed property in following patch.
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Reported-by: Joseph Yasi <joe.yasi@gmail.com>
>
Tested-by: Joseph Yasi <joe.yasi@gmail.com>

> Link: https://bugzilla.kernel.org/show_bug.cgi?id=203175
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c
> b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 7acc61e4f645..ba96e52aa8d1 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -5309,13 +5309,8 @@ static void e1000_watchdog_task(struct work_struct
> *work)
>                         /* 8000ES2LAN requires a Rx packet buffer
> work-around
>                          * on link down event; reset the controller to
> flush
>                          * the Rx packet buffer.
> -                        *
> -                        * If the link is lost the controller stops DMA,
> but
> -                        * if there is queued Tx work it cannot be done.
> So
> -                        * reset the controller to flush the Tx packet
> buffers.
>                          */
> -                       if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
> -                           e1000_desc_unused(tx_ring) + 1 <
> tx_ring->count)
> +                       if (adapter->flags & FLAG_RX_NEEDS_RESTART)
>                                 adapter->flags |= FLAG_RESTART_NOW;
>                         else
>                                 pm_schedule_suspend(netdev->dev.parent,
> @@ -5338,6 +5333,14 @@ static void e1000_watchdog_task(struct work_struct
> *work)
>         adapter->gotc_old = adapter->stats.gotc;
>         spin_unlock(&adapter->stats64_lock);
>
> +       /* If the link is lost the controller stops DMA, but
> +        * if there is queued Tx work it cannot be done.  So
> +        * reset the controller to flush the Tx packet buffers.
> +        */
> +       if (!netif_carrier_ok(netdev) &&
> +           (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
> +               adapter->flags |= FLAG_RESTART_NOW;
> +
>         /* If reset is necessary, do it outside of interrupt context. */
>         if (adapter->flags & FLAG_RESTART_NOW) {
>                 schedule_work(&adapter->reset_task);
>
>
<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Apr 17, 2019 at 4:13 AM Konstantin Khlebnikov &lt;<a href="mailto:khlebnikov@yandex-team.ru">khlebnikov@yandex-team.ru</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.<br>
<br>
That change cased false-positive warning about hardware hang:<br>
<br>
e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx<br>
IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready<br>
e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:<br>
   TDH                  &lt;0&gt;<br>
   TDT                  &lt;1&gt;<br>
   next_to_use          &lt;1&gt;<br>
   next_to_clean        &lt;0&gt;<br>
buffer_info[next_to_clean]:<br>
   time_stamp           &lt;fffba7a7&gt;<br>
   next_to_watch        &lt;0&gt;<br>
   jiffies              &lt;fffbb140&gt;<br>
   next_to_watch.status &lt;0&gt;<br>
MAC Status             &lt;40080080&gt;<br>
PHY Status             &lt;7949&gt;<br>
PHY 1000BASE-T Status  &lt;0&gt;<br>
PHY Extended Status    &lt;3000&gt;<br>
PCI Status             &lt;10&gt;<br>
e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx<br>
<br>
Besides warning everything works fine.<br>
Original issue will be fixed property in following patch.<br>
<br>
Signed-off-by: Konstantin Khlebnikov &lt;<a href="mailto:khlebnikov@yandex-team.ru" target="_blank">khlebnikov@yandex-team.ru</a>&gt;<br>
Reported-by: Joseph Yasi &lt;<a href="mailto:joe.yasi@gmail.com" target="_blank">joe.yasi@gmail.com</a>&gt;<br></blockquote><div>Tested-by: Joseph Yasi &lt;<a href="mailto:joe.yasi@gmail.com" target="_blank">joe.yasi@gmail.com</a>&gt;</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
Link: <a href="https://bugzilla.kernel.org/show_bug.cgi?id=203175" rel="noreferrer" target="_blank">https://bugzilla.kernel.org/show_bug.cgi?id=203175</a><br>
---<br>
 drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------<br>
 1 file changed, 9 insertions(+), 6 deletions(-)<br>
<br>
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c<br>
index 7acc61e4f645..ba96e52aa8d1 100644<br>
--- a/drivers/net/ethernet/intel/e1000e/netdev.c<br>
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c<br>
@@ -5309,13 +5309,8 @@ static void e1000_watchdog_task(struct work_struct *work)<br>
                        /* 8000ES2LAN requires a Rx packet buffer work-around<br>
                         * on link down event; reset the controller to flush<br>
                         * the Rx packet buffer.<br>
-                        *<br>
-                        * If the link is lost the controller stops DMA, but<br>
-                        * if there is queued Tx work it cannot be done.  So<br>
-                        * reset the controller to flush the Tx packet buffers.<br>
                         */<br>
-                       if ((adapter-&gt;flags &amp; FLAG_RX_NEEDS_RESTART) ||<br>
-                           e1000_desc_unused(tx_ring) + 1 &lt; tx_ring-&gt;count)<br>
+                       if (adapter-&gt;flags &amp; FLAG_RX_NEEDS_RESTART)<br>
                                adapter-&gt;flags |= FLAG_RESTART_NOW;<br>
                        else<br>
                                pm_schedule_suspend(netdev-&gt;dev.parent,<br>
@@ -5338,6 +5333,14 @@ static void e1000_watchdog_task(struct work_struct *work)<br>
        adapter-&gt;gotc_old = adapter-&gt;stats.gotc;<br>
        spin_unlock(&amp;adapter-&gt;stats64_lock);<br>
<br>
+       /* If the link is lost the controller stops DMA, but<br>
+        * if there is queued Tx work it cannot be done.  So<br>
+        * reset the controller to flush the Tx packet buffers.<br>
+        */<br>
+       if (!netif_carrier_ok(netdev) &amp;&amp;<br>
+           (e1000_desc_unused(tx_ring) + 1 &lt; tx_ring-&gt;count))<br>
+               adapter-&gt;flags |= FLAG_RESTART_NOW;<br>
+<br>
        /* If reset is necessary, do it outside of interrupt context. */<br>
        if (adapter-&gt;flags &amp; FLAG_RESTART_NOW) {<br>
                schedule_work(&amp;adapter-&gt;reset_task);<br>
<br>
</blockquote></div></div>
Joseph Yasi April 23, 2019, 12:51 a.m. UTC | #2
On Wed, Apr 17, 2019 at 4:13 AM Konstantin Khlebnikov
<khlebnikov@yandex-team.ru> wrote:
>
> This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.
>
> That change cased false-positive warning about hardware hang:
>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:
>    TDH                  <0>
>    TDT                  <1>
>    next_to_use          <1>
>    next_to_clean        <0>
> buffer_info[next_to_clean]:
>    time_stamp           <fffba7a7>
>    next_to_watch        <0>
>    jiffies              <fffbb140>
>    next_to_watch.status <0>
> MAC Status             <40080080>
> PHY Status             <7949>
> PHY 1000BASE-T Status  <0>
> PHY Extended Status    <3000>
> PCI Status             <10>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
>
> Besides warning everything works fine.
> Original issue will be fixed property in following patch.
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Reported-by: Joseph Yasi <joe.yasi@gmail.com>
Tested-by: Joseph Yasi <joe.yasi@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=203175
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 7acc61e4f645..ba96e52aa8d1 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -5309,13 +5309,8 @@ static void e1000_watchdog_task(struct work_struct *work)
>                         /* 8000ES2LAN requires a Rx packet buffer work-around
>                          * on link down event; reset the controller to flush
>                          * the Rx packet buffer.
> -                        *
> -                        * If the link is lost the controller stops DMA, but
> -                        * if there is queued Tx work it cannot be done.  So
> -                        * reset the controller to flush the Tx packet buffers.
>                          */
> -                       if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
> -                           e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
> +                       if (adapter->flags & FLAG_RX_NEEDS_RESTART)
>                                 adapter->flags |= FLAG_RESTART_NOW;
>                         else
>                                 pm_schedule_suspend(netdev->dev.parent,
> @@ -5338,6 +5333,14 @@ static void e1000_watchdog_task(struct work_struct *work)
>         adapter->gotc_old = adapter->stats.gotc;
>         spin_unlock(&adapter->stats64_lock);
>
> +       /* If the link is lost the controller stops DMA, but
> +        * if there is queued Tx work it cannot be done.  So
> +        * reset the controller to flush the Tx packet buffers.
> +        */
> +       if (!netif_carrier_ok(netdev) &&
> +           (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
> +               adapter->flags |= FLAG_RESTART_NOW;
> +
>         /* If reset is necessary, do it outside of interrupt context. */
>         if (adapter->flags & FLAG_RESTART_NOW) {
>                 schedule_work(&adapter->reset_task);
>
Brown, Aaron F April 26, 2019, midnight UTC | #3
> From: Konstantin Khlebnikov [mailto:khlebnikov@yandex-team.ru]
> Sent: Wednesday, April 17, 2019 1:13 AM
> To: netdev@vger.kernel.org; intel-wired-lan@lists.osuosl.org; linux-
> kernel@vger.kernel.org; Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>
> Cc: Sasha Levin <sashal@kernel.org>; Joseph Yasi <joe.yasi@gmail.com>;
> Brown, Aaron F <aaron.f.brown@intel.com>; Alexander Duyck
> <alexander.duyck@gmail.com>; e1000-devel@lists.sourceforge.net
> Subject: [PATCH 1/2] Revert "e1000e: fix cyclic resets at link up with active tx"
> 
> This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.
> 
> That change cased false-positive warning about hardware hang:
> 
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:
>    TDH                  <0>
>    TDT                  <1>
>    next_to_use          <1>
>    next_to_clean        <0>
> buffer_info[next_to_clean]:
>    time_stamp           <fffba7a7>
>    next_to_watch        <0>
>    jiffies              <fffbb140>
>    next_to_watch.status <0>
> MAC Status             <40080080>
> PHY Status             <7949>
> PHY 1000BASE-T Status  <0>
> PHY Extended Status    <3000>
> PCI Status             <10>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> 
> Besides warning everything works fine.
> Original issue will be fixed property in following patch.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Reported-by: Joseph Yasi <joe.yasi@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=203175
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)

Tested-by: Aaron Brown <aaron.f.brown@intel.com>
This was more of a regression check as I never did manage to replicate the tx hang, even with seemingly the same hardware.
Oleksandr Natalenko April 26, 2019, 2:11 p.m. UTC | #4
On Wed, Apr 17, 2019 at 11:13:16AM +0300, Konstantin Khlebnikov wrote:
> This reverts commit 0f9e980bf5ee1a97e2e401c846b2af989eb21c61.
> 
> That change cased false-positive warning about hardware hang:
> 
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> e1000e 0000:00:1f.6 eth0: Detected Hardware Unit Hang:
>    TDH                  <0>
>    TDT                  <1>
>    next_to_use          <1>
>    next_to_clean        <0>
> buffer_info[next_to_clean]:
>    time_stamp           <fffba7a7>
>    next_to_watch        <0>
>    jiffies              <fffbb140>
>    next_to_watch.status <0>
> MAC Status             <40080080>
> PHY Status             <7949>
> PHY 1000BASE-T Status  <0>
> PHY Extended Status    <3000>
> PCI Status             <10>
> e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
> 
> Besides warning everything works fine.
> Original issue will be fixed property in following patch.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Reported-by: Joseph Yasi <joe.yasi@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=203175
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c |   15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 7acc61e4f645..ba96e52aa8d1 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -5309,13 +5309,8 @@ static void e1000_watchdog_task(struct work_struct *work)
>  			/* 8000ES2LAN requires a Rx packet buffer work-around
>  			 * on link down event; reset the controller to flush
>  			 * the Rx packet buffer.
> -			 *
> -			 * If the link is lost the controller stops DMA, but
> -			 * if there is queued Tx work it cannot be done.  So
> -			 * reset the controller to flush the Tx packet buffers.
>  			 */
> -			if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
> -			    e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
> +			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
>  				adapter->flags |= FLAG_RESTART_NOW;
>  			else
>  				pm_schedule_suspend(netdev->dev.parent,
> @@ -5338,6 +5333,14 @@ static void e1000_watchdog_task(struct work_struct *work)
>  	adapter->gotc_old = adapter->stats.gotc;
>  	spin_unlock(&adapter->stats64_lock);
>  
> +	/* If the link is lost the controller stops DMA, but
> +	 * if there is queued Tx work it cannot be done.  So
> +	 * reset the controller to flush the Tx packet buffers.
> +	 */
> +	if (!netif_carrier_ok(netdev) &&
> +	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
> +		adapter->flags |= FLAG_RESTART_NOW;
> +
>  	/* If reset is necessary, do it outside of interrupt context. */
>  	if (adapter->flags & FLAG_RESTART_NOW) {
>  		schedule_work(&adapter->reset_task);
> 

Tested-by: Oleksandr Natalenko <oleksandr@redhat.com>

Patch
diff mbox series

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 7acc61e4f645..ba96e52aa8d1 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5309,13 +5309,8 @@  static void e1000_watchdog_task(struct work_struct *work)
 			/* 8000ES2LAN requires a Rx packet buffer work-around
 			 * on link down event; reset the controller to flush
 			 * the Rx packet buffer.
-			 *
-			 * If the link is lost the controller stops DMA, but
-			 * if there is queued Tx work it cannot be done.  So
-			 * reset the controller to flush the Tx packet buffers.
 			 */
-			if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
-			    e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
+			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
 				adapter->flags |= FLAG_RESTART_NOW;
 			else
 				pm_schedule_suspend(netdev->dev.parent,
@@ -5338,6 +5333,14 @@  static void e1000_watchdog_task(struct work_struct *work)
 	adapter->gotc_old = adapter->stats.gotc;
 	spin_unlock(&adapter->stats64_lock);
 
+	/* If the link is lost the controller stops DMA, but
+	 * if there is queued Tx work it cannot be done.  So
+	 * reset the controller to flush the Tx packet buffers.
+	 */
+	if (!netif_carrier_ok(netdev) &&
+	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
+		adapter->flags |= FLAG_RESTART_NOW;
+
 	/* If reset is necessary, do it outside of interrupt context. */
 	if (adapter->flags & FLAG_RESTART_NOW) {
 		schedule_work(&adapter->reset_task);