Patchwork [net,5/8] qlcnic: Fix reset recovery after transmit timeout

login
register
mail settings
Submitter Shahed Shaikh
Date May 7, 2013, 7:55 p.m.
Message ID <1367956506-3290-6-git-send-email-shahed.shaikh@qlogic.com>
Download mbox | patch
Permalink /patch/242457/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Shahed Shaikh - May 7, 2013, 7:55 p.m.
From: Sony Chacko <sony.chacko@qlogic.com>

o When transmit timeout happens, recovery attempt should start with
  adapter soft reset. If soft reset fails to resume traffic, firmware
  dump will be collected and driver will perform a hard reset of the
  adapter. Reset recovery on 83xx was failing after a hard reset.
  This patch fixes that issue.

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
Signed-off-by: Shahed Shaikh <shahed.shaikh@qlogic.com>
---
 .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c  |   24 ++++++++++++-------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |   15 ++++++++----
 2 files changed, 25 insertions(+), 14 deletions(-)
David Miller - May 8, 2013, 7:06 p.m.
From: Shahed Shaikh <shahed.shaikh@qlogic.com>
Date: Tue, 7 May 2013 15:55:03 -0400

> @@ -435,10 +435,7 @@ static void qlcnic_83xx_idc_attach_driver(struct qlcnic_adapter *adapter)
>  	}
>  done:
>  	netif_device_attach(netdev);
> -	if (netif_running(netdev)) {
> -		netif_carrier_on(netdev);
> -		netif_wake_queue(netdev);
> -	}
> +	adapter->netdev->trans_start = jiffies;
>  }
>  
>  static int qlcnic_83xx_idc_enter_failed_state(struct qlcnic_adapter *adapter,

This is not right.

Multiqueue aware drivers should never access netdev->trans_start
directly, and I see several such writes in this driver.

Anything you write here will be totally ignored by the rest of the
kernel, because this value is overwritten by every call to dev_trans_start()
which is the only valid method by which to determine this value.

dev_trans_start() walks all of the transmit queues, recording the most
recent txq->trans_start timestamp in netdev->trans_start.

So if you write netdev->trans_start in your driver, it does nothing,
because even the dev_watchdog() time is going to inspect the per-queue
txq->trans_start values and ignore the netdev->trans_start value for
devices whose driver suppports multiqueue as qlcnic does.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shahed Shaikh - May 9, 2013, 6:21 a.m.
> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Thursday, May 09, 2013 12:37 AM
> To: Shahed Shaikh
> Cc: netdev; Dept-NX Linux NIC Driver; Sony Chacko
> Subject: Re: [PATCH net 5/8] qlcnic: Fix reset recovery after transmit timeout
> 
> From: Shahed Shaikh <shahed.shaikh@qlogic.com>
> Date: Tue, 7 May 2013 15:55:03 -0400
> 
> > @@ -435,10 +435,7 @@ static void qlcnic_83xx_idc_attach_driver(struct
> qlcnic_adapter *adapter)
> >  	}
> >  done:
> >  	netif_device_attach(netdev);
> > -	if (netif_running(netdev)) {
> > -		netif_carrier_on(netdev);
> > -		netif_wake_queue(netdev);
> > -	}
> > +	adapter->netdev->trans_start = jiffies;
> >  }
> >
> >  static int qlcnic_83xx_idc_enter_failed_state(struct qlcnic_adapter
> > *adapter,
> 
> This is not right.
> 
> Multiqueue aware drivers should never access netdev->trans_start directly,
> and I see several such writes in this driver.
> 
> Anything you write here will be totally ignored by the rest of the kernel,
> because this value is overwritten by every call to dev_trans_start() which is
> the only valid method by which to determine this value.
> 
> dev_trans_start() walks all of the transmit queues, recording the most recent
> txq->trans_start timestamp in netdev->trans_start.
> 
> So if you write netdev->trans_start in your driver, it does nothing, because
> even the dev_watchdog() time is going to inspect the per-queue
> txq->trans_start values and ignore the netdev->trans_start value for
> devices whose driver suppports multiqueue as qlcnic does.

Hi David,

We will resubmit the current patch after removing access to netdev->trans_start.
We will fix the same issue in the rest of the driver code in a follow-up patch.

Thanks,
Shahed


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index ab1d8d9..66f355d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -435,10 +435,7 @@  static void qlcnic_83xx_idc_attach_driver(struct qlcnic_adapter *adapter)
 	}
 done:
 	netif_device_attach(netdev);
-	if (netif_running(netdev)) {
-		netif_carrier_on(netdev);
-		netif_wake_queue(netdev);
-	}
+	adapter->netdev->trans_start = jiffies;
 }
 
 static int qlcnic_83xx_idc_enter_failed_state(struct qlcnic_adapter *adapter,
@@ -642,15 +639,22 @@  static int qlcnic_83xx_idc_reattach_driver(struct qlcnic_adapter *adapter)
 
 static void qlcnic_83xx_idc_update_idc_params(struct qlcnic_adapter *adapter)
 {
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
+
 	qlcnic_83xx_idc_update_drv_presence_reg(adapter, 1, 1);
-	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 	set_bit(QLC_83XX_MBX_READY, &adapter->ahw->idc.status);
 	qlcnic_83xx_idc_update_audit_reg(adapter, 0, 1);
 	set_bit(QLC_83XX_MODULE_LOADED, &adapter->ahw->idc.status);
-	adapter->ahw->idc.quiesce_req = 0;
-	adapter->ahw->idc.delay = QLC_83XX_IDC_FW_POLL_DELAY;
-	adapter->ahw->idc.err_code = 0;
-	adapter->ahw->idc.collect_dump = 0;
+
+	ahw->idc.quiesce_req = 0;
+	ahw->idc.delay = QLC_83XX_IDC_FW_POLL_DELAY;
+	ahw->idc.err_code = 0;
+	ahw->idc.collect_dump = 0;
+	ahw->reset_context = 0;
+	adapter->tx_timeo_cnt = 0;
+	adapter->netdev->trans_start = jiffies;
+
+	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 }
 
 /**
@@ -851,6 +855,7 @@  static int qlcnic_83xx_idc_ready_state(struct qlcnic_adapter *adapter)
 	/* Check for soft reset request */
 	if (ahw->reset_context &&
 	    !(val & QLC_83XX_IDC_DISABLE_FW_RESET_RECOVERY)) {
+		adapter->ahw->reset_context = 0;
 		qlcnic_83xx_idc_tx_soft_reset(adapter);
 		return ret;
 	}
@@ -914,6 +919,7 @@  static int qlcnic_83xx_idc_need_quiesce_state(struct qlcnic_adapter *adapter)
 static int qlcnic_83xx_idc_failed_state(struct qlcnic_adapter *adapter)
 {
 	dev_err(&adapter->pdev->dev, "%s: please restart!!\n", __func__);
+	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 	adapter->ahw->idc.err_code = -EIO;
 
 	return 0;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index d97446a..8fb836d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2502,12 +2502,17 @@  static void qlcnic_tx_timeout(struct net_device *netdev)
 	if (test_bit(__QLCNIC_RESETTING, &adapter->state))
 		return;
 
-	dev_err(&netdev->dev, "transmit timeout, resetting.\n");
-
-	if (++adapter->tx_timeo_cnt >= QLCNIC_MAX_TX_TIMEOUTS)
-		adapter->need_fw_reset = 1;
-	else
+	if (++adapter->tx_timeo_cnt >= QLCNIC_MAX_TX_TIMEOUTS) {
+		netdev_info(netdev, "Tx timeout, reset the adapter.\n");
+		if (qlcnic_82xx_check(adapter))
+			adapter->need_fw_reset = 1;
+		else if (qlcnic_83xx_check(adapter))
+			qlcnic_83xx_idc_request_reset(adapter,
+						      QLCNIC_FORCE_FW_DUMP_KEY);
+	} else {
+		netdev_info(netdev, "Tx timeout, reset adapter context.\n");
 		adapter->ahw->reset_context = 1;
+	}
 }
 
 static struct net_device_stats *qlcnic_get_stats(struct net_device *netdev)