diff mbox series

[net] net/ibmnvic: Fix deadlock problem in reset

Message ID 20181119215727.22197.97260.stgit@ltcalpine2-lp22.aus.stglabs.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series [net] net/ibmnvic: Fix deadlock problem in reset | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/build-ppc64le success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64be success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64e success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-pmac32 success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 1 checks, 151 lines checked

Commit Message

Juliet Kim Nov. 19, 2018, 9:59 p.m. UTC
This patch changes to use rtnl_lock only during a reset to avoid
deadlock that could occur when a thread operating close is holding
rtnl_lock and waiting for reset_lock acquired by another thread,
which is waiting for rtnl_lock in order to set the number of tx/rx
queues during a reset.

Also, we now setting the number of tx/rx queues during a soft reset
for failover or LPM events.

Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   59 +++++++++++++-----------------------
 drivers/net/ethernet/ibm/ibmvnic.h |    2 +
 2 files changed, 22 insertions(+), 39 deletions(-)

Comments

David Miller Nov. 20, 2018, 2:56 a.m. UTC | #1
From: Juliet Kim <julietk@linux.vnet.ibm.com>
Date: Mon, 19 Nov 2018 15:59:22 -0600

> This patch changes to use rtnl_lock only during a reset to avoid
> deadlock that could occur when a thread operating close is holding
> rtnl_lock and waiting for reset_lock acquired by another thread,
> which is waiting for rtnl_lock in order to set the number of tx/rx
> queues during a reset.
> 
> Also, we now setting the number of tx/rx queues during a soft reset
> for failover or LPM events.
> 
> Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>

Applied.
Abdul Haleem Nov. 21, 2018, 10:40 a.m. UTC | #2
On Mon, 2018-11-19 at 15:59 -0600, Juliet Kim wrote:
> This patch changes to use rtnl_lock only during a reset to avoid
> deadlock that could occur when a thread operating close is holding
> rtnl_lock and waiting for reset_lock acquired by another thread,
> which is waiting for rtnl_lock in order to set the number of tx/rx
> queues during a reset.
> 
> Also, we now setting the number of tx/rx queues during a soft reset
> for failover or LPM events.
> 
> Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c |   59 +++++++++++++-----------------------
>  drivers/net/ethernet/ibm/ibmvnic.h |    2 +
>  2 files changed, 22 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> index 7893bef..4a5de59 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
>  		return 0;
>  	}
> 
> -	mutex_lock(&adapter->reset_lock);
> -
>  	if (adapter->state != VNIC_CLOSED) {
>  		rc = ibmvnic_login(netdev);
> -		if (rc) {
> -			mutex_unlock(&adapter->reset_lock);
> +		if (rc)
>  			return rc;
> -		}
> 
>  		rc = init_resources(adapter);
>  		if (rc) {
>  			netdev_err(netdev, "failed to initialize resources\n");
>  			release_resources(adapter);
> -			mutex_unlock(&adapter->reset_lock);
>  			return rc;
>  		}
>  	}
> @@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
>  	rc = __ibmvnic_open(netdev);
>  	netif_carrier_on(netdev);
> 
> -	mutex_unlock(&adapter->reset_lock);
> -
>  	return rc;
>  }
> 
> @@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
>  		return 0;
>  	}
> 
> -	mutex_lock(&adapter->reset_lock);
>  	rc = __ibmvnic_close(netdev);
>  	ibmvnic_cleanup(netdev);
> -	mutex_unlock(&adapter->reset_lock);
> 
>  	return rc;
>  }
> @@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
>  				return rc;
>  		} else if (adapter->req_rx_queues != old_num_rx_queues ||
>  			   adapter->req_tx_queues != old_num_tx_queues) {
> -			adapter->map_id = 1;
>  			release_rx_pools(adapter);
>  			release_tx_pools(adapter);
> -			rc = init_rx_pools(netdev);
> -			if (rc)
> -				return rc;
> -			rc = init_tx_pools(netdev);
> -			if (rc)
> -				return rc;
> -
>  			release_napi(adapter);
> -			rc = init_napi(adapter);
> +			release_vpd_data(adapter);
> +
> +			rc = init_resources(adapter);
>  			if (rc)
>  				return rc;
> +
>  		} else {
>  			rc = reset_tx_pools(adapter);
>  			if (rc)
> @@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
>  		adapter->state = VNIC_PROBED;
>  		return 0;
>  	}
> -	/* netif_set_real_num_xx_queues needs to take rtnl lock here
> -	 * unless wait_for_reset is set, in which case the rtnl lock
> -	 * has already been taken before initializing the reset
> -	 */
> -	if (!adapter->wait_for_reset) {
> -		rtnl_lock();
> -		rc = init_resources(adapter);
> -		rtnl_unlock();
> -	} else {
> -		rc = init_resources(adapter);
> -	}
> +
> +	rc = init_resources(adapter);
>  	if (rc)
>  		return rc;
> 
> @@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct *work)
>  	struct ibmvnic_rwi *rwi;
>  	struct ibmvnic_adapter *adapter;
>  	struct net_device *netdev;
> +	bool we_lock_rtnl = false;
>  	u32 reset_state;
>  	int rc = 0;
> 
>  	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
>  	netdev = adapter->netdev;
> 
> -	mutex_lock(&adapter->reset_lock);
> +	/* netif_set_real_num_xx_queues needs to take rtnl lock here
> +	 * unless wait_for_reset is set, in which case the rtnl lock
> +	 * has already been taken before initializing the reset
> +	 */
> +	if (!adapter->wait_for_reset) {
> +		rtnl_lock();
> +		we_lock_rtnl = true;
> +	}
>  	reset_state = adapter->state;
> 
>  	rwi = get_next_rwi(adapter);
> @@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct *work)
>  	if (rc) {
>  		netdev_dbg(adapter->netdev, "Reset failed\n");
>  		free_all_rwi(adapter);
> -		mutex_unlock(&adapter->reset_lock);
> -		return;
>  	}
> 
>  	adapter->resetting = false;
> -	mutex_unlock(&adapter->reset_lock);
> +	if (we_lock_rtnl)
> +		rtnl_unlock();
>  }
> 
>  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
> @@ -4768,7 +4752,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
> 
>  	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
>  	INIT_LIST_HEAD(&adapter->rwi_list);
> -	mutex_init(&adapter->reset_lock);
>  	mutex_init(&adapter->rwi_lock);
>  	adapter->resetting = false;
> 
> @@ -4840,8 +4823,8 @@ static int ibmvnic_remove(struct vio_dev *dev)
>  	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
> 
>  	adapter->state = VNIC_REMOVING;
> -	unregister_netdev(netdev);
> -	mutex_lock(&adapter->reset_lock);
> +	rtnl_lock();
> +	unregister_netdevice(netdev);
> 
>  	release_resources(adapter);
>  	release_sub_crqs(adapter, 1);
> @@ -4852,7 +4835,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
> 
>  	adapter->state = VNIC_REMOVED;
> 
> -	mutex_unlock(&adapter->reset_lock);
> +	rtnl_unlock();
>  	device_remove_file(&dev->dev, &dev_attr_failover);
>  	free_netdev(netdev);
>  	dev_set_drvdata(&dev->dev, NULL);
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
> index 18103b8..99c4f8d 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.h
> +++ b/drivers/net/ethernet/ibm/ibmvnic.h
> @@ -1075,7 +1075,7 @@ struct ibmvnic_adapter {
>  	struct tasklet_struct tasklet;
>  	enum vnic_state state;
>  	enum ibmvnic_reset_reason reset_reason;
> -	struct mutex reset_lock, rwi_lock;
> +	struct mutex rwi_lock;
>  	struct list_head rwi_list;
>  	struct work_struct ibmvnic_reset;
>  	bool resetting;
> 

Thanks for the fix, Please add Reported-and-tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7893bef..4a5de59 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1103,20 +1103,15 @@  static int ibmvnic_open(struct net_device *netdev)
 		return 0;
 	}
 
-	mutex_lock(&adapter->reset_lock);
-
 	if (adapter->state != VNIC_CLOSED) {
 		rc = ibmvnic_login(netdev);
-		if (rc) {
-			mutex_unlock(&adapter->reset_lock);
+		if (rc)
 			return rc;
-		}
 
 		rc = init_resources(adapter);
 		if (rc) {
 			netdev_err(netdev, "failed to initialize resources\n");
 			release_resources(adapter);
-			mutex_unlock(&adapter->reset_lock);
 			return rc;
 		}
 	}
@@ -1124,8 +1119,6 @@  static int ibmvnic_open(struct net_device *netdev)
 	rc = __ibmvnic_open(netdev);
 	netif_carrier_on(netdev);
 
-	mutex_unlock(&adapter->reset_lock);
-
 	return rc;
 }
 
@@ -1269,10 +1262,8 @@  static int ibmvnic_close(struct net_device *netdev)
 		return 0;
 	}
 
-	mutex_lock(&adapter->reset_lock);
 	rc = __ibmvnic_close(netdev);
 	ibmvnic_cleanup(netdev);
-	mutex_unlock(&adapter->reset_lock);
 
 	return rc;
 }
@@ -1820,20 +1811,15 @@  static int do_reset(struct ibmvnic_adapter *adapter,
 				return rc;
 		} else if (adapter->req_rx_queues != old_num_rx_queues ||
 			   adapter->req_tx_queues != old_num_tx_queues) {
-			adapter->map_id = 1;
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
-			rc = init_rx_pools(netdev);
-			if (rc)
-				return rc;
-			rc = init_tx_pools(netdev);
-			if (rc)
-				return rc;
-
 			release_napi(adapter);
-			rc = init_napi(adapter);
+			release_vpd_data(adapter);
+
+			rc = init_resources(adapter);
 			if (rc)
 				return rc;
+
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
@@ -1917,17 +1903,8 @@  static int do_hard_reset(struct ibmvnic_adapter *adapter,
 		adapter->state = VNIC_PROBED;
 		return 0;
 	}
-	/* netif_set_real_num_xx_queues needs to take rtnl lock here
-	 * unless wait_for_reset is set, in which case the rtnl lock
-	 * has already been taken before initializing the reset
-	 */
-	if (!adapter->wait_for_reset) {
-		rtnl_lock();
-		rc = init_resources(adapter);
-		rtnl_unlock();
-	} else {
-		rc = init_resources(adapter);
-	}
+
+	rc = init_resources(adapter);
 	if (rc)
 		return rc;
 
@@ -1986,13 +1963,21 @@  static void __ibmvnic_reset(struct work_struct *work)
 	struct ibmvnic_rwi *rwi;
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
+	bool we_lock_rtnl = false;
 	u32 reset_state;
 	int rc = 0;
 
 	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 	netdev = adapter->netdev;
 
-	mutex_lock(&adapter->reset_lock);
+	/* netif_set_real_num_xx_queues needs to take rtnl lock here
+	 * unless wait_for_reset is set, in which case the rtnl lock
+	 * has already been taken before initializing the reset
+	 */
+	if (!adapter->wait_for_reset) {
+		rtnl_lock();
+		we_lock_rtnl = true;
+	}
 	reset_state = adapter->state;
 
 	rwi = get_next_rwi(adapter);
@@ -2020,12 +2005,11 @@  static void __ibmvnic_reset(struct work_struct *work)
 	if (rc) {
 		netdev_dbg(adapter->netdev, "Reset failed\n");
 		free_all_rwi(adapter);
-		mutex_unlock(&adapter->reset_lock);
-		return;
 	}
 
 	adapter->resetting = false;
-	mutex_unlock(&adapter->reset_lock);
+	if (we_lock_rtnl)
+		rtnl_unlock();
 }
 
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -4768,7 +4752,6 @@  static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
 	INIT_LIST_HEAD(&adapter->rwi_list);
-	mutex_init(&adapter->reset_lock);
 	mutex_init(&adapter->rwi_lock);
 	adapter->resetting = false;
 
@@ -4840,8 +4823,8 @@  static int ibmvnic_remove(struct vio_dev *dev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
 	adapter->state = VNIC_REMOVING;
-	unregister_netdev(netdev);
-	mutex_lock(&adapter->reset_lock);
+	rtnl_lock();
+	unregister_netdevice(netdev);
 
 	release_resources(adapter);
 	release_sub_crqs(adapter, 1);
@@ -4852,7 +4835,7 @@  static int ibmvnic_remove(struct vio_dev *dev)
 
 	adapter->state = VNIC_REMOVED;
 
-	mutex_unlock(&adapter->reset_lock);
+	rtnl_unlock();
 	device_remove_file(&dev->dev, &dev_attr_failover);
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 18103b8..99c4f8d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1075,7 +1075,7 @@  struct ibmvnic_adapter {
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
 	enum ibmvnic_reset_reason reset_reason;
-	struct mutex reset_lock, rwi_lock;
+	struct mutex rwi_lock;
 	struct list_head rwi_list;
 	struct work_struct ibmvnic_reset;
 	bool resetting;