diff mbox series

[net,v4,2/4] iavf: Don't lock rtnl_lock twice in reset

Message ID 20230420130810.214307-3-kamil.maziarz@intel.com
State Changes Requested
Headers show
Series iavf: fix reset task deadlock | expand

Commit Message

Kamil Maziarz April 20, 2023, 1:08 p.m. UTC
From: Marcin Szycik <marcin.szycik@linux.intel.com>

Some ndo/ethtool callbacks are called under rtnl_lock. If such callback
then triggers a reset, the reset task might try to take the rtnl_lock
again, causing a deadlock.

Callbacks that are sensitive to rtnl_lock are scheduled when the drivers
are unable to obtain the rtnl_lock in the reset flow. This ensures that
the reset task does not attempt to double lock rtnl_lock and avoids
the deadlock.

Before this patch, a deadlock could be caused by e.g.:

echo 1 > /sys/class/net/$PF1/device/sriov_numvfs
while :; do
ip l s $VF1 up
ethtool --set-channels $VF1 combined 8
ip l s $VF1 down
ip l s $VF1 up
ethtool --set-channels $VF1 combined 16
ip l s $VF1 down
done

Fixes: aa626da947e9 ("iavf: Detach device during reset task")
Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Co-developed-by: Dawid Wesierski <dawidx.wesierski@intel.com>
Signed-off-by: Dawid Wesierski <dawidx.wesierski@intel.com>
Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
---
v1->v3: changes were done internally
v4: added space before open parenthesis '(', fixed code indent
---
 drivers/net/ethernet/intel/iavf/iavf.h      |  1 +
 drivers/net/ethernet/intel/iavf/iavf_main.c | 36 ++++++++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

Comments

Jacob Keller April 21, 2023, 7:38 p.m. UTC | #1
On 4/20/2023 6:08 AM, Kamil Maziarz wrote:
>  
> +/**
> + * iavf_delayed_set_interrupt_capability - schedule the update of the netdev
> + * @work: pointer to work_struct containing our data
> + **/
> +static void iavf_delayed_set_interrupt_capability(struct work_struct *work)
> +{
> +	struct iavf_adapter *adapter = container_of(work, struct iavf_adapter,
> +				       set_interrupt_capability);
> +	int pairs = adapter->num_active_queues;
> +
> +	if (rtnl_trylock()) {
> +		netif_set_real_num_rx_queues(adapter->netdev, pairs);
> +		netif_set_real_num_tx_queues(adapter->netdev, pairs);
> +		rtnl_unlock();
> +	} else {
> +		queue_work(adapter->wq, &adapter->set_interrupt_capability);
> +	}
> +}
> +

This function still requeues itself instead of just rtnl_lock(). I'd at
least like a justification as to why.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 47f777674087..8f48b5354025 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -256,6 +256,7 @@  struct iavf_adapter {
 	struct workqueue_struct *wq;
 	struct work_struct reset_task;
 	struct work_struct adminq_task;
+	struct work_struct set_interrupt_capability;
 	struct delayed_work client_task;
 	wait_queue_head_t down_waitqueue;
 	wait_queue_head_t reset_waitqueue;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 29e0fd2e674a..330fcd7a6c41 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1707,11 +1707,36 @@  static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
 	err = iavf_acquire_msix_vectors(adapter, v_budget);
 
 out:
-	netif_set_real_num_rx_queues(adapter->netdev, pairs);
-	netif_set_real_num_tx_queues(adapter->netdev, pairs);
+	if (rtnl_trylock()) {
+		netif_set_real_num_rx_queues(adapter->netdev, pairs);
+		netif_set_real_num_tx_queues(adapter->netdev, pairs);
+		rtnl_unlock();
+	} else {
+		queue_work(adapter->wq, &adapter->set_interrupt_capability);
+	}
+
 	return err;
 }
 
+/**
+ * iavf_delayed_set_interrupt_capability - schedule the update of the netdev
+ * @work: pointer to work_struct containing our data
+ **/
+static void iavf_delayed_set_interrupt_capability(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work, struct iavf_adapter,
+				       set_interrupt_capability);
+	int pairs = adapter->num_active_queues;
+
+	if (rtnl_trylock()) {
+		netif_set_real_num_rx_queues(adapter->netdev, pairs);
+		netif_set_real_num_tx_queues(adapter->netdev, pairs);
+		rtnl_unlock();
+	} else {
+		queue_work(adapter->wq, &adapter->set_interrupt_capability);
+	}
+}
+
 /**
  * iavf_config_rss_aq - Configure RSS keys and lut by using AQ commands
  * @adapter: board private structure
@@ -1930,10 +1955,8 @@  int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
 			"Unable to allocate memory for queues\n");
 		goto err_alloc_queues;
 	}
-
-	rtnl_lock();
 	err = iavf_set_interrupt_capability(adapter);
-	rtnl_unlock();
+
 	if (err) {
 		dev_err(&adapter->pdev->dev,
 			"Unable to setup interrupt capabilities\n");
@@ -4983,6 +5006,8 @@  static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	INIT_WORK(&adapter->reset_task, iavf_reset_task);
 	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+	INIT_WORK(&adapter->set_interrupt_capability,
+		  iavf_delayed_set_interrupt_capability);
 	INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
 	INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
 	queue_delayed_work(adapter->wq, &adapter->watchdog_task,
@@ -5156,6 +5181,7 @@  static void iavf_remove(struct pci_dev *pdev)
 	cancel_work_sync(&adapter->reset_task);
 	cancel_delayed_work_sync(&adapter->watchdog_task);
 	cancel_work_sync(&adapter->adminq_task);
+	cancel_work_sync(&adapter->set_interrupt_capability);
 	cancel_delayed_work_sync(&adapter->client_task);
 
 	adapter->aq_required = 0;