diff mbox series

[iwl-net,v10,2/5] Revert "iavf: Detach device during reset task"

Message ID 20230605145226.1222225-3-mateusz.palczewski@intel.com
State Accepted
Delegated to: Anthony Nguyen
Headers show
Series iavf: fix reset task deadlock | expand

Commit Message

Mateusz Palczewski June 5, 2023, 2:52 p.m. UTC
From: Marcin Szycik <marcin.szycik@linux.intel.com>

This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.

Detaching device during reset was not fully fixing the rtnl locking issue,
as there could be a situation where callback was already in progress before
detaching netdev.

Furthermore, detaching netdevice causes TX timeouts if traffic is running.
To reproduce:

ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null &
while :; do
        for i in 200 7000 400 5000 300 3000 ; do
		ip netns exec ns1 ip link set $VF1 mtu $i
                sleep 2
        done
        sleep 10
done

Currently, callbacks such as iavf_change_mtu() wait for the reset.
If the reset fails to acquire the rtnl_lock, they schedule the netdev
update for later while continuing the reset flow. Operations like MTU
changes are performed under the rtnl_lock. Therefore, when the operation
finishes, another callback that uses rtnl_lock can start.

Signed-off-by: Dawid Wesierski <dawidx.wesierski@intel.com>
Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
---
v1->v7: no changes
v8: changed commit msg
v9->v10: no changes
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

Comments

Romanowski, Rafal June 15, 2023, 3:28 p.m. UTC | #1
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of
> Mateusz Palczewski
> Sent: poniedziaƂek, 5 czerwca 2023 16:52
> To: intel-wired-lan@lists.osuosl.org
> Cc: ivecera <ivecera@redhat.com>
> Subject: [Intel-wired-lan] [PATCH iwl-net v10 2/5] Revert "iavf: Detach device
> during reset task"
> 
> From: Marcin Szycik <marcin.szycik@linux.intel.com>
> 
> This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.
> 
> Detaching device during reset was not fully fixing the rtnl locking issue, as
> there could be a situation where callback was already in progress before
> detaching netdev.
> 
> Furthermore, detaching netdevice causes TX timeouts if traffic is running.
> To reproduce:
> 
> ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null & while :; do
>         for i in 200 7000 400 5000 300 3000 ; do
> 		ip netns exec ns1 ip link set $VF1 mtu $i
>                 sleep 2
>         done
>         sleep 10
> done
> 
> Currently, callbacks such as iavf_change_mtu() wait for the reset.
> If the reset fails to acquire the rtnl_lock, they schedule the netdev update
> for later while continuing the reset flow. Operations like MTU changes are
> performed under the rtnl_lock. Therefore, when the operation finishes,
> another callback that uses rtnl_lock can start.
> 
> Signed-off-by: Dawid Wesierski <dawidx.wesierski@intel.com>
> Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
> Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
> ---
> v1->v7: no changes
> v8: changed commit msg
> v9->v10: no changes
> ---
>  drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c
> b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index c815ef87e27d..6945f462c56e 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -2991,11 +2991,6 @@ static void iavf_reset_task(struct work_struct
> *work)
>  	int i = 0, err;
>  	bool running;
> 
> -	/* Detach interface to avoid subsequent NDO callbacks */
> -	rtnl_lock();
> -	netif_device_detach(netdev);
> -	rtnl_unlock();
> -
>  	/* When device is being removed it doesn't make sense to run the
> reset
>  	 * task, just return in such a case.
>  	 */
> @@ -3003,7 +2998,7 @@ static void iavf_reset_task(struct work_struct
> *work)
>  		if (adapter->state != __IAVF_REMOVE)
>  			queue_work(adapter->wq, &adapter->reset_task);
> 
> -		goto reset_finish;
> +		return;
>  	}
> 
>  	while (!mutex_trylock(&adapter->client_lock))
> @@ -3206,7 +3201,7 @@ static void iavf_reset_task(struct work_struct
> *work)
>  	mutex_unlock(&adapter->client_lock);
>  	mutex_unlock(&adapter->crit_lock);
> 
> -	goto reset_finish;
> +	return;
>  reset_err:
>  	if (running) {
>  		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -
> 3227,10 +3222,6 @@ static void iavf_reset_task(struct work_struct *work)
>  	}
> 
>  	dev_err(&adapter->pdev->dev, "failed to allocate resources during
> reinit\n");
> -reset_finish:
> -	rtnl_lock();
> -	netif_device_attach(netdev);
> -	rtnl_unlock();
>  }
> 
>  /**
> --
> 2.31.1
> 
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan


Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index c815ef87e27d..6945f462c56e 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2991,11 +2991,6 @@  static void iavf_reset_task(struct work_struct *work)
 	int i = 0, err;
 	bool running;
 
-	/* Detach interface to avoid subsequent NDO callbacks */
-	rtnl_lock();
-	netif_device_detach(netdev);
-	rtnl_unlock();
-
 	/* When device is being removed it doesn't make sense to run the reset
 	 * task, just return in such a case.
 	 */
@@ -3003,7 +2998,7 @@  static void iavf_reset_task(struct work_struct *work)
 		if (adapter->state != __IAVF_REMOVE)
 			queue_work(adapter->wq, &adapter->reset_task);
 
-		goto reset_finish;
+		return;
 	}
 
 	while (!mutex_trylock(&adapter->client_lock))
@@ -3206,7 +3201,7 @@  static void iavf_reset_task(struct work_struct *work)
 	mutex_unlock(&adapter->client_lock);
 	mutex_unlock(&adapter->crit_lock);
 
-	goto reset_finish;
+	return;
 reset_err:
 	if (running) {
 		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3227,10 +3222,6 @@  static void iavf_reset_task(struct work_struct *work)
 	}
 
 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-reset_finish:
-	rtnl_lock();
-	netif_device_attach(netdev);
-	rtnl_unlock();
 }
 
 /**