diff mbox series

[iwl-net,v7,3/4] Revert "iavf: Detach device during reset task"

Message ID 20230516132331.96017-4-kamil.maziarz@intel.com
State Changes Requested
Headers show
Series iavf: fix reset task deadlock | expand

Commit Message

Kamil Maziarz May 16, 2023, 1:23 p.m. UTC
From: Marcin Szycik <marcin.szycik@linux.intel.com>

This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.

Detaching device during reset was not fully fixing the rtnl locking issue,
as there could be a situation where callback was already in progress before
detaching netdev.

Furthermore, detaching netdevice causes TX timeouts if traffic is running.
To reproduce:

ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null &
while :; do
        for i in 200 7000 400 5000 300 3000 ; do
		ip netns exec ns1 ip link set $VF1 mtu $i
                sleep 2
        done
        sleep 10
done

Currently callbacks such as iavf_change_mtu() wait for reset and don't try
to take rtnl_lock if they already run under rtnl_lock (flag
IAVF_FLAG_RTNL_LOCK_TAKEN), therefore rtnl_lock will be released when reset
finishes, and only then another callback which uses rtnl_lock will be able
to start.

Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
---
v1->v7: no changes
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

Comments

Ahmed Zaki May 19, 2023, 6:18 p.m. UTC | #1
On 2023-05-16 07:23, Kamil Maziarz wrote:
> From: Marcin Szycik <marcin.szycik@linux.intel.com>
>
> This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.
>
> Detaching device during reset was not fully fixing the rtnl locking issue,
> as there could be a situation where callback was already in progress before
> detaching netdev.
>
> Furthermore, detaching netdevice causes TX timeouts if traffic is running.
> To reproduce:
>
> ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null &
> while :; do
>          for i in 200 7000 400 5000 300 3000 ; do
> 		ip netns exec ns1 ip link set $VF1 mtu $i
>                  sleep 2
>          done
>          sleep 10
> done
>
> Currently callbacks such as iavf_change_mtu() wait for reset and don't try
> to take rtnl_lock if they already run under rtnl_lock (flag
> IAVF_FLAG_RTNL_LOCK_TAKEN), therefore rtnl_lock will be released when reset
> finishes, and only then another callback which uses rtnl_lock will be able
> to start.


Please update the commit message. The flag IAVF_FLAG_RTNL_LOCK_TAKEN was 
dropped in earlier versions.


>
> Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
> Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
> ---
> v1->v7: no changes
> ---
>   drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
>   1 file changed, 2 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index ef7b190ddda1..3d439f9ac97b 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -3015,11 +3015,6 @@ static void iavf_reset_task(struct work_struct *work)
>   	int i = 0, err;
>   	bool running;
>   
> -	/* Detach interface to avoid subsequent NDO callbacks */
> -	rtnl_lock();
> -	netif_device_detach(netdev);
> -	rtnl_unlock();
> -
>   	/* When device is being removed it doesn't make sense to run the reset
>   	 * task, just return in such a case.
>   	 */
> @@ -3027,7 +3022,7 @@ static void iavf_reset_task(struct work_struct *work)
>   		if (adapter->state != __IAVF_REMOVE)
>   			queue_work(adapter->wq, &adapter->reset_task);
>   
> -		goto reset_finish;
> +		return;
>   	}
>   
>   	while (!mutex_trylock(&adapter->client_lock))
> @@ -3230,7 +3225,7 @@ static void iavf_reset_task(struct work_struct *work)
>   	mutex_unlock(&adapter->client_lock);
>   	mutex_unlock(&adapter->crit_lock);
>   
> -	goto reset_finish;
> +	return;
>   reset_err:
>   	if (running) {
>   		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
> @@ -3251,10 +3246,6 @@ static void iavf_reset_task(struct work_struct *work)
>   	}
>   
>   	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
> -reset_finish:
> -	rtnl_lock();
> -	netif_device_attach(netdev);
> -	rtnl_unlock();
>   }
>   
>   /**
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index ef7b190ddda1..3d439f9ac97b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3015,11 +3015,6 @@  static void iavf_reset_task(struct work_struct *work)
 	int i = 0, err;
 	bool running;
 
-	/* Detach interface to avoid subsequent NDO callbacks */
-	rtnl_lock();
-	netif_device_detach(netdev);
-	rtnl_unlock();
-
 	/* When device is being removed it doesn't make sense to run the reset
 	 * task, just return in such a case.
 	 */
@@ -3027,7 +3022,7 @@  static void iavf_reset_task(struct work_struct *work)
 		if (adapter->state != __IAVF_REMOVE)
 			queue_work(adapter->wq, &adapter->reset_task);
 
-		goto reset_finish;
+		return;
 	}
 
 	while (!mutex_trylock(&adapter->client_lock))
@@ -3230,7 +3225,7 @@  static void iavf_reset_task(struct work_struct *work)
 	mutex_unlock(&adapter->client_lock);
 	mutex_unlock(&adapter->crit_lock);
 
-	goto reset_finish;
+	return;
 reset_err:
 	if (running) {
 		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3251,10 +3246,6 @@  static void iavf_reset_task(struct work_struct *work)
 	}
 
 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-reset_finish:
-	rtnl_lock();
-	netif_device_attach(netdev);
-	rtnl_unlock();
 }
 
 /**