diff mbox series

[net,v3,3/4] Revert "iavf: Detach device during reset task"

Message ID 20230419115006.200409-4-kamil.maziarz@intel.com
State Superseded
Headers show
Series iavf: fix reset task deadlock | expand

Commit Message

Kamil Maziarz April 19, 2023, 11:50 a.m. UTC
From: Marcin Szycik <marcin.szycik@linux.intel.com>

This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.

Detaching device during reset was not fully fixing the rtnl locking issue,
as there could be a situation where callback was already in progress before
detaching netdev.

Furthermore, detaching netdevice causes TX timeouts if traffic is running.
To reproduce:

ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null &
while :; do
        for i in 200 7000 400 5000 300 3000 ; do
		ip netns exec ns1 ip link set $VF1 mtu $i
                sleep 2
        done
        sleep 10
done

Currently callbacks such as iavf_change_mtu() wait for reset and don't try
to take rtnl_lock if they already run under rtnl_lock (flag
IAVF_FLAG_RTNL_LOCK_TAKEN), therefore rtnl_lock will be released when reset
finishes, and only then another callback which uses rtnl_lock will be able
to start.

Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
---
v2: no changes
---
v3: no changes
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

Comments

Keller, Jacob E April 19, 2023, 6:41 p.m. UTC | #1
On 4/19/2023 4:50 AM, Kamil Maziarz wrote:
> From: Marcin Szycik <marcin.szycik@linux.intel.com>
> 
> This reverts commit aa626da947e9cd30c4cf727493903e1adbb2c0a0.
> 
> Detaching device during reset was not fully fixing the rtnl locking issue,
> as there could be a situation where callback was already in progress before
> detaching netdev.
> 
> Furthermore, detaching netdevice causes TX timeouts if traffic is running.
> To reproduce:
> 
> ip netns exec ns1 iperf3 -c $PEER_IP -t 600 --logfile /dev/null &
> while :; do
>         for i in 200 7000 400 5000 300 3000 ; do
> 		ip netns exec ns1 ip link set $VF1 mtu $i
>                 sleep 2
>         done
>         sleep 10
> done
> 
> Currently callbacks such as iavf_change_mtu() wait for reset and don't try
> to take rtnl_lock if they already run under rtnl_lock (flag
> IAVF_FLAG_RTNL_LOCK_TAKEN), therefore rtnl_lock will be released when reset
> finishes, and only then another callback which uses rtnl_lock will be able
> to start.

This comment is no longer correct since we stopped using this flag, but
otherwise the patch content is fine.

> 
> Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
> Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
> ---
> v2: no changes
> ---
> v3: no changes
> ---
>  drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++-----------
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index 7bcf422c0b5f..8dd488158961 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -3017,11 +3017,6 @@ static void iavf_reset_task(struct work_struct *work)
>  	int i = 0, err;
>  	bool running;
>  
> -	/* Detach interface to avoid subsequent NDO callbacks */
> -	rtnl_lock();
> -	netif_device_detach(netdev);
> -	rtnl_unlock();
> -
>  	/* When device is being removed it doesn't make sense to run the reset
>  	 * task, just return in such a case.
>  	 */
> @@ -3029,7 +3024,7 @@ static void iavf_reset_task(struct work_struct *work)
>  		if (adapter->state != __IAVF_REMOVE)
>  			queue_work(adapter->wq, &adapter->reset_task);
>  
> -		goto reset_finish;
> +		return;
>  	}
>  
>  	while (!mutex_trylock(&adapter->client_lock))
> @@ -3232,7 +3227,7 @@ static void iavf_reset_task(struct work_struct *work)
>  	mutex_unlock(&adapter->client_lock);
>  	mutex_unlock(&adapter->crit_lock);
>  
> -	goto reset_finish;
> +	return;
>  reset_err:
>  	if (running) {
>  		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
> @@ -3253,10 +3248,6 @@ static void iavf_reset_task(struct work_struct *work)
>  	}
>  
>  	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
> -reset_finish:
> -	rtnl_lock();
> -	netif_device_attach(netdev);
> -	rtnl_unlock();
>  }
>  
>  /**
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 7bcf422c0b5f..8dd488158961 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3017,11 +3017,6 @@  static void iavf_reset_task(struct work_struct *work)
 	int i = 0, err;
 	bool running;
 
-	/* Detach interface to avoid subsequent NDO callbacks */
-	rtnl_lock();
-	netif_device_detach(netdev);
-	rtnl_unlock();
-
 	/* When device is being removed it doesn't make sense to run the reset
 	 * task, just return in such a case.
 	 */
@@ -3029,7 +3024,7 @@  static void iavf_reset_task(struct work_struct *work)
 		if (adapter->state != __IAVF_REMOVE)
 			queue_work(adapter->wq, &adapter->reset_task);
 
-		goto reset_finish;
+		return;
 	}
 
 	while (!mutex_trylock(&adapter->client_lock))
@@ -3232,7 +3227,7 @@  static void iavf_reset_task(struct work_struct *work)
 	mutex_unlock(&adapter->client_lock);
 	mutex_unlock(&adapter->crit_lock);
 
-	goto reset_finish;
+	return;
 reset_err:
 	if (running) {
 		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3253,10 +3248,6 @@  static void iavf_reset_task(struct work_struct *work)
 	}
 
 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-reset_finish:
-	rtnl_lock();
-	netif_device_attach(netdev);
-	rtnl_unlock();
 }
 
 /**