diff mbox series

[net-next,1/3] dpaa2-eth: do not hold rtnl_lock on phylink_create() or _destroy()

Message ID 1574363727-5437-2-git-send-email-ioana.ciornei@nxp.com
State Changes Requested
Delegated to: David Miller
Headers show
Series dpaa2-eth: MAC phylink fixes | expand

Commit Message

Ioana Ciornei Nov. 21, 2019, 7:15 p.m. UTC
The rtnl_lock should not be held when calling phylink_create() or
phylink_destroy() since it leads to the deadlock listed below:

[   18.656576]  rtnl_lock+0x18/0x20
[   18.659798]  sfp_bus_add_upstream+0x28/0x90
[   18.663974]  phylink_create+0x2cc/0x828
[   18.667803]  dpaa2_mac_connect+0x14c/0x2a8
[   18.671890]  dpaa2_eth_connect_mac+0x94/0xd8

Fix this by moving the _lock() and _unlock() calls just outside of
phylink_of_phy_connect() and phylink_disconnect_phy().

Fixes: 719479230893 ("dpaa2-eth: add MAC/PHY support through phylink")
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ----
 drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

Comments

Andrew Lunn Nov. 21, 2019, 7:50 p.m. UTC | #1
On Thu, Nov 21, 2019 at 09:15:25PM +0200, Ioana Ciornei wrote:
> The rtnl_lock should not be held when calling phylink_create() or
> phylink_destroy() since it leads to the deadlock listed below:
> 
> [   18.656576]  rtnl_lock+0x18/0x20
> [   18.659798]  sfp_bus_add_upstream+0x28/0x90
> [   18.663974]  phylink_create+0x2cc/0x828
> [   18.667803]  dpaa2_mac_connect+0x14c/0x2a8
> [   18.671890]  dpaa2_eth_connect_mac+0x94/0xd8

Hi Ioana

Have you done any testing with CONFIG_PROVE_LOCKING enabled. It should
find this sort of problem if the code paths get exercised.

     Andrew
Russell King (Oracle) Nov. 21, 2019, 11:48 p.m. UTC | #2
On Thu, Nov 21, 2019 at 09:15:25PM +0200, Ioana Ciornei wrote:
> The rtnl_lock should not be held when calling phylink_create() or
> phylink_destroy() since it leads to the deadlock listed below:
> 
> [   18.656576]  rtnl_lock+0x18/0x20
> [   18.659798]  sfp_bus_add_upstream+0x28/0x90
> [   18.663974]  phylink_create+0x2cc/0x828
> [   18.667803]  dpaa2_mac_connect+0x14c/0x2a8
> [   18.671890]  dpaa2_eth_connect_mac+0x94/0xd8
> 
> Fix this by moving the _lock() and _unlock() calls just outside of
> phylink_of_phy_connect() and phylink_disconnect_phy().
> 
> Fixes: 719479230893 ("dpaa2-eth: add MAC/PHY support through phylink")
> Reported-by: Russell King <linux@armlinux.org.uk>
> Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> ---
>  drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ----
>  drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 4 ++++
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> index 7ff147e89426..40290fea9e36 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> @@ -3431,12 +3431,10 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
>  		set_mac_addr(netdev_priv(net_dev));
>  		update_tx_fqids(priv);
>  
> -		rtnl_lock();
>  		if (priv->mac)
>  			dpaa2_eth_disconnect_mac(priv);
>  		else
>  			dpaa2_eth_connect_mac(priv);
> -		rtnl_unlock();

As I said previously, this will not be safe if net_dev is actively
published to userspace, and I'm very disappointed that you have not
taken on board what I wrote.

	Thread 1				Thread 2
						rtnl_lock()
	dpaa2_eth_disconnect_mac();
	dpaa2_mac_disconnect(priv->mac)
	 phylink_disconnect_phy(mac->phylink);
						dpaa2_eth_get_link_ksettings()
	 phylink_destroy(mac->phylink);
						phylink_ethtool_ksettings_get(priv->mac->phylink, ...)
	  kfree(pl);
	  					phylink_ethtool_ksettings_get()
						now dereferences freed
						memory
	kfree(priv->mac);
	priv->mac = NULL;

Similar can happen with priv->mac.

As long as the netdev is published, paths such as those in thread 2 are
possible while thread 1 is running concurrently.

So, your patch is at best a band-aid around the deadlock issue I pointed
out, but in doing so exposes another problem.

I think there is a fundamental design problem, and merely tweaking some
locks will not fix it.

As I've already stated, the phylink is not designed to be created and
destroyed on a published network device.

>  	}
>  
>  	return IRQ_HANDLED;
> @@ -3675,9 +3673,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
>  #ifdef CONFIG_DEBUG_FS
>  	dpaa2_dbg_remove(priv);
>  #endif
> -	rtnl_lock();
>  	dpaa2_eth_disconnect_mac(priv);
> -	rtnl_unlock();
>  
>  	unregister_netdev(net_dev);
>  
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
> index 84233e467ed1..0200308d1bc7 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
> @@ -277,7 +277,9 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
>  	}
>  	mac->phylink = phylink;
>  
> +	rtnl_lock();
>  	err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
> +	rtnl_unlock();
>  	if (err) {
>  		netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
>  		goto err_phylink_destroy;
> @@ -301,7 +303,9 @@ void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
>  	if (!mac->phylink)
>  		return;
>  
> +	rtnl_lock();
>  	phylink_disconnect_phy(mac->phylink);
> +	rtnl_unlock();
>  	phylink_destroy(mac->phylink);
>  	dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
>  }
> -- 
> 1.9.1
> 
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7ff147e89426..40290fea9e36 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -3431,12 +3431,10 @@  static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 		set_mac_addr(netdev_priv(net_dev));
 		update_tx_fqids(priv);
 
-		rtnl_lock();
 		if (priv->mac)
 			dpaa2_eth_disconnect_mac(priv);
 		else
 			dpaa2_eth_connect_mac(priv);
-		rtnl_unlock();
 	}
 
 	return IRQ_HANDLED;
@@ -3675,9 +3673,7 @@  static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 #ifdef CONFIG_DEBUG_FS
 	dpaa2_dbg_remove(priv);
 #endif
-	rtnl_lock();
 	dpaa2_eth_disconnect_mac(priv);
-	rtnl_unlock();
 
 	unregister_netdev(net_dev);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 84233e467ed1..0200308d1bc7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -277,7 +277,9 @@  int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	}
 	mac->phylink = phylink;
 
+	rtnl_lock();
 	err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
+	rtnl_unlock();
 	if (err) {
 		netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
 		goto err_phylink_destroy;
@@ -301,7 +303,9 @@  void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
 	if (!mac->phylink)
 		return;
 
+	rtnl_lock();
 	phylink_disconnect_phy(mac->phylink);
+	rtnl_unlock();
 	phylink_destroy(mac->phylink);
 	dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
 }