diff mbox series

[RFC,v2] e1000e: Use rtnl_lock to prevent race conditions between net and pci/pm

Message ID 20191007172559.11166.29328.stgit@localhost.localdomain
State RFC
Delegated to: Jeff Kirsher
Headers show
Series [RFC,v2] e1000e: Use rtnl_lock to prevent race conditions between net and pci/pm | expand

Commit Message

Alexander Duyck Oct. 7, 2019, 5:27 p.m. UTC
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

This patch is meant to address possible race conditions that can exist
between network configuration and power management. A similar issue was
fixed for igb in commit 9474933caf21 ("igb: close/suspend race in
netif_device_detach").

In addition it consolidates the code so that the PCI error handling code
will essentially perform the power management freeze on the device prior to
attempting a reset, and will thaw the device afterwards if that is what it
is planning to do. Otherwise when we call close on the interface it should
see it is detached and not attempt to call the logic to down the interface
and free the IRQs again.

>From what I can tell the check that was adding the check for __E1000_DOWN
in e1000e_close was added when runtime power management was added. However
it should not be relevant for us as we perform a call to
pm_runtime_get_sync before we call e1000_down/free_irq so it should always
be back up before we call into this anyway.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---

RFC v2: Dropped some unused variables
	Added logic to check for device present before removing to pm_freeze
	Fixed misplaced err_irq to before rtnl_unlock()

 drivers/net/ethernet/intel/e1000e/netdev.c |   40 +++++++++++++++-------------
 1 file changed, 21 insertions(+), 19 deletions(-)

Comments

David Z. Dai Oct. 8, 2019, 8:49 p.m. UTC | #1
On Mon, 2019-10-07 at 10:27 -0700, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> This patch is meant to address possible race conditions that can exist
> between network configuration and power management. A similar issue was
> fixed for igb in commit 9474933caf21 ("igb: close/suspend race in
> netif_device_detach").
> 
> In addition it consolidates the code so that the PCI error handling code
> will essentially perform the power management freeze on the device prior to
> attempting a reset, and will thaw the device afterwards if that is what it
> is planning to do. Otherwise when we call close on the interface it should
> see it is detached and not attempt to call the logic to down the interface
> and free the IRQs again.
> 
> >From what I can tell the check that was adding the check for __E1000_DOWN
> in e1000e_close was added when runtime power management was added. However
> it should not be relevant for us as we perform a call to
> pm_runtime_get_sync before we call e1000_down/free_irq so it should always
> be back up before we call into this anyway.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> ---
> 
> RFC v2: Dropped some unused variables
> 	Added logic to check for device present before removing to pm_freeze
> 	Fixed misplaced err_irq to before rtnl_unlock()
> 
>  drivers/net/ethernet/intel/e1000e/netdev.c |   40 +++++++++++++++-------------
>  1 file changed, 21 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index d7d56e42a6aa..8b4e589aca36 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -4715,12 +4715,12 @@ int e1000e_close(struct net_device *netdev)
> 
>  	pm_runtime_get_sync(&pdev->dev);
> 
> -	if (!test_bit(__E1000_DOWN, &adapter->state)) {
> +	if (netif_device_present(netdev)) {
>  		e1000e_down(adapter, true);
>  		e1000_free_irq(adapter);
> 
>  		/* Link status message must follow this format */
> -		pr_info("%s NIC Link is Down\n", adapter->netdev->name);
> +		pr_info("%s NIC Link is Down\n", netdev->name);
>  	}
> 
>  	napi_disable(&adapter->napi);
> @@ -6298,10 +6298,14 @@ static int e1000e_pm_freeze(struct device *dev)
>  {
>  	struct net_device *netdev = dev_get_drvdata(dev);
>  	struct e1000_adapter *adapter = netdev_priv(netdev);
> +	bool present;
> 
> +	rtnl_lock();
> +
> +	present = netif_device_present(netdev);
>  	netif_device_detach(netdev);
> 
> -	if (netif_running(netdev)) {
> +	if (present && netif_running(netdev)) {
>  		int count = E1000_CHECK_RESET_COUNT;
> 
>  		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
> @@ -6313,6 +6317,8 @@ static int e1000e_pm_freeze(struct device *dev)
>  		e1000e_down(adapter, false);
>  		e1000_free_irq(adapter);
>  	}
> +	rtnl_unlock();
> +
>  	e1000e_reset_interrupt_capability(adapter);
> 
>  	/* Allow time for pending master requests to run */
> @@ -6626,27 +6632,31 @@ static int __e1000_resume(struct pci_dev *pdev)
>  	return 0;
>  }
> 
> -#ifdef CONFIG_PM_SLEEP
>  static int e1000e_pm_thaw(struct device *dev)
>  {
>  	struct net_device *netdev = dev_get_drvdata(dev);
>  	struct e1000_adapter *adapter = netdev_priv(netdev);
> +	int rc = 0;
> 
>  	e1000e_set_interrupt_capability(adapter);
> -	if (netif_running(netdev)) {
> -		u32 err = e1000_request_irq(adapter);
> 
> -		if (err)
> -			return err;
> +	rtnl_lock();
> +	if (netif_running(netdev)) {
> +		rc = e1000_request_irq(adapter);
> +		if (rc)
> +			goto err_irq;
> 
>  		e1000e_up(adapter);
>  	}
> 
>  	netif_device_attach(netdev);
> +err_irq:
> +	rtnl_unlock();
> 
> -	return 0;
> +	return rc;
>  }
> 
> +#ifdef CONFIG_PM_SLEEP
>  static int e1000e_pm_suspend(struct device *dev)
>  {
>  	struct pci_dev *pdev = to_pci_dev(dev);
> @@ -6818,16 +6828,11 @@ static void e1000_netpoll(struct net_device *netdev)
>  static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
>  						pci_channel_state_t state)
>  {
> -	struct net_device *netdev = pci_get_drvdata(pdev);
> -	struct e1000_adapter *adapter = netdev_priv(netdev);
> -
> -	netif_device_detach(netdev);
> +	e1000e_pm_freeze(&pdev->dev);
> 
>  	if (state == pci_channel_io_perm_failure)
>  		return PCI_ERS_RESULT_DISCONNECT;
> 
> -	if (netif_running(netdev))
> -		e1000e_down(adapter, true);
>  	pci_disable_device(pdev);
> 
>  	/* Request a slot slot reset. */
> @@ -6893,10 +6898,7 @@ static void e1000_io_resume(struct pci_dev *pdev)
> 
>  	e1000_init_manageability_pt(adapter);
> 
> -	if (netif_running(netdev))
> -		e1000e_up(adapter);
> -
> -	netif_device_attach(netdev);
> +	e1000e_pm_thaw(&pdev->dev);
> 
>  	/* If the controller has AMT, do not set DRV_LOAD until the interface
>  	 * is up.  For all other cases, let the f/w know that the h/w is now
> 
Tested this v2 version patch. There is no more crash, which is good.
And It also eliminates the double free warning message.
This patch is good to go.

Thanks! - David
Kai-Heng Feng Feb. 25, 2020, 9:42 a.m. UTC | #2
Hi Greg,

> On Oct 8, 2019, at 01:27, Alexander Duyck <alexander.duyck@gmail.com> wrote:
> 
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> This patch is meant to address possible race conditions that can exist
> between network configuration and power management. A similar issue was
> fixed for igb in commit 9474933caf21 ("igb: close/suspend race in
> netif_device_detach").
> 
> In addition it consolidates the code so that the PCI error handling code
> will essentially perform the power management freeze on the device prior to
> attempting a reset, and will thaw the device afterwards if that is what it
> is planning to do. Otherwise when we call close on the interface it should
> see it is detached and not attempt to call the logic to down the interface
> and free the IRQs again.
> 
>> From what I can tell the check that was adding the check for __E1000_DOWN
> in e1000e_close was added when runtime power management was added. However
> it should not be relevant for us as we perform a call to
> pm_runtime_get_sync before we call e1000_down/free_irq so it should always
> be back up before we call into this anyway.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>

Please merge this commit, a7023819404ac9bd2bb311a4fafd38515cfa71ec to stable v5.14.

`modprobe -r e1000e` triggers a null pointer dereference [1] after the the following two patches are applied to v5.4.y:
d635e7c4b34e6a630c7a1e8f1a8fd52c3e3ceea7 e1000e: Revert "e1000e: Make watchdog use delayed work"
21c6137939723ed6f5e4aec7882cdfc247304c27 e1000e: Drop unnecessary __E1000_DOWN bit twiddling

[1] https://bugs.launchpad.net/bugs/1864303

Kai-Heng

> ---
> 
> RFC v2: Dropped some unused variables
> 	Added logic to check for device present before removing to pm_freeze
> 	Fixed misplaced err_irq to before rtnl_unlock()
> 
> drivers/net/ethernet/intel/e1000e/netdev.c |   40 +++++++++++++++-------------
> 1 file changed, 21 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index d7d56e42a6aa..8b4e589aca36 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -4715,12 +4715,12 @@ int e1000e_close(struct net_device *netdev)
> 
> 	pm_runtime_get_sync(&pdev->dev);
> 
> -	if (!test_bit(__E1000_DOWN, &adapter->state)) {
> +	if (netif_device_present(netdev)) {
> 		e1000e_down(adapter, true);
> 		e1000_free_irq(adapter);
> 
> 		/* Link status message must follow this format */
> -		pr_info("%s NIC Link is Down\n", adapter->netdev->name);
> +		pr_info("%s NIC Link is Down\n", netdev->name);
> 	}
> 
> 	napi_disable(&adapter->napi);
> @@ -6298,10 +6298,14 @@ static int e1000e_pm_freeze(struct device *dev)
> {
> 	struct net_device *netdev = dev_get_drvdata(dev);
> 	struct e1000_adapter *adapter = netdev_priv(netdev);
> +	bool present;
> 
> +	rtnl_lock();
> +
> +	present = netif_device_present(netdev);
> 	netif_device_detach(netdev);
> 
> -	if (netif_running(netdev)) {
> +	if (present && netif_running(netdev)) {
> 		int count = E1000_CHECK_RESET_COUNT;
> 
> 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
> @@ -6313,6 +6317,8 @@ static int e1000e_pm_freeze(struct device *dev)
> 		e1000e_down(adapter, false);
> 		e1000_free_irq(adapter);
> 	}
> +	rtnl_unlock();
> +
> 	e1000e_reset_interrupt_capability(adapter);
> 
> 	/* Allow time for pending master requests to run */
> @@ -6626,27 +6632,31 @@ static int __e1000_resume(struct pci_dev *pdev)
> 	return 0;
> }
> 
> -#ifdef CONFIG_PM_SLEEP
> static int e1000e_pm_thaw(struct device *dev)
> {
> 	struct net_device *netdev = dev_get_drvdata(dev);
> 	struct e1000_adapter *adapter = netdev_priv(netdev);
> +	int rc = 0;
> 
> 	e1000e_set_interrupt_capability(adapter);
> -	if (netif_running(netdev)) {
> -		u32 err = e1000_request_irq(adapter);
> 
> -		if (err)
> -			return err;
> +	rtnl_lock();
> +	if (netif_running(netdev)) {
> +		rc = e1000_request_irq(adapter);
> +		if (rc)
> +			goto err_irq;
> 
> 		e1000e_up(adapter);
> 	}
> 
> 	netif_device_attach(netdev);
> +err_irq:
> +	rtnl_unlock();
> 
> -	return 0;
> +	return rc;
> }
> 
> +#ifdef CONFIG_PM_SLEEP
> static int e1000e_pm_suspend(struct device *dev)
> {
> 	struct pci_dev *pdev = to_pci_dev(dev);
> @@ -6818,16 +6828,11 @@ static void e1000_netpoll(struct net_device *netdev)
> static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
> 						pci_channel_state_t state)
> {
> -	struct net_device *netdev = pci_get_drvdata(pdev);
> -	struct e1000_adapter *adapter = netdev_priv(netdev);
> -
> -	netif_device_detach(netdev);
> +	e1000e_pm_freeze(&pdev->dev);
> 
> 	if (state == pci_channel_io_perm_failure)
> 		return PCI_ERS_RESULT_DISCONNECT;
> 
> -	if (netif_running(netdev))
> -		e1000e_down(adapter, true);
> 	pci_disable_device(pdev);
> 
> 	/* Request a slot slot reset. */
> @@ -6893,10 +6898,7 @@ static void e1000_io_resume(struct pci_dev *pdev)
> 
> 	e1000_init_manageability_pt(adapter);
> 
> -	if (netif_running(netdev))
> -		e1000e_up(adapter);
> -
> -	netif_device_attach(netdev);
> +	e1000e_pm_thaw(&pdev->dev);
> 
> 	/* If the controller has AMT, do not set DRV_LOAD until the interface
> 	 * is up.  For all other cases, let the f/w know that the h/w is now
>
Greg KH Feb. 25, 2020, 8:46 p.m. UTC | #3
On Tue, Feb 25, 2020 at 05:42:26PM +0800, Kai-Heng Feng wrote:
> Hi Greg,
> 
> > On Oct 8, 2019, at 01:27, Alexander Duyck <alexander.duyck@gmail.com> wrote:
> > 
> > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > 
> > This patch is meant to address possible race conditions that can exist
> > between network configuration and power management. A similar issue was
> > fixed for igb in commit 9474933caf21 ("igb: close/suspend race in
> > netif_device_detach").
> > 
> > In addition it consolidates the code so that the PCI error handling code
> > will essentially perform the power management freeze on the device prior to
> > attempting a reset, and will thaw the device afterwards if that is what it
> > is planning to do. Otherwise when we call close on the interface it should
> > see it is detached and not attempt to call the logic to down the interface
> > and free the IRQs again.
> > 
> >> From what I can tell the check that was adding the check for __E1000_DOWN
> > in e1000e_close was added when runtime power management was added. However
> > it should not be relevant for us as we perform a call to
> > pm_runtime_get_sync before we call e1000_down/free_irq so it should always
> > be back up before we call into this anyway.
> > 
> > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> Please merge this commit, a7023819404ac9bd2bb311a4fafd38515cfa71ec to stable v5.14.
> 
> `modprobe -r e1000e` triggers a null pointer dereference [1] after the the following two patches are applied to v5.4.y:
> d635e7c4b34e6a630c7a1e8f1a8fd52c3e3ceea7 e1000e: Revert "e1000e: Make watchdog use delayed work"
> 21c6137939723ed6f5e4aec7882cdfc247304c27 e1000e: Drop unnecessary __E1000_DOWN bit twiddling

Now queued up, thanks.

greg k-h
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index d7d56e42a6aa..8b4e589aca36 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4715,12 +4715,12 @@  int e1000e_close(struct net_device *netdev)
 
 	pm_runtime_get_sync(&pdev->dev);
 
-	if (!test_bit(__E1000_DOWN, &adapter->state)) {
+	if (netif_device_present(netdev)) {
 		e1000e_down(adapter, true);
 		e1000_free_irq(adapter);
 
 		/* Link status message must follow this format */
-		pr_info("%s NIC Link is Down\n", adapter->netdev->name);
+		pr_info("%s NIC Link is Down\n", netdev->name);
 	}
 
 	napi_disable(&adapter->napi);
@@ -6298,10 +6298,14 @@  static int e1000e_pm_freeze(struct device *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
+	bool present;
 
+	rtnl_lock();
+
+	present = netif_device_present(netdev);
 	netif_device_detach(netdev);
 
-	if (netif_running(netdev)) {
+	if (present && netif_running(netdev)) {
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
@@ -6313,6 +6317,8 @@  static int e1000e_pm_freeze(struct device *dev)
 		e1000e_down(adapter, false);
 		e1000_free_irq(adapter);
 	}
+	rtnl_unlock();
+
 	e1000e_reset_interrupt_capability(adapter);
 
 	/* Allow time for pending master requests to run */
@@ -6626,27 +6632,31 @@  static int __e1000_resume(struct pci_dev *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int e1000e_pm_thaw(struct device *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
 
 	e1000e_set_interrupt_capability(adapter);
-	if (netif_running(netdev)) {
-		u32 err = e1000_request_irq(adapter);
 
-		if (err)
-			return err;
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		rc = e1000_request_irq(adapter);
+		if (rc)
+			goto err_irq;
 
 		e1000e_up(adapter);
 	}
 
 	netif_device_attach(netdev);
+err_irq:
+	rtnl_unlock();
 
-	return 0;
+	return rc;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int e1000e_pm_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -6818,16 +6828,11 @@  static void e1000_netpoll(struct net_device *netdev)
 static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 						pci_channel_state_t state)
 {
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-
-	netif_device_detach(netdev);
+	e1000e_pm_freeze(&pdev->dev);
 
 	if (state == pci_channel_io_perm_failure)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	if (netif_running(netdev))
-		e1000e_down(adapter, true);
 	pci_disable_device(pdev);
 
 	/* Request a slot slot reset. */
@@ -6893,10 +6898,7 @@  static void e1000_io_resume(struct pci_dev *pdev)
 
 	e1000_init_manageability_pt(adapter);
 
-	if (netif_running(netdev))
-		e1000e_up(adapter);
-
-	netif_device_attach(netdev);
+	e1000e_pm_thaw(&pdev->dev);
 
 	/* If the controller has AMT, do not set DRV_LOAD until the interface
 	 * is up.  For all other cases, let the f/w know that the h/w is now