diff mbox series

igc: fix deadlock caused by taking RTNL in RPM resume path

Message ID 20220811151342.19059-1-vinicius.gomes@intel.com
State Changes Requested
Headers show
Series igc: fix deadlock caused by taking RTNL in RPM resume path | expand

Commit Message

Vinicius Costa Gomes Aug. 11, 2022, 3:13 p.m. UTC
It was reported a RTNL deadlock in the igc driver that was causing
problems during suspend/resume.

The solution is similar to commit ac8c58f5b535 ("igb: fix deadlock
caused by taking RTNL in RPM resume path").

Reported-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
Hi James,

Thanks to your investigation I found commit ac8c58f5b535, and it looks
like it could solve the issue you are seeing.

Could you please see if this patch helps. It's only compile and boot
tested.

Sorry the delay, I am travelling.

Cheers,


 drivers/net/ethernet/intel/igc/igc_main.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

Comments

kernel test robot Aug. 11, 2022, 6:58 p.m. UTC | #1
Hi Vinicius,

I love your patch! Yet something to improve:

[auto build test ERROR on tnguy-next-queue/dev-queue]
[also build test ERROR on linus/master v5.19 next-20220811]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vinicius-Costa-Gomes/igc-fix-deadlock-caused-by-taking-RTNL-in-RPM-resume-path/20220811-232032
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git dev-queue
config: i386-randconfig-a013 (https://download.01.org/0day-ci/archive/20220812/202208120244.a7CKRiFy-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 5f1c7e2cc5a3c07cbc2412e851a7283c1841f520)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/61ed7ed758f23a10549c5d4fdc82ef9356281cbf
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vinicius-Costa-Gomes/igc-fix-deadlock-caused-by-taking-RTNL-in-RPM-resume-path/20220811-232032
        git checkout 61ed7ed758f23a10549c5d4fdc82ef9356281cbf
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/net/ethernet/intel/igc/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/net/ethernet/intel/igc/igc_main.c:6838:26: error: use of undeclared identifier 'igc_suspend'; did you mean '__igc_suspend'?
           SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
                                   ^~~~~~~~~~~
                                   __igc_suspend
   include/linux/pm.h:343:22: note: expanded from macro 'SET_SYSTEM_SLEEP_PM_OPS'
           SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
                               ^
   include/linux/pm.h:313:26: note: expanded from macro 'SYSTEM_SLEEP_PM_OPS'
           .suspend = pm_sleep_ptr(suspend_fn), \
                                   ^
   include/linux/pm.h:439:65: note: expanded from macro 'pm_sleep_ptr'
   #define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
                                                                   ^
   include/linux/kernel.h:57:38: note: expanded from macro 'PTR_IF'
   #define PTR_IF(cond, ptr)       ((cond) ? (ptr) : NULL)
                                              ^
   drivers/net/ethernet/intel/igc/igc_main.c:6706:27: note: '__igc_suspend' declared here
   static int __maybe_unused __igc_suspend(struct device *dev)
                             ^
>> drivers/net/ethernet/intel/igc/igc_main.c:6838:26: error: use of undeclared identifier 'igc_suspend'; did you mean '__igc_suspend'?
           SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
                                   ^~~~~~~~~~~
                                   __igc_suspend
   include/linux/pm.h:343:22: note: expanded from macro 'SET_SYSTEM_SLEEP_PM_OPS'
           SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
                               ^
   include/linux/pm.h:315:25: note: expanded from macro 'SYSTEM_SLEEP_PM_OPS'
           .freeze = pm_sleep_ptr(suspend_fn), \
                                  ^
   include/linux/pm.h:439:65: note: expanded from macro 'pm_sleep_ptr'
   #define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
                                                                   ^
   include/linux/kernel.h:57:38: note: expanded from macro 'PTR_IF'
   #define PTR_IF(cond, ptr)       ((cond) ? (ptr) : NULL)
                                              ^
   drivers/net/ethernet/intel/igc/igc_main.c:6706:27: note: '__igc_suspend' declared here
   static int __maybe_unused __igc_suspend(struct device *dev)
                             ^
>> drivers/net/ethernet/intel/igc/igc_main.c:6838:26: error: use of undeclared identifier 'igc_suspend'; did you mean '__igc_suspend'?
           SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
                                   ^~~~~~~~~~~
                                   __igc_suspend
   include/linux/pm.h:343:22: note: expanded from macro 'SET_SYSTEM_SLEEP_PM_OPS'
           SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
                               ^
   include/linux/pm.h:317:27: note: expanded from macro 'SYSTEM_SLEEP_PM_OPS'
           .poweroff = pm_sleep_ptr(suspend_fn), \
                                    ^
   include/linux/pm.h:439:65: note: expanded from macro 'pm_sleep_ptr'
   #define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
                                                                   ^
   include/linux/kernel.h:57:38: note: expanded from macro 'PTR_IF'
   #define PTR_IF(cond, ptr)       ((cond) ? (ptr) : NULL)
                                              ^
   drivers/net/ethernet/intel/igc/igc_main.c:6706:27: note: '__igc_suspend' declared here
   static int __maybe_unused __igc_suspend(struct device *dev)
                             ^
   3 errors generated.


vim +6838 drivers/net/ethernet/intel/igc/igc_main.c

bc23aa949aeba0 Sasha Neftin 2020-01-29  6835  
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6836  #ifdef CONFIG_PM
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6837  static const struct dev_pm_ops igc_pm_ops = {
9513d2a5dc7f3f Sasha Neftin 2019-11-14 @6838  	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6839  	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6840  			   igc_runtime_idle)
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6841  };
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6842  #endif
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6843
kernel test robot Aug. 11, 2022, 7:59 p.m. UTC | #2
Hi Vinicius,

I love your patch! Yet something to improve:

[auto build test ERROR on tnguy-next-queue/dev-queue]
[also build test ERROR on linus/master v5.19 next-20220811]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vinicius-Costa-Gomes/igc-fix-deadlock-caused-by-taking-RTNL-in-RPM-resume-path/20220811-232032
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git dev-queue
config: x86_64-rhel-8.3-kselftests (https://download.01.org/0day-ci/archive/20220812/202208120359.pPxeIJNZ-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/61ed7ed758f23a10549c5d4fdc82ef9356281cbf
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vinicius-Costa-Gomes/igc-fix-deadlock-caused-by-taking-RTNL-in-RPM-resume-path/20220811-232032
        git checkout 61ed7ed758f23a10549c5d4fdc82ef9356281cbf
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/percpu.h:27,
                    from arch/x86/include/asm/nospec-branch.h:14,
                    from arch/x86/include/asm/paravirt_types.h:40,
                    from arch/x86/include/asm/ptrace.h:97,
                    from arch/x86/include/asm/math_emu.h:5,
                    from arch/x86/include/asm/processor.h:13,
                    from arch/x86/include/asm/timex.h:5,
                    from include/linux/timex.h:67,
                    from include/linux/time32.h:13,
                    from include/linux/time.h:60,
                    from include/linux/stat.h:19,
                    from include/linux/module.h:13,
                    from drivers/net/ethernet/intel/igc/igc_main.c:4:
>> drivers/net/ethernet/intel/igc/igc_main.c:6838:33: error: 'igc_suspend' undeclared here (not in a function); did you mean 'dpm_suspend'?
    6838 |         SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
         |                                 ^~~~~~~~~~~
   include/linux/kernel.h:57:44: note: in definition of macro 'PTR_IF'
      57 | #define PTR_IF(cond, ptr)       ((cond) ? (ptr) : NULL)
         |                                            ^~~
   include/linux/pm.h:313:20: note: in expansion of macro 'pm_sleep_ptr'
     313 |         .suspend = pm_sleep_ptr(suspend_fn), \
         |                    ^~~~~~~~~~~~
   include/linux/pm.h:343:9: note: in expansion of macro 'SYSTEM_SLEEP_PM_OPS'
     343 |         SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
         |         ^~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/intel/igc/igc_main.c:6838:9: note: in expansion of macro 'SET_SYSTEM_SLEEP_PM_OPS'
    6838 |         SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
         |         ^~~~~~~~~~~~~~~~~~~~~~~


vim +6838 drivers/net/ethernet/intel/igc/igc_main.c

bc23aa949aeba0 Sasha Neftin 2020-01-29  6835  
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6836  #ifdef CONFIG_PM
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6837  static const struct dev_pm_ops igc_pm_ops = {
9513d2a5dc7f3f Sasha Neftin 2019-11-14 @6838  	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6839  	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6840  			   igc_runtime_idle)
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6841  };
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6842  #endif
9513d2a5dc7f3f Sasha Neftin 2019-11-14  6843
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ebff0e04045d..5079dc581d8d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6600,7 +6600,7 @@  static void igc_deliver_wake_packet(struct net_device *netdev)
 	netif_rx(skb);
 }
 
-static int __maybe_unused igc_resume(struct device *dev)
+static int __maybe_unused __igc_resume(struct device *dev, bool rpm)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct net_device *netdev = pci_get_drvdata(pdev);
@@ -6642,23 +6642,30 @@  static int __maybe_unused igc_resume(struct device *dev)
 
 	wr32(IGC_WUS, ~0);
 
-	rtnl_lock();
+	if (!rpm)
+		rtnl_lock();
 	if (!err && netif_running(netdev))
 		err = __igc_open(netdev, true);
 
 	if (!err)
 		netif_device_attach(netdev);
-	rtnl_unlock();
+	if (!rpm)
+		rtnl_unlock();
 
 	return err;
 }
 
 static int __maybe_unused igc_runtime_resume(struct device *dev)
 {
-	return igc_resume(dev);
+	return __igc_resume(dev, true);
 }
 
-static int __maybe_unused igc_suspend(struct device *dev)
+static int __maybe_unused igc_resume(struct device *dev)
+{
+	return __igc_resume(dev, false);
+}
+
+static int __maybe_unused __igc_suspend(struct device *dev)
 {
 	return __igc_shutdown(to_pci_dev(dev), NULL, 0);
 }
@@ -6719,7 +6726,7 @@  static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
  *  @pdev: Pointer to PCI device
  *
  *  Restart the card from scratch, as if from a cold-boot. Implementation
- *  resembles the first-half of the igc_resume routine.
+ *  resembles the first-half of the __igc_resume routine.
  **/
 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
 {
@@ -6758,7 +6765,7 @@  static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
  *
  *  This callback is called when the error recovery driver tells us that
  *  its OK to resume normal operation. Implementation resembles the
- *  second-half of the igc_resume routine.
+ *  second-half of the __igc_resume routine.
  */
 static void igc_io_resume(struct pci_dev *pdev)
 {