diff mbox series

[v2] ata: libata: Defer rescan on suspended device

Message ID 20230425061746.503145-1-kai.heng.feng@canonical.com
State New
Headers show
Series [v2] ata: libata: Defer rescan on suspended device | expand

Commit Message

Kai-Heng Feng April 25, 2023, 6:17 a.m. UTC
During system resume, if an EH is schduled after ATA host is resumed
(i.e. ATA_PFLAG_PM_PENDING cleared), but before the disk device is
fully resumed, the device_lock hold by scsi_rescan_device() is never
released so the dpm_resume() of the disk is blocked forerver.

That's because scsi_attach_vpd() is expecting the disk device is in
operational state, as it doesn't work on suspended device.

To avoid such deadlock, defer rescan if the disk is still suspended so
the resume process of the disk device can proceed. At the end of the
resume process, use the complete() callback to schedule the rescan task.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
v2:
 - Schedule rescan task at the end of system resume phase.
 - Wording.

 drivers/ata/libata-core.c | 11 +++++++++++
 drivers/ata/libata-eh.c   | 11 +++++++++--
 include/linux/libata.h    |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

Comments

kernel test robot April 25, 2023, 8:34 a.m. UTC | #1
Hi Kai-Heng,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.3 next-20230424]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kai-Heng-Feng/ata-libata-Defer-rescan-on-suspended-device/20230425-142001
base:   linus/master
patch link:    https://lore.kernel.org/r/20230425061746.503145-1-kai.heng.feng%40canonical.com
patch subject: [PATCH v2] ata: libata: Defer rescan on suspended device
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230425/202304251654.BnZBfauy-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/bed073367820f7e00147a8f39ed5f0d9d5eb6c67
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kai-Heng-Feng/ata-libata-Defer-rescan-on-suspended-device/20230425-142001
        git checkout bed073367820f7e00147a8f39ed5f0d9d5eb6c67
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304251654.BnZBfauy-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/ata/libata-eh.c: In function 'ata_eh_revalidate_and_attach':
>> drivers/ata/libata-eh.c:2991:29: error: 'pm_suspend_target_state' undeclared (first use in this function); did you mean 'scsi_target_state'?
    2991 |                         if (pm_suspend_target_state != PM_SUSPEND_ON)
         |                             ^~~~~~~~~~~~~~~~~~~~~~~
         |                             scsi_target_state
   drivers/ata/libata-eh.c:2991:29: note: each undeclared identifier is reported only once for each function it appears in


vim +2991 drivers/ata/libata-eh.c

  2927	
  2928	static int ata_eh_revalidate_and_attach(struct ata_link *link,
  2929						struct ata_device **r_failed_dev)
  2930	{
  2931		struct ata_port *ap = link->ap;
  2932		struct ata_eh_context *ehc = &link->eh_context;
  2933		struct ata_device *dev;
  2934		unsigned int new_mask = 0;
  2935		unsigned long flags;
  2936		int rc = 0;
  2937	
  2938		/* For PATA drive side cable detection to work, IDENTIFY must
  2939		 * be done backwards such that PDIAG- is released by the slave
  2940		 * device before the master device is identified.
  2941		 */
  2942		ata_for_each_dev(dev, link, ALL_REVERSE) {
  2943			unsigned int action = ata_eh_dev_action(dev);
  2944			unsigned int readid_flags = 0;
  2945	
  2946			if (ehc->i.flags & ATA_EHI_DID_RESET)
  2947				readid_flags |= ATA_READID_POSTRESET;
  2948	
  2949			if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
  2950				WARN_ON(dev->class == ATA_DEV_PMP);
  2951	
  2952				/*
  2953				 * The link may be in a deep sleep, wake it up.
  2954				 *
  2955				 * If the link is in deep sleep, ata_phys_link_offline()
  2956				 * will return true, causing the revalidation to fail,
  2957				 * which leads to a (potentially) needless hard reset.
  2958				 *
  2959				 * ata_eh_recover() will later restore the link policy
  2960				 * to ap->target_lpm_policy after revalidation is done.
  2961				 */
  2962				if (link->lpm_policy > ATA_LPM_MAX_POWER) {
  2963					rc = ata_eh_set_lpm(link, ATA_LPM_MAX_POWER,
  2964							    r_failed_dev);
  2965					if (rc)
  2966						goto err;
  2967				}
  2968	
  2969				if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
  2970					rc = -EIO;
  2971					goto err;
  2972				}
  2973	
  2974				ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
  2975				rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
  2976							readid_flags);
  2977				if (rc)
  2978					goto err;
  2979	
  2980				ata_eh_done(link, dev, ATA_EH_REVALIDATE);
  2981	
  2982				/* Configuration may have changed, reconfigure
  2983				 * transfer mode.
  2984				 */
  2985				ehc->i.flags |= ATA_EHI_SETMODE;
  2986	
  2987				/* Schedule the scsi_rescan_device() here.
  2988				 * Defer the rescan if it's in process of
  2989				 * suspending or resuming.
  2990				 */
> 2991				if (pm_suspend_target_state != PM_SUSPEND_ON)
  2992					ap->pflags |= ATA_PFLAG_DEFER_RESCAN;
  2993				else
  2994					schedule_work(&(ap->scsi_rescan_task));
  2995			} else if (dev->class == ATA_DEV_UNKNOWN &&
  2996				   ehc->tries[dev->devno] &&
  2997				   ata_class_enabled(ehc->classes[dev->devno])) {
  2998				/* Temporarily set dev->class, it will be
  2999				 * permanently set once all configurations are
  3000				 * complete.  This is necessary because new
  3001				 * device configuration is done in two
  3002				 * separate loops.
  3003				 */
  3004				dev->class = ehc->classes[dev->devno];
  3005	
  3006				if (dev->class == ATA_DEV_PMP)
  3007					rc = sata_pmp_attach(dev);
  3008				else
  3009					rc = ata_dev_read_id(dev, &dev->class,
  3010							     readid_flags, dev->id);
  3011	
  3012				/* read_id might have changed class, store and reset */
  3013				ehc->classes[dev->devno] = dev->class;
  3014				dev->class = ATA_DEV_UNKNOWN;
  3015	
  3016				switch (rc) {
  3017				case 0:
  3018					/* clear error info accumulated during probe */
  3019					ata_ering_clear(&dev->ering);
  3020					new_mask |= 1 << dev->devno;
  3021					break;
  3022				case -ENOENT:
  3023					/* IDENTIFY was issued to non-existent
  3024					 * device.  No need to reset.  Just
  3025					 * thaw and ignore the device.
  3026					 */
  3027					ata_eh_thaw_port(ap);
  3028					break;
  3029				default:
  3030					goto err;
  3031				}
  3032			}
  3033		}
  3034	
  3035		/* PDIAG- should have been released, ask cable type if post-reset */
  3036		if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
  3037			if (ap->ops->cable_detect)
  3038				ap->cbl = ap->ops->cable_detect(ap);
  3039			ata_force_cbl(ap);
  3040		}
  3041	
  3042		/* Configure new devices forward such that user doesn't see
  3043		 * device detection messages backwards.
  3044		 */
  3045		ata_for_each_dev(dev, link, ALL) {
  3046			if (!(new_mask & (1 << dev->devno)))
  3047				continue;
  3048	
  3049			dev->class = ehc->classes[dev->devno];
  3050	
  3051			if (dev->class == ATA_DEV_PMP)
  3052				continue;
  3053	
  3054			ehc->i.flags |= ATA_EHI_PRINTINFO;
  3055			rc = ata_dev_configure(dev);
  3056			ehc->i.flags &= ~ATA_EHI_PRINTINFO;
  3057			if (rc) {
  3058				dev->class = ATA_DEV_UNKNOWN;
  3059				goto err;
  3060			}
  3061	
  3062			spin_lock_irqsave(ap->lock, flags);
  3063			ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
  3064			spin_unlock_irqrestore(ap->lock, flags);
  3065	
  3066			/* new device discovered, configure xfermode */
  3067			ehc->i.flags |= ATA_EHI_SETMODE;
  3068		}
  3069	
  3070		return 0;
  3071	
  3072	 err:
  3073		*r_failed_dev = dev;
  3074		return rc;
  3075	}
  3076
diff mbox series

Patch

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 14c17c3bda4e..564d72bf1ec6 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5093,6 +5093,16 @@  static int ata_port_pm_poweroff(struct device *dev)
 	return 0;
 }
 
+static void ata_port_pm_complete(struct device *dev)
+{
+	struct ata_port *ap = to_ata_port(dev);
+
+	if (ap->pflags & ATA_PFLAG_DEFER_RESCAN)
+		schedule_work(&(ap->scsi_rescan_task));
+
+	ap->pflags &= ~ATA_PFLAG_DEFER_RESCAN;
+}
+
 static const unsigned int ata_port_resume_ehi = ATA_EHI_NO_AUTOPSY
 						| ATA_EHI_QUIET;
 
@@ -5158,6 +5168,7 @@  static const struct dev_pm_ops ata_port_pm_ops = {
 	.thaw = ata_port_pm_resume,
 	.poweroff = ata_port_pm_poweroff,
 	.restore = ata_port_pm_resume,
+	.complete = ata_port_pm_complete,
 
 	.runtime_suspend = ata_port_runtime_suspend,
 	.runtime_resume = ata_port_runtime_resume,
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index a6c901811802..0881b590fb7e 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -15,6 +15,7 @@ 
 #include <linux/blkdev.h>
 #include <linux/export.h>
 #include <linux/pci.h>
+#include <linux/suspend.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_eh.h>
@@ -2983,8 +2984,14 @@  static int ata_eh_revalidate_and_attach(struct ata_link *link,
 			 */
 			ehc->i.flags |= ATA_EHI_SETMODE;
 
-			/* schedule the scsi_rescan_device() here */
-			schedule_work(&(ap->scsi_rescan_task));
+			/* Schedule the scsi_rescan_device() here.
+			 * Defer the rescan if it's in process of
+			 * suspending or resuming.
+			 */
+			if (pm_suspend_target_state != PM_SUSPEND_ON)
+				ap->pflags |= ATA_PFLAG_DEFER_RESCAN;
+			else
+				schedule_work(&(ap->scsi_rescan_task));
 		} else if (dev->class == ATA_DEV_UNKNOWN &&
 			   ehc->tries[dev->devno] &&
 			   ata_class_enabled(ehc->classes[dev->devno])) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index a759dfbdcc91..aaa549444abc 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -189,6 +189,7 @@  enum {
 	ATA_PFLAG_UNLOADING	= (1 << 9), /* driver is being unloaded */
 	ATA_PFLAG_UNLOADED	= (1 << 10), /* driver is unloaded */
 
+	ATA_PFLAG_DEFER_RESCAN	= (1 << 16), /* peform deferred rescan on system resume */
 	ATA_PFLAG_SUSPENDED	= (1 << 17), /* port is suspended (power) */
 	ATA_PFLAG_PM_PENDING	= (1 << 18), /* PM operation pending */
 	ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */