diff mbox series

[1/4] libata: only wake a drive once on system resume

Message ID 20240104223940.339290-1-phill@thesusis.net
State New
Headers show
Series [1/4] libata: only wake a drive once on system resume | expand

Commit Message

Phillip Susi Jan. 4, 2024, 10:39 p.m. UTC
In the event that more than one pass of EH is needed during system resume,
only request the drive be started once.
---
 drivers/ata/libata-core.c | 9 +++++----
 drivers/ata/libata-eh.c   | 8 +++-----
 drivers/ata/libata.h      | 2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

Comments

Damien Le Moal Jan. 5, 2024, 12:13 p.m. UTC | #1
On 1/5/24 07:39, Phillip Susi wrote:
> In the event that more than one pass of EH is needed during system resume,
> only request the drive be started once.
> ---
>  drivers/ata/libata-core.c | 9 +++++----
>  drivers/ata/libata-eh.c   | 8 +++-----
>  drivers/ata/libata.h      | 2 +-
>  3 files changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 09ed67772fae..a2d8cc0097a8 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -2080,7 +2080,7 @@ static bool ata_dev_power_is_active(struct ata_device *dev)
>   *	LOCKING:
>   *	Kernel thread context (may sleep).
>   */
> -void ata_dev_power_set_active(struct ata_device *dev)
> +unsigned int ata_dev_power_set_active(struct ata_device *dev)
>  {
>  	struct ata_taskfile tf;
>  	unsigned int err_mask;
> @@ -2090,14 +2090,14 @@ void ata_dev_power_set_active(struct ata_device *dev)
>  	 * if supported by the device.
>  	 */
>  	if (!ata_dev_power_init_tf(dev, &tf, true))
> -		return;
> +		return AC_ERR_OTHER;

Nope. This is wrong. ata_dev_power_init_tf() returns a bool, not an error. The
bool indicates if the drive supports power management.

But beside this, I still do not understand what this fixes... Calling again
ata_dev_power_set_active() will do nothing but issue a check power mode command
if the drive is already active. So I do not see the need for this added complexity.

>  
>  	/*
>  	 * Check the device power state & condition and force a spinup with
>  	 * VERIFY command only if the drive is not already ACTIVE or IDLE.
>  	 */
>  	if (ata_dev_power_is_active(dev))
> -		return;
> +		return AC_ERR_OK;
>  
>  	ata_dev_notice(dev, "Entering active power mode\n");
>  
> @@ -2105,6 +2105,7 @@ void ata_dev_power_set_active(struct ata_device *dev)
>  	if (err_mask)
>  		ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n",
>  			    err_mask);
> +	return err_mask;
>  }
>  
>  /**
> @@ -5257,7 +5258,7 @@ static int ata_port_pm_poweroff(struct device *dev)
>  static void ata_port_resume(struct ata_port *ap, pm_message_t mesg,
>  			    bool async)
>  {
> -	ata_port_request_pm(ap, mesg, ATA_EH_RESET,
> +	ata_port_request_pm(ap, mesg, ATA_EH_RESET | ATA_EH_SET_ACTIVE,
>  			    ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET,
>  			    async);
>  }
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index b0d6e69c4a5b..799a1b8bc384 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -710,10 +710,6 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
>  			ehc->saved_xfer_mode[devno] = dev->xfer_mode;
>  			if (ata_ncq_enabled(dev))
>  				ehc->saved_ncq_enabled |= 1 << devno;
> -
> -			/* If we are resuming, wake up the device */
> -			if (ap->pflags & ATA_PFLAG_RESUMING)
> -				ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
>  		}
>  	}
>  
> @@ -3853,7 +3849,9 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
>  		 */
>  		ata_for_each_dev(dev, link, ENABLED) {
>  			if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) {
> -				ata_dev_power_set_active(dev);
> +				unsigned int err_mask = ata_dev_power_set_active(dev);
> +				if (err_mask)
> +					link->eh_info.dev_action[dev->devno] |= ATA_EH_SET_ACTIVE;
>  				ata_eh_done(link, dev, ATA_EH_SET_ACTIVE);
>  			}
>  		}
> diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
> index 5c685bb1939e..43ad1ef9b63a 100644
> --- a/drivers/ata/libata.h
> +++ b/drivers/ata/libata.h
> @@ -65,7 +65,7 @@ extern int ata_dev_configure(struct ata_device *dev);
>  extern bool ata_dev_power_init_tf(struct ata_device *dev,
>  				  struct ata_taskfile *tf, bool set_active);
>  extern void ata_dev_power_set_standby(struct ata_device *dev);
> -extern void ata_dev_power_set_active(struct ata_device *dev);
> +extern unsigned int ata_dev_power_set_active(struct ata_device *dev);
>  extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
>  extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
>  extern unsigned int ata_dev_set_feature(struct ata_device *dev,
Damien Le Moal Jan. 5, 2024, 12:44 p.m. UTC | #2
On 1/5/24 07:39, Phillip Susi wrote:
> In the event that more than one pass of EH is needed during system resume,
> only request the drive be started once.

Please also add a cover letter that explain the problem that these patches all
together solve.


> ---
>  drivers/ata/libata-core.c | 9 +++++----
>  drivers/ata/libata-eh.c   | 8 +++-----
>  drivers/ata/libata.h      | 2 +-
>  3 files changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 09ed67772fae..a2d8cc0097a8 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -2080,7 +2080,7 @@ static bool ata_dev_power_is_active(struct ata_device *dev)
>   *	LOCKING:
>   *	Kernel thread context (may sleep).
>   */
> -void ata_dev_power_set_active(struct ata_device *dev)
> +unsigned int ata_dev_power_set_active(struct ata_device *dev)
>  {
>  	struct ata_taskfile tf;
>  	unsigned int err_mask;
> @@ -2090,14 +2090,14 @@ void ata_dev_power_set_active(struct ata_device *dev)
>  	 * if supported by the device.
>  	 */
>  	if (!ata_dev_power_init_tf(dev, &tf, true))
> -		return;
> +		return AC_ERR_OTHER;
>  
>  	/*
>  	 * Check the device power state & condition and force a spinup with
>  	 * VERIFY command only if the drive is not already ACTIVE or IDLE.
>  	 */
>  	if (ata_dev_power_is_active(dev))
> -		return;
> +		return AC_ERR_OK;
>  
>  	ata_dev_notice(dev, "Entering active power mode\n");
>  
> @@ -2105,6 +2105,7 @@ void ata_dev_power_set_active(struct ata_device *dev)
>  	if (err_mask)
>  		ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n",
>  			    err_mask);
> +	return err_mask;
>  }
>  
>  /**
> @@ -5257,7 +5258,7 @@ static int ata_port_pm_poweroff(struct device *dev)
>  static void ata_port_resume(struct ata_port *ap, pm_message_t mesg,
>  			    bool async)
>  {
> -	ata_port_request_pm(ap, mesg, ATA_EH_RESET,
> +	ata_port_request_pm(ap, mesg, ATA_EH_RESET | ATA_EH_SET_ACTIVE,
>  			    ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET,
>  			    async);
>  }
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index b0d6e69c4a5b..799a1b8bc384 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -710,10 +710,6 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
>  			ehc->saved_xfer_mode[devno] = dev->xfer_mode;
>  			if (ata_ncq_enabled(dev))
>  				ehc->saved_ncq_enabled |= 1 << devno;
> -
> -			/* If we are resuming, wake up the device */
> -			if (ap->pflags & ATA_PFLAG_RESUMING)
> -				ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
>  		}
>  	}
>  
> @@ -3853,7 +3849,9 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
>  		 */
>  		ata_for_each_dev(dev, link, ENABLED) {
>  			if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) {
> -				ata_dev_power_set_active(dev);
> +				unsigned int err_mask = ata_dev_power_set_active(dev);
> +				if (err_mask)
> +					link->eh_info.dev_action[dev->devno] |= ATA_EH_SET_ACTIVE;
>  				ata_eh_done(link, dev, ATA_EH_SET_ACTIVE);
>  			}
>  		}
> diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
> index 5c685bb1939e..43ad1ef9b63a 100644
> --- a/drivers/ata/libata.h
> +++ b/drivers/ata/libata.h
> @@ -65,7 +65,7 @@ extern int ata_dev_configure(struct ata_device *dev);
>  extern bool ata_dev_power_init_tf(struct ata_device *dev,
>  				  struct ata_taskfile *tf, bool set_active);
>  extern void ata_dev_power_set_standby(struct ata_device *dev);
> -extern void ata_dev_power_set_active(struct ata_device *dev);
> +extern unsigned int ata_dev_power_set_active(struct ata_device *dev);
>  extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
>  extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
>  extern unsigned int ata_dev_set_feature(struct ata_device *dev,
Phillip Susi Jan. 5, 2024, 5:03 p.m. UTC | #3
Damien Le Moal <dlemoal@kernel.org> writes:

>> -		return;
>> +		return AC_ERR_OTHER;
>
> Nope. This is wrong. ata_dev_power_init_tf() returns a bool, not an error. The
> bool indicates if the drive supports power management.

Are you saying it should return AC_ERR_OK?  If the drive doesn't support
power management at all, isn't it an error to try to change its power
management state?

> But beside this, I still do not understand what this fixes... Calling again
> ata_dev_power_set_active() will do nothing but issue a check power mode command
> if the drive is already active. So I do not see the need for this added complexity.

If the drive is NOT active because it is PuiS, it would try to start the drive
anyhow, despite the PuiS actively clearing the ATA_EH_SET_ACTVE flag.
Then the VERIFY command fails if the drive requires the SET FEATURES
command to leave PuiS.

My goal here was to make sure that when the PuiS patch clears
ATA_EH_SET_ACTIVE, that it does not get turned back on during a second
round of EH.  Then you pointed out that it SHOULD try on the second
round, if the first attempt failed, so I changed the return to be able
to detect the error, and turn ATA_EH_SET_ACTIVE on for the second round.

It occurs to me now that I only ran into this issue once I changed to
actually giving the drive the SLEEP command instead of just setting the
sleeping flag.  Once the drive actually went to sleep, it shut down the
sata link, which caused an error interrupt, which triggered a second
round of EH, which then issued the failing VERIFY command until all 5
rounds were used and it gave up.  So if I go back to just setting the
sleeping flag instead of actually putting the drive to sleep, I would
not get the error interrupt and would be fine without this patch.  But I
can imagine some other cause for a second round of EH on some system
that would still run into this problem.
Damien Le Moal Jan. 6, 2024, 11:06 p.m. UTC | #4
On 1/6/24 02:03, Phillip Susi wrote:
> Damien Le Moal <dlemoal@kernel.org> writes:
> 
>>> -		return;
>>> +		return AC_ERR_OTHER;
>>
>> Nope. This is wrong. ata_dev_power_init_tf() returns a bool, not an error. The
>> bool indicates if the drive supports power management.
> 
> Are you saying it should return AC_ERR_OK?  If the drive doesn't support
> power management at all, isn't it an error to try to change its power
> management state?

That is why the function returns doing nothing if ata_dev_power_init_tf()
returns false, meaning "do not do power management". See that function. It
includes ATAPI devices (e.g. CD/DVD) which do not have power management.

>> But beside this, I still do not understand what this fixes... Calling again
>> ata_dev_power_set_active() will do nothing but issue a check power mode command
>> if the drive is already active. So I do not see the need for this added complexity.
> 
> If the drive is NOT active because it is PuiS, it would try to start the drive
> anyhow, despite the PuiS actively clearing the ATA_EH_SET_ACTVE flag.
> Then the VERIFY command fails if the drive requires the SET FEATURES
> command to leave PuiS.

Define a device flag to indicate that the drive has PUIS "on" and so woke up in
standby, e.g. ATA_DFLAG_PUIS_STANDBY. Check that flag in
ata_dev_power_set_active(), similarly to the ATA_DFLAG_SLEEPING flag and return
doing nothing if the puis flag is set. Way easier than playing games with the
return value.

> My goal here was to make sure that when the PuiS patch clears
> ATA_EH_SET_ACTIVE, that it does not get turned back on during a second
> round of EH.  Then you pointed out that it SHOULD try on the second
> round, if the first attempt failed, so I changed the return to be able
> to detect the error, and turn ATA_EH_SET_ACTIVE on for the second round.

See above.

> It occurs to me now that I only ran into this issue once I changed to
> actually giving the drive the SLEEP command instead of just setting the
> sleeping flag.  Once the drive actually went to sleep, it shut down the
> sata link, which caused an error interrupt, which triggered a second
> round of EH, which then issued the failing VERIFY command until all 5
> rounds were used and it gave up.  So if I go back to just setting the
> sleeping flag instead of actually putting the drive to sleep, I would
> not get the error interrupt and would be fine without this patch.  But I
> can imagine some other cause for a second round of EH on some system
> that would still run into this problem.

Please separate handling of sleep and puis. The former can be used even if puis
is off.
diff mbox series

Patch

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 09ed67772fae..a2d8cc0097a8 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2080,7 +2080,7 @@  static bool ata_dev_power_is_active(struct ata_device *dev)
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_dev_power_set_active(struct ata_device *dev)
+unsigned int ata_dev_power_set_active(struct ata_device *dev)
 {
 	struct ata_taskfile tf;
 	unsigned int err_mask;
@@ -2090,14 +2090,14 @@  void ata_dev_power_set_active(struct ata_device *dev)
 	 * if supported by the device.
 	 */
 	if (!ata_dev_power_init_tf(dev, &tf, true))
-		return;
+		return AC_ERR_OTHER;
 
 	/*
 	 * Check the device power state & condition and force a spinup with
 	 * VERIFY command only if the drive is not already ACTIVE or IDLE.
 	 */
 	if (ata_dev_power_is_active(dev))
-		return;
+		return AC_ERR_OK;
 
 	ata_dev_notice(dev, "Entering active power mode\n");
 
@@ -2105,6 +2105,7 @@  void ata_dev_power_set_active(struct ata_device *dev)
 	if (err_mask)
 		ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n",
 			    err_mask);
+	return err_mask;
 }
 
 /**
@@ -5257,7 +5258,7 @@  static int ata_port_pm_poweroff(struct device *dev)
 static void ata_port_resume(struct ata_port *ap, pm_message_t mesg,
 			    bool async)
 {
-	ata_port_request_pm(ap, mesg, ATA_EH_RESET,
+	ata_port_request_pm(ap, mesg, ATA_EH_RESET | ATA_EH_SET_ACTIVE,
 			    ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET,
 			    async);
 }
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b0d6e69c4a5b..799a1b8bc384 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -710,10 +710,6 @@  void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
 			ehc->saved_xfer_mode[devno] = dev->xfer_mode;
 			if (ata_ncq_enabled(dev))
 				ehc->saved_ncq_enabled |= 1 << devno;
-
-			/* If we are resuming, wake up the device */
-			if (ap->pflags & ATA_PFLAG_RESUMING)
-				ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
 		}
 	}
 
@@ -3853,7 +3849,9 @@  int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		 */
 		ata_for_each_dev(dev, link, ENABLED) {
 			if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) {
-				ata_dev_power_set_active(dev);
+				unsigned int err_mask = ata_dev_power_set_active(dev);
+				if (err_mask)
+					link->eh_info.dev_action[dev->devno] |= ATA_EH_SET_ACTIVE;
 				ata_eh_done(link, dev, ATA_EH_SET_ACTIVE);
 			}
 		}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 5c685bb1939e..43ad1ef9b63a 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -65,7 +65,7 @@  extern int ata_dev_configure(struct ata_device *dev);
 extern bool ata_dev_power_init_tf(struct ata_device *dev,
 				  struct ata_taskfile *tf, bool set_active);
 extern void ata_dev_power_set_standby(struct ata_device *dev);
-extern void ata_dev_power_set_active(struct ata_device *dev);
+extern unsigned int ata_dev_power_set_active(struct ata_device *dev);
 extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
 extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
 extern unsigned int ata_dev_set_feature(struct ata_device *dev,