diff mbox

[1/2] don't wait on disk to start on resume

Message ID 1356064540-17414-1-git-send-email-dbasehore@chromium.org
State Not Applicable
Delegated to: David Miller
Headers show

Commit Message

dbasehore . Dec. 21, 2012, 4:35 a.m. UTC
We no longer wait for the disk to spin up in sd_resume. It now enters the
request to spinup the disk into the elevator and returns.

A function is scheduled under the scsi_sd_probe_domain to wait for the command
to spinup the disk to complete. This function then checks for errors and cleans
up after the sd resume function.

This allows system resume to complete much faster on systems with an HDD since
spinning up the disk is a significant portion of resume time.

Signed-off-by: Derek Basehore <dbasehore@chromium.org>
---
 drivers/scsi/sd.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/sd.h |    9 ++++
 2 files changed, 108 insertions(+), 7 deletions(-)

Comments

Sergei Shtylyov Dec. 22, 2012, 2:32 p.m. UTC | #1
Hello.

On 21-12-2012 8:35, Derek Basehore wrote:

> We no longer wait for the disk to spin up in sd_resume. It now enters the
> request to spinup the disk into the elevator and returns.
>
> A function is scheduled under the scsi_sd_probe_domain to wait for the command
> to spinup the disk to complete. This function then checks for errors and cleans
> up after the sd resume function.

> This allows system resume to complete much faster on systems with an HDD since
> spinning up the disk is a significant portion of resume time.

> Signed-off-by: Derek Basehore <dbasehore@chromium.org>
> ---
>   drivers/scsi/sd.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++---
>   drivers/scsi/sd.h |    9 ++++
>   2 files changed, 108 insertions(+), 7 deletions(-)

> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 7992635..2fe051f 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
[...]
> @@ -3087,20 +3089,110 @@ done:
>   	return ret;
>   }
>
> +/*
> + * Callback function for when the request that starts the disk completes. Passed
> + * into blk_execute_rq_nowait in sd_resume.
> + */
> +static void sd_end_start_rq(struct request *rq, int error)
> +{
> +	struct completion *waiting = rq->end_io_data;
> +
> +	rq->end_io_data = NULL;
> +	/*
> +	 * Set the end_io_data to NULL before calling complete. This (with
> +	 * sd_resume async) ensures that it is set to NULL before we call
> +	 * blk_put_request.
> +	 */

    Shouldn't this comment precede the previous statement?

> +	complete(waiting);
> +}
[...]
>   static int sd_resume(struct device *dev)
>   {
>   	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
> -	int ret = 0;
> +	struct scsi_device *sdp = sdkp->device;
> +	struct resume_async_struct *resume = NULL;
> +	struct request *req;
> +	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
>
> -	if (!sdkp->device->manage_start_stop)
> -		goto done;
> +	if (!sdkp->device->manage_start_stop) {
> +		scsi_disk_put(sdkp);
> +		return 0;
> +	}
>
>   	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
> -	ret = sd_start_stop_device(sdkp, 1);
>
> -done:
> -	scsi_disk_put(sdkp);
> -	return ret;
> +	cmd[4] |= 1;	/* START */
> +
> +	if (sdp->start_stop_pwr_cond)
> +		cmd[4] |= 1 << 4;	/* Active */
> +
> +	if (!scsi_device_online(sdp))
> +		return -ENODEV;
> +
> +	resume = kzalloc(sizeof(struct resume_async_struct), GFP_NOIO);
> +	if (!resume)
> +		return DRIVER_ERROR << 24;
> +
> +	req = blk_get_request(sdp->request_queue, 0, __GFP_WAIT);
> +	if (!req)
> +		return DRIVER_ERROR << 24;
> +
> +	resume->req = req;
> +	resume->sdkp = sdkp;
> +	init_completion(&resume->wait);
> +
> +	req->cmd_len = COMMAND_SIZE(cmd[0]);
> +	memcpy(req->cmd, cmd, req->cmd_len);
> +	req->sense = resume->sense;
> +	req->sense_len = 0;

    Not SCSI_SENSE_BUFFERSIZE?

> +	req->retries = SD_MAX_RETRIES;
> +	req->timeout = SD_TIMEOUT;
> +	req->cmd_type = REQ_TYPE_BLOCK_PC;
> +	req->cmd_flags |= REQ_QUIET | REQ_PREEMPT;
> +	req->end_io_data = &resume->wait;
> +
> +	async_schedule_domain(sd_resume_async, resume, &scsi_sd_probe_domain);
> +	/*
> +	 * don't wait here for the disk to spin up, since we can resume faster
> +	 * if we don't. Cleanup and checking for errors is done the the

    s/the the/in the/

WBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley Dec. 23, 2012, 11:49 a.m. UTC | #2
Missing cc to linux-scsi added 

On Thu, 2012-12-20 at 20:35 -0800, Derek Basehore wrote:
> We no longer wait for the disk to spin up in sd_resume. It now enters the
> request to spinup the disk into the elevator and returns.
> 
> A function is scheduled under the scsi_sd_probe_domain to wait for the command
> to spinup the disk to complete. This function then checks for errors and cleans
> up after the sd resume function.
> 
> This allows system resume to complete much faster on systems with an HDD since
> spinning up the disk is a significant portion of resume time.
> 
> Signed-off-by: Derek Basehore <dbasehore@chromium.org>
> ---
>  drivers/scsi/sd.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  drivers/scsi/sd.h |    9 ++++
>  2 files changed, 108 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 7992635..2fe051f 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -3045,6 +3045,7 @@ static void sd_shutdown(struct device *dev)
>  	if (!sdkp)
>  		return;         /* this can happen */
>  
> +	async_synchronize_full_domain(&scsi_sd_probe_domain);

What's the point of this?  We already have one in sd_remove() which is
the correct last callback.

>  	if (pm_runtime_suspended(dev))
>  		goto exit;
>  
> @@ -3070,6 +3071,7 @@ static int sd_suspend(struct device *dev)
>  	if (!sdkp)
>  		return 0;	/* this can happen */
>  
> +	async_synchronize_full_domain(&scsi_sd_probe_domain);
>  	if (sdkp->WCE) {
>  		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
>  		ret = sd_sync_cache(sdkp);
> @@ -3087,20 +3089,110 @@ done:
>  	return ret;
>  }
>  
> +/*
> + * Callback function for when the request that starts the disk completes. Passed
> + * into blk_execute_rq_nowait in sd_resume.
> + */
> +static void sd_end_start_rq(struct request *rq, int error)
> +{
> +	struct completion *waiting = rq->end_io_data;
> +
> +	rq->end_io_data = NULL;
> +	/*
> +	 * Set the end_io_data to NULL before calling complete. This (with
> +	 * sd_resume async) ensures that it is set to NULL before we call
> +	 * blk_put_request.
> +	 */
> +	complete(waiting);
> +}
> +
> +/*
> + * Asynchronous part of sd_resume. This is separate from sd_end_start_rq since
> + * we need to block on resume for suspend and shutdown. We already do this by
> + * synchronizing on the scsi_sd_probe_domain, so use that.
> + */
> +static void sd_resume_async(void *data, async_cookie_t cookie)
> +{
> +	struct scsi_sense_hdr sshdr;
> +	struct resume_async_struct *resume = data;
> +	struct scsi_disk *sdkp = resume->sdkp;
> +	struct request *req = resume->req;
> +
> +	wait_for_completion(&resume->wait);
> +
> +	if (req->errors) {
> +		scsi_normalize_sense(resume->sense,
> +				     SCSI_SENSE_BUFFERSIZE,
> +				     &sshdr);
> +		sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
> +		sd_print_result(sdkp, req->errors);
> +		if (driver_byte(req->errors) & DRIVER_SENSE)
> +			sd_print_sense_hdr(sdkp, &sshdr);
> +	}
> +	kfree(resume);
> +	scsi_disk_put(sdkp);
> +	blk_put_request(req);
> +}
> +
> +/*
> + * This does not wait for the disk to spin up. This function enters the request
> + * to spinup the disk and schedules a function, sd_resume_async, that waits for
> + * that request to complete.
> + */
>  static int sd_resume(struct device *dev)
>  {
>  	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
> -	int ret = 0;
> +	struct scsi_device *sdp = sdkp->device;
> +	struct resume_async_struct *resume = NULL;
> +	struct request *req;
> +	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
>  
> -	if (!sdkp->device->manage_start_stop)
> -		goto done;
> +	if (!sdkp->device->manage_start_stop) {
> +		scsi_disk_put(sdkp);
> +		return 0;
> +	}
>  
>  	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
> -	ret = sd_start_stop_device(sdkp, 1);
>  
> -done:
> -	scsi_disk_put(sdkp);
> -	return ret;
> +	cmd[4] |= 1;	/* START */
> +
> +	if (sdp->start_stop_pwr_cond)
> +		cmd[4] |= 1 << 4;	/* Active */
> +
> +	if (!scsi_device_online(sdp))
> +		return -ENODEV;
> +
> +	resume = kzalloc(sizeof(struct resume_async_struct), GFP_NOIO);
> +	if (!resume)
> +		return DRIVER_ERROR << 24;
> +
> +	req = blk_get_request(sdp->request_queue, 0, __GFP_WAIT);
> +	if (!req)
> +		return DRIVER_ERROR << 24;
> +
> +	resume->req = req;
> +	resume->sdkp = sdkp;
> +	init_completion(&resume->wait);
> +
> +	req->cmd_len = COMMAND_SIZE(cmd[0]);
> +	memcpy(req->cmd, cmd, req->cmd_len);
> +	req->sense = resume->sense;
> +	req->sense_len = 0;
> +	req->retries = SD_MAX_RETRIES;
> +	req->timeout = SD_TIMEOUT;
> +	req->cmd_type = REQ_TYPE_BLOCK_PC;
> +	req->cmd_flags |= REQ_QUIET | REQ_PREEMPT;
> +	req->end_io_data = &resume->wait;
> +
> +	async_schedule_domain(sd_resume_async, resume, &scsi_sd_probe_domain);
> +	/*
> +	 * don't wait here for the disk to spin up, since we can resume faster
> +	 * if we don't. Cleanup and checking for errors is done the the
> +	 * previously scheduled sd_resume_async function
> +	 */
> +	blk_execute_rq_nowait(req->q, NULL, req, 1, sd_end_start_rq);

What is the point of open coding sd_start_stop_device() instead of just
calling it in the async function?

> +
> +	return 0;
>  }
>  
>  /**
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 74a1e4c..b09f255 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -1,6 +1,8 @@
>  #ifndef _SCSI_DISK_H
>  #define _SCSI_DISK_H
>  
> +#include <scsi/scsi_cmnd.h>
> +
>  /*
>   * More than enough for everybody ;)  The huge number of majors
>   * is a leftover from 16bit dev_t days, we don't really need that
> @@ -87,6 +89,13 @@ struct scsi_disk {
>  };
>  #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
>  
> +struct resume_async_struct {
> +	struct scsi_disk *sdkp;
> +	struct request *req;
> +	struct completion wait;
> +	char sense[SCSI_SENSE_BUFFERSIZE];
> +};

If you don't open code sd_start_stop_device() you don't need any of this
because you don't pass information around to async functions.

James

>  static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
>  {
>  	return container_of(disk->private_data, struct scsi_disk, driver);


--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
dbasehore . Jan. 31, 2013, 10 p.m. UTC | #3
(Resending as plain text so the message is tracked on vger)

Hi, thanks for reading through my patch.

With regards to SCSI_SENSE_BUFFERSIZE, I'm following the precedent of
scsi_execute_req found in drivers/scsi/scsi_lib.c

It seems that it is used by the scsi_normalize_sense function which I
call in sd_resume_async. I just input SCSI_SENSE_BUFFERSIZE directly
there though. I didn't know if anything would change its behavior on a
lower level if I made sense_len = SCSI_SENSE_BUFFERSIZE, so I just
went with what was already done.

I'll make sure the semantic fixes go into the final patch.

Also, I forgot to mention one possible problem earlier. I understand
that some hard drives have a command buffer that can be executed by
the hard drive in an order it determines. Does anyone know of a
potential problem if the following happens?

-resume finishes (hard drive not started yet)
-read/write sent to disk, inserted before start command

Could this happen? If so, could it cause any problems?

I've tested the possibility of a program trying to read/write from the
disk before it has started, and the read/write blocks until the disk
has actually been spun up. I don't know if there are specific hard
drives where this could be a problem though.
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
dbasehore . Jan. 31, 2013, 10:02 p.m. UTC | #4
(Resending message as plain text so it's tracked through vger lists)

I didn't notice the other call to async_synchronize_full, I'll remove that one.

I split up the functionality of sd_start_stop_device to ensure that
the command to start the disk was sent before resume was completed.
For now, it's open coded, but that could be changed if I make
additions are made to scsi_lib.c to allow us to return immediately
after scheduling the command.
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley Jan. 31, 2013, 10:26 p.m. UTC | #5
On Thu, 2013-01-31 at 14:02 -0800, dbasehore . wrote:
> (Resending message as plain text so it's tracked through vger lists)

Just FYI ... I don't get messages sent to me personally and the list, so
if it wasn't on vger, I haven't seen it.

> I didn't notice the other call to async_synchronize_full, I'll remove that one.
> 
> I split up the functionality of sd_start_stop_device to ensure that
> the command to start the disk was sent before resume was completed.
> For now, it's open coded, but that could be changed if I make
> additions are made to scsi_lib.c to allow us to return immediately
> after scheduling the command.

That would be better; I'll wait to see the proposed patch.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley Jan. 31, 2013, 10:27 p.m. UTC | #6
On Thu, 2013-01-31 at 14:00 -0800, dbasehore . wrote:
> (Resending as plain text so the message is tracked on vger)
> 
> Hi, thanks for reading through my patch.
> 
> With regards to SCSI_SENSE_BUFFERSIZE, I'm following the precedent of
> scsi_execute_req found in drivers/scsi/scsi_lib.c

I have no context for this: I don't recall making any comment on sense
buffers; what are you replying to?

James

> It seems that it is used by the scsi_normalize_sense function which I
> call in sd_resume_async. I just input SCSI_SENSE_BUFFERSIZE directly
> there though. I didn't know if anything would change its behavior on a
> lower level if I made sense_len = SCSI_SENSE_BUFFERSIZE, so I just
> went with what was already done.
> 
> I'll make sure the semantic fixes go into the final patch.
> 
> Also, I forgot to mention one possible problem earlier. I understand
> that some hard drives have a command buffer that can be executed by
> the hard drive in an order it determines. Does anyone know of a
> potential problem if the following happens?
> 
> -resume finishes (hard drive not started yet)
> -read/write sent to disk, inserted before start command
> 
> Could this happen? If so, could it cause any problems?
> 
> I've tested the possibility of a program trying to read/write from the
> disk before it has started, and the read/write blocks until the disk
> has actually been spun up. I don't know if there are specific hard
> drives where this could be a problem though.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
dbasehore . Jan. 31, 2013, 10:32 p.m. UTC | #7
That was a response to Sergei's message (which is properly tracked on
the lists).

On Thu, Jan 31, 2013 at 2:27 PM, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
> On Thu, 2013-01-31 at 14:00 -0800, dbasehore . wrote:
>> (Resending as plain text so the message is tracked on vger)
>>
>> Hi, thanks for reading through my patch.
>>
>> With regards to SCSI_SENSE_BUFFERSIZE, I'm following the precedent of
>> scsi_execute_req found in drivers/scsi/scsi_lib.c
>
> I have no context for this: I don't recall making any comment on sense
> buffers; what are you replying to?
>
> James
>
>> It seems that it is used by the scsi_normalize_sense function which I
>> call in sd_resume_async. I just input SCSI_SENSE_BUFFERSIZE directly
>> there though. I didn't know if anything would change its behavior on a
>> lower level if I made sense_len = SCSI_SENSE_BUFFERSIZE, so I just
>> went with what was already done.
>>
>> I'll make sure the semantic fixes go into the final patch.
>>
>> Also, I forgot to mention one possible problem earlier. I understand
>> that some hard drives have a command buffer that can be executed by
>> the hard drive in an order it determines. Does anyone know of a
>> potential problem if the following happens?
>>
>> -resume finishes (hard drive not started yet)
>> -read/write sent to disk, inserted before start command
>>
>> Could this happen? If so, could it cause any problems?
>>
>> I've tested the possibility of a program trying to read/write from the
>> disk before it has started, and the read/write blocks until the disk
>> has actually been spun up. I don't know if there are specific hard
>> drives where this could be a problem though.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Feb. 1, 2013, 11:51 a.m. UTC | #8
Hi Derek,

On 12/21/2012 12:35 PM, Derek Basehore wrote:
> We no longer wait for the disk to spin up in sd_resume. It now enters the
> request to spinup the disk into the elevator and returns.
> 
> A function is scheduled under the scsi_sd_probe_domain to wait for the command
> to spinup the disk to complete. This function then checks for errors and cleans
> up after the sd resume function.
> 
> This allows system resume to complete much faster on systems with an HDD since
> spinning up the disk is a significant portion of resume time.

An alternative way of possibly solving this problem from PM's point of
view might be:
1 Set both ata port and scsi device's runtime status to RPM_SUSPENDED
  in their system suspend callback;
2 On resume, do nothing in their system resume callback;
3 With request based runtime PM introduced here:
  http://thread.gmane.org/gmane.linux.power-management.general/30405
  When a request comes for the HDD after system resume, both the ata
  port and the scsi device will be runtime resumed, which involves
  re-initialize the ata link(in ata port's runtime resume callback) and
  spin up the HDD(in sd's runtime resume callback).

To make HDD un-affetcted by runtime PM during normal use, a large enough
autosuspend_delay can be set for it.

Just my 2 cents, I've not verified or tried in any way yet :-)

Thanks,
Aaron

> 
> Signed-off-by: Derek Basehore <dbasehore@chromium.org>
> ---
>  drivers/scsi/sd.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  drivers/scsi/sd.h |    9 ++++
>  2 files changed, 108 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 7992635..2fe051f 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -3045,6 +3045,7 @@ static void sd_shutdown(struct device *dev)
>  	if (!sdkp)
>  		return;         /* this can happen */
>  
> +	async_synchronize_full_domain(&scsi_sd_probe_domain);
>  	if (pm_runtime_suspended(dev))
>  		goto exit;
>  
> @@ -3070,6 +3071,7 @@ static int sd_suspend(struct device *dev)
>  	if (!sdkp)
>  		return 0;	/* this can happen */
>  
> +	async_synchronize_full_domain(&scsi_sd_probe_domain);
>  	if (sdkp->WCE) {
>  		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
>  		ret = sd_sync_cache(sdkp);
> @@ -3087,20 +3089,110 @@ done:
>  	return ret;
>  }
>  
> +/*
> + * Callback function for when the request that starts the disk completes. Passed
> + * into blk_execute_rq_nowait in sd_resume.
> + */
> +static void sd_end_start_rq(struct request *rq, int error)
> +{
> +	struct completion *waiting = rq->end_io_data;
> +
> +	rq->end_io_data = NULL;
> +	/*
> +	 * Set the end_io_data to NULL before calling complete. This (with
> +	 * sd_resume async) ensures that it is set to NULL before we call
> +	 * blk_put_request.
> +	 */
> +	complete(waiting);
> +}
> +
> +/*
> + * Asynchronous part of sd_resume. This is separate from sd_end_start_rq since
> + * we need to block on resume for suspend and shutdown. We already do this by
> + * synchronizing on the scsi_sd_probe_domain, so use that.
> + */
> +static void sd_resume_async(void *data, async_cookie_t cookie)
> +{
> +	struct scsi_sense_hdr sshdr;
> +	struct resume_async_struct *resume = data;
> +	struct scsi_disk *sdkp = resume->sdkp;
> +	struct request *req = resume->req;
> +
> +	wait_for_completion(&resume->wait);
> +
> +	if (req->errors) {
> +		scsi_normalize_sense(resume->sense,
> +				     SCSI_SENSE_BUFFERSIZE,
> +				     &sshdr);
> +		sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
> +		sd_print_result(sdkp, req->errors);
> +		if (driver_byte(req->errors) & DRIVER_SENSE)
> +			sd_print_sense_hdr(sdkp, &sshdr);
> +	}
> +	kfree(resume);
> +	scsi_disk_put(sdkp);
> +	blk_put_request(req);
> +}
> +
> +/*
> + * This does not wait for the disk to spin up. This function enters the request
> + * to spinup the disk and schedules a function, sd_resume_async, that waits for
> + * that request to complete.
> + */
>  static int sd_resume(struct device *dev)
>  {
>  	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
> -	int ret = 0;
> +	struct scsi_device *sdp = sdkp->device;
> +	struct resume_async_struct *resume = NULL;
> +	struct request *req;
> +	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
>  
> -	if (!sdkp->device->manage_start_stop)
> -		goto done;
> +	if (!sdkp->device->manage_start_stop) {
> +		scsi_disk_put(sdkp);
> +		return 0;
> +	}
>  
>  	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
> -	ret = sd_start_stop_device(sdkp, 1);
>  
> -done:
> -	scsi_disk_put(sdkp);
> -	return ret;
> +	cmd[4] |= 1;	/* START */
> +
> +	if (sdp->start_stop_pwr_cond)
> +		cmd[4] |= 1 << 4;	/* Active */
> +
> +	if (!scsi_device_online(sdp))
> +		return -ENODEV;
> +
> +	resume = kzalloc(sizeof(struct resume_async_struct), GFP_NOIO);
> +	if (!resume)
> +		return DRIVER_ERROR << 24;
> +
> +	req = blk_get_request(sdp->request_queue, 0, __GFP_WAIT);
> +	if (!req)
> +		return DRIVER_ERROR << 24;
> +
> +	resume->req = req;
> +	resume->sdkp = sdkp;
> +	init_completion(&resume->wait);
> +
> +	req->cmd_len = COMMAND_SIZE(cmd[0]);
> +	memcpy(req->cmd, cmd, req->cmd_len);
> +	req->sense = resume->sense;
> +	req->sense_len = 0;
> +	req->retries = SD_MAX_RETRIES;
> +	req->timeout = SD_TIMEOUT;
> +	req->cmd_type = REQ_TYPE_BLOCK_PC;
> +	req->cmd_flags |= REQ_QUIET | REQ_PREEMPT;
> +	req->end_io_data = &resume->wait;
> +
> +	async_schedule_domain(sd_resume_async, resume, &scsi_sd_probe_domain);
> +	/*
> +	 * don't wait here for the disk to spin up, since we can resume faster
> +	 * if we don't. Cleanup and checking for errors is done the the
> +	 * previously scheduled sd_resume_async function
> +	 */
> +	blk_execute_rq_nowait(req->q, NULL, req, 1, sd_end_start_rq);
> +
> +	return 0;
>  }
>  
>  /**
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 74a1e4c..b09f255 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -1,6 +1,8 @@
>  #ifndef _SCSI_DISK_H
>  #define _SCSI_DISK_H
>  
> +#include <scsi/scsi_cmnd.h>
> +
>  /*
>   * More than enough for everybody ;)  The huge number of majors
>   * is a leftover from 16bit dev_t days, we don't really need that
> @@ -87,6 +89,13 @@ struct scsi_disk {
>  };
>  #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
>  
> +struct resume_async_struct {
> +	struct scsi_disk *sdkp;
> +	struct request *req;
> +	struct completion wait;
> +	char sense[SCSI_SENSE_BUFFERSIZE];
> +};
> +
>  static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
>  {
>  	return container_of(disk->private_data, struct scsi_disk, driver);
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Feb. 1, 2013, 3:28 p.m. UTC | #9
On Fri, 1 Feb 2013, Aaron Lu wrote:

> Hi Derek,
> 
> On 12/21/2012 12:35 PM, Derek Basehore wrote:
> > We no longer wait for the disk to spin up in sd_resume. It now enters the
> > request to spinup the disk into the elevator and returns.
> > 
> > A function is scheduled under the scsi_sd_probe_domain to wait for the command
> > to spinup the disk to complete. This function then checks for errors and cleans
> > up after the sd resume function.
> > 
> > This allows system resume to complete much faster on systems with an HDD since
> > spinning up the disk is a significant portion of resume time.
> 
> An alternative way of possibly solving this problem from PM's point of
> view might be:
> 1 Set both ata port and scsi device's runtime status to RPM_SUSPENDED
>   in their system suspend callback;
> 2 On resume, do nothing in their system resume callback;
> 3 With request based runtime PM introduced here:
>   http://thread.gmane.org/gmane.linux.power-management.general/30405
>   When a request comes for the HDD after system resume, both the ata
>   port and the scsi device will be runtime resumed, which involves
>   re-initialize the ata link(in ata port's runtime resume callback) and
>   spin up the HDD(in sd's runtime resume callback).
> 
> To make HDD un-affetcted by runtime PM during normal use, a large enough
> autosuspend_delay can be set for it.
> 
> Just my 2 cents, I've not verified or tried in any way yet :-)

It's a good idea.  The problem is that it won't work if CONFIG_PM_SLEEP
is enabled but CONFIG_PM_RUNTIME isn't.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7992635..2fe051f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3045,6 +3045,7 @@  static void sd_shutdown(struct device *dev)
 	if (!sdkp)
 		return;         /* this can happen */
 
+	async_synchronize_full_domain(&scsi_sd_probe_domain);
 	if (pm_runtime_suspended(dev))
 		goto exit;
 
@@ -3070,6 +3071,7 @@  static int sd_suspend(struct device *dev)
 	if (!sdkp)
 		return 0;	/* this can happen */
 
+	async_synchronize_full_domain(&scsi_sd_probe_domain);
 	if (sdkp->WCE) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
 		ret = sd_sync_cache(sdkp);
@@ -3087,20 +3089,110 @@  done:
 	return ret;
 }
 
+/*
+ * Callback function for when the request that starts the disk completes. Passed
+ * into blk_execute_rq_nowait in sd_resume.
+ */
+static void sd_end_start_rq(struct request *rq, int error)
+{
+	struct completion *waiting = rq->end_io_data;
+
+	rq->end_io_data = NULL;
+	/*
+	 * Set the end_io_data to NULL before calling complete. This (with
+	 * sd_resume async) ensures that it is set to NULL before we call
+	 * blk_put_request.
+	 */
+	complete(waiting);
+}
+
+/*
+ * Asynchronous part of sd_resume. This is separate from sd_end_start_rq since
+ * we need to block on resume for suspend and shutdown. We already do this by
+ * synchronizing on the scsi_sd_probe_domain, so use that.
+ */
+static void sd_resume_async(void *data, async_cookie_t cookie)
+{
+	struct scsi_sense_hdr sshdr;
+	struct resume_async_struct *resume = data;
+	struct scsi_disk *sdkp = resume->sdkp;
+	struct request *req = resume->req;
+
+	wait_for_completion(&resume->wait);
+
+	if (req->errors) {
+		scsi_normalize_sense(resume->sense,
+				     SCSI_SENSE_BUFFERSIZE,
+				     &sshdr);
+		sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
+		sd_print_result(sdkp, req->errors);
+		if (driver_byte(req->errors) & DRIVER_SENSE)
+			sd_print_sense_hdr(sdkp, &sshdr);
+	}
+	kfree(resume);
+	scsi_disk_put(sdkp);
+	blk_put_request(req);
+}
+
+/*
+ * This does not wait for the disk to spin up. This function enters the request
+ * to spinup the disk and schedules a function, sd_resume_async, that waits for
+ * that request to complete.
+ */
 static int sd_resume(struct device *dev)
 {
 	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
-	int ret = 0;
+	struct scsi_device *sdp = sdkp->device;
+	struct resume_async_struct *resume = NULL;
+	struct request *req;
+	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
 
-	if (!sdkp->device->manage_start_stop)
-		goto done;
+	if (!sdkp->device->manage_start_stop) {
+		scsi_disk_put(sdkp);
+		return 0;
+	}
 
 	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-	ret = sd_start_stop_device(sdkp, 1);
 
-done:
-	scsi_disk_put(sdkp);
-	return ret;
+	cmd[4] |= 1;	/* START */
+
+	if (sdp->start_stop_pwr_cond)
+		cmd[4] |= 1 << 4;	/* Active */
+
+	if (!scsi_device_online(sdp))
+		return -ENODEV;
+
+	resume = kzalloc(sizeof(struct resume_async_struct), GFP_NOIO);
+	if (!resume)
+		return DRIVER_ERROR << 24;
+
+	req = blk_get_request(sdp->request_queue, 0, __GFP_WAIT);
+	if (!req)
+		return DRIVER_ERROR << 24;
+
+	resume->req = req;
+	resume->sdkp = sdkp;
+	init_completion(&resume->wait);
+
+	req->cmd_len = COMMAND_SIZE(cmd[0]);
+	memcpy(req->cmd, cmd, req->cmd_len);
+	req->sense = resume->sense;
+	req->sense_len = 0;
+	req->retries = SD_MAX_RETRIES;
+	req->timeout = SD_TIMEOUT;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET | REQ_PREEMPT;
+	req->end_io_data = &resume->wait;
+
+	async_schedule_domain(sd_resume_async, resume, &scsi_sd_probe_domain);
+	/*
+	 * don't wait here for the disk to spin up, since we can resume faster
+	 * if we don't. Cleanup and checking for errors is done the the
+	 * previously scheduled sd_resume_async function
+	 */
+	blk_execute_rq_nowait(req->q, NULL, req, 1, sd_end_start_rq);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 74a1e4c..b09f255 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -1,6 +1,8 @@ 
 #ifndef _SCSI_DISK_H
 #define _SCSI_DISK_H
 
+#include <scsi/scsi_cmnd.h>
+
 /*
  * More than enough for everybody ;)  The huge number of majors
  * is a leftover from 16bit dev_t days, we don't really need that
@@ -87,6 +89,13 @@  struct scsi_disk {
 };
 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
 
+struct resume_async_struct {
+	struct scsi_disk *sdkp;
+	struct request *req;
+	struct completion wait;
+	char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
 static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
 {
 	return container_of(disk->private_data, struct scsi_disk, driver);