Message ID | 20120311043912.27797.20898.stgit@dwillia2-linux.jf.intel.com |
---|---|
State | Not Applicable |
Delegated to: | David Miller |
Headers | show |
Ping? Ack on the ata changes? I'd like to submit this to scsi-rc-fixes for 3.4-rc to fix regression fallout around host_eh_scheduled handling now that libsas ata eh is asynchronous. On Sat, Mar 10, 2012 at 8:39 PM, Dan Williams <dan.j.williams@intel.com> wrote: > When managing shost->host_eh_scheduled libata assumes that there is a > 1:1 shost-to-ata_port relationship. libsas creates a 1:N relationship > so it needs to manage host_eh_scheduled cumulatively at the host level. > The sched_eh and end_eh port port ops allow libsas to track when domain > devices enter/leave the "eh-pending" state under ha->lock (previously > named ha->state_lock, but it is no longer just a lock for ha->state > changes). > > Since host_eh_scheduled indicates eh without backing commands pinning > the device it can be deallocated at any time. Move the taking of the > domain_device reference under the port_lock to guarantee that the > ata_port stays around for the duration of eh. > > Cc: Tejun Heo <tj@kernel.org> > Acked-by: Jacek Danecki <jacek.danecki@intel.com> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > --- > drivers/ata/libata-core.c | 4 ++ > drivers/ata/libata-eh.c | 57 ++++++++++++++++++++++++++++------- > drivers/scsi/libsas/sas_ata.c | 38 +++++++++++++++++++++-- > drivers/scsi/libsas/sas_discover.c | 6 ++-- > drivers/scsi/libsas/sas_event.c | 12 ++++--- > drivers/scsi/libsas/sas_init.c | 14 ++++----- > drivers/scsi/libsas/sas_scsi_host.c | 27 +++++++++++++---- > include/linux/libata.h | 4 ++ > include/scsi/libsas.h | 4 ++ > include/scsi/sas_ata.h | 5 +++ > 10 files changed, 134 insertions(+), 37 deletions(-) > > diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c > index 1654c94..827881e 100644 > --- a/drivers/ata/libata-core.c > +++ b/drivers/ata/libata-core.c > @@ -79,6 +79,8 @@ const struct ata_port_operations ata_base_port_ops = { > .prereset = ata_std_prereset, > .postreset = ata_std_postreset, > .error_handler = ata_std_error_handler, > + .sched_eh = ata_std_sched_eh, > + .end_eh = ata_std_end_eh, > }; > > const struct ata_port_operations sata_port_ops = { > @@ -6573,6 +6575,8 @@ struct ata_port_operations ata_dummy_port_ops = { > .qc_prep = ata_noop_qc_prep, > .qc_issue = ata_dummy_qc_issue, > .error_handler = ata_dummy_error_handler, > + .sched_eh = ata_std_sched_eh, > + .end_eh = ata_std_end_eh, > }; > > const struct ata_port_info ata_dummy_port_info = { > diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c > index c61316e..4f12f63 100644 > --- a/drivers/ata/libata-eh.c > +++ b/drivers/ata/libata-eh.c > @@ -793,12 +793,12 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) > ata_for_each_link(link, ap, HOST_FIRST) > memset(&link->eh_info, 0, sizeof(link->eh_info)); > > - /* Clear host_eh_scheduled while holding ap->lock such > - * that if exception occurs after this point but > - * before EH completion, SCSI midlayer will > + /* end eh (clear host_eh_scheduled) while holding > + * ap->lock such that if exception occurs after this > + * point but before EH completion, SCSI midlayer will > * re-initiate EH. > */ > - host->host_eh_scheduled = 0; > + ap->ops->end_eh(ap); > > spin_unlock_irqrestore(ap->lock, flags); > ata_eh_release(ap); > @@ -986,16 +986,13 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) > } > > /** > - * ata_port_schedule_eh - schedule error handling without a qc > - * @ap: ATA port to schedule EH for > - * > - * Schedule error handling for @ap. EH will kick in as soon as > - * all commands are drained. > + * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine > + * @ap: ATA port to schedule EH for > * > - * LOCKING: > + * LOCKING: inherited from ata_port_schedule_eh > * spin_lock_irqsave(host lock) > */ > -void ata_port_schedule_eh(struct ata_port *ap) > +void ata_std_sched_eh(struct ata_port *ap) > { > WARN_ON(!ap->ops->error_handler); > > @@ -1007,6 +1004,44 @@ void ata_port_schedule_eh(struct ata_port *ap) > > DPRINTK("port EH scheduled\n"); > } > +EXPORT_SYMBOL_GPL(ata_std_sched_eh); > + > +/** > + * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine > + * @ap: ATA port to end EH for > + * > + * In the libata object model there is a 1:1 mapping of ata_port to > + * shost, so host fields can be directly manipulated under ap->lock, in > + * the libsas case we need to hold a lock at the ha->level to coordinate > + * these events. > + * > + * LOCKING: > + * spin_lock_irqsave(host lock) > + */ > +void ata_std_end_eh(struct ata_port *ap) > +{ > + struct Scsi_Host *host = ap->scsi_host; > + > + host->host_eh_scheduled = 0; > +} > +EXPORT_SYMBOL(ata_std_end_eh); > + > + > +/** > + * ata_port_schedule_eh - schedule error handling without a qc > + * @ap: ATA port to schedule EH for > + * > + * Schedule error handling for @ap. EH will kick in as soon as > + * all commands are drained. > + * > + * LOCKING: > + * spin_lock_irqsave(host lock) > + */ > +void ata_port_schedule_eh(struct ata_port *ap) > +{ > + /* see: ata_std_sched_eh, unless you know better */ > + ap->ops->sched_eh(ap); > +} > > static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) > { > diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c > index 729a7b6..d30a093 100644 > --- a/drivers/scsi/libsas/sas_ata.c > +++ b/drivers/scsi/libsas/sas_ata.c > @@ -523,6 +523,31 @@ static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev) > i->dft->lldd_ata_set_dmamode(dev); > } > > +static void sas_ata_sched_eh(struct ata_port *ap) > +{ > + struct domain_device *dev = ap->private_data; > + struct sas_ha_struct *ha = dev->port->ha; > + unsigned long flags; > + > + spin_lock_irqsave(&ha->lock, flags); > + if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state)) > + ha->eh_active++; > + ata_std_sched_eh(ap); > + spin_unlock_irqrestore(&ha->lock, flags); > +} > + > +void sas_ata_end_eh(struct ata_port *ap) > +{ > + struct domain_device *dev = ap->private_data; > + struct sas_ha_struct *ha = dev->port->ha; > + unsigned long flags; > + > + spin_lock_irqsave(&ha->lock, flags); > + if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state)) > + ha->eh_active--; > + spin_unlock_irqrestore(&ha->lock, flags); > +} > + > static struct ata_port_operations sas_sata_ops = { > .prereset = ata_std_prereset, > .hardreset = sas_ata_hard_reset, > @@ -536,6 +561,8 @@ static struct ata_port_operations sas_sata_ops = { > .port_start = ata_sas_port_start, > .port_stop = ata_sas_port_stop, > .set_dmamode = sas_ata_set_dmamode, > + .sched_eh = sas_ata_sched_eh, > + .end_eh = sas_ata_end_eh, > }; > > static struct ata_port_info sata_port_info = { > @@ -684,10 +711,6 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie) > struct ata_port *ap = dev->sata_dev.ap; > struct sas_ha_struct *ha = dev->port->ha; > > - /* hold a reference over eh since we may be racing with final > - * remove once all commands are completed > - */ > - kref_get(&dev->kref); > sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n"); > ata_scsi_port_error_handler(ha->core.shost, ap); > sas_put_device(dev); > @@ -730,6 +753,13 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost) > list_for_each_entry(dev, &port->dev_list, dev_list_node) { > if (!sas_ata_dev_eh_valid(dev)) > continue; > + > + /* hold a reference over eh since we may be > + * racing with final remove once all commands > + * are completed > + */ > + kref_get(&dev->kref); > + > async_schedule_domain(async_sas_ata_eh, dev, &async); > } > spin_unlock(&port->dev_list_lock); > diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c > index bd8c636..a730ac9 100644 > --- a/drivers/scsi/libsas/sas_discover.c > +++ b/drivers/scsi/libsas/sas_discover.c > @@ -282,6 +282,8 @@ static void sas_unregister_common_dev(struct asd_sas_port *port, struct domain_d > > spin_lock_irq(&port->dev_list_lock); > list_del_init(&dev->dev_list_node); > + if (dev_is_sata(dev)) > + sas_ata_end_eh(dev->sata_dev.ap); > spin_unlock_irq(&port->dev_list_lock); > > sas_put_device(dev); > @@ -479,9 +481,9 @@ static void sas_chain_event(int event, unsigned long *pending, > if (!test_and_set_bit(event, pending)) { > unsigned long flags; > > - spin_lock_irqsave(&ha->state_lock, flags); > + spin_lock_irqsave(&ha->lock, flags); > sas_chain_work(ha, sw); > - spin_unlock_irqrestore(&ha->state_lock, flags); > + spin_unlock_irqrestore(&ha->lock, flags); > } > } > > diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c > index 4e4292d..789c4d8 100644 > --- a/drivers/scsi/libsas/sas_event.c > +++ b/drivers/scsi/libsas/sas_event.c > @@ -47,9 +47,9 @@ static void sas_queue_event(int event, unsigned long *pending, > if (!test_and_set_bit(event, pending)) { > unsigned long flags; > > - spin_lock_irqsave(&ha->state_lock, flags); > + spin_lock_irqsave(&ha->lock, flags); > sas_queue_work(ha, work); > - spin_unlock_irqrestore(&ha->state_lock, flags); > + spin_unlock_irqrestore(&ha->lock, flags); > } > } > > @@ -61,18 +61,18 @@ void __sas_drain_work(struct sas_ha_struct *ha) > > set_bit(SAS_HA_DRAINING, &ha->state); > /* flush submitters */ > - spin_lock_irq(&ha->state_lock); > - spin_unlock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > + spin_unlock_irq(&ha->lock); > > drain_workqueue(wq); > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > clear_bit(SAS_HA_DRAINING, &ha->state); > list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) { > list_del_init(&sw->drain_node); > sas_queue_work(ha, sw); > } > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > } > > int sas_drain_work(struct sas_ha_struct *ha) > diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c > index 10cb5ae..6909fef 100644 > --- a/drivers/scsi/libsas/sas_init.c > +++ b/drivers/scsi/libsas/sas_init.c > @@ -114,7 +114,7 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) > sas_ha->lldd_queue_size = 128; /* Sanity */ > > set_bit(SAS_HA_REGISTERED, &sas_ha->state); > - spin_lock_init(&sas_ha->state_lock); > + spin_lock_init(&sas_ha->lock); > mutex_init(&sas_ha->drain_mutex); > INIT_LIST_HEAD(&sas_ha->defer_q); > > @@ -163,9 +163,9 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha) > * events to be queued, and flush any in-progress drainers > */ > mutex_lock(&sas_ha->drain_mutex); > - spin_lock_irq(&sas_ha->state_lock); > + spin_lock_irq(&sas_ha->lock); > clear_bit(SAS_HA_REGISTERED, &sas_ha->state); > - spin_unlock_irq(&sas_ha->state_lock); > + spin_unlock_irq(&sas_ha->lock); > __sas_drain_work(sas_ha); > mutex_unlock(&sas_ha->drain_mutex); > > @@ -411,9 +411,9 @@ static int queue_phy_reset(struct sas_phy *phy, int hard_reset) > d->reset_result = 0; > d->hard_reset = hard_reset; > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > sas_queue_work(ha, &d->reset_work); > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > > rc = sas_drain_work(ha); > if (rc == 0) > @@ -438,9 +438,9 @@ static int queue_phy_enable(struct sas_phy *phy, int enable) > d->enable_result = 0; > d->enable = enable; > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > sas_queue_work(ha, &d->enable_work); > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > > rc = sas_drain_work(ha); > if (rc == 0) > diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c > index 49a9113..cd63d80 100644 > --- a/drivers/scsi/libsas/sas_scsi_host.c > +++ b/drivers/scsi/libsas/sas_scsi_host.c > @@ -667,16 +667,20 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * > goto out; > } > > + > void sas_scsi_recover_host(struct Scsi_Host *shost) > { > struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); > - unsigned long flags; > LIST_HEAD(eh_work_q); > + int tries = 0; > + bool retry; > > - spin_lock_irqsave(shost->host_lock, flags); > +retry: > + tries++; > + retry = true; > + spin_lock_irq(shost->host_lock); > list_splice_init(&shost->eh_cmd_q, &eh_work_q); > - shost->host_eh_scheduled = 0; > - spin_unlock_irqrestore(shost->host_lock, flags); > + spin_unlock_irq(shost->host_lock); > > SAS_DPRINTK("Enter %s busy: %d failed: %d\n", > __func__, shost->host_busy, shost->host_failed); > @@ -710,8 +714,19 @@ out: > > scsi_eh_flush_done_q(&ha->eh_done_q); > > - SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n", > - __func__, shost->host_busy, shost->host_failed); > + /* check if any new eh work was scheduled during the last run */ > + spin_lock_irq(&ha->lock); > + if (ha->eh_active == 0) { > + shost->host_eh_scheduled = 0; > + retry = false; > + } > + spin_unlock_irq(&ha->lock); > + > + if (retry) > + goto retry; > + > + SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n", > + __func__, shost->host_busy, shost->host_failed, tries); > } > > enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) > diff --git a/include/linux/libata.h b/include/linux/libata.h > index 42378d6..83ff256 100644 > --- a/include/linux/libata.h > +++ b/include/linux/libata.h > @@ -845,6 +845,8 @@ struct ata_port_operations { > void (*error_handler)(struct ata_port *ap); > void (*lost_interrupt)(struct ata_port *ap); > void (*post_internal_cmd)(struct ata_queued_cmd *qc); > + void (*sched_eh)(struct ata_port *ap); > + void (*end_eh)(struct ata_port *ap); > > /* > * Optional features > @@ -1165,6 +1167,8 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, > ata_reset_fn_t softreset, ata_reset_fn_t hardreset, > ata_postreset_fn_t postreset); > extern void ata_std_error_handler(struct ata_port *ap); > +extern void ata_std_sched_eh(struct ata_port *ap); > +extern void ata_std_end_eh(struct ata_port *ap); > extern int ata_link_nr_enabled(struct ata_link *link); > > /* > diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h > index f4f1c96..c0c94b3 100644 > --- a/include/scsi/libsas.h > +++ b/include/scsi/libsas.h > @@ -177,6 +177,7 @@ struct sata_device { > enum { > SAS_DEV_GONE, > SAS_DEV_DESTROY, > + SAS_DEV_EH_PENDING, > }; > > struct domain_device { > @@ -384,7 +385,8 @@ struct sas_ha_struct { > struct list_head defer_q; /* work queued while draining */ > struct mutex drain_mutex; > unsigned long state; > - spinlock_t state_lock; > + spinlock_t lock; > + int eh_active; > > struct mutex disco_mutex; > > diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h > index 6d5d60c..3e13c28 100644 > --- a/include/scsi/sas_ata.h > +++ b/include/scsi/sas_ata.h > @@ -44,6 +44,7 @@ void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, > void sas_ata_schedule_reset(struct domain_device *dev); > void sas_ata_wait_eh(struct domain_device *dev); > void sas_probe_sata(struct asd_sas_port *port); > +void sas_ata_end_eh(struct ata_port *ap); > #else > > > @@ -81,6 +82,10 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy > { > return 0; > } > + > +static inline void sas_ata_end_eh(struct ata_port *ap) > +{ > +} > #endif > > #endif /* _SAS_ATA_H_ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ide" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/11/12 04:13, Dan Williams wrote: > Ping? Ack on the ata changes? Acked-by: Jacek Danecki <jacek.danecki@intel.com>
On Wed, Apr 11, 2012 at 4:39 AM, Jacek Danecki <Jacek.Danecki@intel.com> wrote: > On 04/11/12 04:13, Dan Williams wrote: >> >> Ping? Ack on the ata changes? > > > Acked-by: Jacek Danecki <jacek.danecki@intel.com> Thanks Jacek, I was angling for Tejun or Jeff to weigh in with an ack so I can take this through James' tree. -- Dan -- To unsubscribe from this list: send the line "unsubscribe linux-ide" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1654c94..827881e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -79,6 +79,8 @@ const struct ata_port_operations ata_base_port_ops = { .prereset = ata_std_prereset, .postreset = ata_std_postreset, .error_handler = ata_std_error_handler, + .sched_eh = ata_std_sched_eh, + .end_eh = ata_std_end_eh, }; const struct ata_port_operations sata_port_ops = { @@ -6573,6 +6575,8 @@ struct ata_port_operations ata_dummy_port_ops = { .qc_prep = ata_noop_qc_prep, .qc_issue = ata_dummy_qc_issue, .error_handler = ata_dummy_error_handler, + .sched_eh = ata_std_sched_eh, + .end_eh = ata_std_end_eh, }; const struct ata_port_info ata_dummy_port_info = { diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c61316e..4f12f63 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -793,12 +793,12 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ata_for_each_link(link, ap, HOST_FIRST) memset(&link->eh_info, 0, sizeof(link->eh_info)); - /* Clear host_eh_scheduled while holding ap->lock such - * that if exception occurs after this point but - * before EH completion, SCSI midlayer will + /* end eh (clear host_eh_scheduled) while holding + * ap->lock such that if exception occurs after this + * point but before EH completion, SCSI midlayer will * re-initiate EH. */ - host->host_eh_scheduled = 0; + ap->ops->end_eh(ap); spin_unlock_irqrestore(ap->lock, flags); ata_eh_release(ap); @@ -986,16 +986,13 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) } /** - * ata_port_schedule_eh - schedule error handling without a qc - * @ap: ATA port to schedule EH for - * - * Schedule error handling for @ap. EH will kick in as soon as - * all commands are drained. + * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine + * @ap: ATA port to schedule EH for * - * LOCKING: + * LOCKING: inherited from ata_port_schedule_eh * spin_lock_irqsave(host lock) */ -void ata_port_schedule_eh(struct ata_port *ap) +void ata_std_sched_eh(struct ata_port *ap) { WARN_ON(!ap->ops->error_handler); @@ -1007,6 +1004,44 @@ void ata_port_schedule_eh(struct ata_port *ap) DPRINTK("port EH scheduled\n"); } +EXPORT_SYMBOL_GPL(ata_std_sched_eh); + +/** + * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine + * @ap: ATA port to end EH for + * + * In the libata object model there is a 1:1 mapping of ata_port to + * shost, so host fields can be directly manipulated under ap->lock, in + * the libsas case we need to hold a lock at the ha->level to coordinate + * these events. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_std_end_eh(struct ata_port *ap) +{ + struct Scsi_Host *host = ap->scsi_host; + + host->host_eh_scheduled = 0; +} +EXPORT_SYMBOL(ata_std_end_eh); + + +/** + * ata_port_schedule_eh - schedule error handling without a qc + * @ap: ATA port to schedule EH for + * + * Schedule error handling for @ap. EH will kick in as soon as + * all commands are drained. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_port_schedule_eh(struct ata_port *ap) +{ + /* see: ata_std_sched_eh, unless you know better */ + ap->ops->sched_eh(ap); +} static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) { diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 729a7b6..d30a093 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -523,6 +523,31 @@ static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev) i->dft->lldd_ata_set_dmamode(dev); } +static void sas_ata_sched_eh(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_ha_struct *ha = dev->port->ha; + unsigned long flags; + + spin_lock_irqsave(&ha->lock, flags); + if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state)) + ha->eh_active++; + ata_std_sched_eh(ap); + spin_unlock_irqrestore(&ha->lock, flags); +} + +void sas_ata_end_eh(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_ha_struct *ha = dev->port->ha; + unsigned long flags; + + spin_lock_irqsave(&ha->lock, flags); + if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state)) + ha->eh_active--; + spin_unlock_irqrestore(&ha->lock, flags); +} + static struct ata_port_operations sas_sata_ops = { .prereset = ata_std_prereset, .hardreset = sas_ata_hard_reset, @@ -536,6 +561,8 @@ static struct ata_port_operations sas_sata_ops = { .port_start = ata_sas_port_start, .port_stop = ata_sas_port_stop, .set_dmamode = sas_ata_set_dmamode, + .sched_eh = sas_ata_sched_eh, + .end_eh = sas_ata_end_eh, }; static struct ata_port_info sata_port_info = { @@ -684,10 +711,6 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie) struct ata_port *ap = dev->sata_dev.ap; struct sas_ha_struct *ha = dev->port->ha; - /* hold a reference over eh since we may be racing with final - * remove once all commands are completed - */ - kref_get(&dev->kref); sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n"); ata_scsi_port_error_handler(ha->core.shost, ap); sas_put_device(dev); @@ -730,6 +753,13 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost) list_for_each_entry(dev, &port->dev_list, dev_list_node) { if (!sas_ata_dev_eh_valid(dev)) continue; + + /* hold a reference over eh since we may be + * racing with final remove once all commands + * are completed + */ + kref_get(&dev->kref); + async_schedule_domain(async_sas_ata_eh, dev, &async); } spin_unlock(&port->dev_list_lock); diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index bd8c636..a730ac9 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -282,6 +282,8 @@ static void sas_unregister_common_dev(struct asd_sas_port *port, struct domain_d spin_lock_irq(&port->dev_list_lock); list_del_init(&dev->dev_list_node); + if (dev_is_sata(dev)) + sas_ata_end_eh(dev->sata_dev.ap); spin_unlock_irq(&port->dev_list_lock); sas_put_device(dev); @@ -479,9 +481,9 @@ static void sas_chain_event(int event, unsigned long *pending, if (!test_and_set_bit(event, pending)) { unsigned long flags; - spin_lock_irqsave(&ha->state_lock, flags); + spin_lock_irqsave(&ha->lock, flags); sas_chain_work(ha, sw); - spin_unlock_irqrestore(&ha->state_lock, flags); + spin_unlock_irqrestore(&ha->lock, flags); } } diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c index 4e4292d..789c4d8 100644 --- a/drivers/scsi/libsas/sas_event.c +++ b/drivers/scsi/libsas/sas_event.c @@ -47,9 +47,9 @@ static void sas_queue_event(int event, unsigned long *pending, if (!test_and_set_bit(event, pending)) { unsigned long flags; - spin_lock_irqsave(&ha->state_lock, flags); + spin_lock_irqsave(&ha->lock, flags); sas_queue_work(ha, work); - spin_unlock_irqrestore(&ha->state_lock, flags); + spin_unlock_irqrestore(&ha->lock, flags); } } @@ -61,18 +61,18 @@ void __sas_drain_work(struct sas_ha_struct *ha) set_bit(SAS_HA_DRAINING, &ha->state); /* flush submitters */ - spin_lock_irq(&ha->state_lock); - spin_unlock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); + spin_unlock_irq(&ha->lock); drain_workqueue(wq); - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); clear_bit(SAS_HA_DRAINING, &ha->state); list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) { list_del_init(&sw->drain_node); sas_queue_work(ha, sw); } - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); } int sas_drain_work(struct sas_ha_struct *ha) diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index 10cb5ae..6909fef 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -114,7 +114,7 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) sas_ha->lldd_queue_size = 128; /* Sanity */ set_bit(SAS_HA_REGISTERED, &sas_ha->state); - spin_lock_init(&sas_ha->state_lock); + spin_lock_init(&sas_ha->lock); mutex_init(&sas_ha->drain_mutex); INIT_LIST_HEAD(&sas_ha->defer_q); @@ -163,9 +163,9 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha) * events to be queued, and flush any in-progress drainers */ mutex_lock(&sas_ha->drain_mutex); - spin_lock_irq(&sas_ha->state_lock); + spin_lock_irq(&sas_ha->lock); clear_bit(SAS_HA_REGISTERED, &sas_ha->state); - spin_unlock_irq(&sas_ha->state_lock); + spin_unlock_irq(&sas_ha->lock); __sas_drain_work(sas_ha); mutex_unlock(&sas_ha->drain_mutex); @@ -411,9 +411,9 @@ static int queue_phy_reset(struct sas_phy *phy, int hard_reset) d->reset_result = 0; d->hard_reset = hard_reset; - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); sas_queue_work(ha, &d->reset_work); - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); rc = sas_drain_work(ha); if (rc == 0) @@ -438,9 +438,9 @@ static int queue_phy_enable(struct sas_phy *phy, int enable) d->enable_result = 0; d->enable = enable; - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); sas_queue_work(ha, &d->enable_work); - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); rc = sas_drain_work(ha); if (rc == 0) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 49a9113..cd63d80 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -667,16 +667,20 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * goto out; } + void sas_scsi_recover_host(struct Scsi_Host *shost) { struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); - unsigned long flags; LIST_HEAD(eh_work_q); + int tries = 0; + bool retry; - spin_lock_irqsave(shost->host_lock, flags); +retry: + tries++; + retry = true; + spin_lock_irq(shost->host_lock); list_splice_init(&shost->eh_cmd_q, &eh_work_q); - shost->host_eh_scheduled = 0; - spin_unlock_irqrestore(shost->host_lock, flags); + spin_unlock_irq(shost->host_lock); SAS_DPRINTK("Enter %s busy: %d failed: %d\n", __func__, shost->host_busy, shost->host_failed); @@ -710,8 +714,19 @@ out: scsi_eh_flush_done_q(&ha->eh_done_q); - SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n", - __func__, shost->host_busy, shost->host_failed); + /* check if any new eh work was scheduled during the last run */ + spin_lock_irq(&ha->lock); + if (ha->eh_active == 0) { + shost->host_eh_scheduled = 0; + retry = false; + } + spin_unlock_irq(&ha->lock); + + if (retry) + goto retry; + + SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n", + __func__, shost->host_busy, shost->host_failed, tries); } enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) diff --git a/include/linux/libata.h b/include/linux/libata.h index 42378d6..83ff256 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -845,6 +845,8 @@ struct ata_port_operations { void (*error_handler)(struct ata_port *ap); void (*lost_interrupt)(struct ata_port *ap); void (*post_internal_cmd)(struct ata_queued_cmd *qc); + void (*sched_eh)(struct ata_port *ap); + void (*end_eh)(struct ata_port *ap); /* * Optional features @@ -1165,6 +1167,8 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset); extern void ata_std_error_handler(struct ata_port *ap); +extern void ata_std_sched_eh(struct ata_port *ap); +extern void ata_std_end_eh(struct ata_port *ap); extern int ata_link_nr_enabled(struct ata_link *link); /* diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f4f1c96..c0c94b3 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -177,6 +177,7 @@ struct sata_device { enum { SAS_DEV_GONE, SAS_DEV_DESTROY, + SAS_DEV_EH_PENDING, }; struct domain_device { @@ -384,7 +385,8 @@ struct sas_ha_struct { struct list_head defer_q; /* work queued while draining */ struct mutex drain_mutex; unsigned long state; - spinlock_t state_lock; + spinlock_t lock; + int eh_active; struct mutex disco_mutex; diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 6d5d60c..3e13c28 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -44,6 +44,7 @@ void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, void sas_ata_schedule_reset(struct domain_device *dev); void sas_ata_wait_eh(struct domain_device *dev); void sas_probe_sata(struct asd_sas_port *port); +void sas_ata_end_eh(struct ata_port *ap); #else @@ -81,6 +82,10 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy { return 0; } + +static inline void sas_ata_end_eh(struct ata_port *ap) +{ +} #endif #endif /* _SAS_ATA_H_ */