[v7,5/6] libsas: fix lifetime of SAS_HA_FROZEN

Commit Message

Dan Williams Jan. 24, 2012, 7:50 a.m.
Until all sas_tasks are known to no longer be in-flight this flag gates late
completions from colliding with error handling.  However, it must be cleared
prior to the submission of scsi_send_eh_cmnd() requests, otherwise those
commands will never be completed correctly.

This was spotted by slub debug:
 BUG sas_task: Objects remaining on kmem_cache_close()

 INFO: Slab 0xffffea001f0eba00 objects=34 used=1 fp=0xffff8807c3aecb00 flags=0x8000000000004080
 Pid: 22919, comm: modprobe Not tainted 3.2.0-isci+ #2
 Call Trace:
  [<ffffffff810fcdcd>] slab_err+0xb0/0xd2
  [<ffffffff810e1c50>] ? free_percpu+0x31/0x117
  [<ffffffff81100122>] ? kzalloc+0x14/0x16
  [<ffffffff81100122>] ? kzalloc+0x14/0x16
  [<ffffffff81100486>] kmem_cache_destroy+0x11d/0x270
  [<ffffffffa0112bdc>] sas_class_exit+0x10/0x12 [libsas]
  [<ffffffff81078fba>] sys_delete_module+0x1c4/0x23c
  [<ffffffff814797ba>] ? sysret_check+0x2e/0x69
  [<ffffffff8126479e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
  [<ffffffff81479782>] system_call_fastpath+0x16/0x1b
 INFO: Object 0xffff8807c3aed280 @offset=21120
 INFO: Allocated in sas_alloc_task+0x22/0x90 [libsas] age=4615311 cpu=2 pid=12966
  sas_alloc_task+0x22/0x90 [libsas]
  sas_queuecommand+0x20e/0x230 [libsas]
  sas_scsi_recover_host+0xa35/0xab1 [libsas]

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
 drivers/scsi/libsas/sas_scsi_host.c |   13 +++++++------
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 3701ff7..fd32913 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -521,8 +521,7 @@  try_bus_reset:
 	return FAILED;
-static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
-				    struct list_head *work_q)
+static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *work_q)
 	struct scsi_cmnd *cmd, *n;
 	enum task_disposition res = TASK_IS_DONE;
@@ -658,7 +657,7 @@  static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
 	list_splice_tail(&done, work_q);
 	list_splice_tail_init(&ha->eh_ata_q, work_q);
-	return list_empty(work_q);
+	return;
 	SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__);
@@ -682,10 +681,13 @@  void sas_scsi_recover_host(struct Scsi_Host *shost)
 		    __func__, shost->host_busy, shost->host_failed);
 	 * Deal with commands that still have SAS tasks (i.e. they didn't
-	 * complete via the normal sas_task completion mechanism)
+	 * complete via the normal sas_task completion mechanism),
+	 * SAS_HA_FROZEN gives eh dominion over all sas_task completion.
 	set_bit(SAS_HA_FROZEN, &ha->state);
-	if (sas_eh_handle_sas_errors(shost, &eh_work_q))
+	sas_eh_handle_sas_errors(shost, &eh_work_q);
+	clear_bit(SAS_HA_FROZEN, &ha->state);
+	if (list_empty(&eh_work_q))
 		goto out;
@@ -699,7 +701,6 @@  void sas_scsi_recover_host(struct Scsi_Host *shost)
 		scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
-	clear_bit(SAS_HA_FROZEN, &ha->state);
 	if (ha->lldd_max_execute_num > 1)