Message ID | 577573BE.8060902@canonical.com |
---|---|
State | New |
Headers | show |
On Thu, Jun 30, 2016 at 09:32:14PM +0200, Stefan Bader wrote: > > BugLink: http://bugs.launchpad.net/bugs/1567602 > > IBM reported that with the alua device handler loaded, when running > automatic LUN detection they run into timeouts and fail to discover > some disks completely. > > Unfortunately the change to get rid of the addition vpd_pg83 probing > was not enough (that would have been easy). > The changes that likely do the fix come later and I would expect them > to depend a lot on the previous changes (at least most of those). So > I ended up with picking almost all changes between v4.4 and v4.6 for > the alua device handler and hating myself. > > The last patch is an addition which I know I need because otherwise > a ppc64el host was quite unhappy with multipath on v4.6. > [by know I tried a 4.4 kernel with this set added on ppc64el and could see no > obvious breakage there] > > I am also inlining the combined diff of all changes below to show > that the changes outside scsi_dh_alua.c are not too bad. > > -Stefan > > The following changes since commit a6409cad137621e2b43d9f49c757fbc996539e85: > > UBUNTU: Ubuntu-4.4.0-28.47 > > are available in the git repository at: > > git://git.launchpad.net/~smb/+git/linux-xenial lp1567602 > > for you to fetch changes up to a21cddda99e26e2f4527dd9ac56e7538d5afa253: > > scsi_dh_alua: do not fail for unknown VPD identification (from irc) "Acked-by: Brad Figg <brad.figg@canonical.com>" Applied to Xenial (4.4.0-30.49). -Kamal > > ---------------------------------------------------------------- > Hannes Reinecke (34): > scsi_dh_alua: Disable ALUA handling for non-disk devices > scsi_dh_alua: Use vpd_pg83 information > scsi_dh_alua: improved logging > scsi_dh_alua: sanitze sense code handling > scsi_dh_alua: use standard logging functions > scsi_dh_alua: return standard SCSI return codes in submit_rtpg > scsi_dh_alua: fixup description of stpg_endio() > scsi_dh_alua: use flag for RTPG extended header > scsi_dh_alua: use unaligned access macros > scsi_dh_alua: rework alua_check_tpgs() to return the tpgs mode > scsi_dh_alua: simplify sense code handling > scsi: Add scsi_vpd_lun_id() > scsi: Add scsi_vpd_tpg_id() > scsi_dh_alua: use scsi_vpd_tpg_id() > scsi_dh_alua: Remove stale variables > scsi_dh_alua: Pass buffer as function argument > scsi_dh_alua: separate out alua_stpg() > scsi_dh_alua: Make stpg synchronous > scsi_dh_alua: call alua_rtpg() if stpg fails > scsi_dh_alua: switch to scsi_execute_req_flags() > scsi_dh_alua: allocate RTPG buffer separately > scsi_dh_alua: Use separate alua_port_group structure > scsi_dh_alua: use unique device id > scsi_dh_alua: simplify alua_initialize() > revert commit a8e5a2d593cb ("[SCSI] scsi_dh_alua: ALUA handler attach > should succeed while TPG is transitioning") > scsi_dh_alua: move optimize_stpg evaluation > scsi_dh_alua: remove 'rel_port' from alua_dh_data structure > scsi_dh_alua: Use workqueue for RTPG > scsi_dh_alua: Allow workqueue to run synchronously > scsi_dh_alua: Add new blacklist flag 'BLIST_SYNC_ALUA' > scsi_dh_alua: Recheck state on unit attention > scsi_dh_alua: update all port states > scsi_dh_alua: Send TEST UNIT READY to poll for transitioning > scsi_dh_alua: do not fail for unknown VPD identification > > drivers/scsi/device_handler/scsi_dh_alua.c | 1100 +++++++++++++++++----------- > drivers/scsi/scsi_devinfo.c | 2 + > drivers/scsi/scsi_lib.c | 188 +++++ > drivers/scsi/scsi_scan.c | 3 + > include/scsi/scsi_device.h | 3 + > include/scsi/scsi_devinfo.h | 1 + > include/scsi/scsi_dh.h | 1 + > 7 files changed, 870 insertions(+), 428 deletions(-) > > -- > > diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c > b/drivers/scsi/device_handler/scsi_dh_alua.c > index cc2773b..952e3b7 100644 > --- a/drivers/scsi/device_handler/scsi_dh_alua.c > +++ b/drivers/scsi/device_handler/scsi_dh_alua.c > @@ -22,7 +22,9 @@ > #include <linux/slab.h> > #include <linux/delay.h> > #include <linux/module.h> > +#include <asm/unaligned.h> > #include <scsi/scsi.h> > +#include <scsi/scsi_dbg.h> > #include <scsi/scsi_eh.h> > #include <scsi/scsi_dh.h> > > @@ -54,27 +56,60 @@ > #define TPGS_MODE_IMPLICIT 0x1 > #define TPGS_MODE_EXPLICIT 0x2 > > -#define ALUA_INQUIRY_SIZE 36 > +#define ALUA_RTPG_SIZE 128 > #define ALUA_FAILOVER_TIMEOUT 60 > #define ALUA_FAILOVER_RETRIES 5 > +#define ALUA_RTPG_DELAY_MSECS 5 > > -/* flags passed from user level */ > -#define ALUA_OPTIMIZE_STPG 1 > +/* device handler flags */ > +#define ALUA_OPTIMIZE_STPG 0x01 > +#define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 > +#define ALUA_SYNC_STPG 0x04 > +/* State machine flags */ > +#define ALUA_PG_RUN_RTPG 0x10 > +#define ALUA_PG_RUN_STPG 0x20 > +#define ALUA_PG_RUNNING 0x40 > > -struct alua_dh_data { > +static uint optimize_stpg; > +module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); > +MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than > sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); > + > +static LIST_HEAD(port_group_list); > +static DEFINE_SPINLOCK(port_group_lock); > +static struct workqueue_struct *kaluad_wq; > +static struct workqueue_struct *kaluad_sync_wq; > + > +struct alua_port_group { > + struct kref kref; > + struct rcu_head rcu; > + struct list_head node; > + unsigned char device_id_str[256]; > + int device_id_len; > int group_id; > - int rel_port; > int tpgs; > int state; > int pref; > unsigned flags; /* used for optimizing STPG */ > - unsigned char inq[ALUA_INQUIRY_SIZE]; > - unsigned char *buff; > - int bufflen; > unsigned char transition_tmo; > - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; > - int senselen; > + unsigned long expiry; > + unsigned long interval; > + struct delayed_work rtpg_work; > + spinlock_t lock; > + struct list_head rtpg_list; > + struct scsi_device *rtpg_sdev; > +}; > + > +struct alua_dh_data { > + struct alua_port_group *pg; > + int group_id; > + spinlock_t pg_lock; > struct scsi_device *sdev; > + int init_error; > + struct mutex init_mutex; > +}; > + > +struct alua_queue_data { > + struct list_head entry; > activate_complete callback_fn; > void *callback_data; > }; > @@ -82,231 +117,162 @@ struct alua_dh_data { > #define ALUA_POLICY_SWITCH_CURRENT 0 > #define ALUA_POLICY_SWITCH_ALL 1 > > -static char print_alua_state(int); > -static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *); > - > -static int realloc_buffer(struct alua_dh_data *h, unsigned len) > -{ > - if (h->buff && h->buff != h->inq) > - kfree(h->buff); > - > - h->buff = kmalloc(len, GFP_NOIO); > - if (!h->buff) { > - h->buff = h->inq; > - h->bufflen = ALUA_INQUIRY_SIZE; > - return 1; > - } > - h->bufflen = len; > - return 0; > -} > +static void alua_rtpg_work(struct work_struct *work); > +static void alua_rtpg_queue(struct alua_port_group *pg, > + struct scsi_device *sdev, > + struct alua_queue_data *qdata, bool force); > +static void alua_check(struct scsi_device *sdev, bool force); > > -static struct request *get_alua_req(struct scsi_device *sdev, > - void *buffer, unsigned buflen, int rw) > +static void release_port_group(struct kref *kref) > { > - struct request *rq; > - struct request_queue *q = sdev->request_queue; > - > - rq = blk_get_request(q, rw, GFP_NOIO); > - > - if (IS_ERR(rq)) { > - sdev_printk(KERN_INFO, sdev, > - "%s: blk_get_request failed\n", __func__); > - return NULL; > - } > - blk_rq_set_block_pc(rq); > - > - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) { > - blk_put_request(rq); > - sdev_printk(KERN_INFO, sdev, > - "%s: blk_rq_map_kern failed\n", __func__); > - return NULL; > - } > - > - rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | > - REQ_FAILFAST_DRIVER; > - rq->retries = ALUA_FAILOVER_RETRIES; > - rq->timeout = ALUA_FAILOVER_TIMEOUT * HZ; > - > - return rq; > + struct alua_port_group *pg; > + > + pg = container_of(kref, struct alua_port_group, kref); > + if (pg->rtpg_sdev) > + flush_delayed_work(&pg->rtpg_work); > + spin_lock(&port_group_lock); > + list_del(&pg->node); > + spin_unlock(&port_group_lock); > + kfree_rcu(pg, rcu); > } > > /* > - * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command > + * submit_rtpg - Issue a REPORT TARGET GROUP STATES command > * @sdev: sdev the command should be sent to > */ > -static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h) > +static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, > + int bufflen, struct scsi_sense_hdr *sshdr, int flags) > { > - struct request *rq; > - int err = SCSI_DH_RES_TEMP_UNAVAIL; > - > - rq = get_alua_req(sdev, h->buff, h->bufflen, READ); > - if (!rq) > - goto done; > + u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; > + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | > + REQ_FAILFAST_DRIVER; > > /* Prepare the command. */ > - rq->cmd[0] = INQUIRY; > - rq->cmd[1] = 1; > - rq->cmd[2] = 0x83; > - rq->cmd[4] = h->bufflen; > - rq->cmd_len = COMMAND_SIZE(INQUIRY); > - > - rq->sense = h->sense; > - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); > - rq->sense_len = h->senselen = 0; > - > - err = blk_execute_rq(rq->q, NULL, rq, 1); > - if (err == -EIO) { > - sdev_printk(KERN_INFO, sdev, > - "%s: evpd inquiry failed with %x\n", > - ALUA_DH_NAME, rq->errors); > - h->senselen = rq->sense_len; > - err = SCSI_DH_IO; > - } > - blk_put_request(rq); > -done: > - return err; > + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); > + cdb[0] = MAINTENANCE_IN; > + if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) > + cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; > + else > + cdb[1] = MI_REPORT_TARGET_PGS; > + put_unaligned_be32(bufflen, &cdb[6]); > + > + return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE, > + buff, bufflen, sshdr, > + ALUA_FAILOVER_TIMEOUT * HZ, > + ALUA_FAILOVER_RETRIES, NULL, req_flags); > } > > /* > - * submit_rtpg - Issue a REPORT TARGET GROUP STATES command > - * @sdev: sdev the command should be sent to > + * submit_stpg - Issue a SET TARGET PORT GROUP command > + * > + * Currently we're only setting the current target port group state > + * to 'active/optimized' and let the array firmware figure out > + * the states of the remaining groups. > */ > -static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, > - bool rtpg_ext_hdr_req) > +static int submit_stpg(struct scsi_device *sdev, int group_id, > + struct scsi_sense_hdr *sshdr) > { > - struct request *rq; > - int err = SCSI_DH_RES_TEMP_UNAVAIL; > + u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; > + unsigned char stpg_data[8]; > + int stpg_len = 8; > + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | > + REQ_FAILFAST_DRIVER; > > - rq = get_alua_req(sdev, h->buff, h->bufflen, READ); > - if (!rq) > - goto done; > + /* Prepare the data buffer */ > + memset(stpg_data, 0, stpg_len); > + stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f; > + put_unaligned_be16(group_id, &stpg_data[6]); > > /* Prepare the command. */ > - rq->cmd[0] = MAINTENANCE_IN; > - if (rtpg_ext_hdr_req) > - rq->cmd[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; > - else > - rq->cmd[1] = MI_REPORT_TARGET_PGS; > - rq->cmd[6] = (h->bufflen >> 24) & 0xff; > - rq->cmd[7] = (h->bufflen >> 16) & 0xff; > - rq->cmd[8] = (h->bufflen >> 8) & 0xff; > - rq->cmd[9] = h->bufflen & 0xff; > - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN); > - > - rq->sense = h->sense; > - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); > - rq->sense_len = h->senselen = 0; > - > - err = blk_execute_rq(rq->q, NULL, rq, 1); > - if (err == -EIO) { > - sdev_printk(KERN_INFO, sdev, > - "%s: rtpg failed with %x\n", > - ALUA_DH_NAME, rq->errors); > - h->senselen = rq->sense_len; > - err = SCSI_DH_IO; > - } > - blk_put_request(rq); > -done: > - return err; > + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); > + cdb[0] = MAINTENANCE_OUT; > + cdb[1] = MO_SET_TARGET_PGS; > + put_unaligned_be32(stpg_len, &cdb[6]); > + > + return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, > + stpg_data, stpg_len, > + sshdr, ALUA_FAILOVER_TIMEOUT * HZ, > + ALUA_FAILOVER_RETRIES, NULL, req_flags); > } > > -/* > - * alua_stpg - Evaluate SET TARGET GROUP STATES > - * @sdev: the device to be evaluated > - * @state: the new target group state > - * > - * Send a SET TARGET GROUP STATES command to the device. > - * We only have to test here if we should resubmit the command; > - * any other error is assumed as a failure. > - */ > -static void stpg_endio(struct request *req, int error) > +struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, > + int group_id) > { > - struct alua_dh_data *h = req->end_io_data; > - struct scsi_sense_hdr sense_hdr; > - unsigned err = SCSI_DH_OK; > + struct alua_port_group *pg; > + > + if (!id_str || !id_size || !strlen(id_str)) > + return NULL; > > - if (host_byte(req->errors) != DID_OK || > - msg_byte(req->errors) != COMMAND_COMPLETE) { > - err = SCSI_DH_IO; > - goto done; > + list_for_each_entry(pg, &port_group_list, node) { > + if (pg->group_id != group_id) > + continue; > + if (!pg->device_id_len || pg->device_id_len != id_size) > + continue; > + if (strncmp(pg->device_id_str, id_str, id_size)) > + continue; > + if (!kref_get_unless_zero(&pg->kref)) > + continue; > + return pg; > } > > - if (req->sense_len > 0) { > - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE, > - &sense_hdr); > - if (!err) { > - err = SCSI_DH_IO; > - goto done; > - } > - err = alua_check_sense(h->sdev, &sense_hdr); > - if (err == ADD_TO_MLQUEUE) { > - err = SCSI_DH_RETRY; > - goto done; > - } > - sdev_printk(KERN_INFO, h->sdev, > - "%s: stpg sense code: %02x/%02x/%02x\n", > - ALUA_DH_NAME, sense_hdr.sense_key, > - sense_hdr.asc, sense_hdr.ascq); > - err = SCSI_DH_IO; > - } else if (error) > - err = SCSI_DH_IO; > - > - if (err == SCSI_DH_OK) { > - h->state = TPGS_STATE_OPTIMIZED; > - sdev_printk(KERN_INFO, h->sdev, > - "%s: port group %02x switched to state %c\n", > - ALUA_DH_NAME, h->group_id, > - print_alua_state(h->state)); > - } > -done: > - req->end_io_data = NULL; > - __blk_put_request(req->q, req); > - if (h->callback_fn) { > - h->callback_fn(h->callback_data, err); > - h->callback_fn = h->callback_data = NULL; > - } > - return; > + return NULL; > } > > /* > - * submit_stpg - Issue a SET TARGET GROUP STATES command > + * alua_alloc_pg - Allocate a new port_group structure > + * @sdev: scsi device > + * @h: alua device_handler data > + * @group_id: port group id > * > - * Currently we're only setting the current target port group state > - * to 'active/optimized' and let the array firmware figure out > - * the states of the remaining groups. > + * Allocate a new port_group structure for a given > + * device. > */ > -static unsigned submit_stpg(struct alua_dh_data *h) > +struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, > + int group_id, int tpgs) > { > - struct request *rq; > - int stpg_len = 8; > - struct scsi_device *sdev = h->sdev; > + struct alua_port_group *pg, *tmp_pg; > > - /* Prepare the data buffer */ > - memset(h->buff, 0, stpg_len); > - h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f; > - h->buff[6] = (h->group_id >> 8) & 0xff; > - h->buff[7] = h->group_id & 0xff; > + pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); > + if (!pg) > + return ERR_PTR(-ENOMEM); > > - rq = get_alua_req(sdev, h->buff, stpg_len, WRITE); > - if (!rq) > - return SCSI_DH_RES_TEMP_UNAVAIL; > + pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, > + sizeof(pg->device_id_str)); > + if (pg->device_id_len <= 0) { > + /* > + * TPGS supported but no device identification found. > + * Generate private device identification. > + */ > + sdev_printk(KERN_INFO, sdev, > + "%s: No device descriptors found\n", > + ALUA_DH_NAME); > + pg->device_id_str[0] = '\0'; > + pg->device_id_len = 0; > + } > + pg->group_id = group_id; > + pg->tpgs = tpgs; > + pg->state = TPGS_STATE_OPTIMIZED; > + if (optimize_stpg) > + pg->flags |= ALUA_OPTIMIZE_STPG; > + kref_init(&pg->kref); > + INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); > + INIT_LIST_HEAD(&pg->rtpg_list); > + INIT_LIST_HEAD(&pg->node); > + spin_lock_init(&pg->lock); > + > + spin_lock(&port_group_lock); > + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, > + group_id); > + if (tmp_pg) { > + spin_unlock(&port_group_lock); > + kfree(pg); > + return tmp_pg; > + } > > - /* Prepare the command. */ > - rq->cmd[0] = MAINTENANCE_OUT; > - rq->cmd[1] = MO_SET_TARGET_PGS; > - rq->cmd[6] = (stpg_len >> 24) & 0xff; > - rq->cmd[7] = (stpg_len >> 16) & 0xff; > - rq->cmd[8] = (stpg_len >> 8) & 0xff; > - rq->cmd[9] = stpg_len & 0xff; > - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT); > - > - rq->sense = h->sense; > - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); > - rq->sense_len = h->senselen = 0; > - rq->end_io_data = h; > - > - blk_execute_rq_nowait(rq->q, NULL, rq, 1, stpg_endio); > - return SCSI_DH_OK; > + list_add(&pg->node, &port_group_list); > + spin_unlock(&port_group_lock); > + > + return pg; > } > > /* > @@ -316,12 +282,23 @@ static unsigned submit_stpg(struct alua_dh_data *h) > * Examine the TPGS setting of the sdev to find out if ALUA > * is supported. > */ > -static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h) > +static int alua_check_tpgs(struct scsi_device *sdev) > { > - int err = SCSI_DH_OK; > + int tpgs = TPGS_MODE_NONE; > + > + /* > + * ALUA support for non-disk devices is fraught with > + * difficulties, so disable it for now. > + */ > + if (sdev->type != TYPE_DISK) { > + sdev_printk(KERN_INFO, sdev, > + "%s: disable for non-disk devices\n", > + ALUA_DH_NAME); > + return tpgs; > + } > > - h->tpgs = scsi_device_tpgs(sdev); > - switch (h->tpgs) { > + tpgs = scsi_device_tpgs(sdev); > + switch (tpgs) { > case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: > sdev_printk(KERN_INFO, sdev, > "%s: supports implicit and explicit TPGS\n", > @@ -335,71 +312,36 @@ static int alua_check_tpgs(struct scsi_device *sdev, > struct alua_dh_data *h) > sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", > ALUA_DH_NAME); > break; > - default: > - h->tpgs = TPGS_MODE_NONE; > + case TPGS_MODE_NONE: > sdev_printk(KERN_INFO, sdev, "%s: not supported\n", > ALUA_DH_NAME); > - err = SCSI_DH_DEV_UNSUPP; > + break; > + default: > + sdev_printk(KERN_INFO, sdev, > + "%s: unsupported TPGS setting %d\n", > + ALUA_DH_NAME, tpgs); > + tpgs = TPGS_MODE_NONE; > break; > } > > - return err; > + return tpgs; > } > > /* > - * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83 > + * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 > * @sdev: device to be checked > * > * Extract the relative target port and the target port group > * descriptor from the list of identificators. > */ > -static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h) > +static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, > + int tpgs) > { > - int len; > - unsigned err; > - unsigned char *d; > - > - retry: > - err = submit_vpd_inquiry(sdev, h); > + int rel_port = -1, group_id; > + struct alua_port_group *pg, *old_pg = NULL; > > - if (err != SCSI_DH_OK) > - return err; > - > - /* Check if vpd page exceeds initial buffer */ > - len = (h->buff[2] << 8) + h->buff[3] + 4; > - if (len > h->bufflen) { > - /* Resubmit with the correct length */ > - if (realloc_buffer(h, len)) { > - sdev_printk(KERN_WARNING, sdev, > - "%s: kmalloc buffer failed\n", > - ALUA_DH_NAME); > - /* Temporary failure, bypass */ > - return SCSI_DH_DEV_TEMP_BUSY; > - } > - goto retry; > - } > - > - /* > - * Now look for the correct descriptor. > - */ > - d = h->buff + 4; > - while (d < h->buff + len) { > - switch (d[1] & 0xf) { > - case 0x4: > - /* Relative target port */ > - h->rel_port = (d[6] << 8) + d[7]; > - break; > - case 0x5: > - /* Target port group */ > - h->group_id = (d[6] << 8) + d[7]; > - break; > - default: > - break; > - } > - d += d[3] + 4; > - } > - > - if (h->group_id == -1) { > + group_id = scsi_vpd_tpg_id(sdev, &rel_port); > + if (group_id < 0) { > /* > * Internal error; TPGS supported but required > * VPD identification descriptors not present. > @@ -408,16 +350,41 @@ static int alua_vpd_inquiry(struct scsi_device *sdev, > struct alua_dh_data *h) > sdev_printk(KERN_INFO, sdev, > "%s: No target port descriptors found\n", > ALUA_DH_NAME); > - h->state = TPGS_STATE_OPTIMIZED; > - h->tpgs = TPGS_MODE_NONE; > - err = SCSI_DH_DEV_UNSUPP; > - } else { > + return SCSI_DH_DEV_UNSUPP; > + } > + > + pg = alua_alloc_pg(sdev, group_id, tpgs); > + if (IS_ERR(pg)) { > + if (PTR_ERR(pg) == -ENOMEM) > + return SCSI_DH_NOMEM; > + return SCSI_DH_DEV_UNSUPP; > + } > + if (pg->device_id_len) > sdev_printk(KERN_INFO, sdev, > - "%s: port group %02x rel port %02x\n", > - ALUA_DH_NAME, h->group_id, h->rel_port); > + "%s: device %s port group %x rel port %x\n", > + ALUA_DH_NAME, pg->device_id_str, > + group_id, rel_port); > + else > + sdev_printk(KERN_INFO, sdev, > + "%s: port group %x rel port %x\n", > + ALUA_DH_NAME, group_id, rel_port); > + > + /* Check for existing port group references */ > + spin_lock(&h->pg_lock); > + old_pg = h->pg; > + if (old_pg != pg) { > + /* port group has changed. Update to new port group */ > + rcu_assign_pointer(h->pg, pg); > } > + if (sdev->synchronous_alua) > + pg->flags |= ALUA_SYNC_STPG; > + alua_rtpg_queue(h->pg, sdev, NULL, true); > + spin_unlock(&h->pg_lock); > > - return err; > + if (old_pg) > + kref_put(&old_pg->kref, release_port_group); > + > + return SCSI_DH_OK; > } > > static char print_alua_state(int state) > @@ -447,40 +414,24 @@ static int alua_check_sense(struct scsi_device *sdev, > { > switch (sense_hdr->sense_key) { > case NOT_READY: > - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) > + if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { > /* > * LUN Not Accessible - ALUA state transition > */ > - return ADD_TO_MLQUEUE; > - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b) > - /* > - * LUN Not Accessible -- Target port in standby state > - */ > - return SUCCESS; > - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c) > - /* > - * LUN Not Accessible -- Target port in unavailable state > - */ > - return SUCCESS; > - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12) > - /* > - * LUN Not Ready -- Offline > - */ > - return SUCCESS; > - if (sdev->allow_restart && > - sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x02) > - /* > - * if the device is not started, we need to wake > - * the error handler to start the motor > - */ > - return FAILED; > + alua_check(sdev, false); > + return NEEDS_RETRY; > + } > break; > case UNIT_ATTENTION: > - if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) > + if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { > /* > - * Power On, Reset, or Bus Device Reset, just retry. > + * Power On, Reset, or Bus Device Reset. > + * Might have obscured a state transition, > + * so schedule a recheck. > */ > + alua_check(sdev, true); > return ADD_TO_MLQUEUE; > + } > if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) > /* > * Device internal reset > @@ -491,16 +442,20 @@ static int alua_check_sense(struct scsi_device *sdev, > * Mode Parameters Changed > */ > return ADD_TO_MLQUEUE; > - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) > + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { > /* > * ALUA state changed > */ > + alua_check(sdev, true); > return ADD_TO_MLQUEUE; > - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) > + } > + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { > /* > * Implicit ALUA state transition failed > */ > + alua_check(sdev, true); > return ADD_TO_MLQUEUE; > + } > if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) > /* > * Inquiry data has changed > @@ -520,38 +475,74 @@ static int alua_check_sense(struct scsi_device *sdev, > } > > /* > + * alua_tur - Send a TEST UNIT READY > + * @sdev: device to which the TEST UNIT READY command should be send > + * > + * Send a TEST UNIT READY to @sdev to figure out the device state > + * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, > + * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. > + */ > +static int alua_tur(struct scsi_device *sdev) > +{ > + struct scsi_sense_hdr sense_hdr; > + int retval; > + > + retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, > + ALUA_FAILOVER_RETRIES, &sense_hdr); > + if (sense_hdr.sense_key == NOT_READY && > + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) > + return SCSI_DH_RETRY; > + else if (retval) > + return SCSI_DH_IO; > + else > + return SCSI_DH_OK; > +} > + > +/* > * alua_rtpg - Evaluate REPORT TARGET GROUP STATES > * @sdev: the device to be evaluated. > - * @wait_for_transition: if nonzero, wait ALUA_FAILOVER_TIMEOUT seconds for > device to exit transitioning state > * > * Evaluate the Target Port Group State. > * Returns SCSI_DH_DEV_OFFLINED if the path is > * found to be unusable. > */ > -static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int > wait_for_transition) > +static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) > { > struct scsi_sense_hdr sense_hdr; > - int len, k, off, valid_states = 0; > - unsigned char *ucp; > - unsigned err; > - bool rtpg_ext_hdr_req = 1; > - unsigned long expiry, interval = 0; > + struct alua_port_group *tmp_pg; > + int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; > + unsigned char *desc, *buff; > + unsigned err, retval; > unsigned int tpg_desc_tbl_off; > unsigned char orig_transition_tmo; > + unsigned long flags; > > - if (!h->transition_tmo) > - expiry = round_jiffies_up(jiffies + ALUA_FAILOVER_TIMEOUT * HZ); > - else > - expiry = round_jiffies_up(jiffies + h->transition_tmo * HZ); > + if (!pg->expiry) { > + unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; > > - retry: > - err = submit_rtpg(sdev, h, rtpg_ext_hdr_req); > + if (pg->transition_tmo) > + transition_tmo = pg->transition_tmo * HZ; > + > + pg->expiry = round_jiffies_up(jiffies + transition_tmo); > + } > + > + buff = kzalloc(bufflen, GFP_KERNEL); > + if (!buff) > + return SCSI_DH_DEV_TEMP_BUSY; > > - if (err == SCSI_DH_IO && h->senselen > 0) { > - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE, > - &sense_hdr); > - if (!err) > + retry: > + retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); > + > + if (retval) { > + if (!scsi_sense_valid(&sense_hdr)) { > + sdev_printk(KERN_INFO, sdev, > + "%s: rtpg failed, result %d\n", > + ALUA_DH_NAME, retval); > + kfree(buff); > + if (driver_byte(retval) == DRIVER_ERROR) > + return SCSI_DH_DEV_TEMP_BUSY; > return SCSI_DH_IO; > + } > > /* > * submit_rtpg() has failed on existing arrays > @@ -561,73 +552,101 @@ static int alua_rtpg(struct scsi_device *sdev, struct > alua_dh_data *h, int wait_ > * The retry without rtpg_ext_hdr_req set > * handles this. > */ > - if (rtpg_ext_hdr_req == 1 && > + if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && > sense_hdr.sense_key == ILLEGAL_REQUEST && > sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { > - rtpg_ext_hdr_req = 0; > + pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; > goto retry; > } > - > - err = alua_check_sense(sdev, &sense_hdr); > - if (err == ADD_TO_MLQUEUE && time_before(jiffies, expiry)) > - goto retry; > - sdev_printk(KERN_INFO, sdev, > - "%s: rtpg sense code %02x/%02x/%02x\n", > - ALUA_DH_NAME, sense_hdr.sense_key, > - sense_hdr.asc, sense_hdr.ascq); > - err = SCSI_DH_IO; > + /* > + * Retry on ALUA state transition or if any > + * UNIT ATTENTION occurred. > + */ > + if (sense_hdr.sense_key == NOT_READY && > + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) > + err = SCSI_DH_RETRY; > + else if (sense_hdr.sense_key == UNIT_ATTENTION) > + err = SCSI_DH_RETRY; > + if (err == SCSI_DH_RETRY && > + pg->expiry != 0 && time_before(jiffies, pg->expiry)) { > + sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", > + ALUA_DH_NAME); > + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); > + return err; > + } > + sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", > + ALUA_DH_NAME); > + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); > + kfree(buff); > + pg->expiry = 0; > + return SCSI_DH_IO; > } > - if (err != SCSI_DH_OK) > - return err; > > - len = (h->buff[0] << 24) + (h->buff[1] << 16) + > - (h->buff[2] << 8) + h->buff[3] + 4; > + len = get_unaligned_be32(&buff[0]) + 4; > > - if (len > h->bufflen) { > + if (len > bufflen) { > /* Resubmit with the correct length */ > - if (realloc_buffer(h, len)) { > + kfree(buff); > + bufflen = len; > + buff = kmalloc(bufflen, GFP_KERNEL); > + if (!buff) { > sdev_printk(KERN_WARNING, sdev, > "%s: kmalloc buffer failed\n",__func__); > /* Temporary failure, bypass */ > + pg->expiry = 0; > return SCSI_DH_DEV_TEMP_BUSY; > } > goto retry; > } > > - orig_transition_tmo = h->transition_tmo; > - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && h->buff[5] != 0) > - h->transition_tmo = h->buff[5]; > + orig_transition_tmo = pg->transition_tmo; > + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) > + pg->transition_tmo = buff[5]; > else > - h->transition_tmo = ALUA_FAILOVER_TIMEOUT; > + pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; > > - if (wait_for_transition && (orig_transition_tmo != h->transition_tmo)) { > + if (orig_transition_tmo != pg->transition_tmo) { > sdev_printk(KERN_INFO, sdev, > "%s: transition timeout set to %d seconds\n", > - ALUA_DH_NAME, h->transition_tmo); > - expiry = jiffies + h->transition_tmo * HZ; > + ALUA_DH_NAME, pg->transition_tmo); > + pg->expiry = jiffies + pg->transition_tmo * HZ; > } > > - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) > + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) > tpg_desc_tbl_off = 8; > else > tpg_desc_tbl_off = 4; > > - for (k = tpg_desc_tbl_off, ucp = h->buff + tpg_desc_tbl_off; > + for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; > k < len; > - k += off, ucp += off) { > - > - if (h->group_id == (ucp[2] << 8) + ucp[3]) { > - h->state = ucp[0] & 0x0f; > - h->pref = ucp[0] >> 7; > - valid_states = ucp[1]; > + k += off, desc += off) { > + u16 group_id = get_unaligned_be16(&desc[2]); > + > + spin_lock_irqsave(&port_group_lock, flags); > + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, > + group_id); > + spin_unlock_irqrestore(&port_group_lock, flags); > + if (tmp_pg) { > + if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { > + if ((tmp_pg == pg) || > + !(tmp_pg->flags & ALUA_PG_RUNNING)) { > + tmp_pg->state = desc[0] & 0x0f; > + tmp_pg->pref = desc[0] >> 7; > + } > + if (tmp_pg == pg) > + valid_states = desc[1]; > + spin_unlock_irqrestore(&tmp_pg->lock, flags); > + } > + kref_put(&tmp_pg->kref, release_port_group); > } > - off = 8 + (ucp[7] * 4); > + off = 8 + (desc[7] * 4); > } > > + spin_lock_irqsave(&pg->lock, flags); > sdev_printk(KERN_INFO, sdev, > "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", > - ALUA_DH_NAME, h->group_id, print_alua_state(h->state), > - h->pref ? "preferred" : "non-preferred", > + ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), > + pg->pref ? "preferred" : "non-preferred", > valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', > valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', > valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', > @@ -636,36 +655,224 @@ static int alua_rtpg(struct scsi_device *sdev, struct > alua_dh_data *h, int wait_ > valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', > valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); > > - switch (h->state) { > + switch (pg->state) { > case TPGS_STATE_TRANSITIONING: > - if (wait_for_transition) { > - if (time_before(jiffies, expiry)) { > - /* State transition, retry */ > - interval += 2000; > - msleep(interval); > - goto retry; > - } > + if (time_before(jiffies, pg->expiry)) { > + /* State transition, retry */ > + pg->interval = 2; > err = SCSI_DH_RETRY; > } else { > - err = SCSI_DH_OK; > + /* Transitioning time exceeded, set port to standby */ > + err = SCSI_DH_IO; > + pg->state = TPGS_STATE_STANDBY; > + pg->expiry = 0; > } > - > - /* Transitioning time exceeded, set port to standby */ > - h->state = TPGS_STATE_STANDBY; > break; > case TPGS_STATE_OFFLINE: > /* Path unusable */ > err = SCSI_DH_DEV_OFFLINED; > + pg->expiry = 0; > break; > default: > /* Useable path if active */ > err = SCSI_DH_OK; > + pg->expiry = 0; > break; > } > + spin_unlock_irqrestore(&pg->lock, flags); > + kfree(buff); > return err; > } > > /* > + * alua_stpg - Issue a SET TARGET PORT GROUP command > + * > + * Issue a SET TARGET PORT GROUP command and evaluate the > + * response. Returns SCSI_DH_RETRY per default to trigger > + * a re-evaluation of the target group state or SCSI_DH_OK > + * if no further action needs to be taken. > + */ > +static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) > +{ > + int retval; > + struct scsi_sense_hdr sense_hdr; > + > + if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { > + /* Only implicit ALUA supported, retry */ > + return SCSI_DH_RETRY; > + } > + switch (pg->state) { > + case TPGS_STATE_OPTIMIZED: > + return SCSI_DH_OK; > + case TPGS_STATE_NONOPTIMIZED: > + if ((pg->flags & ALUA_OPTIMIZE_STPG) && > + !pg->pref && > + (pg->tpgs & TPGS_MODE_IMPLICIT)) > + return SCSI_DH_OK; > + break; > + case TPGS_STATE_STANDBY: > + case TPGS_STATE_UNAVAILABLE: > + break; > + case TPGS_STATE_OFFLINE: > + return SCSI_DH_IO; > + case TPGS_STATE_TRANSITIONING: > + break; > + default: > + sdev_printk(KERN_INFO, sdev, > + "%s: stpg failed, unhandled TPGS state %d", > + ALUA_DH_NAME, pg->state); > + return SCSI_DH_NOSYS; > + } > + retval = submit_stpg(sdev, pg->group_id, &sense_hdr); > + > + if (retval) { > + if (!scsi_sense_valid(&sense_hdr)) { > + sdev_printk(KERN_INFO, sdev, > + "%s: stpg failed, result %d", > + ALUA_DH_NAME, retval); > + if (driver_byte(retval) == DRIVER_ERROR) > + return SCSI_DH_DEV_TEMP_BUSY; > + } else { > + sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", > + ALUA_DH_NAME); > + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); > + } > + } > + /* Retry RTPG */ > + return SCSI_DH_RETRY; > +} > + > +static void alua_rtpg_work(struct work_struct *work) > +{ > + struct alua_port_group *pg = > + container_of(work, struct alua_port_group, rtpg_work.work); > + struct scsi_device *sdev; > + LIST_HEAD(qdata_list); > + int err = SCSI_DH_OK; > + struct alua_queue_data *qdata, *tmp; > + unsigned long flags; > + struct workqueue_struct *alua_wq = kaluad_wq; > + > + spin_lock_irqsave(&pg->lock, flags); > + sdev = pg->rtpg_sdev; > + if (!sdev) { > + WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); > + WARN_ON(pg->flags & ALUA_PG_RUN_STPG); > + spin_unlock_irqrestore(&pg->lock, flags); > + return; > + } > + if (pg->flags & ALUA_SYNC_STPG) > + alua_wq = kaluad_sync_wq; > + pg->flags |= ALUA_PG_RUNNING; > + if (pg->flags & ALUA_PG_RUN_RTPG) { > + int state = pg->state; > + > + pg->flags &= ~ALUA_PG_RUN_RTPG; > + spin_unlock_irqrestore(&pg->lock, flags); > + if (state == TPGS_STATE_TRANSITIONING) { > + if (alua_tur(sdev) == SCSI_DH_RETRY) { > + spin_lock_irqsave(&pg->lock, flags); > + pg->flags &= ~ALUA_PG_RUNNING; > + pg->flags |= ALUA_PG_RUN_RTPG; > + spin_unlock_irqrestore(&pg->lock, flags); > + queue_delayed_work(alua_wq, &pg->rtpg_work, > + pg->interval * HZ); > + return; > + } > + /* Send RTPG on failure or if TUR indicates SUCCESS */ > + } > + err = alua_rtpg(sdev, pg); > + spin_lock_irqsave(&pg->lock, flags); > + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { > + pg->flags &= ~ALUA_PG_RUNNING; > + pg->flags |= ALUA_PG_RUN_RTPG; > + spin_unlock_irqrestore(&pg->lock, flags); > + queue_delayed_work(alua_wq, &pg->rtpg_work, > + pg->interval * HZ); > + return; > + } > + if (err != SCSI_DH_OK) > + pg->flags &= ~ALUA_PG_RUN_STPG; > + } > + if (pg->flags & ALUA_PG_RUN_STPG) { > + pg->flags &= ~ALUA_PG_RUN_STPG; > + spin_unlock_irqrestore(&pg->lock, flags); > + err = alua_stpg(sdev, pg); > + spin_lock_irqsave(&pg->lock, flags); > + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { > + pg->flags |= ALUA_PG_RUN_RTPG; > + pg->interval = 0; > + pg->flags &= ~ALUA_PG_RUNNING; > + spin_unlock_irqrestore(&pg->lock, flags); > + queue_delayed_work(alua_wq, &pg->rtpg_work, > + pg->interval * HZ); > + return; > + } > + } > + > + list_splice_init(&pg->rtpg_list, &qdata_list); > + pg->rtpg_sdev = NULL; > + spin_unlock_irqrestore(&pg->lock, flags); > + > + list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { > + list_del(&qdata->entry); > + if (qdata->callback_fn) > + qdata->callback_fn(qdata->callback_data, err); > + kfree(qdata); > + } > + spin_lock_irqsave(&pg->lock, flags); > + pg->flags &= ~ALUA_PG_RUNNING; > + spin_unlock_irqrestore(&pg->lock, flags); > + scsi_device_put(sdev); > + kref_put(&pg->kref, release_port_group); > +} > + > +static void alua_rtpg_queue(struct alua_port_group *pg, > + struct scsi_device *sdev, > + struct alua_queue_data *qdata, bool force) > +{ > + int start_queue = 0; > + unsigned long flags; > + struct workqueue_struct *alua_wq = kaluad_wq; > + > + if (!pg) > + return; > + > + spin_lock_irqsave(&pg->lock, flags); > + if (qdata) { > + list_add_tail(&qdata->entry, &pg->rtpg_list); > + pg->flags |= ALUA_PG_RUN_STPG; > + force = true; > + } > + if (pg->rtpg_sdev == NULL) { > + pg->interval = 0; > + pg->flags |= ALUA_PG_RUN_RTPG; > + kref_get(&pg->kref); > + pg->rtpg_sdev = sdev; > + scsi_device_get(sdev); > + start_queue = 1; > + } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { > + pg->flags |= ALUA_PG_RUN_RTPG; > + /* Do not queue if the worker is already running */ > + if (!(pg->flags & ALUA_PG_RUNNING)) { > + kref_get(&pg->kref); > + start_queue = 1; > + } > + } > + > + if (pg->flags & ALUA_SYNC_STPG) > + alua_wq = kaluad_sync_wq; > + spin_unlock_irqrestore(&pg->lock, flags); > + > + if (start_queue && > + !queue_delayed_work(alua_wq, &pg->rtpg_work, > + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { > + scsi_device_put(sdev); > + kref_put(&pg->kref, release_port_group); > + } > +} > + > +/* > * alua_initialize - Initialize ALUA state > * @sdev: the device to be initialized > * > @@ -674,21 +881,14 @@ static int alua_rtpg(struct scsi_device *sdev, struct > alua_dh_data *h, int wait_ > */ > static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) > { > - int err; > - > - err = alua_check_tpgs(sdev, h); > - if (err != SCSI_DH_OK) > - goto out; > - > - err = alua_vpd_inquiry(sdev, h); > - if (err != SCSI_DH_OK) > - goto out; > - > - err = alua_rtpg(sdev, h, 0); > - if (err != SCSI_DH_OK) > - goto out; > - > -out: > + int err = SCSI_DH_DEV_UNSUPP, tpgs; > + > + mutex_lock(&h->init_mutex); > + tpgs = alua_check_tpgs(sdev); > + if (tpgs != TPGS_MODE_NONE) > + err = alua_check_vpd(sdev, h, tpgs); > + h->init_error = err; > + mutex_unlock(&h->init_mutex); > return err; > } > /* > @@ -703,9 +903,11 @@ out: > static int alua_set_params(struct scsi_device *sdev, const char *params) > { > struct alua_dh_data *h = sdev->handler_data; > + struct alua_port_group __rcu *pg = NULL; > unsigned int optimize = 0, argc; > const char *p = params; > int result = SCSI_DH_OK; > + unsigned long flags; > > if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) > return -EINVAL; > @@ -715,18 +917,23 @@ static int alua_set_params(struct scsi_device *sdev, const > char *params) > if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) > return -EINVAL; > > + rcu_read_lock(); > + pg = rcu_dereference(h->pg); > + if (!pg) { > + rcu_read_unlock(); > + return -ENXIO; > + } > + spin_lock_irqsave(&pg->lock, flags); > if (optimize) > - h->flags |= ALUA_OPTIMIZE_STPG; > + pg->flags |= ALUA_OPTIMIZE_STPG; > else > - h->flags &= ~ALUA_OPTIMIZE_STPG; > + pg->flags &= ~ALUA_OPTIMIZE_STPG; > + spin_unlock_irqrestore(&pg->lock, flags); > + rcu_read_unlock(); > > return result; > } > > -static uint optimize_stpg; > -module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); > -MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than > sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); > - > /* > * alua_activate - activate a path > * @sdev: device on the path to be activated > @@ -742,48 +949,33 @@ static int alua_activate(struct scsi_device *sdev, > { > struct alua_dh_data *h = sdev->handler_data; > int err = SCSI_DH_OK; > - int stpg = 0; > + struct alua_queue_data *qdata; > + struct alua_port_group __rcu *pg; > > - err = alua_rtpg(sdev, h, 1); > - if (err != SCSI_DH_OK) > + qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); > + if (!qdata) { > + err = SCSI_DH_RES_TEMP_UNAVAIL; > goto out; > - > - if (optimize_stpg) > - h->flags |= ALUA_OPTIMIZE_STPG; > - > - if (h->tpgs & TPGS_MODE_EXPLICIT) { > - switch (h->state) { > - case TPGS_STATE_NONOPTIMIZED: > - stpg = 1; > - if ((h->flags & ALUA_OPTIMIZE_STPG) && > - (!h->pref) && > - (h->tpgs & TPGS_MODE_IMPLICIT)) > - stpg = 0; > - break; > - case TPGS_STATE_STANDBY: > - case TPGS_STATE_UNAVAILABLE: > - stpg = 1; > - break; > - case TPGS_STATE_OFFLINE: > - err = SCSI_DH_IO; > - break; > - case TPGS_STATE_TRANSITIONING: > - err = SCSI_DH_RETRY; > - break; > - default: > - break; > - } > } > - > - if (stpg) { > - h->callback_fn = fn; > - h->callback_data = data; > - err = submit_stpg(h); > - if (err == SCSI_DH_OK) > - return 0; > - h->callback_fn = h->callback_data = NULL; > + qdata->callback_fn = fn; > + qdata->callback_data = data; > + > + mutex_lock(&h->init_mutex); > + rcu_read_lock(); > + pg = rcu_dereference(h->pg); > + if (!pg || !kref_get_unless_zero(&pg->kref)) { > + rcu_read_unlock(); > + kfree(qdata); > + err = h->init_error; > + mutex_unlock(&h->init_mutex); > + goto out; > } > + fn = NULL; > + rcu_read_unlock(); > + mutex_unlock(&h->init_mutex); > > + alua_rtpg_queue(pg, sdev, qdata, true); > + kref_put(&pg->kref, release_port_group); > out: > if (fn) > fn(data, err); > @@ -791,6 +983,29 @@ out: > } > > /* > + * alua_check - check path status > + * @sdev: device on the path to be checked > + * > + * Check the device status > + */ > +static void alua_check(struct scsi_device *sdev, bool force) > +{ > + struct alua_dh_data *h = sdev->handler_data; > + struct alua_port_group *pg; > + > + rcu_read_lock(); > + pg = rcu_dereference(h->pg); > + if (!pg || !kref_get_unless_zero(&pg->kref)) { > + rcu_read_unlock(); > + return; > + } > + rcu_read_unlock(); > + > + alua_rtpg_queue(pg, sdev, NULL, force); > + kref_put(&pg->kref, release_port_group); > +} > + > +/* > * alua_prep_fn - request callback > * > * Fail I/O to all paths not in state > @@ -799,13 +1014,20 @@ out: > static int alua_prep_fn(struct scsi_device *sdev, struct request *req) > { > struct alua_dh_data *h = sdev->handler_data; > + struct alua_port_group __rcu *pg; > + int state = TPGS_STATE_OPTIMIZED; > int ret = BLKPREP_OK; > > - if (h->state == TPGS_STATE_TRANSITIONING) > + rcu_read_lock(); > + pg = rcu_dereference(h->pg); > + if (pg) > + state = pg->state; > + rcu_read_unlock(); > + if (state == TPGS_STATE_TRANSITIONING) > ret = BLKPREP_DEFER; > - else if (h->state != TPGS_STATE_OPTIMIZED && > - h->state != TPGS_STATE_NONOPTIMIZED && > - h->state != TPGS_STATE_LBA_DEPENDENT) { > + else if (state != TPGS_STATE_OPTIMIZED && > + state != TPGS_STATE_NONOPTIMIZED && > + state != TPGS_STATE_LBA_DEPENDENT) { > ret = BLKPREP_KILL; > req->cmd_flags |= REQ_QUIET; > } > @@ -820,20 +1042,20 @@ static int alua_prep_fn(struct scsi_device *sdev, struct > request *req) > static int alua_bus_attach(struct scsi_device *sdev) > { > struct alua_dh_data *h; > - int err; > + int err, ret = -EINVAL; > > h = kzalloc(sizeof(*h) , GFP_KERNEL); > if (!h) > return -ENOMEM; > - h->tpgs = TPGS_MODE_UNINITIALIZED; > - h->state = TPGS_STATE_OPTIMIZED; > - h->group_id = -1; > - h->rel_port = -1; > - h->buff = h->inq; > - h->bufflen = ALUA_INQUIRY_SIZE; > + spin_lock_init(&h->pg_lock); > + rcu_assign_pointer(h->pg, NULL); > + h->init_error = SCSI_DH_OK; > h->sdev = sdev; > > + mutex_init(&h->init_mutex); > err = alua_initialize(sdev, h); > + if (err == SCSI_DH_NOMEM) > + ret = -ENOMEM; > if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) > goto failed; > > @@ -841,7 +1063,7 @@ static int alua_bus_attach(struct scsi_device *sdev) > return 0; > failed: > kfree(h); > - return -EINVAL; > + return ret; > } > > /* > @@ -851,9 +1073,16 @@ failed: > static void alua_bus_detach(struct scsi_device *sdev) > { > struct alua_dh_data *h = sdev->handler_data; > + struct alua_port_group *pg; > + > + spin_lock(&h->pg_lock); > + pg = h->pg; > + rcu_assign_pointer(h->pg, NULL); > + h->sdev = NULL; > + spin_unlock(&h->pg_lock); > + if (pg) > + kref_put(&pg->kref, release_port_group); > > - if (h->buff && h->inq != h->buff) > - kfree(h->buff); > sdev->handler_data = NULL; > kfree(h); > } > @@ -873,16 +1102,31 @@ static int __init alua_init(void) > { > int r; > > + kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); > + if (!kaluad_wq) { > + /* Temporary failure, bypass */ > + return SCSI_DH_DEV_TEMP_BUSY; > + } > + kaluad_sync_wq = create_workqueue("kaluad_sync"); > + if (!kaluad_sync_wq) { > + destroy_workqueue(kaluad_wq); > + return SCSI_DH_DEV_TEMP_BUSY; > + } > r = scsi_register_device_handler(&alua_dh); > - if (r != 0) > + if (r != 0) { > printk(KERN_ERR "%s: Failed to register scsi device handler", > ALUA_DH_NAME); > + destroy_workqueue(kaluad_sync_wq); > + destroy_workqueue(kaluad_wq); > + } > return r; > } > > static void __exit alua_exit(void) > { > scsi_unregister_device_handler(&alua_dh); > + destroy_workqueue(kaluad_sync_wq); > + destroy_workqueue(kaluad_wq); > } > > module_init(alua_init); > diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c > index da2e068..0678535 100644 > --- a/drivers/scsi/scsi_devinfo.c > +++ b/drivers/scsi/scsi_devinfo.c > @@ -219,6 +219,8 @@ static struct { > {"NAKAMICH", "MJ-5.16S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, > {"NEC", "PD-1 ODX654P", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, > {"NEC", "iStorage", NULL, BLIST_REPORTLUN2}, > + {"NETAPP", "LUN C-Mode", NULL, BLIST_SYNC_ALUA}, > + {"NETAPP", "INF-01-00", NULL, BLIST_SYNC_ALUA}, > {"NRC", "MBR-7", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, > {"NRC", "MBR-7.4", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, > {"PIONEER", "CD-ROM DRM-600", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index dd8ad2a..fa6b2c4 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -23,6 +23,7 @@ > #include <linux/scatterlist.h> > #include <linux/blk-mq.h> > #include <linux/ratelimit.h> > +#include <asm/unaligned.h> > > #include <scsi/scsi.h> > #include <scsi/scsi_cmnd.h> > @@ -3154,3 +3155,190 @@ void sdev_enable_disk_events(struct scsi_device *sdev) > atomic_dec(&sdev->disk_events_disable_depth); > } > EXPORT_SYMBOL(sdev_enable_disk_events); > + > +/** > + * scsi_vpd_lun_id - return a unique device identification > + * @sdev: SCSI device > + * @id: buffer for the identification > + * @id_len: length of the buffer > + * > + * Copies a unique device identification into @id based > + * on the information in the VPD page 0x83 of the device. > + * The string will be formatted as a SCSI name string. > + * > + * Returns the length of the identification or error on failure. > + * If the identifier is longer than the supplied buffer the actual > + * identifier length is returned and the buffer is not zero-padded. > + */ > +int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) > +{ > + u8 cur_id_type = 0xff; > + u8 cur_id_size = 0; > + unsigned char *d, *cur_id_str; > + unsigned char __rcu *vpd_pg83; > + int id_size = -EINVAL; > + > + rcu_read_lock(); > + vpd_pg83 = rcu_dereference(sdev->vpd_pg83); > + if (!vpd_pg83) { > + rcu_read_unlock(); > + return -ENXIO; > + } > + > + /* > + * Look for the correct descriptor. > + * Order of preference for lun descriptor: > + * - SCSI name string > + * - NAA IEEE Registered Extended > + * - EUI-64 based 16-byte > + * - EUI-64 based 12-byte > + * - NAA IEEE Registered > + * - NAA IEEE Extended > + * as longer descriptors reduce the likelyhood > + * of identification clashes. > + */ > + > + /* The id string must be at least 20 bytes + terminating NULL byte */ > + if (id_len < 21) { > + rcu_read_unlock(); > + return -EINVAL; > + } > + > + memset(id, 0, id_len); > + d = vpd_pg83 + 4; > + while (d < vpd_pg83 + sdev->vpd_pg83_len) { > + /* Skip designators not referring to the LUN */ > + if ((d[1] & 0x30) != 0x00) > + goto next_desig; > + > + switch (d[1] & 0xf) { > + case 0x2: > + /* EUI-64 */ > + if (cur_id_size > d[3]) > + break; > + /* Prefer NAA IEEE Registered Extended */ > + if (cur_id_type == 0x3 && > + cur_id_size == d[3]) > + break; > + cur_id_size = d[3]; > + cur_id_str = d + 4; > + cur_id_type = d[1] & 0xf; > + switch (cur_id_size) { > + case 8: > + id_size = snprintf(id, id_len, > + "eui.%8phN", > + cur_id_str); > + break; > + case 12: > + id_size = snprintf(id, id_len, > + "eui.%12phN", > + cur_id_str); > + break; > + case 16: > + id_size = snprintf(id, id_len, > + "eui.%16phN", > + cur_id_str); > + break; > + default: > + cur_id_size = 0; > + break; > + } > + break; > + case 0x3: > + /* NAA */ > + if (cur_id_size > d[3]) > + break; > + cur_id_size = d[3]; > + cur_id_str = d + 4; > + cur_id_type = d[1] & 0xf; > + switch (cur_id_size) { > + case 8: > + id_size = snprintf(id, id_len, > + "naa.%8phN", > + cur_id_str); > + break; > + case 16: > + id_size = snprintf(id, id_len, > + "naa.%16phN", > + cur_id_str); > + break; > + default: > + cur_id_size = 0; > + break; > + } > + break; > + case 0x8: > + /* SCSI name string */ > + if (cur_id_size + 4 > d[3]) > + break; > + /* Prefer others for truncated descriptor */ > + if (cur_id_size && d[3] > id_len) > + break; > + cur_id_size = id_size = d[3]; > + cur_id_str = d + 4; > + cur_id_type = d[1] & 0xf; > + if (cur_id_size >= id_len) > + cur_id_size = id_len - 1; > + memcpy(id, cur_id_str, cur_id_size); > + /* Decrease priority for truncated descriptor */ > + if (cur_id_size != id_size) > + cur_id_size = 6; > + break; > + default: > + break; > + } > +next_desig: > + d += d[3] + 4; > + } > + rcu_read_unlock(); > + > + return id_size; > +} > +EXPORT_SYMBOL(scsi_vpd_lun_id); > + > +/* > + * scsi_vpd_tpg_id - return a target port group identifier > + * @sdev: SCSI device > + * > + * Returns the Target Port Group identifier from the information > + * froom VPD page 0x83 of the device. > + * > + * Returns the identifier or error on failure. > + */ > +int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id) > +{ > + unsigned char *d; > + unsigned char __rcu *vpd_pg83; > + int group_id = -EAGAIN, rel_port = -1; > + > + rcu_read_lock(); > + vpd_pg83 = rcu_dereference(sdev->vpd_pg83); > + if (!vpd_pg83) { > + rcu_read_unlock(); > + return -ENXIO; > + } > + > + d = sdev->vpd_pg83 + 4; > + while (d < sdev->vpd_pg83 + sdev->vpd_pg83_len) { > + switch (d[1] & 0xf) { > + case 0x4: > + /* Relative target port */ > + rel_port = get_unaligned_be16(&d[6]); > + break; > + case 0x5: > + /* Target port group */ > + group_id = get_unaligned_be16(&d[6]); > + break; > + default: > + break; > + } > + d += d[3] + 4; > + } > + rcu_read_unlock(); > + > + if (group_id >= 0 && rel_id && rel_port != -1) > + *rel_id = rel_port; > + > + return group_id; > +} > +EXPORT_SYMBOL(scsi_vpd_tpg_id); > diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c > index f0cfaac..d84b2c5 100644 > --- a/drivers/scsi/scsi_scan.c > +++ b/drivers/scsi/scsi_scan.c > @@ -962,6 +962,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned > char *inq_result, > if (*bflags & BLIST_NO_DIF) > sdev->no_dif = 1; > > + if (*bflags & BLIST_SYNC_ALUA) > + sdev->synchronous_alua = 1; > + > sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; > > if (*bflags & BLIST_TRY_VPD_PAGES) > diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h > index 4f6ba34..c7c8876 100644 > --- a/include/scsi/scsi_device.h > +++ b/include/scsi/scsi_device.h > @@ -175,6 +175,7 @@ struct scsi_device { > unsigned no_dif:1; /* T10 PI (DIF) should be disabled */ > unsigned broken_fua:1; /* Don't set FUA bit */ > unsigned lun_in_cdb:1; /* Store LUN bits in CDB[1] */ > + unsigned synchronous_alua:1; /* Synchronous ALUA commands */ > > atomic_t disk_events_disable_depth; /* disable depth for disk events */ > > @@ -415,6 +416,8 @@ static inline int scsi_execute_req(struct scsi_device *sdev, > } > extern void sdev_disable_disk_events(struct scsi_device *sdev); > extern void sdev_enable_disk_events(struct scsi_device *sdev); > +extern int scsi_vpd_lun_id(struct scsi_device *, char *, size_t); > +extern int scsi_vpd_tpg_id(struct scsi_device *, int *); > > #ifdef CONFIG_PM > extern int scsi_autopm_get_device(struct scsi_device *); > diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h > index 96e3f56..9f750cb 100644 > --- a/include/scsi/scsi_devinfo.h > +++ b/include/scsi/scsi_devinfo.h > @@ -37,5 +37,6 @@ > #define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */ > #define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */ > #define BLIST_MAX_1024 0x40000000 /* maximum 1024 sector cdb length */ > +#define BLIST_SYNC_ALUA 0x80000000 /* Synchronous ALUA commands */ > > #endif > diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h > index 85d7317..7e184c6 100644 > --- a/include/scsi/scsi_dh.h > +++ b/include/scsi/scsi_dh.h > @@ -52,6 +52,7 @@ enum { > SCSI_DH_TIMED_OUT, > SCSI_DH_RES_TEMP_UNAVAIL, > SCSI_DH_DEV_OFFLINED, > + SCSI_DH_NOMEM, > SCSI_DH_NOSYS, > SCSI_DH_DRIVER_MAX, > }; > > -- > canonical-kernel-team mailing list > canonical-kernel-team@lists.canonical.com > Modify settings or unsubscribe at: > https://lists.canonical.com/mailman/listinfo/canonical-kernel-team > > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index cc2773b..952e3b7 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -22,7 +22,9 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/module.h> +#include <asm/unaligned.h> #include <scsi/scsi.h> +#include <scsi/scsi_dbg.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_dh.h> @@ -54,27 +56,60 @@ #define TPGS_MODE_IMPLICIT 0x1 #define TPGS_MODE_EXPLICIT 0x2 -#define ALUA_INQUIRY_SIZE 36 +#define ALUA_RTPG_SIZE 128 #define ALUA_FAILOVER_TIMEOUT 60 #define ALUA_FAILOVER_RETRIES 5 +#define ALUA_RTPG_DELAY_MSECS 5 -/* flags passed from user level */ -#define ALUA_OPTIMIZE_STPG 1 +/* device handler flags */ +#define ALUA_OPTIMIZE_STPG 0x01 +#define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 +#define ALUA_SYNC_STPG 0x04 +/* State machine flags */ +#define ALUA_PG_RUN_RTPG 0x10 +#define ALUA_PG_RUN_STPG 0x20 +#define ALUA_PG_RUNNING 0x40 -struct alua_dh_data { +static uint optimize_stpg; +module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); + +static LIST_HEAD(port_group_list); +static DEFINE_SPINLOCK(port_group_lock); +static struct workqueue_struct *kaluad_wq; +static struct workqueue_struct *kaluad_sync_wq; + +struct alua_port_group { + struct kref kref; + struct rcu_head rcu; + struct list_head node; + unsigned char device_id_str[256]; + int device_id_len; int group_id; - int rel_port; int tpgs; int state; int pref; unsigned flags; /* used for optimizing STPG */ - unsigned char inq[ALUA_INQUIRY_SIZE]; - unsigned char *buff; - int bufflen; unsigned char transition_tmo; - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; - int senselen; + unsigned long expiry; + unsigned long interval; + struct delayed_work rtpg_work; + spinlock_t lock; + struct list_head rtpg_list; + struct scsi_device *rtpg_sdev; +}; + +struct alua_dh_data { + struct alua_port_group *pg; + int group_id; + spinlock_t pg_lock; struct scsi_device *sdev; + int init_error; + struct mutex init_mutex; +}; + +struct alua_queue_data { + struct list_head entry; activate_complete callback_fn; void *callback_data; }; @@ -82,231 +117,162 @@ struct alua_dh_data { #define ALUA_POLICY_SWITCH_CURRENT 0 #define ALUA_POLICY_SWITCH_ALL 1 -static char print_alua_state(int); -static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *); - -static int realloc_buffer(struct alua_dh_data *h, unsigned len) -{ - if (h->buff && h->buff != h->inq) - kfree(h->buff); - - h->buff = kmalloc(len, GFP_NOIO); - if (!h->buff) { - h->buff = h->inq; - h->bufflen = ALUA_INQUIRY_SIZE; - return 1; - } - h->bufflen = len; - return 0; -} +static void alua_rtpg_work(struct work_struct *work); +static void alua_rtpg_queue(struct alua_port_group *pg, + struct scsi_device *sdev, + struct alua_queue_data *qdata, bool force); +static void alua_check(struct scsi_device *sdev, bool force); -static struct request *get_alua_req(struct scsi_device *sdev, - void *buffer, unsigned buflen, int rw) +static void release_port_group(struct kref *kref) { - struct request *rq; - struct request_queue *q = sdev->request_queue; - - rq = blk_get_request(q, rw, GFP_NOIO); - - if (IS_ERR(rq)) { - sdev_printk(KERN_INFO, sdev, - "%s: blk_get_request failed\n", __func__); - return NULL; - } - blk_rq_set_block_pc(rq); - - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) { - blk_put_request(rq); - sdev_printk(KERN_INFO, sdev, - "%s: blk_rq_map_kern failed\n", __func__); - return NULL; - } - - rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER; - rq->retries = ALUA_FAILOVER_RETRIES; - rq->timeout = ALUA_FAILOVER_TIMEOUT * HZ; - - return rq; + struct alua_port_group *pg; + + pg = container_of(kref, struct alua_port_group, kref); + if (pg->rtpg_sdev) + flush_delayed_work(&pg->rtpg_work); + spin_lock(&port_group_lock); + list_del(&pg->node); + spin_unlock(&port_group_lock); + kfree_rcu(pg, rcu); } /* - * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command + * submit_rtpg - Issue a REPORT TARGET GROUP STATES command * @sdev: sdev the command should be sent to */ -static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h) +static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, + int bufflen, struct scsi_sense_hdr *sshdr, int flags) { - struct request *rq; - int err = SCSI_DH_RES_TEMP_UNAVAIL; - - rq = get_alua_req(sdev, h->buff, h->bufflen, READ); - if (!rq) - goto done; + u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; /* Prepare the command. */ - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = 0x83; - rq->cmd[4] = h->bufflen; - rq->cmd_len = COMMAND_SIZE(INQUIRY); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = h->senselen = 0; - - err = blk_execute_rq(rq->q, NULL, rq, 1); - if (err == -EIO) { - sdev_printk(KERN_INFO, sdev, - "%s: evpd inquiry failed with %x\n", - ALUA_DH_NAME, rq->errors); - h->senselen = rq->sense_len; - err = SCSI_DH_IO; - } - blk_put_request(rq); -done: - return err; + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); + cdb[0] = MAINTENANCE_IN; + if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) + cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; + else + cdb[1] = MI_REPORT_TARGET_PGS; + put_unaligned_be32(bufflen, &cdb[6]); + + return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE, + buff, bufflen, sshdr, + ALUA_FAILOVER_TIMEOUT * HZ, + ALUA_FAILOVER_RETRIES, NULL, req_flags); } /* - * submit_rtpg - Issue a REPORT TARGET GROUP STATES command - * @sdev: sdev the command should be sent to + * submit_stpg - Issue a SET TARGET PORT GROUP command + * + * Currently we're only setting the current target port group state + * to 'active/optimized' and let the array firmware figure out + * the states of the remaining groups. */ -static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, - bool rtpg_ext_hdr_req) +static int submit_stpg(struct scsi_device *sdev, int group_id, + struct scsi_sense_hdr *sshdr) { - struct request *rq; - int err = SCSI_DH_RES_TEMP_UNAVAIL; + u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; + unsigned char stpg_data[8]; + int stpg_len = 8; + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; - rq = get_alua_req(sdev, h->buff, h->bufflen, READ); - if (!rq) - goto done; + /* Prepare the data buffer */ + memset(stpg_data, 0, stpg_len); + stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f; + put_unaligned_be16(group_id, &stpg_data[6]); /* Prepare the command. */ - rq->cmd[0] = MAINTENANCE_IN; - if (rtpg_ext_hdr_req) - rq->cmd[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; - else - rq->cmd[1] = MI_REPORT_TARGET_PGS; - rq->cmd[6] = (h->bufflen >> 24) & 0xff; - rq->cmd[7] = (h->bufflen >> 16) & 0xff; - rq->cmd[8] = (h->bufflen >> 8) & 0xff; - rq->cmd[9] = h->bufflen & 0xff; - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = h->senselen = 0; - - err = blk_execute_rq(rq->q, NULL, rq, 1); - if (err == -EIO) { - sdev_printk(KERN_INFO, sdev, - "%s: rtpg failed with %x\n", - ALUA_DH_NAME, rq->errors); - h->senselen = rq->sense_len; - err = SCSI_DH_IO; - } - blk_put_request(rq); -done: - return err; + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); + cdb[0] = MAINTENANCE_OUT; + cdb[1] = MO_SET_TARGET_PGS; + put_unaligned_be32(stpg_len, &cdb[6]); + + return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, + stpg_data, stpg_len, + sshdr, ALUA_FAILOVER_TIMEOUT * HZ, + ALUA_FAILOVER_RETRIES, NULL, req_flags); } -/* - * alua_stpg - Evaluate SET TARGET GROUP STATES - * @sdev: the device to be evaluated - * @state: the new target group state - * - * Send a SET TARGET GROUP STATES command to the device. - * We only have to test here if we should resubmit the command; - * any other error is assumed as a failure. - */ -static void stpg_endio(struct request *req, int error) +struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, + int group_id) { - struct alua_dh_data *h = req->end_io_data; - struct scsi_sense_hdr sense_hdr; - unsigned err = SCSI_DH_OK; + struct alua_port_group *pg; + + if (!id_str || !id_size || !strlen(id_str)) + return NULL; - if (host_byte(req->errors) != DID_OK || - msg_byte(req->errors) != COMMAND_COMPLETE) { - err = SCSI_DH_IO; - goto done; + list_for_each_entry(pg, &port_group_list, node) { + if (pg->group_id != group_id) + continue; + if (!pg->device_id_len || pg->device_id_len != id_size) + continue; + if (strncmp(pg->device_id_str, id_str, id_size)) + continue; + if (!kref_get_unless_zero(&pg->kref)) + continue; + return pg; } - if (req->sense_len > 0) { - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE, - &sense_hdr); - if (!err) { - err = SCSI_DH_IO; - goto done; - } - err = alua_check_sense(h->sdev, &sense_hdr); - if (err == ADD_TO_MLQUEUE) { - err = SCSI_DH_RETRY; - goto done; - } - sdev_printk(KERN_INFO, h->sdev, - "%s: stpg sense code: %02x/%02x/%02x\n", - ALUA_DH_NAME, sense_hdr.sense_key, - sense_hdr.asc, sense_hdr.ascq); - err = SCSI_DH_IO; - } else if (error) - err = SCSI_DH_IO; - - if (err == SCSI_DH_OK) { - h->state = TPGS_STATE_OPTIMIZED; - sdev_printk(KERN_INFO, h->sdev, - "%s: port group %02x switched to state %c\n", - ALUA_DH_NAME, h->group_id, - print_alua_state(h->state)); - } -done: - req->end_io_data = NULL; - __blk_put_request(req->q, req); - if (h->callback_fn) { - h->callback_fn(h->callback_data, err); - h->callback_fn = h->callback_data = NULL; - } - return; + return NULL; } /* - * submit_stpg - Issue a SET TARGET GROUP STATES command + * alua_alloc_pg - Allocate a new port_group structure + * @sdev: scsi device + * @h: alua device_handler data + * @group_id: port group id * - * Currently we're only setting the current target port group state - * to 'active/optimized' and let the array firmware figure out - * the states of the remaining groups. + * Allocate a new port_group structure for a given + * device. */ -static unsigned submit_stpg(struct alua_dh_data *h) +struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, + int group_id, int tpgs) { - struct request *rq; - int stpg_len = 8; - struct scsi_device *sdev = h->sdev; + struct alua_port_group *pg, *tmp_pg; - /* Prepare the data buffer */ - memset(h->buff, 0, stpg_len); - h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f; - h->buff[6] = (h->group_id >> 8) & 0xff; - h->buff[7] = h->group_id & 0xff; + pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); + if (!pg) + return ERR_PTR(-ENOMEM); - rq = get_alua_req(sdev, h->buff, stpg_len, WRITE); - if (!rq) - return SCSI_DH_RES_TEMP_UNAVAIL; + pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, + sizeof(pg->device_id_str)); + if (pg->device_id_len <= 0) { + /* + * TPGS supported but no device identification found. + * Generate private device identification. + */ + sdev_printk(KERN_INFO, sdev, + "%s: No device descriptors found\n", + ALUA_DH_NAME); + pg->device_id_str[0] = '\0'; + pg->device_id_len = 0; + } + pg->group_id = group_id; + pg->tpgs = tpgs; + pg->state = TPGS_STATE_OPTIMIZED; + if (optimize_stpg) + pg->flags |= ALUA_OPTIMIZE_STPG; + kref_init(&pg->kref); + INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); + INIT_LIST_HEAD(&pg->rtpg_list); + INIT_LIST_HEAD(&pg->node); + spin_lock_init(&pg->lock); + + spin_lock(&port_group_lock); + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, + group_id); + if (tmp_pg) { + spin_unlock(&port_group_lock); + kfree(pg); + return tmp_pg; + } - /* Prepare the command. */ - rq->cmd[0] = MAINTENANCE_OUT; - rq->cmd[1] = MO_SET_TARGET_PGS; - rq->cmd[6] = (stpg_len >> 24) & 0xff; - rq->cmd[7] = (stpg_len >> 16) & 0xff; - rq->cmd[8] = (stpg_len >> 8) & 0xff; - rq->cmd[9] = stpg_len & 0xff; - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = h->senselen = 0; - rq->end_io_data = h; - - blk_execute_rq_nowait(rq->q, NULL, rq, 1, stpg_endio); - return SCSI_DH_OK; + list_add(&pg->node, &port_group_list); + spin_unlock(&port_group_lock); + + return pg; } /* @@ -316,12 +282,23 @@ static unsigned submit_stpg(struct alua_dh_data *h) * Examine the TPGS setting of the sdev to find out if ALUA * is supported. */ -static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h) +static int alua_check_tpgs(struct scsi_device *sdev) { - int err = SCSI_DH_OK; + int tpgs = TPGS_MODE_NONE; + + /* + * ALUA support for non-disk devices is fraught with + * difficulties, so disable it for now. + */ + if (sdev->type != TYPE_DISK) { + sdev_printk(KERN_INFO, sdev, + "%s: disable for non-disk devices\n", + ALUA_DH_NAME); + return tpgs; + } - h->tpgs = scsi_device_tpgs(sdev); - switch (h->tpgs) { + tpgs = scsi_device_tpgs(sdev); + switch (tpgs) { case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: sdev_printk(KERN_INFO, sdev, "%s: supports implicit and explicit TPGS\n", @@ -335,71 +312,36 @@ static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h) sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", ALUA_DH_NAME); break; - default: - h->tpgs = TPGS_MODE_NONE; + case TPGS_MODE_NONE: sdev_printk(KERN_INFO, sdev, "%s: not supported\n", ALUA_DH_NAME); - err = SCSI_DH_DEV_UNSUPP; + break; + default: + sdev_printk(KERN_INFO, sdev, + "%s: unsupported TPGS setting %d\n", + ALUA_DH_NAME, tpgs); + tpgs = TPGS_MODE_NONE; break; } - return err; + return tpgs; } /* - * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83 + * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 * @sdev: device to be checked * * Extract the relative target port and the target port group * descriptor from the list of identificators. */ -static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h) +static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, + int tpgs) { - int len; - unsigned err; - unsigned char *d; - - retry: - err = submit_vpd_inquiry(sdev, h); + int rel_port = -1, group_id; + struct alua_port_group *pg, *old_pg = NULL; - if (err != SCSI_DH_OK) - return err; - - /* Check if vpd page exceeds initial buffer */ - len = (h->buff[2] << 8) + h->buff[3] + 4; - if (len > h->bufflen) { - /* Resubmit with the correct length */ - if (realloc_buffer(h, len)) { - sdev_printk(KERN_WARNING, sdev, - "%s: kmalloc buffer failed\n", - ALUA_DH_NAME); - /* Temporary failure, bypass */ - return SCSI_DH_DEV_TEMP_BUSY; - } - goto retry; - } - - /* - * Now look for the correct descriptor. - */ - d = h->buff + 4; - while (d < h->buff + len) { - switch (d[1] & 0xf) { - case 0x4: - /* Relative target port */ - h->rel_port = (d[6] << 8) + d[7]; - break; - case 0x5: - /* Target port group */ - h->group_id = (d[6] << 8) + d[7]; - break; - default: - break; - } - d += d[3] + 4; - } - - if (h->group_id == -1) { + group_id = scsi_vpd_tpg_id(sdev, &rel_port); + if (group_id < 0) { /* * Internal error; TPGS supported but required * VPD identification descriptors not present. @@ -408,16 +350,41 @@ static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h) sdev_printk(KERN_INFO, sdev, "%s: No target port descriptors found\n", ALUA_DH_NAME); - h->state = TPGS_STATE_OPTIMIZED; - h->tpgs = TPGS_MODE_NONE; - err = SCSI_DH_DEV_UNSUPP; - } else { + return SCSI_DH_DEV_UNSUPP; + } + + pg = alua_alloc_pg(sdev, group_id, tpgs); + if (IS_ERR(pg)) { + if (PTR_ERR(pg) == -ENOMEM) + return SCSI_DH_NOMEM; + return SCSI_DH_DEV_UNSUPP; + } + if (pg->device_id_len) sdev_printk(KERN_INFO, sdev, - "%s: port group %02x rel port %02x\n", - ALUA_DH_NAME, h->group_id, h->rel_port); + "%s: device %s port group %x rel port %x\n", + ALUA_DH_NAME, pg->device_id_str, + group_id, rel_port); + else + sdev_printk(KERN_INFO, sdev, + "%s: port group %x rel port %x\n", + ALUA_DH_NAME, group_id, rel_port); + + /* Check for existing port group references */ + spin_lock(&h->pg_lock); + old_pg = h->pg; + if (old_pg != pg) { + /* port group has changed. Update to new port group */ + rcu_assign_pointer(h->pg, pg); } + if (sdev->synchronous_alua) + pg->flags |= ALUA_SYNC_STPG; + alua_rtpg_queue(h->pg, sdev, NULL, true); + spin_unlock(&h->pg_lock); - return err; + if (old_pg) + kref_put(&old_pg->kref, release_port_group); + + return SCSI_DH_OK; } static char print_alua_state(int state) @@ -447,40 +414,24 @@ static int alua_check_sense(struct scsi_device *sdev, { switch (sense_hdr->sense_key) { case NOT_READY: - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) + if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { /* * LUN Not Accessible - ALUA state transition */ - return ADD_TO_MLQUEUE; - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b) - /* - * LUN Not Accessible -- Target port in standby state - */ - return SUCCESS; - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c) - /* - * LUN Not Accessible -- Target port in unavailable state - */ - return SUCCESS; - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12) - /* - * LUN Not Ready -- Offline - */ - return SUCCESS; - if (sdev->allow_restart && - sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x02) - /* - * if the device is not started, we need to wake - * the error handler to start the motor - */ - return FAILED; + alua_check(sdev, false); + return NEEDS_RETRY; + } break; case UNIT_ATTENTION: - if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) + if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { /* - * Power On, Reset, or Bus Device Reset, just retry. + * Power On, Reset, or Bus Device Reset. + * Might have obscured a state transition, + * so schedule a recheck. */ + alua_check(sdev, true); return ADD_TO_MLQUEUE; + } if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) /* * Device internal reset @@ -491,16 +442,20 @@ static int alua_check_sense(struct scsi_device *sdev, * Mode Parameters Changed */ return ADD_TO_MLQUEUE; - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { /* * ALUA state changed */ + alua_check(sdev, true); return ADD_TO_MLQUEUE; - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) + } + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { /* * Implicit ALUA state transition failed */ + alua_check(sdev, true); return ADD_TO_MLQUEUE; + } if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) /* * Inquiry data has changed @@ -520,38 +475,74 @@ static int alua_check_sense(struct scsi_device *sdev, } /* + * alua_tur - Send a TEST UNIT READY + * @sdev: device to which the TEST UNIT READY command should be send + * + * Send a TEST UNIT READY to @sdev to figure out the device state + * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, + * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. + */ +static int alua_tur(struct scsi_device *sdev) +{ + struct scsi_sense_hdr sense_hdr; + int retval; + + retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, + ALUA_FAILOVER_RETRIES, &sense_hdr); + if (sense_hdr.sense_key == NOT_READY && + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) + return SCSI_DH_RETRY; + else if (retval) + return SCSI_DH_IO; + else + return SCSI_DH_OK; +} + +/* * alua_rtpg - Evaluate REPORT TARGET GROUP STATES * @sdev: the device to be evaluated. - * @wait_for_transition: if nonzero, wait ALUA_FAILOVER_TIMEOUT seconds for device to exit transitioning state * * Evaluate the Target Port Group State. * Returns SCSI_DH_DEV_OFFLINED if the path is * found to be unusable. */ -static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int wait_for_transition) +static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) { struct scsi_sense_hdr sense_hdr; - int len, k, off, valid_states = 0; - unsigned char *ucp; - unsigned err; - bool rtpg_ext_hdr_req = 1; - unsigned long expiry, interval = 0; + struct alua_port_group *tmp_pg; + int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; + unsigned char *desc, *buff; + unsigned err, retval; unsigned int tpg_desc_tbl_off; unsigned char orig_transition_tmo; + unsigned long flags; - if (!h->transition_tmo) - expiry = round_jiffies_up(jiffies + ALUA_FAILOVER_TIMEOUT * HZ); - else - expiry = round_jiffies_up(jiffies + h->transition_tmo * HZ); + if (!pg->expiry) { + unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; - retry: - err = submit_rtpg(sdev, h, rtpg_ext_hdr_req); + if (pg->transition_tmo) + transition_tmo = pg->transition_tmo * HZ; + + pg->expiry = round_jiffies_up(jiffies + transition_tmo); + } + + buff = kzalloc(bufflen, GFP_KERNEL); + if (!buff) + return SCSI_DH_DEV_TEMP_BUSY; - if (err == SCSI_DH_IO && h->senselen > 0) { - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE, - &sense_hdr); - if (!err) + retry: + retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); + + if (retval) { + if (!scsi_sense_valid(&sense_hdr)) { + sdev_printk(KERN_INFO, sdev, + "%s: rtpg failed, result %d\n", + ALUA_DH_NAME, retval); + kfree(buff); + if (driver_byte(retval) == DRIVER_ERROR) + return SCSI_DH_DEV_TEMP_BUSY; return SCSI_DH_IO; + } /* * submit_rtpg() has failed on existing arrays @@ -561,73 +552,101 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int wait_ * The retry without rtpg_ext_hdr_req set * handles this. */ - if (rtpg_ext_hdr_req == 1 && + if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && sense_hdr.sense_key == ILLEGAL_REQUEST && sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { - rtpg_ext_hdr_req = 0; + pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; goto retry; } - - err = alua_check_sense(sdev, &sense_hdr); - if (err == ADD_TO_MLQUEUE && time_before(jiffies, expiry)) - goto retry; - sdev_printk(KERN_INFO, sdev, - "%s: rtpg sense code %02x/%02x/%02x\n", - ALUA_DH_NAME, sense_hdr.sense_key, - sense_hdr.asc, sense_hdr.ascq); - err = SCSI_DH_IO; + /* + * Retry on ALUA state transition or if any + * UNIT ATTENTION occurred. + */ + if (sense_hdr.sense_key == NOT_READY && + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) + err = SCSI_DH_RETRY; + else if (sense_hdr.sense_key == UNIT_ATTENTION) + err = SCSI_DH_RETRY; + if (err == SCSI_DH_RETRY && + pg->expiry != 0 && time_before(jiffies, pg->expiry)) { + sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", + ALUA_DH_NAME); + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); + return err; + } + sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", + ALUA_DH_NAME); + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); + kfree(buff); + pg->expiry = 0; + return SCSI_DH_IO; } - if (err != SCSI_DH_OK) - return err; - len = (h->buff[0] << 24) + (h->buff[1] << 16) + - (h->buff[2] << 8) + h->buff[3] + 4; + len = get_unaligned_be32(&buff[0]) + 4; - if (len > h->bufflen) { + if (len > bufflen) { /* Resubmit with the correct length */ - if (realloc_buffer(h, len)) { + kfree(buff); + bufflen = len; + buff = kmalloc(bufflen, GFP_KERNEL); + if (!buff) { sdev_printk(KERN_WARNING, sdev, "%s: kmalloc buffer failed\n",__func__); /* Temporary failure, bypass */ + pg->expiry = 0; return SCSI_DH_DEV_TEMP_BUSY; } goto retry; } - orig_transition_tmo = h->transition_tmo; - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && h->buff[5] != 0) - h->transition_tmo = h->buff[5]; + orig_transition_tmo = pg->transition_tmo; + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) + pg->transition_tmo = buff[5]; else - h->transition_tmo = ALUA_FAILOVER_TIMEOUT; + pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; - if (wait_for_transition && (orig_transition_tmo != h->transition_tmo)) { + if (orig_transition_tmo != pg->transition_tmo) { sdev_printk(KERN_INFO, sdev, "%s: transition timeout set to %d seconds\n", - ALUA_DH_NAME, h->transition_tmo); - expiry = jiffies + h->transition_tmo * HZ; + ALUA_DH_NAME, pg->transition_tmo); + pg->expiry = jiffies + pg->transition_tmo * HZ; } - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) tpg_desc_tbl_off = 8; else tpg_desc_tbl_off = 4; - for (k = tpg_desc_tbl_off, ucp = h->buff + tpg_desc_tbl_off; + for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; k < len; - k += off, ucp += off) { - - if (h->group_id == (ucp[2] << 8) + ucp[3]) { - h->state = ucp[0] & 0x0f; - h->pref = ucp[0] >> 7; - valid_states = ucp[1]; + k += off, desc += off) { + u16 group_id = get_unaligned_be16(&desc[2]); + + spin_lock_irqsave(&port_group_lock, flags); + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, + group_id); + spin_unlock_irqrestore(&port_group_lock, flags); + if (tmp_pg) { + if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { + if ((tmp_pg == pg) || + !(tmp_pg->flags & ALUA_PG_RUNNING)) { + tmp_pg->state = desc[0] & 0x0f; + tmp_pg->pref = desc[0] >> 7; + } + if (tmp_pg == pg) + valid_states = desc[1]; + spin_unlock_irqrestore(&tmp_pg->lock, flags); + } + kref_put(&tmp_pg->kref, release_port_group); } - off = 8 + (ucp[7] * 4); + off = 8 + (desc[7] * 4); } + spin_lock_irqsave(&pg->lock, flags); sdev_printk(KERN_INFO, sdev, "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", - ALUA_DH_NAME, h->group_id, print_alua_state(h->state), - h->pref ? "preferred" : "non-preferred", + ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), + pg->pref ? "preferred" : "non-preferred", valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', @@ -636,36 +655,224 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int wait_ valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); - switch (h->state) { + switch (pg->state) { case TPGS_STATE_TRANSITIONING: - if (wait_for_transition) { - if (time_before(jiffies, expiry)) { - /* State transition, retry */ - interval += 2000; - msleep(interval); - goto retry; - } + if (time_before(jiffies, pg->expiry)) { + /* State transition, retry */ + pg->interval = 2; err = SCSI_DH_RETRY; } else { - err = SCSI_DH_OK; + /* Transitioning time exceeded, set port to standby */ + err = SCSI_DH_IO; + pg->state = TPGS_STATE_STANDBY; + pg->expiry = 0; } - - /* Transitioning time exceeded, set port to standby */ - h->state = TPGS_STATE_STANDBY; break; case TPGS_STATE_OFFLINE: /* Path unusable */ err = SCSI_DH_DEV_OFFLINED; + pg->expiry = 0; break; default: /* Useable path if active */ err = SCSI_DH_OK; + pg->expiry = 0; break; } + spin_unlock_irqrestore(&pg->lock, flags); + kfree(buff); return err; } /* + * alua_stpg - Issue a SET TARGET PORT GROUP command + * + * Issue a SET TARGET PORT GROUP command and evaluate the + * response. Returns SCSI_DH_RETRY per default to trigger + * a re-evaluation of the target group state or SCSI_DH_OK + * if no further action needs to be taken. + */ +static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) +{ + int retval; + struct scsi_sense_hdr sense_hdr; + + if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { + /* Only implicit ALUA supported, retry */ + return SCSI_DH_RETRY; + } + switch (pg->state) { + case TPGS_STATE_OPTIMIZED: + return SCSI_DH_OK; + case TPGS_STATE_NONOPTIMIZED: + if ((pg->flags & ALUA_OPTIMIZE_STPG) && + !pg->pref && + (pg->tpgs & TPGS_MODE_IMPLICIT)) + return SCSI_DH_OK; + break; + case TPGS_STATE_STANDBY: + case TPGS_STATE_UNAVAILABLE: + break; + case TPGS_STATE_OFFLINE: + return SCSI_DH_IO; + case TPGS_STATE_TRANSITIONING: + break; + default: + sdev_printk(KERN_INFO, sdev, + "%s: stpg failed, unhandled TPGS state %d", + ALUA_DH_NAME, pg->state); + return SCSI_DH_NOSYS; + } + retval = submit_stpg(sdev, pg->group_id, &sense_hdr); + + if (retval) { + if (!scsi_sense_valid(&sense_hdr)) { + sdev_printk(KERN_INFO, sdev, + "%s: stpg failed, result %d", + ALUA_DH_NAME, retval); + if (driver_byte(retval) == DRIVER_ERROR) + return SCSI_DH_DEV_TEMP_BUSY; + } else { + sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", + ALUA_DH_NAME); + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); + } + } + /* Retry RTPG */ + return SCSI_DH_RETRY; +} + +static void alua_rtpg_work(struct work_struct *work) +{ + struct alua_port_group *pg = + container_of(work, struct alua_port_group, rtpg_work.work); + struct scsi_device *sdev; + LIST_HEAD(qdata_list); + int err = SCSI_DH_OK; + struct alua_queue_data *qdata, *tmp; + unsigned long flags; + struct workqueue_struct *alua_wq = kaluad_wq; + + spin_lock_irqsave(&pg->lock, flags); + sdev = pg->rtpg_sdev; + if (!sdev) { + WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); + WARN_ON(pg->flags & ALUA_PG_RUN_STPG); + spin_unlock_irqrestore(&pg->lock, flags); + return; + } + if (pg->flags & ALUA_SYNC_STPG) + alua_wq = kaluad_sync_wq; + pg->flags |= ALUA_PG_RUNNING; + if (pg->flags & ALUA_PG_RUN_RTPG) { + int state = pg->state; + + pg->flags &= ~ALUA_PG_RUN_RTPG; + spin_unlock_irqrestore(&pg->lock, flags); + if (state == TPGS_STATE_TRANSITIONING) { + if (alua_tur(sdev) == SCSI_DH_RETRY) { + spin_lock_irqsave(&pg->lock, flags); + pg->flags &= ~ALUA_PG_RUNNING; + pg->flags |= ALUA_PG_RUN_RTPG; + spin_unlock_irqrestore(&pg->lock, flags); + queue_delayed_work(alua_wq, &pg->rtpg_work, + pg->interval * HZ); + return; + } + /* Send RTPG on failure or if TUR indicates SUCCESS */ + } + err = alua_rtpg(sdev, pg); + spin_lock_irqsave(&pg->lock, flags); + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { + pg->flags &= ~ALUA_PG_RUNNING; + pg->flags |= ALUA_PG_RUN_RTPG; + spin_unlock_irqrestore(&pg->lock, flags); + queue_delayed_work(alua_wq, &pg->rtpg_work, + pg->interval * HZ); + return; + } + if (err != SCSI_DH_OK) + pg->flags &= ~ALUA_PG_RUN_STPG; + } + if (pg->flags & ALUA_PG_RUN_STPG) { + pg->flags &= ~ALUA_PG_RUN_STPG; + spin_unlock_irqrestore(&pg->lock, flags); + err = alua_stpg(sdev, pg); + spin_lock_irqsave(&pg->lock, flags); + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { + pg->flags |= ALUA_PG_RUN_RTPG; + pg->interval = 0; + pg->flags &= ~ALUA_PG_RUNNING; + spin_unlock_irqrestore(&pg->lock, flags); + queue_delayed_work(alua_wq, &pg->rtpg_work, + pg->interval * HZ); + return; + } + } + + list_splice_init(&pg->rtpg_list, &qdata_list); + pg->rtpg_sdev = NULL; + spin_unlock_irqrestore(&pg->lock, flags); + + list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { + list_del(&qdata->entry); + if (qdata->callback_fn) + qdata->callback_fn(qdata->callback_data, err); + kfree(qdata); + } + spin_lock_irqsave(&pg->lock, flags); + pg->flags &= ~ALUA_PG_RUNNING; + spin_unlock_irqrestore(&pg->lock, flags); + scsi_device_put(sdev); + kref_put(&pg->kref, release_port_group); +} + +static void alua_rtpg_queue(struct alua_port_group *pg, + struct scsi_device *sdev, + struct alua_queue_data *qdata, bool force) +{ + int start_queue = 0; + unsigned long flags; + struct workqueue_struct *alua_wq = kaluad_wq; + + if (!pg) + return; + + spin_lock_irqsave(&pg->lock, flags); + if (qdata) { + list_add_tail(&qdata->entry, &pg->rtpg_list); + pg->flags |= ALUA_PG_RUN_STPG; + force = true; + } + if (pg->rtpg_sdev == NULL) { + pg->interval = 0; + pg->flags |= ALUA_PG_RUN_RTPG; + kref_get(&pg->kref); + pg->rtpg_sdev = sdev; + scsi_device_get(sdev); + start_queue = 1; + } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { + pg->flags |= ALUA_PG_RUN_RTPG; + /* Do not queue if the worker is already running */ + if (!(pg->flags & ALUA_PG_RUNNING)) { + kref_get(&pg->kref); + start_queue = 1; + } + } + + if (pg->flags & ALUA_SYNC_STPG) + alua_wq = kaluad_sync_wq; + spin_unlock_irqrestore(&pg->lock, flags); + + if (start_queue && + !queue_delayed_work(alua_wq, &pg->rtpg_work, + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { + scsi_device_put(sdev); + kref_put(&pg->kref, release_port_group); + } +} + +/* * alua_initialize - Initialize ALUA state * @sdev: the device to be initialized * @@ -674,21 +881,14 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int wait_ */ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) { - int err; - - err = alua_check_tpgs(sdev, h); - if (err != SCSI_DH_OK) - goto out; - - err = alua_vpd_inquiry(sdev, h); - if (err != SCSI_DH_OK) - goto out; - - err = alua_rtpg(sdev, h, 0); - if (err != SCSI_DH_OK) - goto out; - -out: + int err = SCSI_DH_DEV_UNSUPP, tpgs; + + mutex_lock(&h->init_mutex); + tpgs = alua_check_tpgs(sdev); + if (tpgs != TPGS_MODE_NONE) + err = alua_check_vpd(sdev, h, tpgs); + h->init_error = err; + mutex_unlock(&h->init_mutex); return err; } /* @@ -703,9 +903,11 @@ out: static int alua_set_params(struct scsi_device *sdev, const char *params) { struct alua_dh_data *h = sdev->handler_data; + struct alua_port_group __rcu *pg = NULL; unsigned int optimize = 0, argc; const char *p = params; int result = SCSI_DH_OK; + unsigned long flags; if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) return -EINVAL; @@ -715,18 +917,23 @@ static int alua_set_params(struct scsi_device *sdev, const char *params) if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) return -EINVAL; + rcu_read_lock(); + pg = rcu_dereference(h->pg); + if (!pg) { + rcu_read_unlock(); + return -ENXIO; + } + spin_lock_irqsave(&pg->lock, flags); if (optimize) - h->flags |= ALUA_OPTIMIZE_STPG; + pg->flags |= ALUA_OPTIMIZE_STPG; else - h->flags &= ~ALUA_OPTIMIZE_STPG; + pg->flags &= ~ALUA_OPTIMIZE_STPG; + spin_unlock_irqrestore(&pg->lock, flags); + rcu_read_unlock(); return result; } -static uint optimize_stpg; -module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); - /* * alua_activate - activate a path * @sdev: device on the path to be activated @@ -742,48 +949,33 @@ static int alua_activate(struct scsi_device *sdev, { struct alua_dh_data *h = sdev->handler_data; int err = SCSI_DH_OK; - int stpg = 0; + struct alua_queue_data *qdata; + struct alua_port_group __rcu *pg; - err = alua_rtpg(sdev, h, 1); - if (err != SCSI_DH_OK) + qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); + if (!qdata) { + err = SCSI_DH_RES_TEMP_UNAVAIL; goto out; - - if (optimize_stpg) - h->flags |= ALUA_OPTIMIZE_STPG; - - if (h->tpgs & TPGS_MODE_EXPLICIT) { - switch (h->state) { - case TPGS_STATE_NONOPTIMIZED: - stpg = 1; - if ((h->flags & ALUA_OPTIMIZE_STPG) && - (!h->pref) && - (h->tpgs & TPGS_MODE_IMPLICIT)) - stpg = 0; - break; - case TPGS_STATE_STANDBY: - case TPGS_STATE_UNAVAILABLE: - stpg = 1; - break; - case TPGS_STATE_OFFLINE: - err = SCSI_DH_IO; - break; - case TPGS_STATE_TRANSITIONING: - err = SCSI_DH_RETRY; - break; - default: - break; - } } - - if (stpg) { - h->callback_fn = fn; - h->callback_data = data; - err = submit_stpg(h); - if (err == SCSI_DH_OK) - return 0; - h->callback_fn = h->callback_data = NULL; + qdata->callback_fn = fn; + qdata->callback_data = data; + + mutex_lock(&h->init_mutex); + rcu_read_lock(); + pg = rcu_dereference(h->pg); + if (!pg || !kref_get_unless_zero(&pg->kref)) { + rcu_read_unlock(); + kfree(qdata); + err = h->init_error; + mutex_unlock(&h->init_mutex); + goto out; } + fn = NULL; + rcu_read_unlock(); + mutex_unlock(&h->init_mutex); + alua_rtpg_queue(pg, sdev, qdata, true); + kref_put(&pg->kref, release_port_group); out: if (fn) fn(data, err); @@ -791,6 +983,29 @@ out: } /* + * alua_check - check path status + * @sdev: device on the path to be checked + * + * Check the device status + */ +static void alua_check(struct scsi_device *sdev, bool force) +{ + struct alua_dh_data *h = sdev->handler_data; + struct alua_port_group *pg; + + rcu_read_lock(); + pg = rcu_dereference(h->pg); + if (!pg || !kref_get_unless_zero(&pg->kref)) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + alua_rtpg_queue(pg, sdev, NULL, force); + kref_put(&pg->kref, release_port_group); +} + +/* * alua_prep_fn - request callback * * Fail I/O to all paths not in state @@ -799,13 +1014,20 @@ out: static int alua_prep_fn(struct scsi_device *sdev, struct request *req) { struct alua_dh_data *h = sdev->handler_data; + struct alua_port_group __rcu *pg; + int state = TPGS_STATE_OPTIMIZED; int ret = BLKPREP_OK; - if (h->state == TPGS_STATE_TRANSITIONING) + rcu_read_lock(); + pg = rcu_dereference(h->pg); + if (pg) + state = pg->state; + rcu_read_unlock(); + if (state == TPGS_STATE_TRANSITIONING) ret = BLKPREP_DEFER; - else if (h->state != TPGS_STATE_OPTIMIZED && - h->state != TPGS_STATE_NONOPTIMIZED && - h->state != TPGS_STATE_LBA_DEPENDENT) { + else if (state != TPGS_STATE_OPTIMIZED && + state != TPGS_STATE_NONOPTIMIZED && + state != TPGS_STATE_LBA_DEPENDENT) { ret = BLKPREP_KILL; req->cmd_flags |= REQ_QUIET; } @@ -820,20 +1042,20 @@ static int alua_prep_fn(struct scsi_device *sdev, struct request *req) static int alua_bus_attach(struct scsi_device *sdev) { struct alua_dh_data *h; - int err; + int err, ret = -EINVAL; h = kzalloc(sizeof(*h) , GFP_KERNEL); if (!h) return -ENOMEM; - h->tpgs = TPGS_MODE_UNINITIALIZED; - h->state = TPGS_STATE_OPTIMIZED; - h->group_id = -1; - h->rel_port = -1; - h->buff = h->inq; - h->bufflen = ALUA_INQUIRY_SIZE; + spin_lock_init(&h->pg_lock); + rcu_assign_pointer(h->pg, NULL); + h->init_error = SCSI_DH_OK; h->sdev = sdev; + mutex_init(&h->init_mutex); err = alua_initialize(sdev, h); + if (err == SCSI_DH_NOMEM) + ret = -ENOMEM; if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) goto failed; @@ -841,7 +1063,7 @@ static int alua_bus_attach(struct scsi_device *sdev) return 0; failed: kfree(h); - return -EINVAL; + return ret; } /* @@ -851,9 +1073,16 @@ failed: static void alua_bus_detach(struct scsi_device *sdev) { struct alua_dh_data *h = sdev->handler_data; + struct alua_port_group *pg; + + spin_lock(&h->pg_lock); + pg = h->pg; + rcu_assign_pointer(h->pg, NULL); + h->sdev = NULL; + spin_unlock(&h->pg_lock); + if (pg) + kref_put(&pg->kref, release_port_group); - if (h->buff && h->inq != h->buff) - kfree(h->buff); sdev->handler_data = NULL; kfree(h); } @@ -873,16 +1102,31 @@ static int __init alua_init(void) { int r; + kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); + if (!kaluad_wq) { + /* Temporary failure, bypass */ + return SCSI_DH_DEV_TEMP_BUSY; + } + kaluad_sync_wq = create_workqueue("kaluad_sync"); + if (!kaluad_sync_wq) { + destroy_workqueue(kaluad_wq); + return SCSI_DH_DEV_TEMP_BUSY; + } r = scsi_register_device_handler(&alua_dh); - if (r != 0) + if (r != 0) { printk(KERN_ERR "%s: Failed to register scsi device handler", ALUA_DH_NAME); + destroy_workqueue(kaluad_sync_wq); + destroy_workqueue(kaluad_wq); + } return r; } static void __exit alua_exit(void) { scsi_unregister_device_handler(&alua_dh); + destroy_workqueue(kaluad_sync_wq); + destroy_workqueue(kaluad_wq); } module_init(alua_init); diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index da2e068..0678535 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -219,6 +219,8 @@ static struct { {"NAKAMICH", "MJ-5.16S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"NEC", "PD-1 ODX654P", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"NEC", "iStorage", NULL, BLIST_REPORTLUN2}, + {"NETAPP", "LUN C-Mode", NULL, BLIST_SYNC_ALUA}, + {"NETAPP", "INF-01-00", NULL, BLIST_SYNC_ALUA}, {"NRC", "MBR-7", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"NRC", "MBR-7.4", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-600", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dd8ad2a..fa6b2c4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -23,6 +23,7 @@ #include <linux/scatterlist.h> #include <linux/blk-mq.h> #include <linux/ratelimit.h> +#include <asm/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -3154,3 +3155,190 @@ void sdev_enable_disk_events(struct scsi_device *sdev) atomic_dec(&sdev->disk_events_disable_depth); } EXPORT_SYMBOL(sdev_enable_disk_events); + +/** + * scsi_vpd_lun_id - return a unique device identification + * @sdev: SCSI device + * @id: buffer for the identification + * @id_len: length of the buffer + * + * Copies a unique device identification into @id based + * on the information in the VPD page 0x83 of the device. + * The string will be formatted as a SCSI name string. + * + * Returns the length of the identification or error on failure. + * If the identifier is longer than the supplied buffer the actual + * identifier length is returned and the buffer is not zero-padded. + */ +int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) +{ + u8 cur_id_type = 0xff; + u8 cur_id_size = 0; + unsigned char *d, *cur_id_str; + unsigned char __rcu *vpd_pg83; + int id_size = -EINVAL; + + rcu_read_lock(); + vpd_pg83 = rcu_dereference(sdev->vpd_pg83); + if (!vpd_pg83) { + rcu_read_unlock(); + return -ENXIO; + } + + /* + * Look for the correct descriptor. + * Order of preference for lun descriptor: + * - SCSI name string + * - NAA IEEE Registered Extended + * - EUI-64 based 16-byte + * - EUI-64 based 12-byte + * - NAA IEEE Registered + * - NAA IEEE Extended + * as longer descriptors reduce the likelyhood + * of identification clashes. + */ + + /* The id string must be at least 20 bytes + terminating NULL byte */ + if (id_len < 21) { + rcu_read_unlock(); + return -EINVAL; + } + + memset(id, 0, id_len); + d = vpd_pg83 + 4; + while (d < vpd_pg83 + sdev->vpd_pg83_len) { + /* Skip designators not referring to the LUN */ + if ((d[1] & 0x30) != 0x00) + goto next_desig; + + switch (d[1] & 0xf) { + case 0x2: + /* EUI-64 */ + if (cur_id_size > d[3]) + break; + /* Prefer NAA IEEE Registered Extended */ + if (cur_id_type == 0x3 && + cur_id_size == d[3]) + break; + cur_id_size = d[3]; + cur_id_str = d + 4; + cur_id_type = d[1] & 0xf; + switch (cur_id_size) { + case 8: + id_size = snprintf(id, id_len, + "eui.%8phN", + cur_id_str); + break; + case 12: + id_size = snprintf(id, id_len, + "eui.%12phN", + cur_id_str); + break; + case 16: + id_size = snprintf(id, id_len, + "eui.%16phN", + cur_id_str); + break; + default: + cur_id_size = 0; + break; + } + break; + case 0x3: + /* NAA */ + if (cur_id_size > d[3]) + break; + cur_id_size = d[3]; + cur_id_str = d + 4; + cur_id_type = d[1] & 0xf; + switch (cur_id_size) { + case 8: + id_size = snprintf(id, id_len, + "naa.%8phN", + cur_id_str); + break; + case 16: + id_size = snprintf(id, id_len, + "naa.%16phN", + cur_id_str); + break; + default: + cur_id_size = 0; + break; + } + break; + case 0x8: + /* SCSI name string */ + if (cur_id_size + 4 > d[3]) + break; + /* Prefer others for truncated descriptor */ + if (cur_id_size && d[3] > id_len) + break; + cur_id_size = id_size = d[3]; + cur_id_str = d + 4; + cur_id_type = d[1] & 0xf; + if (cur_id_size >= id_len) + cur_id_size = id_len - 1; + memcpy(id, cur_id_str, cur_id_size); + /* Decrease priority for truncated descriptor */ + if (cur_id_size != id_size) + cur_id_size = 6; + break; + default: + break; + } +next_desig: + d += d[3] + 4; + } + rcu_read_unlock(); + + return id_size; +} +EXPORT_SYMBOL(scsi_vpd_lun_id); + +/* + * scsi_vpd_tpg_id - return a target port group identifier + * @sdev: SCSI device + * + * Returns the Target Port Group identifier from the information + * froom VPD page 0x83 of the device. + * + * Returns the identifier or error on failure. + */ +int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id) +{ + unsigned char *d; + unsigned char __rcu *vpd_pg83; + int group_id = -EAGAIN, rel_port = -1; + + rcu_read_lock(); + vpd_pg83 = rcu_dereference(sdev->vpd_pg83); + if (!vpd_pg83) { + rcu_read_unlock(); + return -ENXIO; + } + + d = sdev->vpd_pg83 + 4; + while (d < sdev->vpd_pg83 + sdev->vpd_pg83_len) { + switch (d[1] & 0xf) { + case 0x4: + /* Relative target port */ + rel_port = get_unaligned_be16(&d[6]); + break; + case 0x5: + /* Target port group */ + group_id = get_unaligned_be16(&d[6]); + break; + default: + break; + } + d += d[3] + 4; + } + rcu_read_unlock(); + + if (group_id >= 0 && rel_id && rel_port != -1) + *rel_id = rel_port; + + return group_id; +} +EXPORT_SYMBOL(scsi_vpd_tpg_id); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f0cfaac..d84b2c5 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -962,6 +962,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags & BLIST_NO_DIF) sdev->no_dif = 1; + if (*bflags & BLIST_SYNC_ALUA) + sdev->synchronous_alua = 1; + sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; if (*bflags & BLIST_TRY_VPD_PAGES) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 4f6ba34..c7c8876 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -175,6 +175,7 @@ struct scsi_device { unsigned no_dif:1; /* T10 PI (DIF) should be disabled */ unsigned broken_fua:1; /* Don't set FUA bit */ unsigned lun_in_cdb:1; /* Store LUN bits in CDB[1] */ + unsigned synchronous_alua:1; /* Synchronous ALUA commands */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ @@ -415,6 +416,8 @@ static inline int scsi_execute_req(struct scsi_device *sdev, } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); +extern int scsi_vpd_lun_id(struct scsi_device *, char *, size_t); +extern int scsi_vpd_tpg_id(struct scsi_device *, int *); #ifdef CONFIG_PM extern int scsi_autopm_get_device(struct scsi_device *); diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 96e3f56..9f750cb 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -37,5 +37,6 @@ #define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */ #define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */ #define BLIST_MAX_1024 0x40000000 /* maximum 1024 sector cdb length */ +#define BLIST_SYNC_ALUA 0x80000000 /* Synchronous ALUA commands */ #endif diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 85d7317..7e184c6 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -52,6 +52,7 @@ enum { SCSI_DH_TIMED_OUT, SCSI_DH_RES_TEMP_UNAVAIL, SCSI_DH_DEV_OFFLINED, + SCSI_DH_NOMEM, SCSI_DH_NOSYS, SCSI_DH_DRIVER_MAX, };