Patchwork [5/5,scsi-bsg] : Add initial support for BSG based SCSIDeviceInfo

login
register
mail settings
Submitter Nicholas A. Bellinger
Date June 14, 2010, 9:44 a.m.
Message ID <1276508686-3100-1-git-send-email-nab@linux-iscsi.org>
Download mbox | patch
Permalink /patch/55507/
State New
Headers show

Comments

Nicholas A. Bellinger - June 14, 2010, 9:44 a.m.
From: Nicholas Bellinger <nab@linux-iscsi.org>

This patch adds initial support for using the Linux BSG interface with write/read vectored
AIO as a QEMU backstore (SCSIDeviceInfo) with hw/scsi-bus.c compatible HBA emulation.

So far it has been tested with x86_64 host and guest using hw/megasas.c and TCM_Loop LLD
Port LUNs.  Because this path uses struct iovec for struct sg_io_v4->d[out,in]_xferp payloads,
which currently requires a patch to linux/block/bsg.c:bsg_map_hdr() in order to setup the
user -> kernel iovecs.  This also will only currently work with paired user/kernel
(eg: 64bit user / 64bit kernel) because of different pointer sizes in struct iovec->iov_base.

There are also two FIXMEs in hw/scsi-bsg.c:bsg_generic_initfn() related to extraction of
SCSI LUN and device type values using BSG and required by QEMU-KVM.

Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
---
 Makefile.objs |    2 +-
 hw/scsi-bsg.c |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 589 insertions(+), 1 deletions(-)
 create mode 100644 hw/scsi-bsg.c
Blue Swirl - June 14, 2010, 6:11 p.m.
On Mon, Jun 14, 2010 at 9:44 AM, Nicholas A. Bellinger
<nab@linux-iscsi.org> wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
>
> This patch adds initial support for using the Linux BSG interface with write/read vectored
> AIO as a QEMU backstore (SCSIDeviceInfo) with hw/scsi-bus.c compatible HBA emulation.

Did I miss the docs?

>
> So far it has been tested with x86_64 host and guest using hw/megasas.c and TCM_Loop LLD
> Port LUNs.  Because this path uses struct iovec for struct sg_io_v4->d[out,in]_xferp payloads,
> which currently requires a patch to linux/block/bsg.c:bsg_map_hdr() in order to setup the
> user -> kernel iovecs.  This also will only currently work with paired user/kernel
> (eg: 64bit user / 64bit kernel) because of different pointer sizes in struct iovec->iov_base.
>
> There are also two FIXMEs in hw/scsi-bsg.c:bsg_generic_initfn() related to extraction of
> SCSI LUN and device type values using BSG and required by QEMU-KVM.
>
> Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
> ---
>  Makefile.objs |    2 +-
>  hw/scsi-bsg.c |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 589 insertions(+), 1 deletions(-)
>  create mode 100644 hw/scsi-bsg.c
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 188d617..c4fcb72 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -197,7 +197,7 @@ hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
>  hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
>
>  # SCSI layer
> -hw-obj-y += scsi-disk.o scsi-generic.o
> +hw-obj-y += scsi-disk.o scsi-generic.o scsi-bsg.o

Instead of '#ifdef __linux__' (which should be '#ifdef CONFIG_LINUX'),
please compile the object only if CONFIG_LINUX is set, something like:
hw-obj-$(CONFIG_LINUX) += scsi-bsg.o

Please also check if this could be compiled in common-obj set.

>  hw-obj-y += lsi53c895a.o megasas.o
>  hw-obj-$(CONFIG_ESP) += esp.o
>
> diff --git a/hw/scsi-bsg.c b/hw/scsi-bsg.c
> new file mode 100644
> index 0000000..fc76b76
> --- /dev/null
> +++ b/hw/scsi-bsg.c
> @@ -0,0 +1,588 @@
> +/*
> + * block layer implementation of the sg v4 interface for Linux hosts
> + *
> + * Copyright (c) 2010 Rising Tide Systems
> + * Written by Nicholas A. Bellinger <nab@linux-iscsi.org>
> + *
> + * Based on hw/scsi-generic code by Laurent Vivier, Paul Brook, and Fabrice Bellard
> + *
> + * This code is licenced under the LGPL.
> + */
> +
> +#include "qemu-common.h"
> +#include "qemu-error.h"
> +#include "block.h"
> +#include "scsi.h"
> +#include "dma.h"
> +#include "block/raw-posix-aio.h"
> +
> +#ifdef __linux__
> +
> +#define DEBUG_BSG
> +#undef DEBUG_BSG_IO
> +#undef DEBUG_BSG_MAP

This should be
//#define DEBUG_BSG
//#define DEBUG_BSG_IO
//#define DEBUG_BSG_MAP

> +
> +#ifdef DEBUG_BSG
> +#define DPRINTF(fmt, ...) \
> +do { printf("scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) do {} while(0)
> +#endif
> +
> +#define BADF(fmt, ...) \
> +do { fprintf(stderr, "scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
> +
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/epoll.h>
> +#include <unistd.h>
> +#include <scsi/sg.h>
> +#include <linux/bsg.h>
> +#include "scsi-defs.h"
> +
> +#define SCSI_SENSE_BUF_SIZE 96
> +
> +#define SG_ERR_DRIVER_TIMEOUT 0x06
> +#define SG_ERR_DRIVER_SENSE 0x08
> +
> +#ifndef MAX_UINT
> +#define MAX_UINT ((unsigned int)-1)

The standard macro is UINT_MAX.

> +#endif
> +
> +typedef struct SCSIBSGState SCSIBSGState;
> +
> +typedef struct SCSIBSGReq {
> +    SCSIRequest req;
> +    uint8_t *buf;
> +    int buflen;
> +    QEMUIOVector iov;
> +    QEMUIOVector aio_iov;
> +    struct sg_io_v4 bsg_hdr;
> +} SCSIBSGReq;
> +
> +struct SCSIBSGState {
> +    SCSIDevice qdev;
> +    BlockDriverState *bs;
> +    int lun;
> +    int driver_status;
> +    uint8_t sensebuf[SCSI_SENSE_BUF_SIZE];
> +    uint8_t senselen;
> +};
> +
> +static int bsg_read(int fd, void *p_read, int to_read)
> +{
> +    int err;
> +
> +    while (to_read > 0) {
> +        err = read(fd, p_read, to_read);
> +        if (err >= 0) {
> +            to_read -= err;
> +            p_read += err;
> +        } else if (errno == EINTR)
> +            continue;
> +        else {
> +            printf("bsg device %d read failed, errno: %d\n",
> +                    fd, errno);

DPRINTF?

> +            return errno;
> +        }
> +    }
> +    return 0;
> +}
> +
> +static SCSIBSGReq *bsg_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun)
> +{
> +    SCSIRequest *req;
> +    SCSIBSGReq *r;
> +
> +    req = scsi_req_alloc(sizeof(SCSIBSGReq), d, tag, lun);
> +    r = DO_UPCAST(SCSIBSGReq, req, req);
> +    qemu_iovec_init(&r->iov, 1);
> +    qemu_iovec_init(&r->aio_iov, 1);
> +    return r;
> +}
> +
> +static void bsg_remove_request(SCSIBSGReq *r)
> +{
> +    qemu_free(r->buf);
> +    qemu_iovec_destroy(&r->iov);
> +    qemu_iovec_destroy(&r->aio_iov);
> +    scsi_req_free(&r->req);
> +}
> +
> +static void bsg_command_complete(void *opaque, int ret)
> +{
> +    SCSIBSGReq *r = (SCSIBSGReq *)opaque;

Useless cast in C.

> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> +
> +    s->driver_status = r->bsg_hdr.driver_status;
> +    if (s->driver_status)
> +        s->senselen = SCSI_SENSE_BUF_SIZE;
> +
> +    if (ret != 0) {
> +        scsi_req_print(&r->req);
> +        fprintf(stderr, "%s: ret %d (%s)\n", __FUNCTION__,
> +                ret, strerror(-ret));

error_report()?

> +        s->senselen = scsi_build_sense(SENSE_CODE(INVALID_FIELD),
> +                    s->sensebuf, SCSI_SENSE_BUF_SIZE, 0);
> +        s->driver_status = SG_ERR_DRIVER_SENSE;
> +        r->req.status = CHECK_CONDITION;
> +    } else {
> +        if (s->driver_status & SG_ERR_DRIVER_TIMEOUT) {
> +            scsi_req_print(&r->req);
> +            fprintf(stderr, "%s: timeout\n", __FUNCTION__);
> +            r->req.status = BUSY << 1;
> +        } else if (r->bsg_hdr.device_status) {
> +            r->req.status = r->bsg_hdr.device_status;
> +        } else if (s->driver_status & SG_ERR_DRIVER_SENSE) {
> +            scsi_req_print(&r->req);
> +            fprintf(stderr, "%s: driver sense\n", __FUNCTION__);
> +            r->req.status = CHECK_CONDITION << 1;
> +        } else {
> +            r->req.status = GOOD << 1;
> +        }
> +    }
> +#ifdef DEBUG_BSG_IO
> +    DPRINTF("Command complete 0x%p tag=0x%x status=%d\n",
> +            r, r->req.tag, r->req.status);

Please introduce DPRINTF_BSG_IO and remove #ifdef/#endif.

> +#endif
> +    scsi_req_complete(&r->req);
> +}
> +
> +static int bsg_execute_command_run(SCSIBSGReq *r,
> +                                   BlockDriverCompletionFunc *complete)
> +{
> +    BlockDriverState *bdrv = r->req.dev->conf.dinfo->bdrv;
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> +    /*
> +     * Following linux/include/linux/bsg.h
> +     */
> +    /* [i] 'Q' to differentiate from v3 */
> +    r->bsg_hdr.guard = 'Q';
> +    r->bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
> +    r->bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
> +    r->bsg_hdr.request_len = r->req.cmd.len;
> +    r->bsg_hdr.request = (unsigned long)r->req.cmd.buf;
> +    r->bsg_hdr.max_response_len = sizeof(s->sensebuf);
> +    /* SCSI: (auto)sense data */
> +    r->bsg_hdr.response = (unsigned long)s->sensebuf;
> +    /* Unlimited timeout */
> +    r->bsg_hdr.timeout = MAX_UINT;
> +    /* [i->o] unused internally */
> +    r->bsg_hdr.usr_ptr = (unsigned long)r;
> +    /* Bsg does Q_AT_HEAD by default */
> +    r->bsg_hdr.flags |= BSG_FLAG_Q_AT_TAIL;

Does something initialize r->bsg_hdr.flags before?

> +
> +    qemu_iovec_reset(&r->aio_iov);
> +    qemu_iovec_add(&r->aio_iov, &r->bsg_hdr, sizeof(r->bsg_hdr));
> +
> +    r->req.aiocb = paio_submit_len(bdrv, bdrv->fd, 0, &r->aio_iov,
> +                sizeof(r->bsg_hdr), complete, r, QEMU_AIO_WRITE);
> +    if (r->req.aiocb == NULL) {
> +        BADF("execute_command: paio_submit_len() failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int bsg_execute_command_buf(SCSIBSGReq *r,
> +                                   BlockDriverCompletionFunc *complete,
> +                                   uint8_t *buf, uint32_t buflen)
> +{
> +    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
> +        r->bsg_hdr.dout_xferp = (unsigned long)buf;
> +        r->bsg_hdr.dout_xfer_len = buflen;
> +    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
> +        r->bsg_hdr.din_xferp = (unsigned long)buf;
> +        r->bsg_hdr.din_xfer_len = buflen;
> +    }
> +#ifdef DEBUG_BSG_IO
> +    DPRINTF("execute BUF: %p, dxfer_len %u\n", buf, buflen);
> +#endif
> +    return bsg_execute_command_run(r, complete);
> +}
> +
> +static int bsg_execute_command_iov(SCSIBSGReq *r,
> +                                   BlockDriverCompletionFunc *complete,
> +                                   QEMUIOVector *iov)
> +{
> +    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
> +        r->bsg_hdr.dout_iovec_count = iov->niov;
> +        r->bsg_hdr.dout_xferp = (unsigned long)iov->iov;
> +        r->bsg_hdr.dout_xfer_len = iov->size;
> +    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
> +        r->bsg_hdr.din_iovec_count = iov->niov;
> +        r->bsg_hdr.din_xferp = (unsigned long)iov->iov;
> +        r->bsg_hdr.din_xfer_len = iov->size;
> +    }
> +#ifdef DEBUG_BSG_IO
> +    DPRINTF("execute IOV: iovec_count: %u, iov: %p, size: %u\n",
> +            iov->niov, iov->iov, (unsigned int)iov->size);

You can remove the cast by using '%zu'.

> +#endif
> +    return bsg_execute_command_run(r, complete);
> +}
> +
> +static void bsg_write_complete(void *opaque, int ret)
> +{
> +    SCSIBSGReq *r = (SCSIBSGReq *)opaque;
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> +#ifdef DEBUG_BSG_IO
> +    DPRINTF("bsg_write_complete() ret = %d\n", ret);
> +#endif
> +    if (ret) {
> +        DPRINTF("IO error\n");
> +        bsg_command_complete(r, ret);
> +        return;
> +    }
> +    /*
> +     * Copied from hw/scsi-generic.c:scsi_write_complete(), is this still
> +     * necessary for BSG..?
> +     */
> +    if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
> +        s->qdev.type == TYPE_TAPE) {
> +        s->qdev.blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11];
> +        DPRINTF("block size %d\n", s->qdev.blocksize);
> +    }
> +
> +    bsg_command_complete(r, ret);
> +}
> +
> +static void bsg_req_fixup(SCSIRequest *req)
> +{
> +    return;
> +}
> +
> +static int bsg_get_blocksize(BlockDriverState *bdrv)
> +{
> +    uint8_t cmd[10];
> +    uint8_t buf[8];
> +    uint8_t sensebuf[8];
> +    struct sg_io_v4 bsg_hdr;
> +    int ret;
> +
> +    memset(cmd, 0, sizeof(cmd));
> +    memset(buf, 0, sizeof(buf));
> +    cmd[0] = READ_CAPACITY;
> +
> +    memset(&bsg_hdr, 0, sizeof(bsg_hdr));
> +    bsg_hdr.guard = 'Q';
> +    bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
> +    bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
> +    bsg_hdr.request_len = sizeof(cmd);
> +    bsg_hdr.request = (unsigned long)cmd;
> +    bsg_hdr.din_xfer_len = sizeof(buf);
> +    bsg_hdr.din_xferp = (unsigned long)buf;
> +    bsg_hdr.max_response_len = sizeof(sensebuf);
> +    bsg_hdr.response = (unsigned long)sensebuf;
> +    bsg_hdr.timeout = 6000; /* XXX */
> +
> +    ret = bdrv_ioctl(bdrv, SG_IO, (void *)&bsg_hdr);
> +    if (ret < 0)
> +        return -1;
> +
> +    return (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
> +}
> +
> +static int bsg_get_stream_blocksize(BlockDriverState *bdrv)
> +{
> +    uint8_t cmd[6];
> +    uint8_t buf[12];
> +    uint8_t sensebuf[8];
> +    struct sg_io_v4 bsg_hdr;
> +    int ret;
> +
> +    memset(cmd, 0, sizeof(cmd));
> +    memset(buf, 0, sizeof(buf));
> +    cmd[0] = MODE_SENSE;
> +    cmd[4] = sizeof(buf);
> +
> +    memset(&bsg_hdr, 0, sizeof(bsg_hdr));
> +    bsg_hdr.guard = 'Q';
> +    bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
> +    bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
> +    bsg_hdr.request_len = sizeof(cmd);
> +    bsg_hdr.request = (unsigned long)cmd;
> +    bsg_hdr.din_xfer_len = sizeof(buf);
> +    bsg_hdr.din_xferp = (unsigned long)buf;
> +    bsg_hdr.max_response_len = sizeof(sensebuf);
> +    bsg_hdr.response = (unsigned long)sensebuf;
> +    bsg_hdr.timeout = 6000; /* XXX */
> +
> +    ret = bdrv_ioctl(bdrv, SG_IO, (void *)&bsg_hdr);
> +    if (ret < 0)
> +        return -1;
> +
> +    return (buf[9] << 16) | (buf[10] << 8) | buf[11];
> +}
> +
> +static void bsg_destroy(SCSIDevice *d)
> +{
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, d);
> +    SCSIBSGReq *r;
> +
> +    while (!QTAILQ_EMPTY(&s->qdev.requests)) {
> +        r = DO_UPCAST(SCSIBSGReq, req, QTAILQ_FIRST(&s->qdev.requests));
> +        bsg_remove_request(r);
> +    }
> +    drive_uninit(s->qdev.conf.dinfo);
> +}
> +
> +struct scsi_idlun {
> +    uint32_t dev_id;
> +    uint32_t host_unique_id;
> +};
> +
> +static int bsg_generic_initfn(SCSIDevice *dev)
> +{
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, dev);
> +
> +    if (!s->qdev.conf.dinfo || !s->qdev.conf.dinfo->bdrv) {
> +        error_report("scsi-bsg: drive property not set");
> +        return -1;
> +    }
> +    s->bs = s->qdev.conf.dinfo->bdrv;
> +
> +    /* check we are really using a /dev/bsg/ * file */
> +    if (!bdrv_is_bsg(s->bs)) {
> +        error_report("scsi-bsg: not BSG*");
> +        return -1;
> +    }
> +#if 0
> +    /* get LUN of the BSG */
> +    if (bdrv_ioctl(s->bs, SG_GET_SCSI_ID, &scsiid)) {
> +        error_report("scsi-bsg: SG_GET_SCSI_ID ioctl failed");
> +        return -1;
> +    }
> +#endif

Dead code shouldn't be committed.

> +// FIXME: Get SCSI lun from BSG
> +    s->lun = 0;
> +// FIXME: Get SCSI device type from BSG INQUIRY
> +    s->qdev.type = TYPE_DISK;
> +    DPRINTF("LUN %d\n", s->lun);
> +    DPRINTF("device type %d\n", s->qdev.type);
> +
> +    if (s->qdev.type == TYPE_TAPE) {
> +        s->qdev.blocksize = bsg_get_stream_blocksize(s->bs);
> +        if (s->qdev.blocksize == -1)
> +            s->qdev.blocksize = 0;
> +    } else {
> +        s->qdev.blocksize = bsg_get_blocksize(s->bs);
> +        /* removable media returns 0 if not present */
> +        if (s->qdev.blocksize <= 0) {
> +            if (s->qdev.type == TYPE_ROM || s->qdev.type  == TYPE_WORM)
> +                s->qdev.blocksize = 2048;
> +            else
> +                s->qdev.blocksize = 512;
> +        }
> +    }
> +    DPRINTF("block size %d\n", s->qdev.blocksize);
> +    s->driver_status = 0;
> +    memset(s->sensebuf, 0, sizeof(s->sensebuf));
> +    return 0;
> +}
> +
> +static void bsg_generic_unmap(SCSIBSGReq *r)
> +{
> +    int is_write = !scsi_req_is_write(&r->req);
> +    int i;
> +
> +    for (i = 0; i < r->iov.niov; i++) {
> +        cpu_physical_memory_unmap(r->iov.iov[i].iov_base,
> +                                  r->iov.iov[i].iov_len, is_write,
> +                                  r->iov.iov[i].iov_len);
> +    }
> +    qemu_iovec_reset(&r->iov);
> +}
> +
> +static int bsg_generic_map(SCSIBSGReq *r, QEMUSGList *sg)
> +{
> +    int is_write = !scsi_req_is_write(&r->req);
> +    target_phys_addr_t cur_addr, cur_len, cur_offset = 0;
> +    void *mem;
> +    int i;
> +
> +    qemu_iovec_reset(&r->iov);
> +    for (i = 0; i < sg->nsg;) {
> +        cur_addr = sg->sg[i].base + cur_offset;
> +        cur_len = sg->sg[i].len - cur_offset;
> +#ifdef DEBUG_BSG_MAP
> +        DPRINTF("Using cur_addr: 0x%016lx cur_len: 0x%016lx\n",
> +            (long unsigned int)cur_addr, (long unsigned int)cur_len);

Please use TARGET_FMT_plx for cur_addr and cur_len.

> +#endif
> +        mem = cpu_physical_memory_map(cur_addr, &cur_len, is_write);
> +        if (!mem)
> +            goto err;
> +#ifdef DEBUG_BSG_MAP
> +        DPRINTF("Adding iovec for mem: %p len: 0x%016lx\n", mem,
> +            (long unsigned int)cur_len);

Same here.

> +#endif
> +        qemu_iovec_add(&r->iov, mem, cur_len);
> +
> +        cur_offset += cur_len;
> +        if (cur_offset == sg->sg[i].len) {
> +            cur_offset = 0;
> +            i++;
> +        }
> +    }
> +
> +    return 0;
> +
> +err:
> +    bsg_generic_unmap(r);
> +    return -1;
> +}
> +
> +static SCSIRequest *bsg_generic_req_get(SCSIDevice *d, uint32_t tag, int lun)
> +{
> +    SCSIBSGReq *r;
> +
> +    r = bsg_new_request(d, tag, lun);
> +    return &r->req;
> +}
> +
> +static void bsg_generic_req_cb(void *opaque, int ret)
> +{
> +    SCSIRequest *req = opaque;
> +    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> +    BlockDriverCompletionFunc *complete;
> +    struct sg_io_v4 io_hdr;
> +    int err;
> +
> +    req->aiocb = NULL;
> +
> +    memset(&io_hdr, 0, sizeof(io_hdr));
> +    /* [i] 'Q' to differentiate from v3 */
> +    io_hdr.guard = 'Q';
> +
> +    err = bsg_read(s->bs->fd, &io_hdr, sizeof(io_hdr));
> +    if (err) {
> +        printf("bsg_read() failed with ret: %d\n", err);
> +        return;
> +    }
> +
> +    if (r->iov.niov)
> +        bsg_generic_unmap(r);
> +
> +    if (scsi_req_is_write(req)) {
> +        req->xferlen = r->bsg_hdr.dout_xfer_len;
> +        complete = bsg_write_complete;
> +    } else {
> +        req->xferlen = r->bsg_hdr.din_xfer_len;
> +        complete = bsg_command_complete;
> +    }
> +    complete(opaque, ret);
> +}
> +
> +static int bsg_generic_req_common(SCSIRequest *req, uint8_t *buffer)
> +{
> +    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
> +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> +
> +    bsg_req_fixup(&r->req);
> +    if (req->cmd.buf[0] != REQUEST_SENSE &&
> +        (req->lun != s->lun || (req->cmd.buf[1] >> 5) != s->lun)) {
> +
> +        DPRINTF("Unimplemented LUN %d\n",
> +            req->lun ? req->lun : req->cmd.buf[1] >> 5);
> +        s->senselen = scsi_build_sense(SENSE_CODE(LUN_NOT_SUPPORTED),
> +                s->sensebuf, SCSI_SENSE_BUF_SIZE, 0);
> +        s->driver_status = SG_ERR_DRIVER_SENSE;
> +        req->status = CHECK_CONDITION << 1;
> +        return 1;
> +    }
> +    if (r->req.cmd.buf[0] == REQUEST_SENSE &&
> +        s->driver_status & SG_ERR_DRIVER_SENSE) {
> +        req->xferlen = MIN(req->cmd.xfer, s->senselen);
> +        memcpy(buffer, s->sensebuf, req->xferlen);
> +        DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, s->senselen);
> +        DPRINTF("Sense: %d %d %d %d %d %d %d %d\n",
> +                buffer[0], buffer[1], buffer[2], buffer[3],
> +                buffer[4], buffer[5], buffer[6], buffer[7]);
> +        req->status = GOOD;
> +        return 1;
> +    }
> +    return 0;
> +}
> +
> +static int bsg_generic_req_buf(SCSIRequest *req, uint8_t *buffer)
> +{
> +    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
> +    int handled, ret;
> +
> +    handled = bsg_generic_req_common(req, buffer);
> +    if (handled) {
> +        scsi_req_complete(req);
> +        return 0;
> +    }
> +
> +    req->xferlen = req->cmd.xfer;
> +    ret = bsg_execute_command_buf(r, bsg_generic_req_cb, buffer, req->xferlen);
> +    if (ret == -1) {
> +        bsg_command_complete(r, -EINVAL);
> +    }
> +    return 0;
> +}
> +
> +static int bsg_generic_req_sgl(SCSIRequest *req, QEMUSGList *sg)
> +{
> +    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
> +    int handled, ret;
> +
> +    if (bsg_generic_map(r, sg) != 0) {
> +        /* Hmm ... */
> +        abort();
> +    }
> +    handled = bsg_generic_req_common(req, r->iov.iov[0].iov_base);
> +    if (handled) {
> +        bsg_generic_unmap(r);
> +        scsi_req_complete(req);
> +        return 0;
> +    }
> +
> +    req->xferlen = req->cmd.xfer;
> +    ret = bsg_execute_command_iov(r, bsg_generic_req_cb, &r->iov);
> +    if (ret == -1) {
> +        bsg_generic_unmap(r);
> +        bsg_command_complete(r, -EINVAL);
> +    }
> +    return 0;
> +}
> +
> +static void bsg_generic_req_put(SCSIRequest *req)
> +{
> +    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
> +
> +    if (r->req.aiocb) {
> +        bdrv_aio_cancel(r->req.aiocb);
> +    }
> +    bsg_remove_request(r);
> +}
> +
> +static SCSIDeviceInfo bsg_info = {
> +    .qdev.name      = "scsi-bsg",
> +    .qdev.desc      = "pass through block layer scsi generic (/dev/bsg/*)",
> +    .qdev.size      = sizeof(SCSIBSGState),
> +    .init           = bsg_generic_initfn,
> +    .destroy        = bsg_destroy,
> +
> +    /* new */
> +    .request_get    = bsg_generic_req_get,
> +    .request_buf    = bsg_generic_req_buf,
> +    .request_sgl    = bsg_generic_req_sgl,
> +    .request_put    = bsg_generic_req_put,
> +
> +    .qdev.props     = (Property[]) {
> +        DEFINE_BLOCK_PROPERTIES(SCSIBSGState, qdev.conf),
> +        DEFINE_PROP_END_OF_LIST(),
> +    },
> +};
> +
> +static void bsg_register_devices(void)
> +{
> +    scsi_qdev_register(&bsg_info);
> +}
> +device_init(bsg_register_devices)
> +
> +#endif /* __linux__ */
> --
> 1.5.6.5
>
>
>
Nicholas A. Bellinger - June 14, 2010, 11:37 p.m.
On Mon, 2010-06-14 at 18:11 +0000, Blue Swirl wrote:
> On Mon, Jun 14, 2010 at 9:44 AM, Nicholas A. Bellinger
> <nab@linux-iscsi.org> wrote:
> > From: Nicholas Bellinger <nab@linux-iscsi.org>
> >
> > This patch adds initial support for using the Linux BSG interface with write/read vectored
> > AIO as a QEMU backstore (SCSIDeviceInfo) with hw/scsi-bus.c compatible HBA emulation.
> 
> Did I miss the docs?

I assume you mean the docs for CLI usage, yes..?  This ops are the same
as scsi-generic, and using megasas HBA emulation look like:

qemu-system-x86_64 -m 512 -boot c ~/lenny-32bit.img -drive
if=none,id=mydisk1,file=/dev/4\:0\:1\:0 -device megasas,id=raid -device
scsi-bsg,bus=raid.0,scsi-id=1,drive=mydisk1

> 
> >
> > So far it has been tested with x86_64 host and guest using hw/megasas.c and TCM_Loop LLD
> > Port LUNs.  Because this path uses struct iovec for struct sg_io_v4->d[out,in]_xferp payloads,
> > which currently requires a patch to linux/block/bsg.c:bsg_map_hdr() in order to setup the
> > user -> kernel iovecs.  This also will only currently work with paired user/kernel
> > (eg: 64bit user / 64bit kernel) because of different pointer sizes in struct iovec->iov_base.
> >
> > There are also two FIXMEs in hw/scsi-bsg.c:bsg_generic_initfn() related to extraction of
> > SCSI LUN and device type values using BSG and required by QEMU-KVM.
> >
> > Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
> > ---
> >  Makefile.objs |    2 +-
> >  hw/scsi-bsg.c |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 589 insertions(+), 1 deletions(-)
> >  create mode 100644 hw/scsi-bsg.c
> >
> > diff --git a/Makefile.objs b/Makefile.objs
> > index 188d617..c4fcb72 100644
> > --- a/Makefile.objs
> > +++ b/Makefile.objs
> > @@ -197,7 +197,7 @@ hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
> >  hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
> >
> >  # SCSI layer
> > -hw-obj-y += scsi-disk.o scsi-generic.o
> > +hw-obj-y += scsi-disk.o scsi-generic.o scsi-bsg.o
> 
> Instead of '#ifdef __linux__' (which should be '#ifdef CONFIG_LINUX'),
> please compile the object only if CONFIG_LINUX is set, something like:
> hw-obj-$(CONFIG_LINUX) += scsi-bsg.o
> 
> Please also check if this could be compiled in common-obj set.
> 

Ok, I added the 'hw-obj-$(CONFIG_LINUX) += scsi-bsg.o' mentioned above..

> >  hw-obj-y += lsi53c895a.o megasas.o
> >  hw-obj-$(CONFIG_ESP) += esp.o
> >
> > diff --git a/hw/scsi-bsg.c b/hw/scsi-bsg.c
> > new file mode 100644
> > index 0000000..fc76b76
> > --- /dev/null
> > +++ b/hw/scsi-bsg.c
> > @@ -0,0 +1,588 @@
> > +/*
> > + * block layer implementation of the sg v4 interface for Linux hosts
> > + *
> > + * Copyright (c) 2010 Rising Tide Systems
> > + * Written by Nicholas A. Bellinger <nab@linux-iscsi.org>
> > + *
> > + * Based on hw/scsi-generic code by Laurent Vivier, Paul Brook, and Fabrice Bellard
> > + *
> > + * This code is licenced under the LGPL.
> > + */
> > +
> > +#include "qemu-common.h"
> > +#include "qemu-error.h"
> > +#include "block.h"
> > +#include "scsi.h"
> > +#include "dma.h"
> > +#include "block/raw-posix-aio.h"
> > +
> > +#ifdef __linux__
> > +
> > +#define DEBUG_BSG
> > +#undef DEBUG_BSG_IO
> > +#undef DEBUG_BSG_MAP
> 
> This should be
> //#define DEBUG_BSG
> //#define DEBUG_BSG_IO
> //#define DEBUG_BSG_MAP
> 

Fixed

> > +
> > +#ifdef DEBUG_BSG
> > +#define DPRINTF(fmt, ...) \
> > +do { printf("scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
> > +#else
> > +#define DPRINTF(fmt, ...) do {} while(0)
> > +#endif
> > +
> > +#define BADF(fmt, ...) \
> > +do { fprintf(stderr, "scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
> > +
> > +#include <stdio.h>
> > +#include <sys/types.h>
> > +#include <sys/stat.h>
> > +#include <sys/epoll.h>
> > +#include <unistd.h>
> > +#include <scsi/sg.h>
> > +#include <linux/bsg.h>
> > +#include "scsi-defs.h"
> > +
> > +#define SCSI_SENSE_BUF_SIZE 96
> > +
> > +#define SG_ERR_DRIVER_TIMEOUT 0x06
> > +#define SG_ERR_DRIVER_SENSE 0x08
> > +
> > +#ifndef MAX_UINT
> > +#define MAX_UINT ((unsigned int)-1)
> 
> The standard macro is UINT_MAX.
> 

Fixed (This was originally from hw/scsi-generic.c)

> > +#endif
> > +
> > +typedef struct SCSIBSGState SCSIBSGState;
> > +
> > +typedef struct SCSIBSGReq {
> > +    SCSIRequest req;
> > +    uint8_t *buf;
> > +    int buflen;
> > +    QEMUIOVector iov;
> > +    QEMUIOVector aio_iov;
> > +    struct sg_io_v4 bsg_hdr;
> > +} SCSIBSGReq;
> > +
> > +struct SCSIBSGState {
> > +    SCSIDevice qdev;
> > +    BlockDriverState *bs;
> > +    int lun;
> > +    int driver_status;
> > +    uint8_t sensebuf[SCSI_SENSE_BUF_SIZE];
> > +    uint8_t senselen;
> > +};
> > +
> > +static int bsg_read(int fd, void *p_read, int to_read)
> > +{
> > +    int err;
> > +
> > +    while (to_read > 0) {
> > +        err = read(fd, p_read, to_read);
> > +        if (err >= 0) {
> > +            to_read -= err;
> > +            p_read += err;
> > +        } else if (errno == EINTR)
> > +            continue;
> > +        else {
> > +            printf("bsg device %d read failed, errno: %d\n",
> > +                    fd, errno);
> 
> DPRINTF?

I made this into a error_report()

> 
> > +            return errno;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static SCSIBSGReq *bsg_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun)
> > +{
> > +    SCSIRequest *req;
> > +    SCSIBSGReq *r;
> > +
> > +    req = scsi_req_alloc(sizeof(SCSIBSGReq), d, tag, lun);
> > +    r = DO_UPCAST(SCSIBSGReq, req, req);
> > +    qemu_iovec_init(&r->iov, 1);
> > +    qemu_iovec_init(&r->aio_iov, 1);
> > +    return r;
> > +}
> > +
> > +static void bsg_remove_request(SCSIBSGReq *r)
> > +{
> > +    qemu_free(r->buf);
> > +    qemu_iovec_destroy(&r->iov);
> > +    qemu_iovec_destroy(&r->aio_iov);
> > +    scsi_req_free(&r->req);
> > +}
> > +
> > +static void bsg_command_complete(void *opaque, int ret)
> > +{
> > +    SCSIBSGReq *r = (SCSIBSGReq *)opaque;
> 
> Useless cast in C.
> 

Fixed

> > +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> > +
> > +    s->driver_status = r->bsg_hdr.driver_status;
> > +    if (s->driver_status)
> > +        s->senselen = SCSI_SENSE_BUF_SIZE;
> > +
> > +    if (ret != 0) {
> > +        scsi_req_print(&r->req);
> > +        fprintf(stderr, "%s: ret %d (%s)\n", __FUNCTION__,
> > +                ret, strerror(-ret));
> 
> error_report()?
> 

Ok, using error_report() instead of fprintf(stderr) in
bsg_command_complete()

> > +        s->senselen = scsi_build_sense(SENSE_CODE(INVALID_FIELD),
> > +                    s->sensebuf, SCSI_SENSE_BUF_SIZE, 0);
> > +        s->driver_status = SG_ERR_DRIVER_SENSE;
> > +        r->req.status = CHECK_CONDITION;
> > +    } else {
> > +        if (s->driver_status & SG_ERR_DRIVER_TIMEOUT) {
> > +            scsi_req_print(&r->req);
> > +            fprintf(stderr, "%s: timeout\n", __FUNCTION__);
> > +            r->req.status = BUSY << 1;
> > +        } else if (r->bsg_hdr.device_status) {
> > +            r->req.status = r->bsg_hdr.device_status;
> > +        } else if (s->driver_status & SG_ERR_DRIVER_SENSE) {
> > +            scsi_req_print(&r->req);
> > +            fprintf(stderr, "%s: driver sense\n", __FUNCTION__);
> > +            r->req.status = CHECK_CONDITION << 1;
> > +        } else {
> > +            r->req.status = GOOD << 1;
> > +        }
> > +    }
> > +#ifdef DEBUG_BSG_IO
> > +    DPRINTF("Command complete 0x%p tag=0x%x status=%d\n",
> > +            r, r->req.tag, r->req.status);
> 
> Please introduce DPRINTF_BSG_IO and remove #ifdef/#endif.
> 

Done

> > +#endif
> > +    scsi_req_complete(&r->req);
> > +}
> > +
> > +static int bsg_execute_command_run(SCSIBSGReq *r,
> > +                                   BlockDriverCompletionFunc *complete)
> > +{
> > +    BlockDriverState *bdrv = r->req.dev->conf.dinfo->bdrv;
> > +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
> > +    /*
> > +     * Following linux/include/linux/bsg.h
> > +     */
> > +    /* [i] 'Q' to differentiate from v3 */
> > +    r->bsg_hdr.guard = 'Q';
> > +    r->bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
> > +    r->bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
> > +    r->bsg_hdr.request_len = r->req.cmd.len;
> > +    r->bsg_hdr.request = (unsigned long)r->req.cmd.buf;
> > +    r->bsg_hdr.max_response_len = sizeof(s->sensebuf);
> > +    /* SCSI: (auto)sense data */
> > +    r->bsg_hdr.response = (unsigned long)s->sensebuf;
> > +    /* Unlimited timeout */
> > +    r->bsg_hdr.timeout = MAX_UINT;
> > +    /* [i->o] unused internally */
> > +    r->bsg_hdr.usr_ptr = (unsigned long)r;
> > +    /* Bsg does Q_AT_HEAD by default */
> > +    r->bsg_hdr.flags |= BSG_FLAG_Q_AT_TAIL;
> 
> Does something initialize r->bsg_hdr.flags before?
> 

Nope, changed to an assignment..

> > +
> > +    qemu_iovec_reset(&r->aio_iov);
> > +    qemu_iovec_add(&r->aio_iov, &r->bsg_hdr, sizeof(r->bsg_hdr));
> > +
> > +    r->req.aiocb = paio_submit_len(bdrv, bdrv->fd, 0, &r->aio_iov,
> > +                sizeof(r->bsg_hdr), complete, r, QEMU_AIO_WRITE);
> > +    if (r->req.aiocb == NULL) {
> > +        BADF("execute_command: paio_submit_len() failed\n");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int bsg_execute_command_buf(SCSIBSGReq *r,
> > +                                   BlockDriverCompletionFunc *complete,
> > +                                   uint8_t *buf, uint32_t buflen)
> > +{
> > +    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
> > +        r->bsg_hdr.dout_xferp = (unsigned long)buf;
> > +        r->bsg_hdr.dout_xfer_len = buflen;
> > +    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
> > +        r->bsg_hdr.din_xferp = (unsigned long)buf;
> > +        r->bsg_hdr.din_xfer_len = buflen;
> > +    }
> > +#ifdef DEBUG_BSG_IO
> > +    DPRINTF("execute BUF: %p, dxfer_len %u\n", buf, buflen);
> > +#endif
> > +    return bsg_execute_command_run(r, complete);
> > +}
> > +
> > +static int bsg_execute_command_iov(SCSIBSGReq *r,
> > +                                   BlockDriverCompletionFunc *complete,
> > +                                   QEMUIOVector *iov)
> > +{
> > +    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
> > +        r->bsg_hdr.dout_iovec_count = iov->niov;
> > +        r->bsg_hdr.dout_xferp = (unsigned long)iov->iov;
> > +        r->bsg_hdr.dout_xfer_len = iov->size;
> > +    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
> > +        r->bsg_hdr.din_iovec_count = iov->niov;
> > +        r->bsg_hdr.din_xferp = (unsigned long)iov->iov;
> > +        r->bsg_hdr.din_xfer_len = iov->size;
> > +    }
> > +#ifdef DEBUG_BSG_IO
> > +    DPRINTF("execute IOV: iovec_count: %u, iov: %p, size: %u\n",
> > +            iov->niov, iov->iov, (unsigned int)iov->size);
> 
> You can remove the cast by using '%zu'.
> 

Fixed

> > +static int bsg_generic_initfn(SCSIDevice *dev)
> > +{
> > +    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, dev);
> > +
> > +    if (!s->qdev.conf.dinfo || !s->qdev.conf.dinfo->bdrv) {
> > +        error_report("scsi-bsg: drive property not set");
> > +        return -1;
> > +    }
> > +    s->bs = s->qdev.conf.dinfo->bdrv;
> > +
> > +    /* check we are really using a /dev/bsg/ * file */
> > +    if (!bdrv_is_bsg(s->bs)) {
> > +        error_report("scsi-bsg: not BSG*");
> > +        return -1;
> > +    }
> > +#if 0
> > +    /* get LUN of the BSG */
> > +    if (bdrv_ioctl(s->bs, SG_GET_SCSI_ID, &scsiid)) {
> > +        error_report("scsi-bsg: SG_GET_SCSI_ID ioctl failed");
> > +        return -1;
> > +    }
> > +#endif
> 
> Dead code shouldn't be committed.
> 

Understood.  The reason this was included in the original commit is
because this is what hw/scsi-generic.c with the SG_IO ioctl supports to
determine HCTL, and adding it to linux/block/bsg.c IOCTL is going to
make sense as it already supports certain existing SG_IO IOCTL ops.

> > +// FIXME: Get SCSI lun from BSG
> > +    s->lun = 0;
> > +// FIXME: Get SCSI device type from BSG INQUIRY
> > +    s->qdev.type = TYPE_DISK;
> > +    DPRINTF("LUN %d\n", s->lun);
> > +    DPRINTF("device type %d\n", s->qdev.type);
> > +
> > +    if (s->qdev.type == TYPE_TAPE) {
> > +        s->qdev.blocksize = bsg_get_stream_blocksize(s->bs);
> > +        if (s->qdev.blocksize == -1)
> > +            s->qdev.blocksize = 0;
> > +    } else {
> > +        s->qdev.blocksize = bsg_get_blocksize(s->bs);
> > +        /* removable media returns 0 if not present */
> > +        if (s->qdev.blocksize <= 0) {
> > +            if (s->qdev.type == TYPE_ROM || s->qdev.type  == TYPE_WORM)
> > +                s->qdev.blocksize = 2048;
> > +            else
> > +                s->qdev.blocksize = 512;
> > +        }
> > +    }
> > +    DPRINTF("block size %d\n", s->qdev.blocksize);
> > +    s->driver_status = 0;
> > +    memset(s->sensebuf, 0, sizeof(s->sensebuf));
> > +    return 0;
> > +}
> > +
> > +static void bsg_generic_unmap(SCSIBSGReq *r)
> > +{
> > +    int is_write = !scsi_req_is_write(&r->req);
> > +    int i;
> > +
> > +    for (i = 0; i < r->iov.niov; i++) {
> > +        cpu_physical_memory_unmap(r->iov.iov[i].iov_base,
> > +                                  r->iov.iov[i].iov_len, is_write,
> > +                                  r->iov.iov[i].iov_len);
> > +    }
> > +    qemu_iovec_reset(&r->iov);
> > +}
> > +
> > +static int bsg_generic_map(SCSIBSGReq *r, QEMUSGList *sg)
> > +{
> > +    int is_write = !scsi_req_is_write(&r->req);
> > +    target_phys_addr_t cur_addr, cur_len, cur_offset = 0;
> > +    void *mem;
> > +    int i;
> > +
> > +    qemu_iovec_reset(&r->iov);
> > +    for (i = 0; i < sg->nsg;) {
> > +        cur_addr = sg->sg[i].base + cur_offset;
> > +        cur_len = sg->sg[i].len - cur_offset;
> > +#ifdef DEBUG_BSG_MAP
> > +        DPRINTF("Using cur_addr: 0x%016lx cur_len: 0x%016lx\n",
> > +            (long unsigned int)cur_addr, (long unsigned int)cur_len);
> 
> Please use TARGET_FMT_plx for cur_addr and cur_len.
> 

Fixed

> > +#endif
> > +        mem = cpu_physical_memory_map(cur_addr, &cur_len, is_write);
> > +        if (!mem)
> > +            goto err;
> > +#ifdef DEBUG_BSG_MAP
> > +        DPRINTF("Adding iovec for mem: %p len: 0x%016lx\n", mem,
> > +            (long unsigned int)cur_len);
> 
> Same here.

Fixed

Thank you for your comments!

--nab

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 188d617..c4fcb72 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -197,7 +197,7 @@  hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
 hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
 
 # SCSI layer
-hw-obj-y += scsi-disk.o scsi-generic.o
+hw-obj-y += scsi-disk.o scsi-generic.o scsi-bsg.o
 hw-obj-y += lsi53c895a.o megasas.o
 hw-obj-$(CONFIG_ESP) += esp.o
 
diff --git a/hw/scsi-bsg.c b/hw/scsi-bsg.c
new file mode 100644
index 0000000..fc76b76
--- /dev/null
+++ b/hw/scsi-bsg.c
@@ -0,0 +1,588 @@ 
+/*
+ * block layer implementation of the sg v4 interface for Linux hosts
+ *
+ * Copyright (c) 2010 Rising Tide Systems
+ * Written by Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * Based on hw/scsi-generic code by Laurent Vivier, Paul Brook, and Fabrice Bellard
+ *
+ * This code is licenced under the LGPL.
+ */
+
+#include "qemu-common.h"
+#include "qemu-error.h"
+#include "block.h"
+#include "scsi.h"
+#include "dma.h"
+#include "block/raw-posix-aio.h"
+
+#ifdef __linux__
+
+#define DEBUG_BSG
+#undef DEBUG_BSG_IO
+#undef DEBUG_BSG_MAP
+
+#ifdef DEBUG_BSG
+#define DPRINTF(fmt, ...) \
+do { printf("scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while(0)
+#endif
+
+#define BADF(fmt, ...) \
+do { fprintf(stderr, "scsi-bsg: " fmt , ## __VA_ARGS__); } while (0)
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <scsi/sg.h>
+#include <linux/bsg.h>
+#include "scsi-defs.h"
+
+#define SCSI_SENSE_BUF_SIZE 96
+
+#define SG_ERR_DRIVER_TIMEOUT 0x06
+#define SG_ERR_DRIVER_SENSE 0x08
+
+#ifndef MAX_UINT
+#define MAX_UINT ((unsigned int)-1)
+#endif
+
+typedef struct SCSIBSGState SCSIBSGState;
+
+typedef struct SCSIBSGReq {
+    SCSIRequest req;
+    uint8_t *buf;
+    int buflen;
+    QEMUIOVector iov;
+    QEMUIOVector aio_iov;
+    struct sg_io_v4 bsg_hdr;
+} SCSIBSGReq;
+
+struct SCSIBSGState {
+    SCSIDevice qdev;
+    BlockDriverState *bs;
+    int lun;
+    int driver_status;
+    uint8_t sensebuf[SCSI_SENSE_BUF_SIZE];
+    uint8_t senselen;
+};
+
+static int bsg_read(int fd, void *p_read, int to_read)
+{
+    int err;
+
+    while (to_read > 0) {
+        err = read(fd, p_read, to_read);
+        if (err >= 0) {
+            to_read -= err;
+            p_read += err;
+        } else if (errno == EINTR)
+            continue;
+        else {
+            printf("bsg device %d read failed, errno: %d\n",
+                    fd, errno);
+            return errno;
+        }
+    }
+    return 0;
+}
+
+static SCSIBSGReq *bsg_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun)
+{
+    SCSIRequest *req;
+    SCSIBSGReq *r;
+
+    req = scsi_req_alloc(sizeof(SCSIBSGReq), d, tag, lun);
+    r = DO_UPCAST(SCSIBSGReq, req, req);
+    qemu_iovec_init(&r->iov, 1);
+    qemu_iovec_init(&r->aio_iov, 1);
+    return r;
+}
+
+static void bsg_remove_request(SCSIBSGReq *r)
+{
+    qemu_free(r->buf);
+    qemu_iovec_destroy(&r->iov);
+    qemu_iovec_destroy(&r->aio_iov);
+    scsi_req_free(&r->req);
+}
+
+static void bsg_command_complete(void *opaque, int ret)
+{
+    SCSIBSGReq *r = (SCSIBSGReq *)opaque;
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
+
+    s->driver_status = r->bsg_hdr.driver_status;
+    if (s->driver_status)
+        s->senselen = SCSI_SENSE_BUF_SIZE;
+
+    if (ret != 0) {
+        scsi_req_print(&r->req);
+        fprintf(stderr, "%s: ret %d (%s)\n", __FUNCTION__,
+                ret, strerror(-ret));
+        s->senselen = scsi_build_sense(SENSE_CODE(INVALID_FIELD),
+                    s->sensebuf, SCSI_SENSE_BUF_SIZE, 0);
+        s->driver_status = SG_ERR_DRIVER_SENSE;
+        r->req.status = CHECK_CONDITION;
+    } else {
+        if (s->driver_status & SG_ERR_DRIVER_TIMEOUT) {
+            scsi_req_print(&r->req);
+            fprintf(stderr, "%s: timeout\n", __FUNCTION__);
+            r->req.status = BUSY << 1;
+        } else if (r->bsg_hdr.device_status) {
+            r->req.status = r->bsg_hdr.device_status;
+        } else if (s->driver_status & SG_ERR_DRIVER_SENSE) {
+            scsi_req_print(&r->req);
+            fprintf(stderr, "%s: driver sense\n", __FUNCTION__);
+            r->req.status = CHECK_CONDITION << 1;
+        } else {
+            r->req.status = GOOD << 1;
+        }
+    }
+#ifdef DEBUG_BSG_IO
+    DPRINTF("Command complete 0x%p tag=0x%x status=%d\n",
+            r, r->req.tag, r->req.status);
+#endif
+    scsi_req_complete(&r->req);
+}
+
+static int bsg_execute_command_run(SCSIBSGReq *r,
+                                   BlockDriverCompletionFunc *complete)
+{
+    BlockDriverState *bdrv = r->req.dev->conf.dinfo->bdrv;
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
+    /*
+     * Following linux/include/linux/bsg.h
+     */ 
+    /* [i] 'Q' to differentiate from v3 */
+    r->bsg_hdr.guard = 'Q';
+    r->bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
+    r->bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
+    r->bsg_hdr.request_len = r->req.cmd.len;
+    r->bsg_hdr.request = (unsigned long)r->req.cmd.buf;
+    r->bsg_hdr.max_response_len = sizeof(s->sensebuf);
+    /* SCSI: (auto)sense data */
+    r->bsg_hdr.response = (unsigned long)s->sensebuf;
+    /* Unlimited timeout */
+    r->bsg_hdr.timeout = MAX_UINT;
+    /* [i->o] unused internally */
+    r->bsg_hdr.usr_ptr = (unsigned long)r;
+    /* Bsg does Q_AT_HEAD by default */
+    r->bsg_hdr.flags |= BSG_FLAG_Q_AT_TAIL;
+
+    qemu_iovec_reset(&r->aio_iov);
+    qemu_iovec_add(&r->aio_iov, &r->bsg_hdr, sizeof(r->bsg_hdr));
+
+    r->req.aiocb = paio_submit_len(bdrv, bdrv->fd, 0, &r->aio_iov,
+                sizeof(r->bsg_hdr), complete, r, QEMU_AIO_WRITE);
+    if (r->req.aiocb == NULL) {
+        BADF("execute_command: paio_submit_len() failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int bsg_execute_command_buf(SCSIBSGReq *r,
+                                   BlockDriverCompletionFunc *complete,
+                                   uint8_t *buf, uint32_t buflen)
+{
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        r->bsg_hdr.dout_xferp = (unsigned long)buf;
+        r->bsg_hdr.dout_xfer_len = buflen;
+    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
+        r->bsg_hdr.din_xferp = (unsigned long)buf;
+        r->bsg_hdr.din_xfer_len = buflen;
+    }
+#ifdef DEBUG_BSG_IO
+    DPRINTF("execute BUF: %p, dxfer_len %u\n", buf, buflen);
+#endif
+    return bsg_execute_command_run(r, complete);
+}
+
+static int bsg_execute_command_iov(SCSIBSGReq *r,
+                                   BlockDriverCompletionFunc *complete,
+                                   QEMUIOVector *iov)
+{
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        r->bsg_hdr.dout_iovec_count = iov->niov;
+        r->bsg_hdr.dout_xferp = (unsigned long)iov->iov;
+        r->bsg_hdr.dout_xfer_len = iov->size;
+    } else if (r->req.cmd.mode == SCSI_XFER_FROM_DEV) {
+        r->bsg_hdr.din_iovec_count = iov->niov;
+        r->bsg_hdr.din_xferp = (unsigned long)iov->iov;
+        r->bsg_hdr.din_xfer_len = iov->size;
+    }
+#ifdef DEBUG_BSG_IO
+    DPRINTF("execute IOV: iovec_count: %u, iov: %p, size: %u\n",
+            iov->niov, iov->iov, (unsigned int)iov->size);
+#endif
+    return bsg_execute_command_run(r, complete);
+}
+
+static void bsg_write_complete(void *opaque, int ret)
+{
+    SCSIBSGReq *r = (SCSIBSGReq *)opaque;
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
+#ifdef DEBUG_BSG_IO
+    DPRINTF("bsg_write_complete() ret = %d\n", ret);
+#endif
+    if (ret) {
+        DPRINTF("IO error\n");
+        bsg_command_complete(r, ret);
+        return;
+    }
+    /*
+     * Copied from hw/scsi-generic.c:scsi_write_complete(), is this still
+     * necessary for BSG..?
+     */
+    if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
+        s->qdev.type == TYPE_TAPE) {
+        s->qdev.blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11];
+        DPRINTF("block size %d\n", s->qdev.blocksize);
+    }
+
+    bsg_command_complete(r, ret);
+}
+
+static void bsg_req_fixup(SCSIRequest *req)
+{
+    return;
+}
+
+static int bsg_get_blocksize(BlockDriverState *bdrv)
+{
+    uint8_t cmd[10];
+    uint8_t buf[8];
+    uint8_t sensebuf[8];
+    struct sg_io_v4 bsg_hdr;
+    int ret;
+
+    memset(cmd, 0, sizeof(cmd));
+    memset(buf, 0, sizeof(buf));
+    cmd[0] = READ_CAPACITY;
+
+    memset(&bsg_hdr, 0, sizeof(bsg_hdr));
+    bsg_hdr.guard = 'Q';
+    bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
+    bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
+    bsg_hdr.request_len = sizeof(cmd);
+    bsg_hdr.request = (unsigned long)cmd;
+    bsg_hdr.din_xfer_len = sizeof(buf);
+    bsg_hdr.din_xferp = (unsigned long)buf;
+    bsg_hdr.max_response_len = sizeof(sensebuf);
+    bsg_hdr.response = (unsigned long)sensebuf;
+    bsg_hdr.timeout = 6000; /* XXX */
+
+    ret = bdrv_ioctl(bdrv, SG_IO, (void *)&bsg_hdr);
+    if (ret < 0)
+        return -1;
+
+    return (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
+}
+
+static int bsg_get_stream_blocksize(BlockDriverState *bdrv)
+{
+    uint8_t cmd[6];
+    uint8_t buf[12];
+    uint8_t sensebuf[8];
+    struct sg_io_v4 bsg_hdr;
+    int ret;
+
+    memset(cmd, 0, sizeof(cmd));
+    memset(buf, 0, sizeof(buf));
+    cmd[0] = MODE_SENSE;
+    cmd[4] = sizeof(buf);
+
+    memset(&bsg_hdr, 0, sizeof(bsg_hdr));
+    bsg_hdr.guard = 'Q';
+    bsg_hdr.protocol = BSG_PROTOCOL_SCSI;
+    bsg_hdr.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
+    bsg_hdr.request_len = sizeof(cmd);
+    bsg_hdr.request = (unsigned long)cmd;
+    bsg_hdr.din_xfer_len = sizeof(buf);
+    bsg_hdr.din_xferp = (unsigned long)buf;
+    bsg_hdr.max_response_len = sizeof(sensebuf);
+    bsg_hdr.response = (unsigned long)sensebuf;
+    bsg_hdr.timeout = 6000; /* XXX */
+
+    ret = bdrv_ioctl(bdrv, SG_IO, (void *)&bsg_hdr);
+    if (ret < 0)
+        return -1;
+
+    return (buf[9] << 16) | (buf[10] << 8) | buf[11];
+}
+
+static void bsg_destroy(SCSIDevice *d)
+{
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, d);
+    SCSIBSGReq *r;
+
+    while (!QTAILQ_EMPTY(&s->qdev.requests)) {
+        r = DO_UPCAST(SCSIBSGReq, req, QTAILQ_FIRST(&s->qdev.requests));
+        bsg_remove_request(r);
+    }
+    drive_uninit(s->qdev.conf.dinfo);
+}
+
+struct scsi_idlun {
+    uint32_t dev_id;
+    uint32_t host_unique_id;
+};
+
+static int bsg_generic_initfn(SCSIDevice *dev)
+{
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, dev);
+
+    if (!s->qdev.conf.dinfo || !s->qdev.conf.dinfo->bdrv) {
+        error_report("scsi-bsg: drive property not set");
+        return -1;
+    }
+    s->bs = s->qdev.conf.dinfo->bdrv;
+
+    /* check we are really using a /dev/bsg/ * file */
+    if (!bdrv_is_bsg(s->bs)) {
+        error_report("scsi-bsg: not BSG*");
+        return -1;
+    }
+#if 0
+    /* get LUN of the BSG */
+    if (bdrv_ioctl(s->bs, SG_GET_SCSI_ID, &scsiid)) {
+        error_report("scsi-bsg: SG_GET_SCSI_ID ioctl failed");
+        return -1;
+    }
+#endif
+// FIXME: Get SCSI lun from BSG 
+    s->lun = 0;
+// FIXME: Get SCSI device type from BSG INQUIRY
+    s->qdev.type = TYPE_DISK;
+    DPRINTF("LUN %d\n", s->lun);
+    DPRINTF("device type %d\n", s->qdev.type);
+
+    if (s->qdev.type == TYPE_TAPE) {
+        s->qdev.blocksize = bsg_get_stream_blocksize(s->bs);
+        if (s->qdev.blocksize == -1)
+            s->qdev.blocksize = 0;
+    } else {
+        s->qdev.blocksize = bsg_get_blocksize(s->bs);
+        /* removable media returns 0 if not present */
+        if (s->qdev.blocksize <= 0) {
+            if (s->qdev.type == TYPE_ROM || s->qdev.type  == TYPE_WORM)
+                s->qdev.blocksize = 2048;
+            else
+                s->qdev.blocksize = 512;
+        }
+    }
+    DPRINTF("block size %d\n", s->qdev.blocksize);
+    s->driver_status = 0;
+    memset(s->sensebuf, 0, sizeof(s->sensebuf));
+    return 0;
+}
+
+static void bsg_generic_unmap(SCSIBSGReq *r)
+{
+    int is_write = !scsi_req_is_write(&r->req);
+    int i;
+
+    for (i = 0; i < r->iov.niov; i++) {
+        cpu_physical_memory_unmap(r->iov.iov[i].iov_base,
+                                  r->iov.iov[i].iov_len, is_write,
+                                  r->iov.iov[i].iov_len);
+    }
+    qemu_iovec_reset(&r->iov);
+}
+
+static int bsg_generic_map(SCSIBSGReq *r, QEMUSGList *sg)
+{
+    int is_write = !scsi_req_is_write(&r->req);
+    target_phys_addr_t cur_addr, cur_len, cur_offset = 0;
+    void *mem;
+    int i;
+
+    qemu_iovec_reset(&r->iov);
+    for (i = 0; i < sg->nsg;) {
+        cur_addr = sg->sg[i].base + cur_offset;
+        cur_len = sg->sg[i].len - cur_offset;
+#ifdef DEBUG_BSG_MAP
+        DPRINTF("Using cur_addr: 0x%016lx cur_len: 0x%016lx\n",
+            (long unsigned int)cur_addr, (long unsigned int)cur_len);
+#endif
+        mem = cpu_physical_memory_map(cur_addr, &cur_len, is_write);
+        if (!mem)
+            goto err;
+#ifdef DEBUG_BSG_MAP
+        DPRINTF("Adding iovec for mem: %p len: 0x%016lx\n", mem,
+            (long unsigned int)cur_len);
+#endif
+        qemu_iovec_add(&r->iov, mem, cur_len);
+
+        cur_offset += cur_len;
+        if (cur_offset == sg->sg[i].len) {
+            cur_offset = 0;
+            i++;
+        }
+    }
+
+    return 0;
+
+err:
+    bsg_generic_unmap(r);
+    return -1;
+}
+
+static SCSIRequest *bsg_generic_req_get(SCSIDevice *d, uint32_t tag, int lun)
+{
+    SCSIBSGReq *r;
+
+    r = bsg_new_request(d, tag, lun);
+    return &r->req;
+}
+
+static void bsg_generic_req_cb(void *opaque, int ret)
+{
+    SCSIRequest *req = opaque;
+    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
+    BlockDriverCompletionFunc *complete;
+    struct sg_io_v4 io_hdr;
+    int err;
+
+    req->aiocb = NULL;
+
+    memset(&io_hdr, 0, sizeof(io_hdr));
+    /* [i] 'Q' to differentiate from v3 */
+    io_hdr.guard = 'Q';
+
+    err = bsg_read(s->bs->fd, &io_hdr, sizeof(io_hdr));
+    if (err) {
+        printf("bsg_read() failed with ret: %d\n", err);
+        return;
+    }
+
+    if (r->iov.niov)
+        bsg_generic_unmap(r);
+
+    if (scsi_req_is_write(req)) {
+        req->xferlen = r->bsg_hdr.dout_xfer_len;
+        complete = bsg_write_complete;
+    } else {
+        req->xferlen = r->bsg_hdr.din_xfer_len;
+        complete = bsg_command_complete;
+    }
+    complete(opaque, ret);
+}
+
+static int bsg_generic_req_common(SCSIRequest *req, uint8_t *buffer)
+{
+    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
+    SCSIBSGState *s = DO_UPCAST(SCSIBSGState, qdev, r->req.dev);
+
+    bsg_req_fixup(&r->req);
+    if (req->cmd.buf[0] != REQUEST_SENSE &&
+        (req->lun != s->lun || (req->cmd.buf[1] >> 5) != s->lun)) {
+
+        DPRINTF("Unimplemented LUN %d\n",
+            req->lun ? req->lun : req->cmd.buf[1] >> 5);
+        s->senselen = scsi_build_sense(SENSE_CODE(LUN_NOT_SUPPORTED),
+                s->sensebuf, SCSI_SENSE_BUF_SIZE, 0);
+        s->driver_status = SG_ERR_DRIVER_SENSE;
+        req->status = CHECK_CONDITION << 1;
+        return 1;
+    }
+    if (r->req.cmd.buf[0] == REQUEST_SENSE &&
+        s->driver_status & SG_ERR_DRIVER_SENSE) {
+        req->xferlen = MIN(req->cmd.xfer, s->senselen);
+        memcpy(buffer, s->sensebuf, req->xferlen);
+        DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, s->senselen);
+        DPRINTF("Sense: %d %d %d %d %d %d %d %d\n",
+                buffer[0], buffer[1], buffer[2], buffer[3],
+                buffer[4], buffer[5], buffer[6], buffer[7]);
+        req->status = GOOD;
+        return 1;
+    }
+    return 0;
+}
+
+static int bsg_generic_req_buf(SCSIRequest *req, uint8_t *buffer)
+{
+    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
+    int handled, ret;
+
+    handled = bsg_generic_req_common(req, buffer);
+    if (handled) {
+        scsi_req_complete(req);
+        return 0;
+    }
+
+    req->xferlen = req->cmd.xfer;
+    ret = bsg_execute_command_buf(r, bsg_generic_req_cb, buffer, req->xferlen);
+    if (ret == -1) {
+        bsg_command_complete(r, -EINVAL);
+    }
+    return 0;
+}
+
+static int bsg_generic_req_sgl(SCSIRequest *req, QEMUSGList *sg)
+{
+    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
+    int handled, ret;
+
+    if (bsg_generic_map(r, sg) != 0) {
+        /* Hmm ... */
+        abort();
+    }
+    handled = bsg_generic_req_common(req, r->iov.iov[0].iov_base);
+    if (handled) {
+        bsg_generic_unmap(r);
+        scsi_req_complete(req);
+        return 0;
+    }
+
+    req->xferlen = req->cmd.xfer;
+    ret = bsg_execute_command_iov(r, bsg_generic_req_cb, &r->iov);
+    if (ret == -1) {
+        bsg_generic_unmap(r);
+        bsg_command_complete(r, -EINVAL);
+    }
+    return 0;
+}
+
+static void bsg_generic_req_put(SCSIRequest *req)
+{
+    SCSIBSGReq *r = DO_UPCAST(SCSIBSGReq, req, req);
+
+    if (r->req.aiocb) {
+        bdrv_aio_cancel(r->req.aiocb);
+    }
+    bsg_remove_request(r);
+}
+
+static SCSIDeviceInfo bsg_info = {
+    .qdev.name      = "scsi-bsg",
+    .qdev.desc      = "pass through block layer scsi generic (/dev/bsg/*)",
+    .qdev.size      = sizeof(SCSIBSGState),
+    .init           = bsg_generic_initfn,
+    .destroy        = bsg_destroy,
+
+    /* new */
+    .request_get    = bsg_generic_req_get,
+    .request_buf    = bsg_generic_req_buf,
+    .request_sgl    = bsg_generic_req_sgl,
+    .request_put    = bsg_generic_req_put,
+
+    .qdev.props     = (Property[]) {
+        DEFINE_BLOCK_PROPERTIES(SCSIBSGState, qdev.conf),
+        DEFINE_PROP_END_OF_LIST(),
+    },
+};
+
+static void bsg_register_devices(void)
+{
+    scsi_qdev_register(&bsg_info);
+}
+device_init(bsg_register_devices)
+
+#endif /* __linux__ */