Patchwork iSCSI support

login
register
mail settings
Submitter ronniesahlberg@gmail.com
Date Feb. 4, 2011, 4:37 a.m.
Message ID <1296794276-1683-2-git-send-email-ronniesahlberg@gmail.com>
Download mbox | patch
Permalink /patch/81804/
State New
Headers show

Comments

ronniesahlberg@gmail.com - Feb. 4, 2011, 4:37 a.m.
From: Ronnie Sahlberg <ronniesahlberg@gmail.com>

This patch adds a new block driver : block.iscsi.c
This driver interfaces with the multiplatform posix library
for iscsi initiator/client access to iscsi devices hosted at
git://github.com/sahlberg/libiscsi.git

The patch adds the driver to interface with the iscsi library.
It also updated the configure script to
* by default, probe is libiscsi is available and if so, build
  qemu against libiscsi.
* --enable-libiscsi
  Force a build against libiscsi. If libiscsi is not available
  the build will fail.
* --disable-libiscsi
  Do not link against libiscsi, even if it is available.

When linked with libiscsi, qemu gains support to access iscsi resources
such as disks and cdrom directly, without having to make the devices visible
to the host.

You can specify devices using a iscsi url of the form :
iscsi://[<username>%<password>@]<host>[:<port]/<target-iqn-name>/<lun>

Example:
-drive file=iscsi://10.1.1.1:3260/iqn.ronnie.test/1

-cdrom iscsi://10.1.1.1:3260/iqn.ronnie.test/2

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
---
 Makefile.objs |    1 +
 block/iscsi.c |  542 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 configure     |   31 ++++
 trace-events  |    7 +
 4 files changed, 581 insertions(+), 0 deletions(-)
 create mode 100644 block/iscsi.c
Stefan Hajnoczi - Feb. 19, 2011, 3:34 p.m.
On Fri, Feb 4, 2011 at 4:37 AM,  <ronniesahlberg@gmail.com> wrote:
> From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
>
> This patch adds a new block driver : block.iscsi.c
> This driver interfaces with the multiplatform posix library
> for iscsi initiator/client access to iscsi devices hosted at
> git://github.com/sahlberg/libiscsi.git
>
> The patch adds the driver to interface with the iscsi library.
> It also updated the configure script to
> * by default, probe is libiscsi is available and if so, build
>  qemu against libiscsi.
> * --enable-libiscsi
>  Force a build against libiscsi. If libiscsi is not available
>  the build will fail.
> * --disable-libiscsi
>  Do not link against libiscsi, even if it is available.
>
> When linked with libiscsi, qemu gains support to access iscsi resources
> such as disks and cdrom directly, without having to make the devices visible
> to the host.
>
> You can specify devices using a iscsi url of the form :
> iscsi://[<username>%<password>@]<host>[:<port]/<target-iqn-name>/<lun>

From what I can tell there is no standard iSCSI URI scheme but it's
worth checking before we invent our own.

I thought that '%' is an escape character in URIs.  Are you sure it's
valid to use it as a delimeter between the username and password?

There is a security risk in passing the password on the QEMU
command-line since other users will be able to see it in ps(1) output.
 Encrypted image files (e.g. qcow2) require keys and this is closest
to a mechanism for securely providing the password either via QMP or
interactively.  That would be worth looking into.

>
> Example:
> -drive file=iscsi://10.1.1.1:3260/iqn.ronnie.test/1
>
> -cdrom iscsi://10.1.1.1:3260/iqn.ronnie.test/2
>
> Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
> ---
>  Makefile.objs |    1 +
>  block/iscsi.c |  542 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure     |   31 ++++
>  trace-events  |    7 +
>  4 files changed, 581 insertions(+), 0 deletions(-)
>  create mode 100644 block/iscsi.c
>
> diff --git a/Makefile.objs b/Makefile.objs
> index f1c7bfe..a62fcdd 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -25,6 +25,7 @@ block-nested-y += qed-check.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>  block-nested-$(CONFIG_WIN32) += raw-win32.o
>  block-nested-$(CONFIG_POSIX) += raw-posix.o
> +block-nested-$(CONFIG_LIBISCSI) += iscsi.o
>  block-nested-$(CONFIG_CURL) += curl.o
>  block-nested-$(CONFIG_RBD) += rbd.o
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> new file mode 100644
> index 0000000..8cb1c6f
> --- /dev/null
> +++ b/block/iscsi.c
> @@ -0,0 +1,542 @@
> +/*
> + * QEMU Block driver for iSCSI images
> + *
> + * Copyright (c) 2010 Ronnie Sahlberg <ronniesahlberg@gmail.com>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include "sysemu.h"
> +#include "qemu-common.h"
> +#include "qemu-error.h"
> +#include "block_int.h"
> +#include "trace.h"
> +
> +#include <iscsi/iscsi.h>
> +#include <iscsi/scsi-lowlevel.h>
> +
> +
> +typedef struct IscsiLun {
> +    struct iscsi_context *iscsi;
> +    int lun;
> +    int block_size;
> +    unsigned long num_blocks;
> +} IscsiLun;
> +
> +typedef struct IscsiAIOCB {
> +    BlockDriverAIOCB common;
> +    QEMUIOVector *qiov;
> +    QEMUBH *bh;
> +    IscsiLun *iscsilun;
> +    uint8_t *buf;
> +    int canceled;
> +    int status;
> +    size_t read_size;
> +} IscsiAIOCB;
> +
> +struct IscsiTask {
> +    IscsiLun *iscsilun;
> +    int status;
> +    int complete;
> +};
> +
> +static int
> +iscsi_is_inserted(BlockDriverState *bs)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +
> +    return iscsi_is_logged_in(iscsi);
> +}

Why is this needed?

> +
> +
> +static void
> +iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
> +{
> +    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
> +
> +    acb->status = -EIO;

-ECANCELED?

> +static void
> +iscsi_readv_writev_bh_cb(void *p)
> +{
> +    IscsiAIOCB *acb = p;
> +
> +    qemu_bh_delete(acb->bh);
> +
> +    acb->common.cb(acb->common.opaque, acb->status);

I think we should double-check that acb->status != -ECANCELED here
just in case we scheduled the BH but someone cancelled the request
before the BH was executed.  This would be a really odd case but I
think it's theoretically possible.

> +static BlockDriverAIOCB *
> +iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
> +                 QEMUIOVector *qiov, int nb_sectors,
> +                 BlockDriverCompletionFunc *cb,
> +                 void *opaque)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    IscsiAIOCB *acb;
> +    size_t size;
> +    int ret;
> +
> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
> +    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
> +    if (!acb) {
> +        return NULL;
> +    }
> +
> +    acb->iscsilun = iscsilun;
> +    acb->qiov     = qiov;
> +
> +    acb->canceled   = 0;
> +
> +    /* XXX we should pass the iovec to write10 to avoid the extra copy */
> +    /* this will allow us to get rid of 'buf' completely */
> +    size = nb_sectors * BDRV_SECTOR_SIZE;
> +    acb->buf = qemu_malloc(size);
> +    qemu_iovec_to_buffer(acb->qiov, acb->buf);
> +    ret = iscsi_write10_async(iscsi, iscsilun->lun, acb->buf, size,
> +                              sector_qemu2lun(sector_num, iscsilun),
> +                              0, 0, iscsilun->block_size,
> +                              iscsi_aio_write10_cb, acb);

We're not obeying LUN block size here.  In order to do that we need
read-write-modify.  See comments on iscsi_aio_readv() below.

> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send write10 command. %s",
> +                     iscsi_get_error(iscsi));
> +        qemu_free(acb->buf);
> +        qemu_aio_release(acb);
> +        return NULL;
> +    }
> +
> +    iscsi_set_events(iscsilun);
> +
> +    return &acb->common;
> +}
> +
> +static void
> +iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
> +                    void *command_data, void *opaque)
> +{
> +    IscsiAIOCB *acb = opaque;
> +    struct scsi_task *scsi = command_data;
> +
> +    trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
> +
> +    if (acb->canceled != 0) {
> +        qemu_aio_release(acb);
> +        return;
> +    }
> +
> +    acb->status = 0;
> +    if (status < 0) {
> +        error_report("Failed to read10 data from iSCSI lun. %s",
> +                     iscsi_get_error(iscsi));
> +        acb->status = -EIO;
> +    } else {
> +        qemu_iovec_from_buffer(acb->qiov, scsi->datain.data, acb->read_size);

Need to offset from sector_num rounded down to LUN block size.

> +    }
> +
> +    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
> +}
> +
> +static BlockDriverAIOCB *
> +iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
> +                QEMUIOVector *qiov, int nb_sectors,
> +                BlockDriverCompletionFunc *cb,
> +                void *opaque)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    IscsiAIOCB *acb;
> +    size_t qemu_read_size, lun_read_size;
> +    int ret;
> +
> +    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
> +    lun_read_size  = (qemu_read_size + iscsilun->block_size - 1)
> +      / iscsilun->block_size * iscsilun->block_size;
> +
> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
> +    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
> +    if (!acb) {
> +        return NULL;
> +    }
> +
> +    acb->iscsilun = iscsilun;
> +    acb->qiov     = qiov;
> +
> +    acb->canceled   = 0;
> +    acb->read_size  = qemu_read_size;
> +    acb->buf        = NULL;
> +
> +    ret = iscsi_read10_async(iscsi, iscsilun->lun,
> +                             sector_qemu2lun(sector_num, iscsilun),
> +                             lun_read_size, iscsilun->block_size,
> +                             iscsi_aio_read10_cb, acb);

lba and datalen seem incorrect here.  lun_read_size has been rounded
up to the next LUN block.  But sector_num might not be LUN block
aligned so we need to extend lun_read_size even further in order to
round down lba to LUN block size.

The cleanest way to support this would be for BlockDriverState to
supply a block size but we currently keep that separate in BlockConf
and have it set from device emulation (e.g. scsi-disk.c) code.  So in
the near term we need to emulated BDRV_SECTOR_SIZE accesses.

> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send read10 command. %s",
> +                     iscsi_get_error(iscsi));
> +        qemu_aio_release(acb);
> +        return NULL;
> +    }
> +
> +    iscsi_set_events(iscsilun);
> +
> +    return &acb->common;
> +}
> +
> +
> +static void synccache10_cb(struct iscsi_context *iscsi, int status,
> +                           void *command_data, void *opaque)
> +{
> +    struct IscsiTask *task = opaque;
> +
> +    task->status   = status;
> +    task->complete = 1;
> +}
> +
> +static int
> +iscsi_flush(BlockDriverState *bs)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    struct IscsiTask task;
> +    int ret;
> +
> +    task.iscsilun = iscsilun;
> +    task.status = 0;
> +    task.complete = 0;
> +
> +    ret = iscsi_synchronizecache10_async(iscsi, iscsilun->lun,
> +                                         0, 0, 0, 0,
> +                                         synccache10_cb,
> +                                         &task);
> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send SYNCHRONIZECACHE10. %s",
> +                     iscsi_get_error(iscsi));
> +        return -1;

Please use an errno here since this function returns a negative errno value.

> +static BlockDriver bdrv_iscsi = {
> +    .format_name     = "iscsi",
> +    .protocol_name   = "iscsi",
> +
> +    .instance_size   = sizeof(IscsiLun),
> +    .bdrv_file_open  = iscsi_open,
> +    .bdrv_close      = iscsi_close,
> +    .bdrv_flush      = iscsi_flush,
> +
> +    .bdrv_getlength  = iscsi_getlength,
> +
> +    .bdrv_aio_readv  = iscsi_aio_readv,
> +    .bdrv_aio_writev = iscsi_aio_writev,

Please implement .bdrv_aio_flush.  Synchronous functions like
.bdrv_flush will block the VM so asynchronous functions are much
preferred.

Stefan
ronniesahlberg@gmail.com - Feb. 27, 2011, 4:28 a.m.
Hi Stefan

Thanks for reviewing the patch. I have addressed all your items, some
with comments below, and will resubmit the patch shortly.
Comments below.


regards
ronnie sahlberg


On Sun, Feb 20, 2011 at 2:34 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, Feb 4, 2011 at 4:37 AM,  <ronniesahlberg@gmail.com> wrote:
>> From: Ronnie Sahlberg <ronniesahlberg@gmail.com>
>>
>> This patch adds a new block driver : block.iscsi.c
>> This driver interfaces with the multiplatform posix library
>> for iscsi initiator/client access to iscsi devices hosted at
>> git://github.com/sahlberg/libiscsi.git
>>
>> The patch adds the driver to interface with the iscsi library.
>> It also updated the configure script to
>> * by default, probe is libiscsi is available and if so, build
>>  qemu against libiscsi.
>> * --enable-libiscsi
>>  Force a build against libiscsi. If libiscsi is not available
>>  the build will fail.
>> * --disable-libiscsi
>>  Do not link against libiscsi, even if it is available.
>>
>> When linked with libiscsi, qemu gains support to access iscsi resources
>> such as disks and cdrom directly, without having to make the devices visible
>> to the host.
>>
>> You can specify devices using a iscsi url of the form :
>> iscsi://[<username>%<password>@]<host>[:<port]/<target-iqn-name>/<lun>
>
> From what I can tell there is no standard iSCSI URI scheme but it's
> worth checking before we invent our own.
>
> I thought that '%' is an escape character in URIs.  Are you sure it's
> valid to use it as a delimeter between the username and password?

I used a '%' since that is how passwords are specified in samba/smb.
I have added to libiscsi the option to use ':' to separate username
and password too.


>
> There is a security risk in passing the password on the QEMU
> command-line since other users will be able to see it in ps(1) output.
>  Encrypted image files (e.g. qcow2) require keys and this is closest
> to a mechanism for securely providing the password either via QMP or
> interactively.  That would be worth looking into.

CHAP passwords can also be specified via environment variables :

LIBISCSI_CHAP_PASSWORD="password"  ....
iscsi://ronnie@127.0.0.1/iqn.ronnie.test/2  ...

To avoid the password form showing up in the process list.


>
>>
>> Example:
>> -drive file=iscsi://10.1.1.1:3260/iqn.ronnie.test/1
>>
>> -cdrom iscsi://10.1.1.1:3260/iqn.ronnie.test/2
>>
>> Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
>> ---
>>  Makefile.objs |    1 +
>>  block/iscsi.c |  542 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  configure     |   31 ++++
>>  trace-events  |    7 +
>>  4 files changed, 581 insertions(+), 0 deletions(-)
>>  create mode 100644 block/iscsi.c
>>
>> diff --git a/Makefile.objs b/Makefile.objs
>> index f1c7bfe..a62fcdd 100644
>> --- a/Makefile.objs
>> +++ b/Makefile.objs
>> @@ -25,6 +25,7 @@ block-nested-y += qed-check.o
>>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>>  block-nested-$(CONFIG_WIN32) += raw-win32.o
>>  block-nested-$(CONFIG_POSIX) += raw-posix.o
>> +block-nested-$(CONFIG_LIBISCSI) += iscsi.o
>>  block-nested-$(CONFIG_CURL) += curl.o
>>  block-nested-$(CONFIG_RBD) += rbd.o
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> new file mode 100644
>> index 0000000..8cb1c6f
>> --- /dev/null
>> +++ b/block/iscsi.c
>> @@ -0,0 +1,542 @@
>> +/*
>> + * QEMU Block driver for iSCSI images
>> + *
>> + * Copyright (c) 2010 Ronnie Sahlberg <ronniesahlberg@gmail.com>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "config-host.h"
>> +
>> +#include <poll.h>
>> +#include "sysemu.h"
>> +#include "qemu-common.h"
>> +#include "qemu-error.h"
>> +#include "block_int.h"
>> +#include "trace.h"
>> +
>> +#include <iscsi/iscsi.h>
>> +#include <iscsi/scsi-lowlevel.h>
>> +
>> +
>> +typedef struct IscsiLun {
>> +    struct iscsi_context *iscsi;
>> +    int lun;
>> +    int block_size;
>> +    unsigned long num_blocks;
>> +} IscsiLun;
>> +
>> +typedef struct IscsiAIOCB {
>> +    BlockDriverAIOCB common;
>> +    QEMUIOVector *qiov;
>> +    QEMUBH *bh;
>> +    IscsiLun *iscsilun;
>> +    uint8_t *buf;
>> +    int canceled;
>> +    int status;
>> +    size_t read_size;
>> +} IscsiAIOCB;
>> +
>> +struct IscsiTask {
>> +    IscsiLun *iscsilun;
>> +    int status;
>> +    int complete;
>> +};
>> +
>> +static int
>> +iscsi_is_inserted(BlockDriverState *bs)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct iscsi_context *iscsi = iscsilun->iscsi;
>> +
>> +    return iscsi_is_logged_in(iscsi);
>> +}
>
> Why is this needed?

It is not needed and I have removed it.

>
>> +
>> +
>> +static void
>> +iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
>> +{
>> +    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
>> +
>> +    acb->status = -EIO;
>
> -ECANCELED?

Done.


>
>> +static void
>> +iscsi_readv_writev_bh_cb(void *p)
>> +{
>> +    IscsiAIOCB *acb = p;
>> +
>> +    qemu_bh_delete(acb->bh);
>> +
>> +    acb->common.cb(acb->common.opaque, acb->status);
>
> I think we should double-check that acb->status != -ECANCELED here
> just in case we scheduled the BH but someone cancelled the request
> before the BH was executed.  This would be a really odd case but I
> think it's theoretically possible.

Done.

>
>> +static BlockDriverAIOCB *
>> +iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
>> +                 QEMUIOVector *qiov, int nb_sectors,
>> +                 BlockDriverCompletionFunc *cb,
>> +                 void *opaque)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct iscsi_context *iscsi = iscsilun->iscsi;
>> +    IscsiAIOCB *acb;
>> +    size_t size;
>> +    int ret;
>> +
>> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
>> +    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
>> +    if (!acb) {
>> +        return NULL;
>> +    }
>> +
>> +    acb->iscsilun = iscsilun;
>> +    acb->qiov     = qiov;
>> +
>> +    acb->canceled   = 0;
>> +
>> +    /* XXX we should pass the iovec to write10 to avoid the extra copy */
>> +    /* this will allow us to get rid of 'buf' completely */
>> +    size = nb_sectors * BDRV_SECTOR_SIZE;
>> +    acb->buf = qemu_malloc(size);
>> +    qemu_iovec_to_buffer(acb->qiov, acb->buf);
>> +    ret = iscsi_write10_async(iscsi, iscsilun->lun, acb->buf, size,
>> +                              sector_qemu2lun(sector_num, iscsilun),
>> +                              0, 0, iscsilun->block_size,
>> +                              iscsi_aio_write10_cb, acb);
>
> We're not obeying LUN block size here.  In order to do that we need
> read-write-modify.  See comments on iscsi_aio_readv() below.
>
>> +    if (ret != 0) {
>> +        error_report("iSCSI: Failed to send write10 command. %s",
>> +                     iscsi_get_error(iscsi));
>> +        qemu_free(acb->buf);
>> +        qemu_aio_release(acb);
>> +        return NULL;
>> +    }
>> +
>> +    iscsi_set_events(iscsilun);
>> +
>> +    return &acb->common;
>> +}
>> +
>> +static void
>> +iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
>> +                    void *command_data, void *opaque)
>> +{
>> +    IscsiAIOCB *acb = opaque;
>> +    struct scsi_task *scsi = command_data;
>> +
>> +    trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
>> +
>> +    if (acb->canceled != 0) {
>> +        qemu_aio_release(acb);
>> +        return;
>> +    }
>> +
>> +    acb->status = 0;
>> +    if (status < 0) {
>> +        error_report("Failed to read10 data from iSCSI lun. %s",
>> +                     iscsi_get_error(iscsi));
>> +        acb->status = -EIO;
>> +    } else {
>> +        qemu_iovec_from_buffer(acb->qiov, scsi->datain.data, acb->read_size);
>
> Need to offset from sector_num rounded down to LUN block size.
>
>> +    }
>> +
>> +    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
>> +}
>> +
>> +static BlockDriverAIOCB *
>> +iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
>> +                QEMUIOVector *qiov, int nb_sectors,
>> +                BlockDriverCompletionFunc *cb,
>> +                void *opaque)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct iscsi_context *iscsi = iscsilun->iscsi;
>> +    IscsiAIOCB *acb;
>> +    size_t qemu_read_size, lun_read_size;
>> +    int ret;
>> +
>> +    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
>> +    lun_read_size  = (qemu_read_size + iscsilun->block_size - 1)
>> +      / iscsilun->block_size * iscsilun->block_size;
>> +
>> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
>> +    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
>> +    if (!acb) {
>> +        return NULL;
>> +    }
>> +
>> +    acb->iscsilun = iscsilun;
>> +    acb->qiov     = qiov;
>> +
>> +    acb->canceled   = 0;
>> +    acb->read_size  = qemu_read_size;
>> +    acb->buf        = NULL;
>> +
>> +    ret = iscsi_read10_async(iscsi, iscsilun->lun,
>> +                             sector_qemu2lun(sector_num, iscsilun),
>> +                             lun_read_size, iscsilun->block_size,
>> +                             iscsi_aio_read10_cb, acb);
>
> lba and datalen seem incorrect here.  lun_read_size has been rounded
> up to the next LUN block.  But sector_num might not be LUN block
> aligned so we need to extend lun_read_size even further in order to
> round down lba to LUN block size.
>
> The cleanest way to support this would be for BlockDriverState to
> supply a block size but we currently keep that separate in BlockConf
> and have it set from device emulation (e.g. scsi-disk.c) code.  So in
> the near term we need to emulated BDRV_SECTOR_SIZE accesses.

The problem I think would only exist if qemu block size is smaller
than the iscsi LUN block size.
Iscsi lun blocksizes are either 512 or 2048 bytes, for sbc and mmc
devices accordingly.

I have added a check to iscsi_open() to fail if the qemu blocksize is
less than 512 bytes.
I guess anything smaller is unreasonable, so iscsi will just not work
on smaller blocksizes.

This leaves the case where qemu blocksize is 512 bytes and the lun
blocksize is 2048.
For this case we only need to need to look at the read case, since
writes to a iscsi cdrom device
would go through a /dev/sg interface and not through the qemu write api.
(I intend to in the future add support to expose the iscsi LUNs as SG
devices to qemu and to the guest, but that is future work).

I have update iscsi_aio_readv() code in block/iscsi.c to handle the
case where qemu does reads that are smaller than a full iscsi lun
block
and also the case where the qemu read is not aligned to a iscsi lun block.


>
>> +    if (ret != 0) {
>> +        error_report("iSCSI: Failed to send read10 command. %s",
>> +                     iscsi_get_error(iscsi));
>> +        qemu_aio_release(acb);
>> +        return NULL;
>> +    }
>> +
>> +    iscsi_set_events(iscsilun);
>> +
>> +    return &acb->common;
>> +}
>> +
>> +
>> +static void synccache10_cb(struct iscsi_context *iscsi, int status,
>> +                           void *command_data, void *opaque)
>> +{
>> +    struct IscsiTask *task = opaque;
>> +
>> +    task->status   = status;
>> +    task->complete = 1;
>> +}
>> +
>> +static int
>> +iscsi_flush(BlockDriverState *bs)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct iscsi_context *iscsi = iscsilun->iscsi;
>> +    struct IscsiTask task;
>> +    int ret;
>> +
>> +    task.iscsilun = iscsilun;
>> +    task.status = 0;
>> +    task.complete = 0;
>> +
>> +    ret = iscsi_synchronizecache10_async(iscsi, iscsilun->lun,
>> +                                         0, 0, 0, 0,
>> +                                         synccache10_cb,
>> +                                         &task);
>> +    if (ret != 0) {
>> +        error_report("iSCSI: Failed to send SYNCHRONIZECACHE10. %s",
>> +                     iscsi_get_error(iscsi));
>> +        return -1;
>
> Please use an errno here since this function returns a negative errno value.

Done.

>
>> +static BlockDriver bdrv_iscsi = {
>> +    .format_name     = "iscsi",
>> +    .protocol_name   = "iscsi",
>> +
>> +    .instance_size   = sizeof(IscsiLun),
>> +    .bdrv_file_open  = iscsi_open,
>> +    .bdrv_close      = iscsi_close,
>> +    .bdrv_flush      = iscsi_flush,
>> +
>> +    .bdrv_getlength  = iscsi_getlength,
>> +
>> +    .bdrv_aio_readv  = iscsi_aio_readv,
>> +    .bdrv_aio_writev = iscsi_aio_writev,
>
> Please implement .bdrv_aio_flush.  Synchronous functions like
> .bdrv_flush will block the VM so asynchronous functions are much
> preferred.

Done. I have replaced it with an async version now.

>
> Stefan
>

Patch

diff --git a/Makefile.objs b/Makefile.objs
index f1c7bfe..a62fcdd 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -25,6 +25,7 @@  block-nested-y += qed-check.o
 block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
+block-nested-$(CONFIG_LIBISCSI) += iscsi.o
 block-nested-$(CONFIG_CURL) += curl.o
 block-nested-$(CONFIG_RBD) += rbd.o
 
diff --git a/block/iscsi.c b/block/iscsi.c
new file mode 100644
index 0000000..8cb1c6f
--- /dev/null
+++ b/block/iscsi.c
@@ -0,0 +1,542 @@ 
+/*
+ * QEMU Block driver for iSCSI images
+ *
+ * Copyright (c) 2010 Ronnie Sahlberg <ronniesahlberg@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include "sysemu.h"
+#include "qemu-common.h"
+#include "qemu-error.h"
+#include "block_int.h"
+#include "trace.h"
+
+#include <iscsi/iscsi.h>
+#include <iscsi/scsi-lowlevel.h>
+
+
+typedef struct IscsiLun {
+    struct iscsi_context *iscsi;
+    int lun;
+    int block_size;
+    unsigned long num_blocks;
+} IscsiLun;
+
+typedef struct IscsiAIOCB {
+    BlockDriverAIOCB common;
+    QEMUIOVector *qiov;
+    QEMUBH *bh;
+    IscsiLun *iscsilun;
+    uint8_t *buf;
+    int canceled;
+    int status;
+    size_t read_size;
+} IscsiAIOCB;
+
+struct IscsiTask {
+    IscsiLun *iscsilun;
+    int status;
+    int complete;
+};
+
+static int
+iscsi_is_inserted(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    return iscsi_is_logged_in(iscsi);
+}
+
+
+static void
+iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
+
+    acb->status = -EIO;
+    acb->common.cb(acb->common.opaque, acb->status);
+    acb->canceled = 1;
+}
+
+static AIOPool iscsi_aio_pool = {
+    .aiocb_size         = sizeof(IscsiAIOCB),
+    .cancel             = iscsi_aio_cancel,
+};
+
+
+static void iscsi_process_read(void *arg);
+static void iscsi_process_write(void *arg);
+
+static int iscsi_process_flush(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+
+    return iscsi_queue_length(iscsilun->iscsi) > 0;
+}
+
+static void
+iscsi_set_events(IscsiLun *iscsilun)
+{
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read,
+                           (iscsi_which_events(iscsi) & POLLOUT)
+                           ? iscsi_process_write : NULL,
+                           iscsi_process_flush, NULL, iscsilun);
+}
+
+static void
+iscsi_process_read(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    iscsi_service(iscsi, POLLIN);
+    iscsi_set_events(iscsilun);
+}
+
+static void
+iscsi_process_write(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    iscsi_service(iscsi, POLLOUT);
+    iscsi_set_events(iscsilun);
+}
+
+
+static int
+iscsi_schedule_bh(QEMUBHFunc *cb, IscsiAIOCB *acb)
+{
+    acb->bh = qemu_bh_new(cb, acb);
+    if (!acb->bh) {
+        error_report("oom: could not create iscsi bh");
+        return -EIO;
+    }
+
+    qemu_bh_schedule(acb->bh);
+    return 0;
+}
+
+static void
+iscsi_readv_writev_bh_cb(void *p)
+{
+    IscsiAIOCB *acb = p;
+
+    qemu_bh_delete(acb->bh);
+
+    acb->common.cb(acb->common.opaque, acb->status);
+    qemu_aio_release(acb);
+}
+
+static void
+iscsi_aio_write10_cb(struct iscsi_context *iscsi, int status,
+                     void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+
+    trace_iscsi_aio_write10_cb(iscsi, status, acb, acb->canceled);
+
+    if (acb->buf != NULL) {
+        free(acb->buf);
+    }
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        return;
+    }
+
+    acb->status = 0;
+    if (status < 0) {
+        error_report("Failed to write10 data to iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+}
+
+static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
+{
+    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
+}
+
+static BlockDriverAIOCB *
+iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
+                 QEMUIOVector *qiov, int nb_sectors,
+                 BlockDriverCompletionFunc *cb,
+                 void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    IscsiAIOCB *acb;
+    size_t size;
+    int ret;
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
+    if (!acb) {
+        return NULL;
+    }
+
+    acb->iscsilun = iscsilun;
+    acb->qiov     = qiov;
+
+    acb->canceled   = 0;
+
+    /* XXX we should pass the iovec to write10 to avoid the extra copy */
+    /* this will allow us to get rid of 'buf' completely */
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->buf = qemu_malloc(size);
+    qemu_iovec_to_buffer(acb->qiov, acb->buf);
+    ret = iscsi_write10_async(iscsi, iscsilun->lun, acb->buf, size,
+                              sector_qemu2lun(sector_num, iscsilun),
+                              0, 0, iscsilun->block_size,
+                              iscsi_aio_write10_cb, acb);
+    if (ret != 0) {
+        error_report("iSCSI: Failed to send write10 command. %s",
+                     iscsi_get_error(iscsi));
+        qemu_free(acb->buf);
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+static void
+iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
+                    void *command_data, void *opaque)
+{
+    IscsiAIOCB *acb = opaque;
+    struct scsi_task *scsi = command_data;
+
+    trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
+
+    if (acb->canceled != 0) {
+        qemu_aio_release(acb);
+        return;
+    }
+
+    acb->status = 0;
+    if (status < 0) {
+        error_report("Failed to read10 data from iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        acb->status = -EIO;
+    } else {
+        qemu_iovec_from_buffer(acb->qiov, scsi->datain.data, acb->read_size);
+    }
+
+    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
+}
+
+static BlockDriverAIOCB *
+iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
+                QEMUIOVector *qiov, int nb_sectors,
+                BlockDriverCompletionFunc *cb,
+                void *opaque)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    IscsiAIOCB *acb;
+    size_t qemu_read_size, lun_read_size;
+    int ret;
+
+    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
+    lun_read_size  = (qemu_read_size + iscsilun->block_size - 1)
+      / iscsilun->block_size * iscsilun->block_size;
+
+    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
+    if (!acb) {
+        return NULL;
+    }
+
+    acb->iscsilun = iscsilun;
+    acb->qiov     = qiov;
+
+    acb->canceled   = 0;
+    acb->read_size  = qemu_read_size;
+    acb->buf        = NULL;
+
+    ret = iscsi_read10_async(iscsi, iscsilun->lun,
+                             sector_qemu2lun(sector_num, iscsilun),
+                             lun_read_size, iscsilun->block_size,
+                             iscsi_aio_read10_cb, acb);
+    if (ret != 0) {
+        error_report("iSCSI: Failed to send read10 command. %s",
+                     iscsi_get_error(iscsi));
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    iscsi_set_events(iscsilun);
+
+    return &acb->common;
+}
+
+
+static void synccache10_cb(struct iscsi_context *iscsi, int status,
+                           void *command_data, void *opaque)
+{
+    struct IscsiTask *task = opaque;
+
+    task->status   = status;
+    task->complete = 1;
+}
+
+static int
+iscsi_flush(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+    struct IscsiTask task;
+    int ret;
+
+    task.iscsilun = iscsilun;
+    task.status = 0;
+    task.complete = 0;
+
+    ret = iscsi_synchronizecache10_async(iscsi, iscsilun->lun,
+                                         0, 0, 0, 0,
+                                         synccache10_cb,
+                                         &task);
+    if (ret != 0) {
+        error_report("iSCSI: Failed to send SYNCHRONIZECACHE10. %s",
+                     iscsi_get_error(iscsi));
+        return -1;
+    }
+
+    async_context_push();
+    while (!task.complete) {
+        iscsi_set_events(iscsilun);
+        qemu_aio_wait();
+    }
+    async_context_pop();
+    if (task.status != 0) {
+        error_report("iSCSI: Failed to send SYNCHRONIZECACHE10: %s",
+                     iscsi_get_error(iscsi));
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int64_t
+iscsi_getlength(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    int64_t len;
+
+    len  = iscsilun->num_blocks;
+    len *= iscsilun->block_size;
+
+    return len;
+}
+
+static void
+iscsi_readcapacity10_cb(struct iscsi_context *iscsi, int status,
+                        void *command_data, void *opaque)
+{
+    struct IscsiTask *task = opaque;
+    struct scsi_readcapacity10 *rc10;
+    struct scsi_task *scsi = command_data;
+
+    if (status != 0) {
+        error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
+                     iscsi_get_error(iscsi));
+        task->status   = 1;
+        task->complete = 1;
+        return;
+    }
+
+    rc10 = scsi_datain_unmarshall(scsi);
+    if (rc10 == NULL) {
+        error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
+        task->status   = 1;
+        task->complete = 1;
+        return;
+    }
+
+    task->iscsilun->block_size = rc10->block_size;
+    task->iscsilun->num_blocks = rc10->lba;
+
+    task->status   = 0;
+    task->complete = 1;
+}
+
+
+static void
+iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
+                 void *opaque)
+{
+    struct IscsiTask *task = opaque;
+
+    if (status != 0) {
+        task->status   = 1;
+        task->complete = 1;
+        return;
+    }
+
+    if (iscsi_readcapacity10_async(iscsi, task->iscsilun->lun, 0, 0,
+                                   iscsi_readcapacity10_cb, opaque)
+        != 0) {
+        error_report("iSCSI: failed to send readcapacity command.");
+        task->status   = 1;
+        task->complete = 1;
+        return;
+    }
+}
+
+/*
+ * We support iscsi url's on the form
+ * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
+ */
+static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = NULL;
+    struct iscsi_url *iscsi_url = NULL;
+    struct IscsiTask task;
+    int ret;
+
+    memset(iscsilun, 0, sizeof(IscsiLun));
+
+    /* Should really append the KVM name after the ':' here */
+    iscsi = iscsi_create_context("iqn.2008-11.org.linux-kvm:");
+    if (iscsi == NULL) {
+        error_report("iSCSI: Failed to create iSCSI context.");
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    iscsi_url = iscsi_parse_full_url(iscsi, filename);
+    if (iscsi_url == NULL) {
+        error_report("Failed to parse URL : %s %s", filename,
+                     iscsi_get_error(iscsi));
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
+        error_report("iSCSI: Failed to set target name.");
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    if (iscsi_url->user != NULL) {
+        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
+                                              iscsi_url->passwd);
+        if (ret != 0) {
+            error_report("Failed to set initiator username and password");
+            ret = -ENOMEM;
+            goto failed;
+        }
+    }
+    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
+        error_report("iSCSI: Failed to set session type to normal.");
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+
+    task.iscsilun = iscsilun;
+    task.status = 0;
+    task.complete = 0;
+
+    iscsilun->iscsi = iscsi;
+    iscsilun->lun   = iscsi_url->lun;
+
+    if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun,
+                                 iscsi_connect_cb, &task)
+        != 0) {
+        error_report("iSCSI: Failed to start async connect.");
+        ret = -ENOMEM;
+        goto failed;
+    }
+
+    async_context_push();
+    while (!task.complete) {
+        iscsi_set_events(iscsilun);
+        qemu_aio_wait();
+    }
+    async_context_pop();
+    if (task.status != 0) {
+        error_report("iSCSI: Failed to connect to LUN : %s",
+                     iscsi_get_error(iscsi));
+        ret = -EINVAL;
+        goto failed;
+    }
+
+    return 0;
+
+failed:
+    if (iscsi_url != NULL) {
+        iscsi_destroy_url(iscsi_url);
+    }
+    if (iscsi != NULL) {
+        iscsi_destroy_context(iscsi);
+    }
+    memset(iscsilun, 0, sizeof(IscsiLun));
+    return ret;
+}
+
+static void iscsi_close(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct iscsi_context *iscsi = iscsilun->iscsi;
+
+    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL, NULL);
+    iscsi_destroy_context(iscsi);
+    memset(iscsilun, 0, sizeof(IscsiLun));
+}
+
+static BlockDriver bdrv_iscsi = {
+    .format_name     = "iscsi",
+    .protocol_name   = "iscsi",
+
+    .instance_size   = sizeof(IscsiLun),
+    .bdrv_file_open  = iscsi_open,
+    .bdrv_close      = iscsi_close,
+    .bdrv_flush      = iscsi_flush,
+
+    .bdrv_getlength  = iscsi_getlength,
+
+    .bdrv_aio_readv  = iscsi_aio_readv,
+    .bdrv_aio_writev = iscsi_aio_writev,
+
+    .bdrv_is_inserted = iscsi_is_inserted,
+};
+
+static void iscsi_block_init(void)
+{
+    bdrv_register(&bdrv_iscsi);
+}
+
+block_init(iscsi_block_init);
+
diff --git a/configure b/configure
index 598e8e1..5ccafe3 100755
--- a/configure
+++ b/configure
@@ -174,6 +174,7 @@  trace_backend="nop"
 trace_file="trace"
 spice=""
 rbd=""
+libiscsi=""
 
 # parse CC options first
 for opt do
@@ -622,6 +623,10 @@  for opt do
   ;;
   --enable-spice) spice="yes"
   ;;
+  --disable-libiscsi) libiscsi="no"
+  ;;
+  --enable-libiscsi) libiscsi="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --enable-cocoa)
@@ -914,6 +919,8 @@  echo "                           Default:trace-<pid>"
 echo "  --disable-spice          disable spice"
 echo "  --enable-spice           enable spice"
 echo "  --enable-rbd             enable building the rados block device (rbd)"
+echo "  --disable-libiscsi       disable iscsi support"
+echo "  --enable-libiscsi        enable iscsi support"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2171,6 +2178,25 @@  if compile_prog "" "" ; then
 fi
 
 ##########################################
+# Do we have libiscsi
+if test "$libiscsi" != "no" ; then
+  cat > $TMPC << EOF
+#include <iscsi/iscsi.h>
+int main(void) { iscsi_create_context(""); return 0; }
+EOF
+  if compile_prog "-Werror" "-liscsi" ; then
+    libiscsi="yes"
+    LIBS="$LIBS -liscsi"
+  else
+    if test "$libiscsi" = "yes" ; then
+      feature_not_found "libiscsi"
+    fi
+    libiscsi="no"
+  fi
+fi
+
+
+##########################################
 # Do we need librt
 cat > $TMPC <<EOF
 #include <signal.h>
@@ -2472,6 +2498,7 @@  echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice"
 echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
+echo "libiscsi support  $libiscsi"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -2744,6 +2771,10 @@  if test "$spice" = "yes" ; then
   echo "CONFIG_SPICE=y" >> $config_host_mak
 fi
 
+if test "$libiscsi" = "yes" ; then
+  echo "CONFIG_LIBISCSI=y" >> $config_host_mak
+fi
+
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
   echo "CONFIG_BSD=y" >> $config_host_mak
diff --git a/trace-events b/trace-events
index e6138ea..ef3cd35 100644
--- a/trace-events
+++ b/trace-events
@@ -254,3 +254,10 @@  disable spice_vmc_write(ssize_t out, int len) "spice wrottn %lu of requested %zd
 disable spice_vmc_read(int bytes, int len) "spice read %lu of requested %zd"
 disable spice_vmc_register_interface(void *scd) "spice vmc registered interface %p"
 disable spice_vmc_unregister_interface(void *scd) "spice vmc unregistered interface %p"
+
+
+# block/iscsi.c
+disable iscsi_aio_write10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
+disable iscsi_aio_writev(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"
+disable iscsi_aio_read10_cb(void *iscsi, int status, void *acb, int canceled) "iscsi %p status %d acb %p canceled %d"
+disable iscsi_aio_readv(void *iscsi, int64_t sector_num, int nb_sectors, void *opaque, void *acb) "iscsi %p sector_num %"PRId64" nb_sectors %d opaque %p acb %p"