diff mbox

[08/22] iscsi: implement .bdrv_detach/attach_aio_context()

Message ID 1398956086-20171-9-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi May 1, 2014, 2:54 p.m. UTC
Drop the assumption that we're using the main AioContext for Linux
AIO.  Convert qemu_aio_set_fd_handler() to aio_set_fd_handler() and
timer_new_ms() to aio_timer_new().

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the fd and timer from the old to the new AioContext.

Cc: Peter Lieven <pl@kamp.de>
Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/iscsi.c | 79 +++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 24 deletions(-)

Comments

Peter Lieven May 1, 2014, 10:39 p.m. UTC | #1
Am 01.05.2014 um 16:54 schrieb Stefan Hajnoczi <stefanha@redhat.com>:

> Drop the assumption that we're using the main AioContext for Linux
> AIO.  Convert qemu_aio_set_fd_handler() to aio_set_fd_handler() and
> timer_new_ms() to aio_timer_new().
> 
> The .bdrv_detach/attach_aio_context() interfaces also need to be
> implemented to move the fd and timer from the old to the new AioContext.
> 
> Cc: Peter Lieven <pl@kamp.de>
> Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> block/iscsi.c | 79 +++++++++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 55 insertions(+), 24 deletions(-)
> 
> diff --git a/block/iscsi.c b/block/iscsi.c
> index a30202b..81e3ebd 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -47,6 +47,7 @@
> 
> typedef struct IscsiLun {
>     struct iscsi_context *iscsi;
> +    AioContext *aio_context;
>     int lun;
>     enum scsi_inquiry_peripheral_device_type type;
>     int block_size;
> @@ -69,6 +70,7 @@ typedef struct IscsiTask {
>     struct scsi_task *task;
>     Coroutine *co;
>     QEMUBH *bh;
> +    AioContext *aio_context;
> } IscsiTask;
> 
> typedef struct IscsiAIOCB {
> @@ -120,7 +122,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
>     if (acb->bh) {
>         return;
>     }
> -    acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
> +    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
>     qemu_bh_schedule(acb->bh);
> }
> 
> @@ -156,7 +158,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
> 
> out:
>     if (iTask->co) {
> -        iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
> +        iTask->bh = aio_bh_new(iTask->aio_context, iscsi_co_generic_bh_cb, iTask);
>         qemu_bh_schedule(iTask->bh);
>     }
> }
> @@ -164,8 +166,9 @@ out:
> static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
> {
>     *iTask = (struct IscsiTask) {
> -        .co         = qemu_coroutine_self(),
> -        .retries    = ISCSI_CMD_RETRIES,
> +        .co             = qemu_coroutine_self(),
> +        .retries        = ISCSI_CMD_RETRIES,
> +        .aio_context    = iscsilun->aio_context,
>     };
> }
> 
> @@ -196,7 +199,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
>                                      iscsi_abort_task_cb, acb);
> 
>     while (acb->status == -EINPROGRESS) {
> -        qemu_aio_wait();
> +        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
>     }
> }
> 
> @@ -219,10 +222,11 @@ iscsi_set_events(IscsiLun *iscsilun)
>     ev = POLLIN;
>     ev |= iscsi_which_events(iscsi);
>     if (ev != iscsilun->events) {
> -        qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
> -                      iscsi_process_read,
> -                      (ev & POLLOUT) ? iscsi_process_write : NULL,
> -                      iscsilun);
> +        aio_set_fd_handler(iscsilun->aio_context,
> +                           iscsi_get_fd(iscsi),
> +                           iscsi_process_read,
> +                           (ev & POLLOUT) ? iscsi_process_write : NULL,
> +                           iscsilun);
> 
>     }
> 
> @@ -620,7 +624,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
>         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
> 
>         while (status == -EINPROGRESS) {
> -            qemu_aio_wait();
> +            aio_poll(bdrv_get_aio_context(bs), true);
>         }
> 
>         return 0;
> @@ -1110,6 +1114,40 @@ fail_with_err:
>     return NULL;
> }
> 
> +static void iscsi_detach_aio_context(BlockDriverState *bs)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +
> +    aio_set_fd_handler(iscsilun->aio_context,
> +                       iscsi_get_fd(iscsilun->iscsi),
> +                       NULL, NULL, NULL);
> +    iscsilun->events = 0;
> +
> +    if (iscsilun->nop_timer) {
> +        timer_del(iscsilun->nop_timer);
> +        timer_free(iscsilun->nop_timer);
> +        iscsilun->nop_timer = NULL;
> +    }
> +}
> +
> +static void iscsi_attach_aio_context(BlockDriverState *bs,
> +                                     AioContext *new_context)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +
> +    iscsilun->aio_context = new_context;
> +    iscsi_set_events(iscsilun);
> +
> +#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
> +    /* Set up a timer for sending out iSCSI NOPs */
> +    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
> +                                        QEMU_CLOCK_REALTIME, SCALE_MS,
> +                                        iscsi_nop_timed_event, iscsilun);
> +    timer_mod(iscsilun->nop_timer,
> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
> +#endif
> +}

Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
while we are in another function/callback of the iscsi driver for the same target?

Peter


> +
> /*
>  * We support iscsi url's on the form
>  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
> @@ -1216,6 +1254,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
>     }
> 
>     iscsilun->iscsi = iscsi;
> +    iscsilun->aio_context = bdrv_get_aio_context(bs);
>     iscsilun->lun   = iscsi_url->lun;
>     iscsilun->has_write_same = true;
> 
> @@ -1289,11 +1328,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
>     scsi_free_scsi_task(task);
>     task = NULL;
> 
> -#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
> -    /* Set up a timer for sending out iSCSI NOPs */
> -    iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
> -    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
> -#endif
> +    iscsi_attach_aio_context(bs, iscsilun->aio_context);
> 
> out:
>     qemu_opts_del(opts);
> @@ -1321,11 +1356,7 @@ static void iscsi_close(BlockDriverState *bs)
>     IscsiLun *iscsilun = bs->opaque;
>     struct iscsi_context *iscsi = iscsilun->iscsi;
> 
> -    if (iscsilun->nop_timer) {
> -        timer_del(iscsilun->nop_timer);
> -        timer_free(iscsilun->nop_timer);
> -    }
> -    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
> +    iscsi_detach_aio_context(bs);
>     iscsi_destroy_context(iscsi);
>     g_free(iscsilun->zeroblock);
>     memset(iscsilun, 0, sizeof(IscsiLun));
> @@ -1421,10 +1452,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
>     if (ret != 0) {
>         goto out;
>     }
> -    if (iscsilun->nop_timer) {
> -        timer_del(iscsilun->nop_timer);
> -        timer_free(iscsilun->nop_timer);
> -    }
> +    iscsi_detach_aio_context(bs);
>     if (iscsilun->type != TYPE_DISK) {
>         ret = -ENODEV;
>         goto out;
> @@ -1501,6 +1529,9 @@ static BlockDriver bdrv_iscsi = {
>     .bdrv_ioctl       = iscsi_ioctl,
>     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
> #endif
> +
> +    .bdrv_detach_aio_context = iscsi_detach_aio_context,
> +    .bdrv_attach_aio_context = iscsi_attach_aio_context,
> };
> 
> static QemuOptsList qemu_iscsi_opts = {
> -- 
> 1.9.0
>
Stefan Hajnoczi May 7, 2014, 10:07 a.m. UTC | #2
On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
> > +static void iscsi_attach_aio_context(BlockDriverState *bs,
> > +                                     AioContext *new_context)
> > +{
> > +    IscsiLun *iscsilun = bs->opaque;
> > +
> > +    iscsilun->aio_context = new_context;
> > +    iscsi_set_events(iscsilun);
> > +
> > +#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
> > +    /* Set up a timer for sending out iSCSI NOPs */
> > +    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
> > +                                        QEMU_CLOCK_REALTIME, SCALE_MS,
> > +                                        iscsi_nop_timed_event, iscsilun);
> > +    timer_mod(iscsilun->nop_timer,
> > +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
> > +#endif
> > +}
> 
> Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
> while we are in another function/callback of the iscsi driver for the same target?

This is a good point.

Previously, the nop timer was deferred until the qemu_aio_wait() loop
terminates.

With this patch the nop timer fires during aio_poll() loops for any
synchronous emulation that QEMU does (including iscsi_aio_cancel() and
.bdrv_ioctl() in block/iscsi.c).

I don't know libiscsi well enough to understand the implications.  I can
see that iscsi_reconnect() resends in-flight commands.  So what's the
upshot of all this?

BTW, is iscsi_reconnect() the right libiscsi interface to use since it
is synchronous?  It seems like this would block QEMU until the socket
has connected!  The guest would be frozen.

Stefan
Paolo Bonzini May 7, 2014, 10:29 a.m. UTC | #3
Il 07/05/2014 12:07, Stefan Hajnoczi ha scritto:
> On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
>>> +static void iscsi_attach_aio_context(BlockDriverState *bs,
>>> +                                     AioContext *new_context)
>>> +{
>>> +    IscsiLun *iscsilun = bs->opaque;
>>> +
>>> +    iscsilun->aio_context = new_context;
>>> +    iscsi_set_events(iscsilun);
>>> +
>>> +#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
>>> +    /* Set up a timer for sending out iSCSI NOPs */
>>> +    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
>>> +                                        QEMU_CLOCK_REALTIME, SCALE_MS,
>>> +                                        iscsi_nop_timed_event, iscsilun);
>>> +    timer_mod(iscsilun->nop_timer,
>>> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
>>> +#endif
>>> +}
>>
>> Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
>> while we are in another function/callback of the iscsi driver for the same target?

Yes, since the timer is in the same AioContext as the iscsi driver 
callbacks.

> This is a good point.
>
> Previously, the nop timer was deferred until the qemu_aio_wait() loop
> terminates.
>
> With this patch the nop timer fires during aio_poll() loops for any
> synchronous emulation that QEMU does (including iscsi_aio_cancel() and
> .bdrv_ioctl() in block/iscsi.c).
>
> I don't know libiscsi well enough to understand the implications.  I can
> see that iscsi_reconnect() resends in-flight commands.  So what's the
> upshot of all this?

I think it's fine.  The target will process NOPs asynchronously, so 
iscsi_nop_timed_event will see no NOP in flight if the target is working 
properly.

> BTW, is iscsi_reconnect() the right libiscsi interface to use since it
> is synchronous?  It seems like this would block QEMU until the socket
> has connected!  The guest would be frozen.

There is no asynchronous interface yet for reconnection, unfortunately.

Paolo
Peter Lieven May 7, 2014, 2:09 p.m. UTC | #4
On 07.05.2014 12:29, Paolo Bonzini wrote:
> Il 07/05/2014 12:07, Stefan Hajnoczi ha scritto:
>> On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
>>>> +static void iscsi_attach_aio_context(BlockDriverState *bs,
>>>> +                                     AioContext *new_context)
>>>> +{
>>>> +    IscsiLun *iscsilun = bs->opaque;
>>>> +
>>>> +    iscsilun->aio_context = new_context;
>>>> +    iscsi_set_events(iscsilun);
>>>> +
>>>> +#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
>>>> +    /* Set up a timer for sending out iSCSI NOPs */
>>>> +    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
>>>> + QEMU_CLOCK_REALTIME, SCALE_MS,
>>>> + iscsi_nop_timed_event, iscsilun);
>>>> +    timer_mod(iscsilun->nop_timer,
>>>> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
>>>> +#endif
>>>> +}
>>>
>>> Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
>>> while we are in another function/callback of the iscsi driver for the same target?
>
> Yes, since the timer is in the same AioContext as the iscsi driver callbacks.


Ok. Stefan: What MUST NOT happen is that the timer gets fired while we are in iscsi_service.
As Paolo outlined, this cannot happen, right?

>
>> This is a good point.
>>
>> Previously, the nop timer was deferred until the qemu_aio_wait() loop
>> terminates.
>>
>> With this patch the nop timer fires during aio_poll() loops for any
>> synchronous emulation that QEMU does (including iscsi_aio_cancel() and
>> .bdrv_ioctl() in block/iscsi.c).
>>
>> I don't know libiscsi well enough to understand the implications.  I can
>> see that iscsi_reconnect() resends in-flight commands.  So what's the
>> upshot of all this?
>
> I think it's fine.  The target will process NOPs asynchronously, so iscsi_nop_timed_event will see no NOP in flight if the target is working properly.

Yes, or at most one in flight NOP.

>
>> BTW, is iscsi_reconnect() the right libiscsi interface to use since it
>> is synchronous?  It seems like this would block QEMU until the socket
>> has connected!  The guest would be frozen.
>
> There is no asynchronous interface yet for reconnection, unfortunately.

We initiate the reconnect after we miss a few NOP replies. So the target is already down for approx. 30 seconds.
Every process inside the guest is already haging or has timed out.

If I understand correctly with the new patches only the communication with this target is hanging or isn't it?
So what benefit would an asyncronous reconnect have?

Peter
Stefan Hajnoczi May 8, 2014, 11:33 a.m. UTC | #5
On Wed, May 07, 2014 at 04:09:27PM +0200, Peter Lieven wrote:
> On 07.05.2014 12:29, Paolo Bonzini wrote:
> >Il 07/05/2014 12:07, Stefan Hajnoczi ha scritto:
> >>On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
> >>>>+static void iscsi_attach_aio_context(BlockDriverState *bs,
> >>>>+                                     AioContext *new_context)
> >>>>+{
> >>>>+    IscsiLun *iscsilun = bs->opaque;
> >>>>+
> >>>>+    iscsilun->aio_context = new_context;
> >>>>+    iscsi_set_events(iscsilun);
> >>>>+
> >>>>+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
> >>>>+    /* Set up a timer for sending out iSCSI NOPs */
> >>>>+    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
> >>>>+ QEMU_CLOCK_REALTIME, SCALE_MS,
> >>>>+ iscsi_nop_timed_event, iscsilun);
> >>>>+    timer_mod(iscsilun->nop_timer,
> >>>>+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
> >>>>+#endif
> >>>>+}
> >>>
> >>>Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
> >>>while we are in another function/callback of the iscsi driver for the same target?
> >
> >Yes, since the timer is in the same AioContext as the iscsi driver callbacks.
> 
> 
> Ok. Stefan: What MUST NOT happen is that the timer gets fired while we are in iscsi_service.
> As Paolo outlined, this cannot happen, right?

Okay, I think we're safe then.  The timer can only be invoked during
aio_poll() event loop iterations.  It cannot be invoked while we're
inside iscsi_service().

> >>BTW, is iscsi_reconnect() the right libiscsi interface to use since it
> >>is synchronous?  It seems like this would block QEMU until the socket
> >>has connected!  The guest would be frozen.
> >
> >There is no asynchronous interface yet for reconnection, unfortunately.
> 
> We initiate the reconnect after we miss a few NOP replies. So the target is already down for approx. 30 seconds.
> Every process inside the guest is already haging or has timed out.
> 
> If I understand correctly with the new patches only the communication with this target is hanging or isn't it?
> So what benefit would an asyncronous reconnect have?

Asynchronous reconnect is desirable:

1. The QEMU monitor is blocked while we're waiting for the iSCSI target
   to accept our reconnect.  This means the management stack (libvirt)
   cannot control QEMU until we time out or succeed.

2. The guest is totally frozen - cannot execute instructions - because
   it will soon reach a point in the code that locks the QEMU global
   mutex (which is being held while we reconnect to the iSCSI target).

   This may be okayish for guests where the iSCSI LUN contains the
   "main" data that is being processed.  But what if an iSCSI LUN was
   just attached to a guest that is also doing other things that are
   independent (e.g. serving a website, processing data from a local
   disk, etc) - now the reconnect causes downtime for the entire guest.
ronnie sahlberg May 8, 2014, 2:52 p.m. UTC | #6
On Thu, May 8, 2014 at 4:33 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Wed, May 07, 2014 at 04:09:27PM +0200, Peter Lieven wrote:
>> On 07.05.2014 12:29, Paolo Bonzini wrote:
>> >Il 07/05/2014 12:07, Stefan Hajnoczi ha scritto:
>> >>On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
>> >>>>+static void iscsi_attach_aio_context(BlockDriverState *bs,
>> >>>>+                                     AioContext *new_context)
>> >>>>+{
>> >>>>+    IscsiLun *iscsilun = bs->opaque;
>> >>>>+
>> >>>>+    iscsilun->aio_context = new_context;
>> >>>>+    iscsi_set_events(iscsilun);
>> >>>>+
>> >>>>+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
>> >>>>+    /* Set up a timer for sending out iSCSI NOPs */
>> >>>>+    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
>> >>>>+ QEMU_CLOCK_REALTIME, SCALE_MS,
>> >>>>+ iscsi_nop_timed_event, iscsilun);
>> >>>>+    timer_mod(iscsilun->nop_timer,
>> >>>>+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
>> >>>>+#endif
>> >>>>+}
>> >>>
>> >>>Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
>> >>>while we are in another function/callback of the iscsi driver for the same target?
>> >
>> >Yes, since the timer is in the same AioContext as the iscsi driver callbacks.
>>
>>
>> Ok. Stefan: What MUST NOT happen is that the timer gets fired while we are in iscsi_service.
>> As Paolo outlined, this cannot happen, right?
>
> Okay, I think we're safe then.  The timer can only be invoked during
> aio_poll() event loop iterations.  It cannot be invoked while we're
> inside iscsi_service().
>
>> >>BTW, is iscsi_reconnect() the right libiscsi interface to use since it
>> >>is synchronous?  It seems like this would block QEMU until the socket
>> >>has connected!  The guest would be frozen.
>> >
>> >There is no asynchronous interface yet for reconnection, unfortunately.
>>
>> We initiate the reconnect after we miss a few NOP replies. So the target is already down for approx. 30 seconds.
>> Every process inside the guest is already haging or has timed out.
>>
>> If I understand correctly with the new patches only the communication with this target is hanging or isn't it?
>> So what benefit would an asyncronous reconnect have?
>
> Asynchronous reconnect is desirable:
>
> 1. The QEMU monitor is blocked while we're waiting for the iSCSI target
>    to accept our reconnect.  This means the management stack (libvirt)
>    cannot control QEMU until we time out or succeed.
>
> 2. The guest is totally frozen - cannot execute instructions - because
>    it will soon reach a point in the code that locks the QEMU global
>    mutex (which is being held while we reconnect to the iSCSI target).
>
>    This may be okayish for guests where the iSCSI LUN contains the
>    "main" data that is being processed.  But what if an iSCSI LUN was
>    just attached to a guest that is also doing other things that are
>    independent (e.g. serving a website, processing data from a local
>    disk, etc) - now the reconnect causes downtime for the entire guest.

I will look into making the reconnect async over the next few days.
Peter Lieven May 8, 2014, 3:45 p.m. UTC | #7
Am 08.05.2014 16:52, schrieb ronnie sahlberg:
> On Thu, May 8, 2014 at 4:33 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> On Wed, May 07, 2014 at 04:09:27PM +0200, Peter Lieven wrote:
>>> On 07.05.2014 12:29, Paolo Bonzini wrote:
>>>> Il 07/05/2014 12:07, Stefan Hajnoczi ha scritto:
>>>>> On Fri, May 02, 2014 at 12:39:06AM +0200, Peter Lieven wrote:
>>>>>>> +static void iscsi_attach_aio_context(BlockDriverState *bs,
>>>>>>> +                                     AioContext *new_context)
>>>>>>> +{
>>>>>>> +    IscsiLun *iscsilun = bs->opaque;
>>>>>>> +
>>>>>>> +    iscsilun->aio_context = new_context;
>>>>>>> +    iscsi_set_events(iscsilun);
>>>>>>> +
>>>>>>> +#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
>>>>>>> +    /* Set up a timer for sending out iSCSI NOPs */
>>>>>>> +    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
>>>>>>> + QEMU_CLOCK_REALTIME, SCALE_MS,
>>>>>>> + iscsi_nop_timed_event, iscsilun);
>>>>>>> +    timer_mod(iscsilun->nop_timer,
>>>>>>> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
>>>>>>> +#endif
>>>>>>> +}
>>>>>> Is it still guaranteed that iscsi_nop_timed_event for a target is not invoked
>>>>>> while we are in another function/callback of the iscsi driver for the same target?
>>>> Yes, since the timer is in the same AioContext as the iscsi driver callbacks.
>>>
>>> Ok. Stefan: What MUST NOT happen is that the timer gets fired while we are in iscsi_service.
>>> As Paolo outlined, this cannot happen, right?
>> Okay, I think we're safe then.  The timer can only be invoked during
>> aio_poll() event loop iterations.  It cannot be invoked while we're
>> inside iscsi_service().
>>
>>>>> BTW, is iscsi_reconnect() the right libiscsi interface to use since it
>>>>> is synchronous?  It seems like this would block QEMU until the socket
>>>>> has connected!  The guest would be frozen.
>>>> There is no asynchronous interface yet for reconnection, unfortunately.
>>> We initiate the reconnect after we miss a few NOP replies. So the target is already down for approx. 30 seconds.
>>> Every process inside the guest is already haging or has timed out.
>>>
>>> If I understand correctly with the new patches only the communication with this target is hanging or isn't it?
>>> So what benefit would an asyncronous reconnect have?
>> Asynchronous reconnect is desirable:
>>
>> 1. The QEMU monitor is blocked while we're waiting for the iSCSI target
>>    to accept our reconnect.  This means the management stack (libvirt)
>>    cannot control QEMU until we time out or succeed.
>>
>> 2. The guest is totally frozen - cannot execute instructions - because
>>    it will soon reach a point in the code that locks the QEMU global
>>    mutex (which is being held while we reconnect to the iSCSI target).
>>
>>    This may be okayish for guests where the iSCSI LUN contains the
>>    "main" data that is being processed.  But what if an iSCSI LUN was
>>    just attached to a guest that is also doing other things that are
>>    independent (e.g. serving a website, processing data from a local
>>    disk, etc) - now the reconnect causes downtime for the entire guest.
> I will look into making the reconnect async over the next few days.

Thanks for looking into this. I have a few things in mind that I will
post on github to the issue you created.

Peter
diff mbox

Patch

diff --git a/block/iscsi.c b/block/iscsi.c
index a30202b..81e3ebd 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -47,6 +47,7 @@ 
 
 typedef struct IscsiLun {
     struct iscsi_context *iscsi;
+    AioContext *aio_context;
     int lun;
     enum scsi_inquiry_peripheral_device_type type;
     int block_size;
@@ -69,6 +70,7 @@  typedef struct IscsiTask {
     struct scsi_task *task;
     Coroutine *co;
     QEMUBH *bh;
+    AioContext *aio_context;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -120,7 +122,7 @@  iscsi_schedule_bh(IscsiAIOCB *acb)
     if (acb->bh) {
         return;
     }
-    acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
+    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -156,7 +158,7 @@  iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+        iTask->bh = aio_bh_new(iTask->aio_context, iscsi_co_generic_bh_cb, iTask);
         qemu_bh_schedule(iTask->bh);
     }
 }
@@ -164,8 +166,9 @@  out:
 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 {
     *iTask = (struct IscsiTask) {
-        .co         = qemu_coroutine_self(),
-        .retries    = ISCSI_CMD_RETRIES,
+        .co             = qemu_coroutine_self(),
+        .retries        = ISCSI_CMD_RETRIES,
+        .aio_context    = iscsilun->aio_context,
     };
 }
 
@@ -196,7 +199,7 @@  iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
                                      iscsi_abort_task_cb, acb);
 
     while (acb->status == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
     }
 }
 
@@ -219,10 +222,11 @@  iscsi_set_events(IscsiLun *iscsilun)
     ev = POLLIN;
     ev |= iscsi_which_events(iscsi);
     if (ev != iscsilun->events) {
-        qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
-                      iscsi_process_read,
-                      (ev & POLLOUT) ? iscsi_process_write : NULL,
-                      iscsilun);
+        aio_set_fd_handler(iscsilun->aio_context,
+                           iscsi_get_fd(iscsi),
+                           iscsi_process_read,
+                           (ev & POLLOUT) ? iscsi_process_write : NULL,
+                           iscsilun);
 
     }
 
@@ -620,7 +624,7 @@  static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
 
         while (status == -EINPROGRESS) {
-            qemu_aio_wait();
+            aio_poll(bdrv_get_aio_context(bs), true);
         }
 
         return 0;
@@ -1110,6 +1114,40 @@  fail_with_err:
     return NULL;
 }
 
+static void iscsi_detach_aio_context(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    aio_set_fd_handler(iscsilun->aio_context,
+                       iscsi_get_fd(iscsilun->iscsi),
+                       NULL, NULL, NULL);
+    iscsilun->events = 0;
+
+    if (iscsilun->nop_timer) {
+        timer_del(iscsilun->nop_timer);
+        timer_free(iscsilun->nop_timer);
+        iscsilun->nop_timer = NULL;
+    }
+}
+
+static void iscsi_attach_aio_context(BlockDriverState *bs,
+                                     AioContext *new_context)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    iscsilun->aio_context = new_context;
+    iscsi_set_events(iscsilun);
+
+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
+    /* Set up a timer for sending out iSCSI NOPs */
+    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
+                                        QEMU_CLOCK_REALTIME, SCALE_MS,
+                                        iscsi_nop_timed_event, iscsilun);
+    timer_mod(iscsilun->nop_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+#endif
+}
+
 /*
  * We support iscsi url's on the form
  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1216,6 +1254,7 @@  static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     iscsilun->iscsi = iscsi;
+    iscsilun->aio_context = bdrv_get_aio_context(bs);
     iscsilun->lun   = iscsi_url->lun;
     iscsilun->has_write_same = true;
 
@@ -1289,11 +1328,7 @@  static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
-    /* Set up a timer for sending out iSCSI NOPs */
-    iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
-    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-#endif
+    iscsi_attach_aio_context(bs, iscsilun->aio_context);
 
 out:
     qemu_opts_del(opts);
@@ -1321,11 +1356,7 @@  static void iscsi_close(BlockDriverState *bs)
     IscsiLun *iscsilun = bs->opaque;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
-    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+    iscsi_detach_aio_context(bs);
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     memset(iscsilun, 0, sizeof(IscsiLun));
@@ -1421,10 +1452,7 @@  static int iscsi_create(const char *filename, QEMUOptionParameter *options,
     if (ret != 0) {
         goto out;
     }
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
+    iscsi_detach_aio_context(bs);
     if (iscsilun->type != TYPE_DISK) {
         ret = -ENODEV;
         goto out;
@@ -1501,6 +1529,9 @@  static BlockDriver bdrv_iscsi = {
     .bdrv_ioctl       = iscsi_ioctl,
     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
 #endif
+
+    .bdrv_detach_aio_context = iscsi_detach_aio_context,
+    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 
 static QemuOptsList qemu_iscsi_opts = {