diff mbox

[4/5] migration: add missed aio_context_acquire into hmp_savevm/hmp_delvm

Message ID 1445954986-13005-5-git-send-email-den@openvz.org
State New
Headers show

Commit Message

Denis V. Lunev Oct. 27, 2015, 2:09 p.m. UTC
aio_context should be locked in the similar way as was done in QMP
snapshot creation in the other case there are a lot of possible
troubles if native AIO mode is enabled for disk.

- the command can hang (HMP thread) with missed wakeup (the operation is
  actually complete)
    io_submit
    ioq_submit
    laio_submit
    raw_aio_submit
    raw_aio_readv
    bdrv_co_io_em
    bdrv_co_readv_em
    bdrv_aligned_preadv
    bdrv_co_do_preadv
    bdrv_co_do_readv
    bdrv_co_readv
    qcow2_co_readv
    bdrv_aligned_preadv
    bdrv_co_do_pwritev
    bdrv_rw_co_entry

- QEMU can assert in coroutine re-enter
    __GI_abort
    qemu_coroutine_enter
    bdrv_co_io_em_complete
    qemu_laio_process_completion
    qemu_laio_completion_bh
    aio_bh_poll
    aio_dispatch
    aio_poll
    iothread_run

AioContext lock is reqursive. Thus nested locking should not be a problem.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Amit Shah <amit.shah@redhat.com>
---
 block/snapshot.c   | 5 +++++
 migration/savevm.c | 7 +++++++
 2 files changed, 12 insertions(+)

Comments

Paolo Bonzini Oct. 27, 2015, 6:12 p.m. UTC | #1
On 27/10/2015 15:09, Denis V. Lunev wrote:
> aio_context should be locked in the similar way as was done in QMP
> snapshot creation in the other case there are a lot of possible
> troubles if native AIO mode is enabled for disk.
> 
> - the command can hang (HMP thread) with missed wakeup (the operation is
>   actually complete)
>     io_submit
>     ioq_submit
>     laio_submit
>     raw_aio_submit
>     raw_aio_readv
>     bdrv_co_io_em
>     bdrv_co_readv_em
>     bdrv_aligned_preadv
>     bdrv_co_do_preadv
>     bdrv_co_do_readv
>     bdrv_co_readv
>     qcow2_co_readv
>     bdrv_aligned_preadv
>     bdrv_co_do_pwritev
>     bdrv_rw_co_entry
> 
> - QEMU can assert in coroutine re-enter
>     __GI_abort
>     qemu_coroutine_enter
>     bdrv_co_io_em_complete
>     qemu_laio_process_completion
>     qemu_laio_completion_bh
>     aio_bh_poll
>     aio_dispatch
>     aio_poll
>     iothread_run
> 
> AioContext lock is reqursive. Thus nested locking should not be a problem.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Amit Shah <amit.shah@redhat.com>
> ---
>  block/snapshot.c   | 5 +++++
>  migration/savevm.c | 7 +++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/block/snapshot.c b/block/snapshot.c
> index 89500f2..f6fa17a 100644
> --- a/block/snapshot.c
> +++ b/block/snapshot.c
> @@ -259,6 +259,9 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>  {
>      int ret;
>      Error *local_err = NULL;
> +    AioContext *aio_context = bdrv_get_aio_context(bs);
> +
> +    aio_context_acquire(aio_context);
>  
>      ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
>      if (ret == -ENOENT || ret == -EINVAL) {
> @@ -267,6 +270,8 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>          ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
>      }
>  
> +    aio_context_release(aio_context);

Why here and not in hmp_delvm, for consistency?

The call from hmp_savevm is already protected.

Thanks for fixing the bug!

Paolo

>      if (ret < 0) {
>          error_propagate(errp, local_err);
>      }
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..83d2efa 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1289,6 +1289,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      struct tm tm;
>      const char *name = qdict_get_try_str(qdict, "name");
>      Error *local_err = NULL;
> +    AioContext *aio_context;
>  
>      /* Verify if there is a device that doesn't support snapshots and is writable */
>      bs = NULL;
> @@ -1320,6 +1321,9 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      }
>      vm_stop(RUN_STATE_SAVE_VM);
>  
> +    aio_context = bdrv_get_aio_context(bs);
> +    aio_context_acquire(aio_context);
> +
>      memset(sn, 0, sizeof(*sn));
>  
>      /* fill auxiliary fields */
> @@ -1378,6 +1382,8 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      }
>  
>   the_end:
> +    aio_context_release(aio_context);
> +
>      if (saved_vm_running) {
>          vm_start();
>      }
> 
>
Denis V. Lunev Oct. 27, 2015, 6:23 p.m. UTC | #2
On 10/27/2015 09:12 PM, Paolo Bonzini wrote:
>
> On 27/10/2015 15:09, Denis V. Lunev wrote:
>> aio_context should be locked in the similar way as was done in QMP
>> snapshot creation in the other case there are a lot of possible
>> troubles if native AIO mode is enabled for disk.
>>
>> - the command can hang (HMP thread) with missed wakeup (the operation is
>>    actually complete)
>>      io_submit
>>      ioq_submit
>>      laio_submit
>>      raw_aio_submit
>>      raw_aio_readv
>>      bdrv_co_io_em
>>      bdrv_co_readv_em
>>      bdrv_aligned_preadv
>>      bdrv_co_do_preadv
>>      bdrv_co_do_readv
>>      bdrv_co_readv
>>      qcow2_co_readv
>>      bdrv_aligned_preadv
>>      bdrv_co_do_pwritev
>>      bdrv_rw_co_entry
>>
>> - QEMU can assert in coroutine re-enter
>>      __GI_abort
>>      qemu_coroutine_enter
>>      bdrv_co_io_em_complete
>>      qemu_laio_process_completion
>>      qemu_laio_completion_bh
>>      aio_bh_poll
>>      aio_dispatch
>>      aio_poll
>>      iothread_run
>>
>> AioContext lock is reqursive. Thus nested locking should not be a problem.
>>
>> Signed-off-by: Denis V. Lunev <den@openvz.org>
>> CC: Stefan Hajnoczi <stefanha@redhat.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Amit Shah <amit.shah@redhat.com>
>> ---
>>   block/snapshot.c   | 5 +++++
>>   migration/savevm.c | 7 +++++++
>>   2 files changed, 12 insertions(+)
>>
>> diff --git a/block/snapshot.c b/block/snapshot.c
>> index 89500f2..f6fa17a 100644
>> --- a/block/snapshot.c
>> +++ b/block/snapshot.c
>> @@ -259,6 +259,9 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>>   {
>>       int ret;
>>       Error *local_err = NULL;
>> +    AioContext *aio_context = bdrv_get_aio_context(bs);
>> +
>> +    aio_context_acquire(aio_context);
>>   
>>       ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
>>       if (ret == -ENOENT || ret == -EINVAL) {
>> @@ -267,6 +270,8 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>>           ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
>>       }
>>   
>> +    aio_context_release(aio_context);
> Why here and not in hmp_delvm, for consistency?
>
> The call from hmp_savevm is already protected.
>
> Thanks for fixing the bug!
>
> Paolo

the situation is more difficult. There are several disks in VM.
One disk is used for state saving (protected in savevm)
and there are several disks touched via

static int del_existing_snapshots(Monitor *mon, const char *name)
     while ((bs = bdrv_next(bs))) {
         if (bdrv_can_snapshot(bs) &&
             bdrv_snapshot_find(bs, snapshot, name) >= 0) {
             bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
         }
     }

in savevm and similar looking code in delvm with similar cycle
implemented differently.

This patchset looks minimal for me to kludge situation enough.

True fix would be a drop of this code in favour of blockdev
transactions. At least this is my opinion. Though I can not do
this at this stage, this will take a lot of time.

Den
Juan Quintela Oct. 28, 2015, 10:11 a.m. UTC | #3
"Denis V. Lunev" <den@openvz.org> wrote:
> aio_context should be locked in the similar way as was done in QMP
> snapshot creation in the other case there are a lot of possible
> troubles if native AIO mode is enabled for disk.
>
> - the command can hang (HMP thread) with missed wakeup (the operation is
>   actually complete)
>     io_submit
>     ioq_submit
>     laio_submit
>     raw_aio_submit
>     raw_aio_readv
>     bdrv_co_io_em
>     bdrv_co_readv_em
>     bdrv_aligned_preadv
>     bdrv_co_do_preadv
>     bdrv_co_do_readv
>     bdrv_co_readv
>     qcow2_co_readv
>     bdrv_aligned_preadv
>     bdrv_co_do_pwritev
>     bdrv_rw_co_entry
>
> - QEMU can assert in coroutine re-enter
>     __GI_abort
>     qemu_coroutine_enter
>     bdrv_co_io_em_complete
>     qemu_laio_process_completion
>     qemu_laio_completion_bh
>     aio_bh_poll
>     aio_dispatch
>     aio_poll
>     iothread_run
>
> AioContext lock is reqursive. Thus nested locking should not be a problem.
>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Amit Shah <amit.shah@redhat.com>
> ---
>  block/snapshot.c   | 5 +++++
>  migration/savevm.c | 7 +++++++
>  2 files changed, 12 insertions(+)



Reviewed-by: Juan Quintela <quintela@redhat.com>

But once there, I can't understand why migration have to know about
aio_contexts at all.

I *think* that it would be a good idea to "hide" the
adi_context_acquire(aio_context) inside qemu_fopen_bdrv() (yes, it is
still in migration/*, but you get the idea).  But don't propose it,
because we don't have qemu_fclose_bdrv().  Yes we could add an
aio_context inside QemuFile, and release it on qemu_fclose(), but I
guess this needs more thought yet.

BTW, once that I got your attention, why is this needed on hmp_savevm()
but it is not needed on load_vmstate()?  We are also using
qemu_fopen_bdrv()?  Because we are only reading from there?  Just curios
the reason or if we are missing something there.

Thanks, Juan.


>
> diff --git a/block/snapshot.c b/block/snapshot.c
> index 89500f2..f6fa17a 100644
> --- a/block/snapshot.c
> +++ b/block/snapshot.c
> @@ -259,6 +259,9 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>  {
>      int ret;
>      Error *local_err = NULL;
> +    AioContext *aio_context = bdrv_get_aio_context(bs);
> +
> +    aio_context_acquire(aio_context);
>  
>      ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
>      if (ret == -ENOENT || ret == -EINVAL) {
> @@ -267,6 +270,8 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
>          ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
>      }
>  
> +    aio_context_release(aio_context);
> +
>      if (ret < 0) {
>          error_propagate(errp, local_err);
>      }
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..83d2efa 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1289,6 +1289,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      struct tm tm;
>      const char *name = qdict_get_try_str(qdict, "name");
>      Error *local_err = NULL;
> +    AioContext *aio_context;
>  
>      /* Verify if there is a device that doesn't support snapshots and is writable */
>      bs = NULL;
> @@ -1320,6 +1321,9 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      }
>      vm_stop(RUN_STATE_SAVE_VM);
>  
> +    aio_context = bdrv_get_aio_context(bs);
> +    aio_context_acquire(aio_context);
> +
>      memset(sn, 0, sizeof(*sn));
>  
>      /* fill auxiliary fields */
> @@ -1378,6 +1382,8 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      }
>  
>   the_end:
> +    aio_context_release(aio_context);
> +
>      if (saved_vm_running) {
>          vm_start();
>      }
Denis V. Lunev Oct. 28, 2015, 10:38 a.m. UTC | #4
On 10/28/2015 01:11 PM, Juan Quintela wrote:
> "Denis V. Lunev" <den@openvz.org> wrote:
>> aio_context should be locked in the similar way as was done in QMP
>> snapshot creation in the other case there are a lot of possible
>> troubles if native AIO mode is enabled for disk.
>>
>> - the command can hang (HMP thread) with missed wakeup (the operation is
>>    actually complete)
>>      io_submit
>>      ioq_submit
>>      laio_submit
>>      raw_aio_submit
>>      raw_aio_readv
>>      bdrv_co_io_em
>>      bdrv_co_readv_em
>>      bdrv_aligned_preadv
>>      bdrv_co_do_preadv
>>      bdrv_co_do_readv
>>      bdrv_co_readv
>>      qcow2_co_readv
>>      bdrv_aligned_preadv
>>      bdrv_co_do_pwritev
>>      bdrv_rw_co_entry
>>
>> - QEMU can assert in coroutine re-enter
>>      __GI_abort
>>      qemu_coroutine_enter
>>      bdrv_co_io_em_complete
>>      qemu_laio_process_completion
>>      qemu_laio_completion_bh
>>      aio_bh_poll
>>      aio_dispatch
>>      aio_poll
>>      iothread_run
>>
>> AioContext lock is reqursive. Thus nested locking should not be a problem.
>>
>> Signed-off-by: Denis V. Lunev <den@openvz.org>
>> CC: Stefan Hajnoczi <stefanha@redhat.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Amit Shah <amit.shah@redhat.com>
>> ---
>>   block/snapshot.c   | 5 +++++
>>   migration/savevm.c | 7 +++++++
>>   2 files changed, 12 insertions(+)
>
>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
>
> But once there, I can't understand why migration have to know about
> aio_contexts at all.
>
> I *think* that it would be a good idea to "hide" the
> adi_context_acquire(aio_context) inside qemu_fopen_bdrv() (yes, it is
> still in migration/*, but you get the idea).  But don't propose it,
> because we don't have qemu_fclose_bdrv().  Yes we could add an
> aio_context inside QemuFile, and release it on qemu_fclose(), but I
> guess this needs more thought yet.
>
> BTW, once that I got your attention, why is this needed on hmp_savevm()
> but it is not needed on load_vmstate()?  We are also using
> qemu_fopen_bdrv()?  Because we are only reading from there?  Just curios
> the reason or if we are missing something there.
>
> Thanks, Juan.

I think that the race is still there (I have checked this several times 
but less
amount of times then create/delete snapshot) but the windows is seriously
reduced due to bdrv_drain_all at the beginning.

In general your are right. But in this case we are almost doomed. Any single
read/write operation could executed in iothread only. May be I have missed
something in this puzzle.

OK. What about bdrv_fclose callback and similar (new) callback for open
which should be called through qemu_fopen_bdrv for block driver only?

Den
diff mbox

Patch

diff --git a/block/snapshot.c b/block/snapshot.c
index 89500f2..f6fa17a 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -259,6 +259,9 @@  void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
 {
     int ret;
     Error *local_err = NULL;
+    AioContext *aio_context = bdrv_get_aio_context(bs);
+
+    aio_context_acquire(aio_context);
 
     ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
     if (ret == -ENOENT || ret == -EINVAL) {
@@ -267,6 +270,8 @@  void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
         ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
     }
 
+    aio_context_release(aio_context);
+
     if (ret < 0) {
         error_propagate(errp, local_err);
     }
diff --git a/migration/savevm.c b/migration/savevm.c
index dbcc39a..83d2efa 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1289,6 +1289,7 @@  void hmp_savevm(Monitor *mon, const QDict *qdict)
     struct tm tm;
     const char *name = qdict_get_try_str(qdict, "name");
     Error *local_err = NULL;
+    AioContext *aio_context;
 
     /* Verify if there is a device that doesn't support snapshots and is writable */
     bs = NULL;
@@ -1320,6 +1321,9 @@  void hmp_savevm(Monitor *mon, const QDict *qdict)
     }
     vm_stop(RUN_STATE_SAVE_VM);
 
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
     memset(sn, 0, sizeof(*sn));
 
     /* fill auxiliary fields */
@@ -1378,6 +1382,8 @@  void hmp_savevm(Monitor *mon, const QDict *qdict)
     }
 
  the_end:
+    aio_context_release(aio_context);
+
     if (saved_vm_running) {
         vm_start();
     }