diff mbox

[1/8] block-migration: acquire AioContext as necessary

Message ID 1455470231-5223-2-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Feb. 14, 2016, 5:17 p.m. UTC
This is needed because dataplane will run during block migration as well.

The block device migration code is quite liberal in taking the iothread
mutex.  For simplicity, keep it the same way, even though one could
actually choose between the BQL (for regular BlockDriverStates) and
the AioContext (for dataplane BlockDriverStates).  When the block layer
is made fully thread safe, aio_context_acquire shall go away altogether.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 migration/block.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 12 deletions(-)

Comments

Fam Zheng Feb. 16, 2016, 7:17 a.m. UTC | #1
On Sun, 02/14 18:17, Paolo Bonzini wrote:
> This is needed because dataplane will run during block migration as well.
> 
> The block device migration code is quite liberal in taking the iothread
> mutex.  For simplicity, keep it the same way, even though one could
> actually choose between the BQL (for regular BlockDriverStates) and
> the AioContext (for dataplane BlockDriverStates).  When the block layer
> is made fully thread safe, aio_context_acquire shall go away altogether.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  migration/block.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 49 insertions(+), 12 deletions(-)
> 
> diff --git a/migration/block.c b/migration/block.c
> index a444058..6dd2327 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -60,9 +60,15 @@ typedef struct BlkMigDevState {
>      int64_t cur_sector;
>      int64_t cur_dirty;
>  
> -    /* Protected by block migration lock.  */
> +    /* Data in the aio_bitmap is protected by block migration lock.
> +     * Allocation and free happen during setup and cleanup respectively.
> +     */
>      unsigned long *aio_bitmap;
> +
> +    /* Protected by block migration lock.  */
>      int64_t completed_sectors;
> +
> +    /* Protected by iothread lock / AioContext.  */
>      BdrvDirtyBitmap *dirty_bitmap;
>      Error *blocker;
>  } BlkMigDevState;
> @@ -100,7 +106,7 @@ typedef struct BlkMigState {
>      int prev_progress;
>      int bulk_completed;
>  
> -    /* Lock must be taken _inside_ the iothread lock.  */
> +    /* Lock must be taken _inside_ the iothread lock and any AioContexts.  */
>      QemuMutex lock;
>  } BlkMigState;
>  
> @@ -264,11 +270,13 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>  
>      if (bmds->shared_base) {
>          qemu_mutex_lock_iothread();
> +        aio_context_acquire(bdrv_get_aio_context(bs));
>          while (cur_sector < total_sectors &&
>                 !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
>                                    &nr_sectors)) {
>              cur_sector += nr_sectors;
>          }
> +        aio_context_release(bdrv_get_aio_context(bs));
>          qemu_mutex_unlock_iothread();
>      }
>  
> @@ -302,11 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>      block_mig_state.submitted++;
>      blk_mig_unlock();
>  
> +    /* We do not know if bs is under the main thread (and thus does
> +     * not acquire the AioContext when doing AIO) or rather under
> +     * dataplane.  Thus acquire both the iothread mutex and the
> +     * AioContext.
> +     *
> +     * This is ugly and will disappear when we make bdrv_* thread-safe,
> +     * without the need to acquire the AioContext.
> +     */
>      qemu_mutex_lock_iothread();
> +    aio_context_acquire(bdrv_get_aio_context(bmds->bs));
>      blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
>                                  nr_sectors, blk_mig_read_cb, blk);
>  
>      bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
> +    aio_context_release(bdrv_get_aio_context(bmds->bs));
>      qemu_mutex_unlock_iothread();
>  
>      bmds->cur_sector = cur_sector + nr_sectors;
> @@ -321,8 +339,9 @@ static int set_dirty_tracking(void)
>      int ret;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */

Why? I don't think it is safe today.  The BDS state is mutated and it can race
with bdrv_set_dirty() etc. (Also the refresh_total_sectors in bdrv_nb_sectors
can even do read/write, no?)

>          bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
>                                                        NULL, NULL);
>          if (!bmds->dirty_bitmap) {
>              ret = -errno;
>              goto fail;
> @@ -332,11 +352,14 @@ static int set_dirty_tracking(void)
>      return ret;
>  }
>  
> +/* Called with iothread lock taken.  */
> +
>  static void unset_dirty_tracking(void)
>  {
>      BlkMigDevState *bmds;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */

Ditto.

>          bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
>      }
>  }
> @@ -597,21 +627,28 @@ static void block_migration_cleanup(void *opaque)
>  {
>      BlkMigDevState *bmds;
>      BlkMigBlock *blk;
> +    AioContext *ctx;
>  
>      bdrv_drain_all();
>  
>      unset_dirty_tracking();
>  
> -    blk_mig_lock();

Why is it okay to skip the blk_mig_lock() for block_mig_state.bmds_list?

>      while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
>          QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
>          bdrv_op_unblock_all(bmds->bs, bmds->blocker);
>          error_free(bmds->blocker);
> +
> +        /* Save ctx, because bmds->bs can disappear during bdrv_unref.  */
> +        ctx = bdrv_get_aio_context(bmds->bs);
> +        aio_context_acquire(ctx);
>          bdrv_unref(bmds->bs);
> +        aio_context_release(ctx);
> +
>          g_free(bmds->aio_bitmap);
>          g_free(bmds);
>      }
>  
> +    blk_mig_lock();
>      while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
>          QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
>          g_free(blk->buf);
Michael S. Tsirkin Feb. 19, 2016, 7:41 a.m. UTC | #2
ping
Paolo - were you going to address these questions?
Or did I miss it?

On Tue, Feb 16, 2016 at 03:17:11PM +0800, Fam Zheng wrote:
> On Sun, 02/14 18:17, Paolo Bonzini wrote:
> > This is needed because dataplane will run during block migration as well.
> > 
> > The block device migration code is quite liberal in taking the iothread
> > mutex.  For simplicity, keep it the same way, even though one could
> > actually choose between the BQL (for regular BlockDriverStates) and
> > the AioContext (for dataplane BlockDriverStates).  When the block layer
> > is made fully thread safe, aio_context_acquire shall go away altogether.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  migration/block.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 49 insertions(+), 12 deletions(-)
> > 
> > diff --git a/migration/block.c b/migration/block.c
> > index a444058..6dd2327 100644
> > --- a/migration/block.c
> > +++ b/migration/block.c
> > @@ -60,9 +60,15 @@ typedef struct BlkMigDevState {
> >      int64_t cur_sector;
> >      int64_t cur_dirty;
> >  
> > -    /* Protected by block migration lock.  */
> > +    /* Data in the aio_bitmap is protected by block migration lock.
> > +     * Allocation and free happen during setup and cleanup respectively.
> > +     */
> >      unsigned long *aio_bitmap;
> > +
> > +    /* Protected by block migration lock.  */
> >      int64_t completed_sectors;
> > +
> > +    /* Protected by iothread lock / AioContext.  */
> >      BdrvDirtyBitmap *dirty_bitmap;
> >      Error *blocker;
> >  } BlkMigDevState;
> > @@ -100,7 +106,7 @@ typedef struct BlkMigState {
> >      int prev_progress;
> >      int bulk_completed;
> >  
> > -    /* Lock must be taken _inside_ the iothread lock.  */
> > +    /* Lock must be taken _inside_ the iothread lock and any AioContexts.  */
> >      QemuMutex lock;
> >  } BlkMigState;
> >  
> > @@ -264,11 +270,13 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
> >  
> >      if (bmds->shared_base) {
> >          qemu_mutex_lock_iothread();
> > +        aio_context_acquire(bdrv_get_aio_context(bs));
> >          while (cur_sector < total_sectors &&
> >                 !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
> >                                    &nr_sectors)) {
> >              cur_sector += nr_sectors;
> >          }
> > +        aio_context_release(bdrv_get_aio_context(bs));
> >          qemu_mutex_unlock_iothread();
> >      }
> >  
> > @@ -302,11 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
> >      block_mig_state.submitted++;
> >      blk_mig_unlock();
> >  
> > +    /* We do not know if bs is under the main thread (and thus does
> > +     * not acquire the AioContext when doing AIO) or rather under
> > +     * dataplane.  Thus acquire both the iothread mutex and the
> > +     * AioContext.
> > +     *
> > +     * This is ugly and will disappear when we make bdrv_* thread-safe,
> > +     * without the need to acquire the AioContext.
> > +     */
> >      qemu_mutex_lock_iothread();
> > +    aio_context_acquire(bdrv_get_aio_context(bmds->bs));
> >      blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
> >                                  nr_sectors, blk_mig_read_cb, blk);
> >  
> >      bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
> > +    aio_context_release(bdrv_get_aio_context(bmds->bs));
> >      qemu_mutex_unlock_iothread();
> >  
> >      bmds->cur_sector = cur_sector + nr_sectors;
> > @@ -321,8 +339,9 @@ static int set_dirty_tracking(void)
> >      int ret;
> >  
> >      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> > +        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */
> 
> Why? I don't think it is safe today.  The BDS state is mutated and it can race
> with bdrv_set_dirty() etc. (Also the refresh_total_sectors in bdrv_nb_sectors
> can even do read/write, no?)
> 
> >          bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
> >                                                        NULL, NULL);
> >          if (!bmds->dirty_bitmap) {
> >              ret = -errno;
> >              goto fail;
> > @@ -332,11 +352,14 @@ static int set_dirty_tracking(void)
> >      return ret;
> >  }
> >  
> > +/* Called with iothread lock taken.  */
> > +
> >  static void unset_dirty_tracking(void)
> >  {
> >      BlkMigDevState *bmds;
> >  
> >      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> > +        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */
> 
> Ditto.
> 
> >          bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
> >      }
> >  }
> > @@ -597,21 +627,28 @@ static void block_migration_cleanup(void *opaque)
> >  {
> >      BlkMigDevState *bmds;
> >      BlkMigBlock *blk;
> > +    AioContext *ctx;
> >  
> >      bdrv_drain_all();
> >  
> >      unset_dirty_tracking();
> >  
> > -    blk_mig_lock();
> 
> Why is it okay to skip the blk_mig_lock() for block_mig_state.bmds_list?
> 
> >      while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
> >          QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
> >          bdrv_op_unblock_all(bmds->bs, bmds->blocker);
> >          error_free(bmds->blocker);
> > +
> > +        /* Save ctx, because bmds->bs can disappear during bdrv_unref.  */
> > +        ctx = bdrv_get_aio_context(bmds->bs);
> > +        aio_context_acquire(ctx);
> >          bdrv_unref(bmds->bs);
> > +        aio_context_release(ctx);
> > +
> >          g_free(bmds->aio_bitmap);
> >          g_free(bmds);
> >      }
> >  
> > +    blk_mig_lock();
> >      while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
> >          QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
> >          g_free(blk->buf);
Paolo Bonzini Feb. 19, 2016, 3:02 p.m. UTC | #3
On 16/02/2016 08:17, Fam Zheng wrote:
>> @@ -321,8 +339,9 @@ static int set_dirty_tracking(void)
>>      int ret;
>>  
>>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
>> +        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */
> 
> Why? I don't think it is safe today.  The BDS state is mutated and it can race
> with bdrv_set_dirty() etc.

You're completely right.

> (Also the refresh_total_sectors in bdrv_nb_sectors
> can even do read/write, no?)

refresh_total_sectors will just do a lseek(SEEK_END) basically.  So
that's safe.

Paolo
diff mbox

Patch

diff --git a/migration/block.c b/migration/block.c
index a444058..6dd2327 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -60,9 +60,15 @@  typedef struct BlkMigDevState {
     int64_t cur_sector;
     int64_t cur_dirty;
 
-    /* Protected by block migration lock.  */
+    /* Data in the aio_bitmap is protected by block migration lock.
+     * Allocation and free happen during setup and cleanup respectively.
+     */
     unsigned long *aio_bitmap;
+
+    /* Protected by block migration lock.  */
     int64_t completed_sectors;
+
+    /* Protected by iothread lock / AioContext.  */
     BdrvDirtyBitmap *dirty_bitmap;
     Error *blocker;
 } BlkMigDevState;
@@ -100,7 +106,7 @@  typedef struct BlkMigState {
     int prev_progress;
     int bulk_completed;
 
-    /* Lock must be taken _inside_ the iothread lock.  */
+    /* Lock must be taken _inside_ the iothread lock and any AioContexts.  */
     QemuMutex lock;
 } BlkMigState;
 
@@ -264,11 +270,13 @@  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
 
     if (bmds->shared_base) {
         qemu_mutex_lock_iothread();
+        aio_context_acquire(bdrv_get_aio_context(bs));
         while (cur_sector < total_sectors &&
                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                                   &nr_sectors)) {
             cur_sector += nr_sectors;
         }
+        aio_context_release(bdrv_get_aio_context(bs));
         qemu_mutex_unlock_iothread();
     }
 
@@ -302,11 +310,21 @@  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     block_mig_state.submitted++;
     blk_mig_unlock();
 
+    /* We do not know if bs is under the main thread (and thus does
+     * not acquire the AioContext when doing AIO) or rather under
+     * dataplane.  Thus acquire both the iothread mutex and the
+     * AioContext.
+     *
+     * This is ugly and will disappear when we make bdrv_* thread-safe,
+     * without the need to acquire the AioContext.
+     */
     qemu_mutex_lock_iothread();
+    aio_context_acquire(bdrv_get_aio_context(bmds->bs));
     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                 nr_sectors, blk_mig_read_cb, blk);
 
     bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
+    aio_context_release(bdrv_get_aio_context(bmds->bs));
     qemu_mutex_unlock_iothread();
 
     bmds->cur_sector = cur_sector + nr_sectors;
@@ -321,8 +339,9 @@  static int set_dirty_tracking(void)
     int ret;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */
         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
                                                       NULL, NULL);
         if (!bmds->dirty_bitmap) {
             ret = -errno;
             goto fail;
@@ -332,11 +352,14 @@  static int set_dirty_tracking(void)
     return ret;
 }
 
+/* Called with iothread lock taken.  */
+
 static void unset_dirty_tracking(void)
 {
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+        /* Creating/dropping dirty bitmaps only requires the big QEMU lock.  */
         bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
     }
 }
@@ -444,7 +470,7 @@  static void blk_mig_reset_dirty_cursor(void)
     }
 }
 
-/* Called with iothread lock taken.  */
+/* Called with iothread lock and AioContext taken.  */
 
 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                                  int is_async)
@@ -527,7 +553,9 @@  static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
     int ret = 1;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
         ret = mig_save_device_dirty(f, bmds, is_async);
+        aio_context_release(bdrv_get_aio_context(bmds->bs));
         if (ret <= 0) {
             break;
         }
@@ -585,7 +613,9 @@  static int64_t get_remaining_dirty(void)
     int64_t dirty = 0;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
         dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
+        aio_context_release(bdrv_get_aio_context(bmds->bs));
     }
 
     return dirty << BDRV_SECTOR_BITS;
@@ -597,21 +627,28 @@  static void block_migration_cleanup(void *opaque)
 {
     BlkMigDevState *bmds;
     BlkMigBlock *blk;
+    AioContext *ctx;
 
     bdrv_drain_all();
 
     unset_dirty_tracking();
 
-    blk_mig_lock();
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
         bdrv_op_unblock_all(bmds->bs, bmds->blocker);
         error_free(bmds->blocker);
+
+        /* Save ctx, because bmds->bs can disappear during bdrv_unref.  */
+        ctx = bdrv_get_aio_context(bmds->bs);
+        aio_context_acquire(ctx);
         bdrv_unref(bmds->bs);
+        aio_context_release(ctx);
+
         g_free(bmds->aio_bitmap);
         g_free(bmds);
     }
 
+    blk_mig_lock();
     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
         g_free(blk->buf);
@@ -633,13 +670,12 @@  static int block_save_setup(QEMUFile *f, void *opaque)
     /* start track dirty blocks */
     ret = set_dirty_tracking();
 
+    qemu_mutex_unlock_iothread();
+
     if (ret) {
-        qemu_mutex_unlock_iothread();
         return ret;
     }
 
-    qemu_mutex_unlock_iothread();
-
     ret = flush_blks(f);
     blk_mig_reset_dirty_cursor();
     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
@@ -761,17 +797,18 @@  static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
     uint64_t pending;
 
     qemu_mutex_lock_iothread();
+    pending = get_remaining_dirty();
+    qemu_mutex_unlock_iothread();
+
     blk_mig_lock();
-    pending = get_remaining_dirty() +
-                       block_mig_state.submitted * BLOCK_SIZE +
-                       block_mig_state.read_done * BLOCK_SIZE;
+    pending += block_mig_state.submitted * BLOCK_SIZE +
+               block_mig_state.read_done * BLOCK_SIZE;
+    blk_mig_unlock();
 
     /* Report at least one block pending during bulk phase */
     if (pending <= max_size && !block_mig_state.bulk_completed) {
         pending = max_size + BLOCK_SIZE;
     }
-    blk_mig_unlock();
-    qemu_mutex_unlock_iothread();
 
     DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
     /* We don't do postcopy */