[02/18] block: BDS deletion during bdrv_drain_recurse

Message ID 20170913181910.29688-3-mreitz@redhat.com
State New
Headers show
Series
  • block/mirror: Add active-sync mirroring
Related show

Commit Message

Max Reitz Sept. 13, 2017, 6:18 p.m.
Drainined a BDS child may lead to both the original BDS and/or its other
children being deleted (e.g. if the original BDS represents a block
job).  We should prepare for this in both bdrv_drain_recurse() and
bdrv_drained_begin() by monitoring whether the BDS we are about to drain
still exists at all.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/io.c | 72 +++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 20 deletions(-)

Comments

Fam Zheng Sept. 18, 2017, 3:44 a.m. | #1
On Wed, 09/13 20:18, Max Reitz wrote:
> Drainined a BDS child may lead to both the original BDS and/or its other
> children being deleted (e.g. if the original BDS represents a block
> job).  We should prepare for this in both bdrv_drain_recurse() and
> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
> still exists at all.

Can the deletion happen when IOThread calls
bdrv_drain_recurse/bdrv_drained_begin?  If not, is it enough to do

    ...
    if (in_main_loop) {
        bdrv_ref(bs);
    }
    ...
    if (in_main_loop) {
        bdrv_unref(bs);
    }

to protect the main loop case? So the BdrvDeletedStatus state is not needed.

Fam

> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/io.c | 72 +++++++++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 52 insertions(+), 20 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 4378ae4c7d..8ec1a564ad 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -182,33 +182,57 @@ static void bdrv_drain_invoke(BlockDriverState *bs)
>  
>  static bool bdrv_drain_recurse(BlockDriverState *bs)
>  {
> -    BdrvChild *child, *tmp;
> +    BdrvChild *child;
>      bool waited;
> +    struct BDSToDrain {
> +        BlockDriverState *bs;
> +        BdrvDeletedStatus del_stat;
> +        QLIST_ENTRY(BDSToDrain) next;
> +    };
> +    QLIST_HEAD(, BDSToDrain) bs_list = QLIST_HEAD_INITIALIZER(bs_list);
> +    bool in_main_loop =
> +        qemu_get_current_aio_context() == qemu_get_aio_context();
>  
>      waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
>  
>      /* Ensure any pending metadata writes are submitted to bs->file.  */
>      bdrv_drain_invoke(bs);
>  
> -    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
> -        BlockDriverState *bs = child->bs;
> -        bool in_main_loop =
> -            qemu_get_current_aio_context() == qemu_get_aio_context();
> -        assert(bs->refcnt > 0);
> -        if (in_main_loop) {
> -            /* In case the recursive bdrv_drain_recurse processes a
> -             * block_job_defer_to_main_loop BH and modifies the graph,
> -             * let's hold a reference to bs until we are done.
> -             *
> -             * IOThread doesn't have such a BH, and it is not safe to call
> -             * bdrv_unref without BQL, so skip doing it there.
> -             */
> -            bdrv_ref(bs);
> -        }
> -        waited |= bdrv_drain_recurse(bs);
> -        if (in_main_loop) {
> -            bdrv_unref(bs);
> +    /* Draining children may result in other children being removed and maybe
> +     * even deleted, so copy the children list first */
> +    QLIST_FOREACH(child, &bs->children, next) {
> +        struct BDSToDrain *bs2d = g_new0(struct BDSToDrain, 1);
> +
> +        bs2d->bs = child->bs;
> +        QLIST_INSERT_HEAD(&bs->deleted_status, &bs2d->del_stat, next);
> +
> +        QLIST_INSERT_HEAD(&bs_list, bs2d, next);
> +    }
> +
> +    while (!QLIST_EMPTY(&bs_list)) {
> +        struct BDSToDrain *bs2d = QLIST_FIRST(&bs_list);
> +        QLIST_REMOVE(bs2d, next);
> +
> +        if (!bs2d->del_stat.deleted) {
> +            QLIST_REMOVE(&bs2d->del_stat, next);
> +
> +            if (in_main_loop) {
> +                /* In case the recursive bdrv_drain_recurse processes a
> +                 * block_job_defer_to_main_loop BH and modifies the graph,
> +                 * let's hold a reference to the BDS until we are done.
> +                 *
> +                 * IOThread doesn't have such a BH, and it is not safe to call
> +                 * bdrv_unref without BQL, so skip doing it there.
> +                 */
> +                bdrv_ref(bs2d->bs);
> +            }
> +            waited |= bdrv_drain_recurse(bs2d->bs);
> +            if (in_main_loop) {
> +                bdrv_unref(bs2d->bs);
> +            }
>          }
> +
> +        g_free(bs2d);
>      }
>  
>      return waited;
> @@ -252,17 +276,25 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>  
>  void bdrv_drained_begin(BlockDriverState *bs)
>  {
> +    BdrvDeletedStatus del_stat = { .deleted = false };
> +
>      if (qemu_in_coroutine()) {
>          bdrv_co_yield_to_drain(bs);
>          return;
>      }
>  
> +    QLIST_INSERT_HEAD(&bs->deleted_status, &del_stat, next);
> +
>      if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
>          aio_disable_external(bdrv_get_aio_context(bs));
>          bdrv_parent_drained_begin(bs);
>      }
>  
> -    bdrv_drain_recurse(bs);
> +    if (!del_stat.deleted) {
> +        QLIST_REMOVE(&del_stat, next);
> +
> +        bdrv_drain_recurse(bs);
> +    }
>  }
>  
>  void bdrv_drained_end(BlockDriverState *bs)
> -- 
> 2.13.5
>
Max Reitz Sept. 18, 2017, 4:13 p.m. | #2
On 2017-09-18 05:44, Fam Zheng wrote:
> On Wed, 09/13 20:18, Max Reitz wrote:
>> Drainined a BDS child may lead to both the original BDS and/or its other
>> children being deleted (e.g. if the original BDS represents a block
>> job).  We should prepare for this in both bdrv_drain_recurse() and
>> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
>> still exists at all.
> 
> Can the deletion happen when IOThread calls
> bdrv_drain_recurse/bdrv_drained_begin?

I don't think so, because (1) my issue was draining a block job and that
can only be completed in the main loop, and (2) I would like to think
it's always impossible, considering that bdrv_unref() may only be called
with the BQL.

>                                         If not, is it enough to do
> 
>     ...
>     if (in_main_loop) {
>         bdrv_ref(bs);
>     }
>     ...
>     if (in_main_loop) {
>         bdrv_unref(bs);
>     }
> 
> to protect the main loop case? So the BdrvDeletedStatus state is not needed.

We already have that in bdrv_drained_recurse(), don't we?

The issue here is, though, that QLIST_FOREACH_SAFE() stores the next
child pointer to @tmp.  However, once the current child @child is
drained, @tmp may no longer be valid -- it may have been detached from
@bs, and it may even have been deleted.

We could work around the latter by increasing the next child's reference
somehow (but BdrvChild doesn't really have a refcount, and in order to
do so, we would probably have to emulate being a parent or
something...), but then you'd still have the issue of @tmp being
detached from the children list we're trying to iterate over.  So
tmp->next is no longer valid.

Anyway, so the latter is the reason why I decided to introduce the bs_list.

But maybe that actually saves us from having to fiddle with BdrvChild...
 Since it's just a list of BDSs now, it may be enough to simply
bdrv_ref() all of the BDSs in that list before draining any of them.  So
 we'd keep creating the bs_list and then we'd move the existing
bdrv_ref() from the drain loop into the loop filling bs_list.

And adding a bdrv_ref()/bdrv_unref() pair to bdrv_drained_begin() should
hopefully work there, too.

Max

Patch

diff --git a/block/io.c b/block/io.c
index 4378ae4c7d..8ec1a564ad 100644
--- a/block/io.c
+++ b/block/io.c
@@ -182,33 +182,57 @@  static void bdrv_drain_invoke(BlockDriverState *bs)
 
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
-    BdrvChild *child, *tmp;
+    BdrvChild *child;
     bool waited;
+    struct BDSToDrain {
+        BlockDriverState *bs;
+        BdrvDeletedStatus del_stat;
+        QLIST_ENTRY(BDSToDrain) next;
+    };
+    QLIST_HEAD(, BDSToDrain) bs_list = QLIST_HEAD_INITIALIZER(bs_list);
+    bool in_main_loop =
+        qemu_get_current_aio_context() == qemu_get_aio_context();
 
     waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
     /* Ensure any pending metadata writes are submitted to bs->file.  */
     bdrv_drain_invoke(bs);
 
-    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
-        BlockDriverState *bs = child->bs;
-        bool in_main_loop =
-            qemu_get_current_aio_context() == qemu_get_aio_context();
-        assert(bs->refcnt > 0);
-        if (in_main_loop) {
-            /* In case the recursive bdrv_drain_recurse processes a
-             * block_job_defer_to_main_loop BH and modifies the graph,
-             * let's hold a reference to bs until we are done.
-             *
-             * IOThread doesn't have such a BH, and it is not safe to call
-             * bdrv_unref without BQL, so skip doing it there.
-             */
-            bdrv_ref(bs);
-        }
-        waited |= bdrv_drain_recurse(bs);
-        if (in_main_loop) {
-            bdrv_unref(bs);
+    /* Draining children may result in other children being removed and maybe
+     * even deleted, so copy the children list first */
+    QLIST_FOREACH(child, &bs->children, next) {
+        struct BDSToDrain *bs2d = g_new0(struct BDSToDrain, 1);
+
+        bs2d->bs = child->bs;
+        QLIST_INSERT_HEAD(&bs->deleted_status, &bs2d->del_stat, next);
+
+        QLIST_INSERT_HEAD(&bs_list, bs2d, next);
+    }
+
+    while (!QLIST_EMPTY(&bs_list)) {
+        struct BDSToDrain *bs2d = QLIST_FIRST(&bs_list);
+        QLIST_REMOVE(bs2d, next);
+
+        if (!bs2d->del_stat.deleted) {
+            QLIST_REMOVE(&bs2d->del_stat, next);
+
+            if (in_main_loop) {
+                /* In case the recursive bdrv_drain_recurse processes a
+                 * block_job_defer_to_main_loop BH and modifies the graph,
+                 * let's hold a reference to the BDS until we are done.
+                 *
+                 * IOThread doesn't have such a BH, and it is not safe to call
+                 * bdrv_unref without BQL, so skip doing it there.
+                 */
+                bdrv_ref(bs2d->bs);
+            }
+            waited |= bdrv_drain_recurse(bs2d->bs);
+            if (in_main_loop) {
+                bdrv_unref(bs2d->bs);
+            }
         }
+
+        g_free(bs2d);
     }
 
     return waited;
@@ -252,17 +276,25 @@  static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
 
 void bdrv_drained_begin(BlockDriverState *bs)
 {
+    BdrvDeletedStatus del_stat = { .deleted = false };
+
     if (qemu_in_coroutine()) {
         bdrv_co_yield_to_drain(bs);
         return;
     }
 
+    QLIST_INSERT_HEAD(&bs->deleted_status, &del_stat, next);
+
     if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
         aio_disable_external(bdrv_get_aio_context(bs));
         bdrv_parent_drained_begin(bs);
     }
 
-    bdrv_drain_recurse(bs);
+    if (!del_stat.deleted) {
+        QLIST_REMOVE(&del_stat, next);
+
+        bdrv_drain_recurse(bs);
+    }
 }
 
 void bdrv_drained_end(BlockDriverState *bs)