diff mbox

[15/16] block: Add and use bdrv_replace_in_backing_chain()

Message ID 1442497700-2536-16-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf Sept. 17, 2015, 1:48 p.m. UTC
This cleans up the mess we left behind in the mirror code after the
previous patch. Instead of using bdrv_swap(), just change pointers.

The interface change of the mirror job that callers must consider is
that after job completion, their local BDS pointers still point to the
same node now. qemu-img must change its code accordingly (which makes it
easier to understand); the other callers stays unchanged because after
completion they don't do anything with the BDS, but just with the job,
and the job is still owned by the source BDS.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c               | 32 +++++++++++++++++++++++++++++++-
 block/mirror.c        | 23 +++++++----------------
 include/block/block.h |  4 +++-
 qemu-img.c            | 16 ++++++++--------
 4 files changed, 49 insertions(+), 26 deletions(-)

Comments

Max Reitz Sept. 23, 2015, 5:08 p.m. UTC | #1
On 17.09.2015 15:48, Kevin Wolf wrote:
> This cleans up the mess we left behind in the mirror code after the
> previous patch. Instead of using bdrv_swap(), just change pointers.
> 
> The interface change of the mirror job that callers must consider is
> that after job completion, their local BDS pointers still point to the
> same node now. qemu-img must change its code accordingly (which makes it
> easier to understand); the other callers stays unchanged because after
> completion they don't do anything with the BDS, but just with the job,
> and the job is still owned by the source BDS.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block.c               | 32 +++++++++++++++++++++++++++++++-
>  block/mirror.c        | 23 +++++++----------------
>  include/block/block.h |  4 +++-
>  qemu-img.c            | 16 ++++++++--------
>  4 files changed, 49 insertions(+), 26 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 98fc17c..7c21659 100644
> --- a/block.c
> +++ b/block.c
> @@ -1095,7 +1095,7 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
>      return child;
>  }
>  
> -void bdrv_detach_child(BdrvChild *child)
> +static void bdrv_detach_child(BdrvChild *child)
>  {
>      QLIST_REMOVE(child, next);
>      QLIST_REMOVE(child, next_parent);
> @@ -2228,6 +2228,36 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
>      bdrv_unref(bs_new);
>  }
>  
> +void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
> +{
> +    assert(!bdrv_requests_pending(old));
> +    assert(!bdrv_requests_pending(new));
> +
> +    bdrv_ref(old);
> +
> +    if (old->blk) {
> +        /* As long as these fields aren't in BlockBackend, but in the top-level
> +         * BlockDriverState, it's not possible for a BDS to have two BBs.
> +         *
> +         * We really want to copy the fields from old to new, but we go for a
> +         * swap instead so that pointers aren't duplicated and cause trouble.
> +         * (Also, bdrv_swap() used to do the same.) */
> +        assert(!new->blk);
> +        swap_feature_fields(old, new);
> +    }
> +    change_parent_backing_link(old, new);
> +
> +    /* Change backing files if a previously independent node is added to the
> +     * chain. For active commit, we replace top by its own (indirect) backing
> +     * file and don't do anything here so we don't build a loop. */
> +    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
> +        bdrv_set_backing_hd(new, backing_bs(old));
> +        bdrv_set_backing_hd(old, NULL);

Wouldn't we want @old to keep its backing file?

Then bdrv_append() would basically be a special case of this function,
with it additionally decrementing the refcount of @bs_new.

Max

> +    }
> +
> +    bdrv_unref(old);
> +}
> +
>  static void bdrv_delete(BlockDriverState *bs)
>  {
>      assert(!bs->job);
Kevin Wolf Sept. 29, 2015, 3:22 p.m. UTC | #2
Am 23.09.2015 um 19:08 hat Max Reitz geschrieben:
> On 17.09.2015 15:48, Kevin Wolf wrote:
> > This cleans up the mess we left behind in the mirror code after the
> > previous patch. Instead of using bdrv_swap(), just change pointers.
> > 
> > The interface change of the mirror job that callers must consider is
> > that after job completion, their local BDS pointers still point to the
> > same node now. qemu-img must change its code accordingly (which makes it
> > easier to understand); the other callers stays unchanged because after
> > completion they don't do anything with the BDS, but just with the job,
> > and the job is still owned by the source BDS.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  block.c               | 32 +++++++++++++++++++++++++++++++-
> >  block/mirror.c        | 23 +++++++----------------
> >  include/block/block.h |  4 +++-
> >  qemu-img.c            | 16 ++++++++--------
> >  4 files changed, 49 insertions(+), 26 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 98fc17c..7c21659 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -1095,7 +1095,7 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> >      return child;
> >  }
> >  
> > -void bdrv_detach_child(BdrvChild *child)
> > +static void bdrv_detach_child(BdrvChild *child)
> >  {
> >      QLIST_REMOVE(child, next);
> >      QLIST_REMOVE(child, next_parent);
> > @@ -2228,6 +2228,36 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
> >      bdrv_unref(bs_new);
> >  }
> >  
> > +void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
> > +{
> > +    assert(!bdrv_requests_pending(old));
> > +    assert(!bdrv_requests_pending(new));
> > +
> > +    bdrv_ref(old);
> > +
> > +    if (old->blk) {
> > +        /* As long as these fields aren't in BlockBackend, but in the top-level
> > +         * BlockDriverState, it's not possible for a BDS to have two BBs.
> > +         *
> > +         * We really want to copy the fields from old to new, but we go for a
> > +         * swap instead so that pointers aren't duplicated and cause trouble.
> > +         * (Also, bdrv_swap() used to do the same.) */
> > +        assert(!new->blk);
> > +        swap_feature_fields(old, new);
> > +    }
> > +    change_parent_backing_link(old, new);
> > +
> > +    /* Change backing files if a previously independent node is added to the
> > +     * chain. For active commit, we replace top by its own (indirect) backing
> > +     * file and don't do anything here so we don't build a loop. */
> > +    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
> > +        bdrv_set_backing_hd(new, backing_bs(old));
> > +        bdrv_set_backing_hd(old, NULL);
> 
> Wouldn't we want @old to keep its backing file?

Would we? The operation I had in mind was: Given a backing file chain,
one node in that chain and an independent node, replace the node in the
chain. That is:

    A <- B <- C <- D

         X

becomes

    A <- X <- C <- D

         B

Of course, you could define a different operation, but this seemed to be
the obvious one that the mirror completion needs.

> Then bdrv_append() would basically be a special case of this function,
> with it additionally decrementing the refcount of @bs_new.

Hm, less duplication sounds nice, but as long as the current way is
technically correct, can we leave this for a cleanup on top?

Kevin

> Max
> 
> > +    }
> > +
> > +    bdrv_unref(old);
> > +}
> > +
> >  static void bdrv_delete(BlockDriverState *bs)
> >  {
> >      assert(!bs->job);
>
Max Reitz Sept. 30, 2015, 2:50 p.m. UTC | #3
On 29.09.2015 17:22, Kevin Wolf wrote:
> Am 23.09.2015 um 19:08 hat Max Reitz geschrieben:
>> On 17.09.2015 15:48, Kevin Wolf wrote:
>>> This cleans up the mess we left behind in the mirror code after the
>>> previous patch. Instead of using bdrv_swap(), just change pointers.
>>>
>>> The interface change of the mirror job that callers must consider is
>>> that after job completion, their local BDS pointers still point to the
>>> same node now. qemu-img must change its code accordingly (which makes it
>>> easier to understand); the other callers stays unchanged because after
>>> completion they don't do anything with the BDS, but just with the job,
>>> and the job is still owned by the source BDS.
>>>
>>> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>>> ---
>>>  block.c               | 32 +++++++++++++++++++++++++++++++-
>>>  block/mirror.c        | 23 +++++++----------------
>>>  include/block/block.h |  4 +++-
>>>  qemu-img.c            | 16 ++++++++--------
>>>  4 files changed, 49 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/block.c b/block.c
>>> index 98fc17c..7c21659 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -1095,7 +1095,7 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
>>>      return child;
>>>  }
>>>  
>>> -void bdrv_detach_child(BdrvChild *child)
>>> +static void bdrv_detach_child(BdrvChild *child)
>>>  {
>>>      QLIST_REMOVE(child, next);
>>>      QLIST_REMOVE(child, next_parent);
>>> @@ -2228,6 +2228,36 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
>>>      bdrv_unref(bs_new);
>>>  }
>>>  
>>> +void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
>>> +{
>>> +    assert(!bdrv_requests_pending(old));
>>> +    assert(!bdrv_requests_pending(new));
>>> +
>>> +    bdrv_ref(old);
>>> +
>>> +    if (old->blk) {
>>> +        /* As long as these fields aren't in BlockBackend, but in the top-level
>>> +         * BlockDriverState, it's not possible for a BDS to have two BBs.
>>> +         *
>>> +         * We really want to copy the fields from old to new, but we go for a
>>> +         * swap instead so that pointers aren't duplicated and cause trouble.
>>> +         * (Also, bdrv_swap() used to do the same.) */
>>> +        assert(!new->blk);
>>> +        swap_feature_fields(old, new);
>>> +    }
>>> +    change_parent_backing_link(old, new);
>>> +
>>> +    /* Change backing files if a previously independent node is added to the
>>> +     * chain. For active commit, we replace top by its own (indirect) backing
>>> +     * file and don't do anything here so we don't build a loop. */
>>> +    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
>>> +        bdrv_set_backing_hd(new, backing_bs(old));
>>> +        bdrv_set_backing_hd(old, NULL);
>>
>> Wouldn't we want @old to keep its backing file?
> 
> Would we? The operation I had in mind was: Given a backing file chain,
> one node in that chain and an independent node, replace the node in the
> chain. That is:
> 
>     A <- B <- C <- D
> 
>          X
> 
> becomes
> 
>     A <- X <- C <- D
> 
>          B
> 
> Of course, you could define a different operation, but this seemed to be
> the obvious one that the mirror completion needs.

Oops, apparently I missed the backing_bs(old) in
bdrv_set_backing_hd(new, ...).

>> Then bdrv_append() would basically be a special case of this function,
>> with it additionally decrementing the refcount of @bs_new.
> 
> Hm, less duplication sounds nice, but as long as the current way is
> technically correct, can we leave this for a cleanup on top?

And with the backing_bs() taken into account, it is not so duplicated
after all.

Max

> Kevin
> 
>> Max
>>
>>> +    }
>>> +
>>> +    bdrv_unref(old);
>>> +}
>>> +
>>>  static void bdrv_delete(BlockDriverState *bs)
>>>  {
>>>      assert(!bs->job);
>>
> 
>
Max Reitz Sept. 30, 2015, 2:50 p.m. UTC | #4
On 17.09.2015 15:48, Kevin Wolf wrote:
> This cleans up the mess we left behind in the mirror code after the
> previous patch. Instead of using bdrv_swap(), just change pointers.
> 
> The interface change of the mirror job that callers must consider is
> that after job completion, their local BDS pointers still point to the
> same node now. qemu-img must change its code accordingly (which makes it
> easier to understand); the other callers stays unchanged because after
> completion they don't do anything with the BDS, but just with the job,
> and the job is still owned by the source BDS.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block.c               | 32 +++++++++++++++++++++++++++++++-
>  block/mirror.c        | 23 +++++++----------------
>  include/block/block.h |  4 +++-
>  qemu-img.c            | 16 ++++++++--------
>  4 files changed, 49 insertions(+), 26 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>
diff mbox

Patch

diff --git a/block.c b/block.c
index 98fc17c..7c21659 100644
--- a/block.c
+++ b/block.c
@@ -1095,7 +1095,7 @@  static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     return child;
 }
 
-void bdrv_detach_child(BdrvChild *child)
+static void bdrv_detach_child(BdrvChild *child)
 {
     QLIST_REMOVE(child, next);
     QLIST_REMOVE(child, next_parent);
@@ -2228,6 +2228,36 @@  void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
     bdrv_unref(bs_new);
 }
 
+void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
+{
+    assert(!bdrv_requests_pending(old));
+    assert(!bdrv_requests_pending(new));
+
+    bdrv_ref(old);
+
+    if (old->blk) {
+        /* As long as these fields aren't in BlockBackend, but in the top-level
+         * BlockDriverState, it's not possible for a BDS to have two BBs.
+         *
+         * We really want to copy the fields from old to new, but we go for a
+         * swap instead so that pointers aren't duplicated and cause trouble.
+         * (Also, bdrv_swap() used to do the same.) */
+        assert(!new->blk);
+        swap_feature_fields(old, new);
+    }
+    change_parent_backing_link(old, new);
+
+    /* Change backing files if a previously independent node is added to the
+     * chain. For active commit, we replace top by its own (indirect) backing
+     * file and don't do anything here so we don't build a loop. */
+    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
+        bdrv_set_backing_hd(new, backing_bs(old));
+        bdrv_set_backing_hd(old, NULL);
+    }
+
+    bdrv_unref(old);
+}
+
 static void bdrv_delete(BlockDriverState *bs)
 {
     assert(!bs->job);
diff --git a/block/mirror.c b/block/mirror.c
index 571da27..443d97c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -353,6 +353,11 @@  static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
+    BlockDriverState *src = s->common.bs;
+
+    /* Make sure that the source BDS doesn't go away before we called
+     * block_job_completed(). */
+    bdrv_ref(src);
 
     if (s->to_replace) {
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -367,22 +372,7 @@  static void mirror_exit(BlockJob *job, void *opaque)
         if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
             bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
         }
-        bdrv_swap(s->target, to_replace);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref */
-            /* FIXME This duplicates bdrv_set_backing_hd(), except for the
-             * actual detach/unref so that the loop can be broken. When
-             * bdrv_swap() gets replaced, this will become sane again. */
-            BlockDriverState *backing = s->base->backing->bs;
-            assert(s->base->backing_blocker);
-            bdrv_op_unblock_all(s->base->backing->bs, s->base->backing_blocker);
-            error_free(s->base->backing_blocker);
-            s->base->backing_blocker = NULL;
-            bdrv_detach_child(s->base->backing);
-            s->base->backing = NULL;
-            bdrv_unref(backing);
-        }
+        bdrv_replace_in_backing_chain(to_replace, s->target);
     }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -396,6 +386,7 @@  static void mirror_exit(BlockJob *job, void *opaque)
     bdrv_unref(s->target);
     block_job_completed(&s->common, data->ret);
     g_free(data);
+    bdrv_unref(src);
 }
 
 static void coroutine_fn mirror_run(void *opaque)
diff --git a/include/block/block.h b/include/block/block.h
index 6dc6766..a046bd0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -202,6 +202,9 @@  BlockDriverState *bdrv_new(void);
 void bdrv_make_anon(BlockDriverState *bs);
 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_replace_in_backing_chain(BlockDriverState *old,
+                                   BlockDriverState *new);
+
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_parse_discard_flags(const char *mode, int *flags);
 BdrvChild *bdrv_open_child(const char *filename,
@@ -510,7 +513,6 @@  void bdrv_disable_copy_on_read(BlockDriverState *bs);
 void bdrv_ref(BlockDriverState *bs);
 void bdrv_unref(BlockDriverState *bs);
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-void bdrv_detach_child(BdrvChild *child);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/qemu-img.c b/qemu-img.c
index c4454da..575013b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -765,12 +765,12 @@  static int img_commit(int argc, char **argv)
         goto done;
     }
 
-    /* The block job will swap base_bs and bs (which is not what we really want
-     * here, but okay) and unref base_bs (after the swap, i.e., the old top
-     * image). In order to still be able to empty that top image afterwards,
-     * increment the reference counter here preemptively. */
+    /* When the block job completes, the BlockBackend reference will point to
+     * the old backing file. In order to avoid that the top image is already
+     * deleted, so we can still empty it afterwards, increment the reference
+     * counter here preemptively. */
     if (!drop) {
-        bdrv_ref(base_bs);
+        bdrv_ref(bs);
     }
 
     run_block_job(bs->job, &local_err);
@@ -778,8 +778,8 @@  static int img_commit(int argc, char **argv)
         goto unref_backing;
     }
 
-    if (!drop && base_bs->drv->bdrv_make_empty) {
-        ret = base_bs->drv->bdrv_make_empty(base_bs);
+    if (!drop && bs->drv->bdrv_make_empty) {
+        ret = bs->drv->bdrv_make_empty(bs);
         if (ret) {
             error_setg_errno(&local_err, -ret, "Could not empty %s",
                              filename);
@@ -789,7 +789,7 @@  static int img_commit(int argc, char **argv)
 
 unref_backing:
     if (!drop) {
-        bdrv_unref(base_bs);
+        bdrv_unref(bs);
     }
 
 done: