diff mbox

[v17,08/14] block: Support dropping active in bdrv_drop_intermediate

Message ID 1394436370-8908-9-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng March 10, 2014, 7:26 a.m. UTC
Dropping intermediate could be useful both for commit and stream, and
BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
to work with op blockers.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block.c        | 139 ++++++++++++++++++++++++++++-----------------------------
 block/commit.c |   2 +-
 2 files changed, 70 insertions(+), 71 deletions(-)

Comments

Jeff Cody April 7, 2014, 6:47 p.m. UTC | #1
On Mon, Mar 10, 2014 at 03:26:04PM +0800, Fam Zheng wrote:
> Dropping intermediate could be useful both for commit and stream, and
> BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
> to work with op blockers.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block.c        | 139 ++++++++++++++++++++++++++++-----------------------------
>  block/commit.c |   2 +-
>  2 files changed, 70 insertions(+), 71 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 05f7766..0af7c62 100644
> --- a/block.c
> +++ b/block.c
> @@ -2503,115 +2503,114 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
>      return overlay;
>  }
>  
> -typedef struct BlkIntermediateStates {
> -    BlockDriverState *bs;
> -    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> -} BlkIntermediateStates;
> -
> -
>  /*
> - * Drops images above 'base' up to and including 'top', and sets the image
> - * above 'top' to have base as its backing file.
> + * Drops images above 'base' up to and including 'top', and sets new 'base' as
> + * backing_hd of top's overlay (the image orignally has 'top' as backing file).
> + * top's overlay may be NULL if 'top' is active, no such update needed.
> + * Requires that the top's overlay to 'top' is opened r/w.
> + *
> + * 1) This will convert the following chain:
> + *
> + *     ... <- base <- ... <- top <- overlay <-... <- active
>   *
> - * Requires that the overlay to 'top' is opened r/w, so that the backing file
> - * information in 'bs' can be properly updated.
> + * to
> + *
> + *     ... <- base <- overlay <- active
> + *
> + * 2) It is allowed for bottom==base, in which case it converts:
>   *
> - * E.g., this will convert the following chain:
> - * bottom <- base <- intermediate <- top <- active
> + *     base <- ... <- top <- overlay <- ... <- active
>   *
>   * to
>   *
> - * bottom <- base <- active
> + *     base <- overlay <- active
>   *
> - * It is allowed for bottom==base, in which case it converts:
> + * 2) It also allows active==top, in which case it converts:
>   *
> - * base <- intermediate <- top <- active
> + *     ... <- base <- ... <- top (active)
>   *
>   * to
>   *
> - * base <- active
> + *     ... <- base == active == top
> + *
> + * i.e. only base and lower remains: *top == *base when return.
> + *
> + * 3) If base==NULL, it will drop all the BDS below overlay and set its
> + * backing_hd to NULL. I.e.:
>   *
> - * Error conditions:
> - *  if active == top, that is considered an error
> + *     base(NULL) <- ... <- overlay <- ... <- active
> + *
> + * to
> + *
> + *     overlay <- ... <- active
>   *
>   */
>  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
>                             BlockDriverState *base)
>  {
> -    BlockDriverState *intermediate;
> -    BlockDriverState *base_bs = NULL;
> -    BlockDriverState *new_top_bs = NULL;
> -    BlkIntermediateStates *intermediate_state, *next;
> -    int ret = -EIO;
> -
> -    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
> -    QSIMPLEQ_INIT(&states_to_delete);
> +    BlockDriverState *drop_start, *overlay, *bs;
> +    int ret = -EINVAL;
>  
> -    if (!top->drv || !base->drv) {
> +    assert(active);
> +    assert(top);
> +    /* Verify that top is in backing chain of active */
> +    bs = active;
> +    while (bs && bs != top) {
> +        bs = bs->backing_hd;
> +    }
> +    if (!bs) {
>          goto exit;
>      }
> +    /* Verify that base is in backing chain of top */
> +    if (base) {
> +        while (bs && bs != base) {
> +            bs = bs->backing_hd;
> +        }
> +        if (bs != base) {
> +            goto exit;
> +        }
> +    }
>  
> -    new_top_bs = bdrv_find_overlay(active, top);
> -
> -    if (new_top_bs == NULL) {
> -        /* we could not find the image above 'top', this is an error */
> +    if (!top->drv || (base && !base->drv)) {
>          goto exit;
>      }
> -
> -    /* special case of new_top_bs->backing_hd already pointing to base - nothing
> -     * to do, no intermediate images */
> -    if (new_top_bs->backing_hd == base) {
> +    if (top == base) {
> +        ret = 0;
> +        goto exit;
> +    } else if (top == active) {
> +        assert(base);
> +        drop_start = active->backing_hd;
> +        bdrv_swap(active, base);

This will assert in block.c, in bdrv_swap, on the test for
anonymity of active.  (For testing, I changed the active layer commit
in mirror to use bdrv_drop_intermediate()). 

Unfortunately, there are other problems as well (anonymity could be
fixed by bdrv_make_anon(active)).

Using line numbers from my version of block.c, lines 1957, 1959, and
1960 will each cause an assert (these lines are all in bdrv_swap()):

1956:    /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1957:    assert(bs_new->device_name[0] == '\0');
1958:    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1959:    assert(bs_new->job == NULL);
1960:    assert(bs_new->dev == NULL);

Markus - on line 1960 above, is it safe to remove that check (and the
other check further down in bdrv_swap())?

Thinking about it more, there may be other landmines in bdrv_swap()
for this case; prior to this, bdrv_swap() was always called with
bs_new being a newly-created BDS.  With the active layer BDS it almost
certainly is not 'new', and could have other "prohibited" fields set,
in addition to the above (e.g. io limits, etc.)

> +        base->backing_hd = NULL;
> +        bdrv_unref(drop_start);
>          ret = 0;
>          goto exit;
>      }
>  
> -    intermediate = top;
> -
> -    /* now we will go down through the list, and add each BDS we find
> -     * into our deletion queue, until we hit the 'base'
> -     */
> -    while (intermediate) {
> -        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
> -        intermediate_state->bs = intermediate;
> -        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
> -
> -        if (intermediate->backing_hd == base) {
> -            base_bs = intermediate->backing_hd;
> -            break;
> -        }
> -        intermediate = intermediate->backing_hd;
> -    }
> -    if (base_bs == NULL) {
> -        /* something went wrong, we did not end at the base. safely
> -         * unravel everything, and exit with error */
> +    overlay = bdrv_find_overlay(active, top);
> +    if (!overlay) {
>          goto exit;
>      }
> -
> -    /* success - we can delete the intermediate states, and link top->base */
> -    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
> -                                   base_bs->drv ? base_bs->drv->format_name : "");
> +    ret = bdrv_change_backing_file(overlay,
> +                                   base ? base->filename : NULL,
> +                                   base ? base->drv->format_name : NULL);
>      if (ret) {
>          goto exit;
>      }
> -    new_top_bs->backing_hd = base_bs;
>  
> -    bdrv_refresh_limits(new_top_bs);
> -
> -    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
> -        /* so that bdrv_close() does not recursively close the chain */
> -        intermediate_state->bs->backing_hd = NULL;
> -        bdrv_unref(intermediate_state->bs);
> +    bs = overlay->backing_hd;
> +    bdrv_set_backing_hd(overlay, base);
> +    if (base) {
> +        bdrv_ref(base);
> +    }
> +    if (bs) {
> +        bdrv_unref(bs);
>      }
>      ret = 0;
> -
>  exit:
> -    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
> -        g_free(intermediate_state);
> -    }
>      return ret;
>  }
>  
> -
>  static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
>                                     size_t size)
>  {
> diff --git a/block/commit.c b/block/commit.c
> index acec4ac..ac598d6 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -73,6 +73,7 @@ static void coroutine_fn commit_run(void *opaque)
>      int bytes_written = 0;
>      int64_t base_len;
>  
> +    overlay_bs = bdrv_find_overlay(active, top);
>      ret = s->common.len = bdrv_getlength(top);
>  
>  
> @@ -154,7 +155,6 @@ exit_restore_reopen:
>      if (s->base_flags != bdrv_get_flags(base)) {
>          bdrv_reopen(base, s->base_flags, NULL);
>      }
> -    overlay_bs = bdrv_find_overlay(active, top);
>      if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
>          bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
>      }
> -- 
> 1.9.0
> 
>
Markus Armbruster April 8, 2014, 8:15 a.m. UTC | #2
Jeff Cody <jcody@redhat.com> writes:

> On Mon, Mar 10, 2014 at 03:26:04PM +0800, Fam Zheng wrote:
>> Dropping intermediate could be useful both for commit and stream, and
>> BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
>> to work with op blockers.
>> 
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> ---
>>  block.c        | 139 ++++++++++++++++++++++++++++-----------------------------
>>  block/commit.c |   2 +-
>>  2 files changed, 70 insertions(+), 71 deletions(-)
>> 
>> diff --git a/block.c b/block.c
>> index 05f7766..0af7c62 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -2503,115 +2503,114 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
>>      return overlay;
>>  }
>>  
>> -typedef struct BlkIntermediateStates {
>> -    BlockDriverState *bs;
>> -    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
>> -} BlkIntermediateStates;
>> -
>> -
>>  /*
>> - * Drops images above 'base' up to and including 'top', and sets the image
>> - * above 'top' to have base as its backing file.
>> + * Drops images above 'base' up to and including 'top', and sets new 'base' as
>> + * backing_hd of top's overlay (the image orignally has 'top' as backing file).
>> + * top's overlay may be NULL if 'top' is active, no such update needed.
>> + * Requires that the top's overlay to 'top' is opened r/w.
>> + *
>> + * 1) This will convert the following chain:
>> + *
>> + *     ... <- base <- ... <- top <- overlay <-... <- active
>>   *
>> - * Requires that the overlay to 'top' is opened r/w, so that the backing file
>> - * information in 'bs' can be properly updated.
>> + * to
>> + *
>> + *     ... <- base <- overlay <- active
>> + *
>> + * 2) It is allowed for bottom==base, in which case it converts:
>>   *
>> - * E.g., this will convert the following chain:
>> - * bottom <- base <- intermediate <- top <- active
>> + *     base <- ... <- top <- overlay <- ... <- active
>>   *
>>   * to
>>   *
>> - * bottom <- base <- active
>> + *     base <- overlay <- active
>>   *
>> - * It is allowed for bottom==base, in which case it converts:
>> + * 2) It also allows active==top, in which case it converts:
>>   *
>> - * base <- intermediate <- top <- active
>> + *     ... <- base <- ... <- top (active)
>>   *
>>   * to
>>   *
>> - * base <- active
>> + *     ... <- base == active == top
>> + *
>> + * i.e. only base and lower remains: *top == *base when return.
>> + *
>> + * 3) If base==NULL, it will drop all the BDS below overlay and set its
>> + * backing_hd to NULL. I.e.:
>>   *
>> - * Error conditions:
>> - *  if active == top, that is considered an error
>> + *     base(NULL) <- ... <- overlay <- ... <- active
>> + *
>> + * to
>> + *
>> + *     overlay <- ... <- active
>>   *
>>   */
>>  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
>>                             BlockDriverState *base)
>>  {
>> -    BlockDriverState *intermediate;
>> -    BlockDriverState *base_bs = NULL;
>> -    BlockDriverState *new_top_bs = NULL;
>> -    BlkIntermediateStates *intermediate_state, *next;
>> -    int ret = -EIO;
>> -
>> -    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
>> -    QSIMPLEQ_INIT(&states_to_delete);
>> +    BlockDriverState *drop_start, *overlay, *bs;
>> +    int ret = -EINVAL;
>>  
>> -    if (!top->drv || !base->drv) {
>> +    assert(active);
>> +    assert(top);
>> +    /* Verify that top is in backing chain of active */
>> +    bs = active;
>> +    while (bs && bs != top) {
>> +        bs = bs->backing_hd;
>> +    }
>> +    if (!bs) {
>>          goto exit;
>>      }
>> +    /* Verify that base is in backing chain of top */
>> +    if (base) {
>> +        while (bs && bs != base) {
>> +            bs = bs->backing_hd;
>> +        }
>> +        if (bs != base) {
>> +            goto exit;
>> +        }
>> +    }
>>  
>> -    new_top_bs = bdrv_find_overlay(active, top);
>> -
>> -    if (new_top_bs == NULL) {
>> -        /* we could not find the image above 'top', this is an error */
>> +    if (!top->drv || (base && !base->drv)) {
>>          goto exit;
>>      }
>> -
>> -    /* special case of new_top_bs->backing_hd already pointing to base - nothing
>> -     * to do, no intermediate images */
>> -    if (new_top_bs->backing_hd == base) {
>> +    if (top == base) {
>> +        ret = 0;
>> +        goto exit;
>> +    } else if (top == active) {
>> +        assert(base);
>> +        drop_start = active->backing_hd;
>> +        bdrv_swap(active, base);
>
> This will assert in block.c, in bdrv_swap, on the test for
> anonymity of active.  (For testing, I changed the active layer commit
> in mirror to use bdrv_drop_intermediate()). 
>
> Unfortunately, there are other problems as well (anonymity could be
> fixed by bdrv_make_anon(active)).
>
> Using line numbers from my version of block.c, lines 1957, 1959, and
> 1960 will each cause an assert (these lines are all in bdrv_swap()):
>
> 1956: /* bs_new must be anonymous and shouldn't have anything fancy
> enabled */
> 1957:    assert(bs_new->device_name[0] == '\0');
> 1958:    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
> 1959:    assert(bs_new->job == NULL);
> 1960:    assert(bs_new->dev == NULL);
>
> Markus - on line 1960 above, is it safe to remove that check (and the
> other check further down in bdrv_swap())?

I guess you mean the one under /* Check a few fields that should remain
attached to the device */ by "the other check".

bdrv_swap() is a scary, scary function.  It has a number of
preconditions, and we've tried to make them explicit in assertions.

The preconditions could be:

(0) Implicit.  Or less politely said: unstated / unknown.

(1) Explicit, but wrong.

(2) Restrictions: the swapping code doesn't cover this state, but it
could be made to cover it, relaxing the precondition.

(3) Fundamental: the precondition cannot or should not be relaxed.

BDS member dev is is the back pointer for the device model property
pointing to the device model's backend.  It must stay on top, and
bdrv_move_feature_fields() duly moves it, along with its buddies dev_ops
and dev_opaque.  Obviously, only one of bdrv_old and bdrv_new can be on
top at the same time.  bdrv_swap() currently assumes that bdrv_old is
the top one on entry.  Feels like an instance of (3).

> Thinking about it more, there may be other landmines in bdrv_swap()
> for this case; prior to this, bdrv_swap() was always called with
> bs_new being a newly-created BDS.  With the active layer BDS it almost
> certainly is not 'new', and could have other "prohibited" fields set,
> in addition to the above (e.g. io limits, etc.)

Hairy...

We talked about splitting BlockBackend off BDS with an axe.  Do you
think that would make this series less hairy?
Fam Zheng April 8, 2014, 9:07 a.m. UTC | #3
On Tue, 04/08 10:15, Markus Armbruster wrote:
> Jeff Cody <jcody@redhat.com> writes:
> 
> > On Mon, Mar 10, 2014 at 03:26:04PM +0800, Fam Zheng wrote:
> >> Dropping intermediate could be useful both for commit and stream, and
> >> BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
> >> to work with op blockers.
> >> 
> >> Signed-off-by: Fam Zheng <famz@redhat.com>
> >> ---
> >>  block.c        | 139 ++++++++++++++++++++++++++++-----------------------------
> >>  block/commit.c |   2 +-
> >>  2 files changed, 70 insertions(+), 71 deletions(-)
> >> 
> >> diff --git a/block.c b/block.c
> >> index 05f7766..0af7c62 100644
> >> --- a/block.c
> >> +++ b/block.c
> >> @@ -2503,115 +2503,114 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
> >>      return overlay;
> >>  }
> >>  
> >> -typedef struct BlkIntermediateStates {
> >> -    BlockDriverState *bs;
> >> -    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> >> -} BlkIntermediateStates;
> >> -
> >> -
> >>  /*
> >> - * Drops images above 'base' up to and including 'top', and sets the image
> >> - * above 'top' to have base as its backing file.
> >> + * Drops images above 'base' up to and including 'top', and sets new 'base' as
> >> + * backing_hd of top's overlay (the image orignally has 'top' as backing file).
> >> + * top's overlay may be NULL if 'top' is active, no such update needed.
> >> + * Requires that the top's overlay to 'top' is opened r/w.
> >> + *
> >> + * 1) This will convert the following chain:
> >> + *
> >> + *     ... <- base <- ... <- top <- overlay <-... <- active
> >>   *
> >> - * Requires that the overlay to 'top' is opened r/w, so that the backing file
> >> - * information in 'bs' can be properly updated.
> >> + * to
> >> + *
> >> + *     ... <- base <- overlay <- active
> >> + *
> >> + * 2) It is allowed for bottom==base, in which case it converts:
> >>   *
> >> - * E.g., this will convert the following chain:
> >> - * bottom <- base <- intermediate <- top <- active
> >> + *     base <- ... <- top <- overlay <- ... <- active
> >>   *
> >>   * to
> >>   *
> >> - * bottom <- base <- active
> >> + *     base <- overlay <- active
> >>   *
> >> - * It is allowed for bottom==base, in which case it converts:
> >> + * 2) It also allows active==top, in which case it converts:
> >>   *
> >> - * base <- intermediate <- top <- active
> >> + *     ... <- base <- ... <- top (active)
> >>   *
> >>   * to
> >>   *
> >> - * base <- active
> >> + *     ... <- base == active == top
> >> + *
> >> + * i.e. only base and lower remains: *top == *base when return.
> >> + *
> >> + * 3) If base==NULL, it will drop all the BDS below overlay and set its
> >> + * backing_hd to NULL. I.e.:
> >>   *
> >> - * Error conditions:
> >> - *  if active == top, that is considered an error
> >> + *     base(NULL) <- ... <- overlay <- ... <- active
> >> + *
> >> + * to
> >> + *
> >> + *     overlay <- ... <- active
> >>   *
> >>   */
> >>  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
> >>                             BlockDriverState *base)
> >>  {
> >> -    BlockDriverState *intermediate;
> >> -    BlockDriverState *base_bs = NULL;
> >> -    BlockDriverState *new_top_bs = NULL;
> >> -    BlkIntermediateStates *intermediate_state, *next;
> >> -    int ret = -EIO;
> >> -
> >> -    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
> >> -    QSIMPLEQ_INIT(&states_to_delete);
> >> +    BlockDriverState *drop_start, *overlay, *bs;
> >> +    int ret = -EINVAL;
> >>  
> >> -    if (!top->drv || !base->drv) {
> >> +    assert(active);
> >> +    assert(top);
> >> +    /* Verify that top is in backing chain of active */
> >> +    bs = active;
> >> +    while (bs && bs != top) {
> >> +        bs = bs->backing_hd;
> >> +    }
> >> +    if (!bs) {
> >>          goto exit;
> >>      }
> >> +    /* Verify that base is in backing chain of top */
> >> +    if (base) {
> >> +        while (bs && bs != base) {
> >> +            bs = bs->backing_hd;
> >> +        }
> >> +        if (bs != base) {
> >> +            goto exit;
> >> +        }
> >> +    }
> >>  
> >> -    new_top_bs = bdrv_find_overlay(active, top);
> >> -
> >> -    if (new_top_bs == NULL) {
> >> -        /* we could not find the image above 'top', this is an error */
> >> +    if (!top->drv || (base && !base->drv)) {
> >>          goto exit;
> >>      }
> >> -
> >> -    /* special case of new_top_bs->backing_hd already pointing to base - nothing
> >> -     * to do, no intermediate images */
> >> -    if (new_top_bs->backing_hd == base) {
> >> +    if (top == base) {
> >> +        ret = 0;
> >> +        goto exit;
> >> +    } else if (top == active) {
> >> +        assert(base);
> >> +        drop_start = active->backing_hd;
> >> +        bdrv_swap(active, base);
> >
> > This will assert in block.c, in bdrv_swap, on the test for
> > anonymity of active.  (For testing, I changed the active layer commit
> > in mirror to use bdrv_drop_intermediate()).

Jeff, you're right, because bdrv_swap requires first argument to be a "new"
BDS, while we are passing in an top BDS.

But what happens if we write bdrv_swap(base, active)?

> >
> > Unfortunately, there are other problems as well (anonymity could be
> > fixed by bdrv_make_anon(active)).
> >
> > Using line numbers from my version of block.c, lines 1957, 1959, and
> > 1960 will each cause an assert (these lines are all in bdrv_swap()):
> >
> > 1956: /* bs_new must be anonymous and shouldn't have anything fancy
> > enabled */
> > 1957:    assert(bs_new->device_name[0] == '\0');
> > 1958:    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
> > 1959:    assert(bs_new->job == NULL);
> > 1960:    assert(bs_new->dev == NULL);
> >
> > Markus - on line 1960 above, is it safe to remove that check (and the
> > other check further down in bdrv_swap())?
> 
> I guess you mean the one under /* Check a few fields that should remain
> attached to the device */ by "the other check".
> 
> bdrv_swap() is a scary, scary function.  It has a number of
> preconditions, and we've tried to make them explicit in assertions.
> 
> The preconditions could be:
> 
> (0) Implicit.  Or less politely said: unstated / unknown.
> 
> (1) Explicit, but wrong.
> 
> (2) Restrictions: the swapping code doesn't cover this state, but it
> could be made to cover it, relaxing the precondition.
> 
> (3) Fundamental: the precondition cannot or should not be relaxed.
> 
> BDS member dev is is the back pointer for the device model property
> pointing to the device model's backend.  It must stay on top, and
> bdrv_move_feature_fields() duly moves it, along with its buddies dev_ops
> and dev_opaque.  Obviously, only one of bdrv_old and bdrv_new can be on
> top at the same time.  bdrv_swap() currently assumes that bdrv_old is
> the top one on entry.  Feels like an instance of (3).
> 
> > Thinking about it more, there may be other landmines in bdrv_swap()
> > for this case; prior to this, bdrv_swap() was always called with
> > bs_new being a newly-created BDS.  With the active layer BDS it almost
> > certainly is not 'new', and could have other "prohibited" fields set,
> > in addition to the above (e.g. io limits, etc.)
> 
> Hairy...
> 
> We talked about splitting BlockBackend off BDS with an axe.  Do you
> think that would make this series less hairy?

Hopefully, because it will help us to get rid of bdrv_move_feature_fields. :)

But if we need to get rid of bdrv_swap, we need a generic "level of indirect"
between BDS and BDS user, so we don't need to worry about the validity of old
BDS pointers when we update the chain. Just like BlockBackend does at device
emulation side. That's an even bigger project.

Thanks,
Fam
Jeff Cody April 9, 2014, 6:12 p.m. UTC | #4
On Tue, Apr 08, 2014 at 05:07:38PM +0800, Fam Zheng wrote:
> On Tue, 04/08 10:15, Markus Armbruster wrote:
> > Jeff Cody <jcody@redhat.com> writes:
> > 
> > > On Mon, Mar 10, 2014 at 03:26:04PM +0800, Fam Zheng wrote:
> > >> Dropping intermediate could be useful both for commit and stream, and
> > >> BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs
> > >> to work with op blockers.
> > >> 
> > >> Signed-off-by: Fam Zheng <famz@redhat.com>
> > >> ---
> > >>  block.c        | 139 ++++++++++++++++++++++++++++-----------------------------
> > >>  block/commit.c |   2 +-
> > >>  2 files changed, 70 insertions(+), 71 deletions(-)
> > >> 
> > >> diff --git a/block.c b/block.c
> > >> index 05f7766..0af7c62 100644
> > >> --- a/block.c
> > >> +++ b/block.c
> > >> @@ -2503,115 +2503,114 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
> > >>      return overlay;
> > >>  }
> > >>  
> > >> -typedef struct BlkIntermediateStates {
> > >> -    BlockDriverState *bs;
> > >> -    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> > >> -} BlkIntermediateStates;
> > >> -
> > >> -
> > >>  /*
> > >> - * Drops images above 'base' up to and including 'top', and sets the image
> > >> - * above 'top' to have base as its backing file.
> > >> + * Drops images above 'base' up to and including 'top', and sets new 'base' as
> > >> + * backing_hd of top's overlay (the image orignally has 'top' as backing file).
> > >> + * top's overlay may be NULL if 'top' is active, no such update needed.
> > >> + * Requires that the top's overlay to 'top' is opened r/w.
> > >> + *
> > >> + * 1) This will convert the following chain:
> > >> + *
> > >> + *     ... <- base <- ... <- top <- overlay <-... <- active
> > >>   *
> > >> - * Requires that the overlay to 'top' is opened r/w, so that the backing file
> > >> - * information in 'bs' can be properly updated.
> > >> + * to
> > >> + *
> > >> + *     ... <- base <- overlay <- active
> > >> + *
> > >> + * 2) It is allowed for bottom==base, in which case it converts:
> > >>   *
> > >> - * E.g., this will convert the following chain:
> > >> - * bottom <- base <- intermediate <- top <- active
> > >> + *     base <- ... <- top <- overlay <- ... <- active
> > >>   *
> > >>   * to
> > >>   *
> > >> - * bottom <- base <- active
> > >> + *     base <- overlay <- active
> > >>   *
> > >> - * It is allowed for bottom==base, in which case it converts:
> > >> + * 2) It also allows active==top, in which case it converts:
> > >>   *
> > >> - * base <- intermediate <- top <- active
> > >> + *     ... <- base <- ... <- top (active)
> > >>   *
> > >>   * to
> > >>   *
> > >> - * base <- active
> > >> + *     ... <- base == active == top
> > >> + *
> > >> + * i.e. only base and lower remains: *top == *base when return.
> > >> + *
> > >> + * 3) If base==NULL, it will drop all the BDS below overlay and set its
> > >> + * backing_hd to NULL. I.e.:
> > >>   *
> > >> - * Error conditions:
> > >> - *  if active == top, that is considered an error
> > >> + *     base(NULL) <- ... <- overlay <- ... <- active
> > >> + *
> > >> + * to
> > >> + *
> > >> + *     overlay <- ... <- active
> > >>   *
> > >>   */
> > >>  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
> > >>                             BlockDriverState *base)
> > >>  {
> > >> -    BlockDriverState *intermediate;
> > >> -    BlockDriverState *base_bs = NULL;
> > >> -    BlockDriverState *new_top_bs = NULL;
> > >> -    BlkIntermediateStates *intermediate_state, *next;
> > >> -    int ret = -EIO;
> > >> -
> > >> -    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
> > >> -    QSIMPLEQ_INIT(&states_to_delete);
> > >> +    BlockDriverState *drop_start, *overlay, *bs;
> > >> +    int ret = -EINVAL;
> > >>  
> > >> -    if (!top->drv || !base->drv) {
> > >> +    assert(active);
> > >> +    assert(top);
> > >> +    /* Verify that top is in backing chain of active */
> > >> +    bs = active;
> > >> +    while (bs && bs != top) {
> > >> +        bs = bs->backing_hd;
> > >> +    }
> > >> +    if (!bs) {
> > >>          goto exit;
> > >>      }
> > >> +    /* Verify that base is in backing chain of top */
> > >> +    if (base) {
> > >> +        while (bs && bs != base) {
> > >> +            bs = bs->backing_hd;
> > >> +        }
> > >> +        if (bs != base) {
> > >> +            goto exit;
> > >> +        }
> > >> +    }
> > >>  
> > >> -    new_top_bs = bdrv_find_overlay(active, top);
> > >> -
> > >> -    if (new_top_bs == NULL) {
> > >> -        /* we could not find the image above 'top', this is an error */
> > >> +    if (!top->drv || (base && !base->drv)) {
> > >>          goto exit;
> > >>      }
> > >> -
> > >> -    /* special case of new_top_bs->backing_hd already pointing to base - nothing
> > >> -     * to do, no intermediate images */
> > >> -    if (new_top_bs->backing_hd == base) {
> > >> +    if (top == base) {
> > >> +        ret = 0;
> > >> +        goto exit;
> > >> +    } else if (top == active) {
> > >> +        assert(base);
> > >> +        drop_start = active->backing_hd;
> > >> +        bdrv_swap(active, base);
> > >
> > > This will assert in block.c, in bdrv_swap, on the test for
> > > anonymity of active.  (For testing, I changed the active layer commit
> > > in mirror to use bdrv_drop_intermediate()).
> 
> Jeff, you're right, because bdrv_swap requires first argument to be a "new"
> BDS, while we are passing in an top BDS.
> 
> But what happens if we write bdrv_swap(base, active)?
>

That seems like it could work - I did a quick test, and did not run
into any issues, going from active->base, and active->intermediate.  


> > >
> > > Unfortunately, there are other problems as well (anonymity could be
> > > fixed by bdrv_make_anon(active)).
> > >
> > > Using line numbers from my version of block.c, lines 1957, 1959, and
> > > 1960 will each cause an assert (these lines are all in bdrv_swap()):
> > >
> > > 1956: /* bs_new must be anonymous and shouldn't have anything fancy
> > > enabled */
> > > 1957:    assert(bs_new->device_name[0] == '\0');
> > > 1958:    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
> > > 1959:    assert(bs_new->job == NULL);
> > > 1960:    assert(bs_new->dev == NULL);
> > >
> > > Markus - on line 1960 above, is it safe to remove that check (and the
> > > other check further down in bdrv_swap())?
> > 
> > I guess you mean the one under /* Check a few fields that should remain
> > attached to the device */ by "the other check".
> > 
> > bdrv_swap() is a scary, scary function.  It has a number of
> > preconditions, and we've tried to make them explicit in assertions.
> > 
> > The preconditions could be:
> > 
> > (0) Implicit.  Or less politely said: unstated / unknown.
> > 
> > (1) Explicit, but wrong.
> > 
> > (2) Restrictions: the swapping code doesn't cover this state, but it
> > could be made to cover it, relaxing the precondition.
> > 
> > (3) Fundamental: the precondition cannot or should not be relaxed.
> > 
> > BDS member dev is is the back pointer for the device model property
> > pointing to the device model's backend.  It must stay on top, and
> > bdrv_move_feature_fields() duly moves it, along with its buddies dev_ops
> > and dev_opaque.  Obviously, only one of bdrv_old and bdrv_new can be on
> > top at the same time.  bdrv_swap() currently assumes that bdrv_old is
> > the top one on entry.  Feels like an instance of (3).
> > 
> > > Thinking about it more, there may be other landmines in bdrv_swap()
> > > for this case; prior to this, bdrv_swap() was always called with
> > > bs_new being a newly-created BDS.  With the active layer BDS it almost
> > > certainly is not 'new', and could have other "prohibited" fields set,
> > > in addition to the above (e.g. io limits, etc.)
> > 
> > Hairy...
> > 
> > We talked about splitting BlockBackend off BDS with an axe.  Do you
> > think that would make this series less hairy?
> 
> Hopefully, because it will help us to get rid of bdrv_move_feature_fields. :)
> 
> But if we need to get rid of bdrv_swap, we need a generic "level of indirect"
> between BDS and BDS user, so we don't need to worry about the validity of old
> BDS pointers when we update the chain. Just like BlockBackend does at device
> emulation side. That's an even bigger project.
> 
> Thanks,
> Fam
diff mbox

Patch

diff --git a/block.c b/block.c
index 05f7766..0af7c62 100644
--- a/block.c
+++ b/block.c
@@ -2503,115 +2503,114 @@  BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
     return overlay;
 }
 
-typedef struct BlkIntermediateStates {
-    BlockDriverState *bs;
-    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
 /*
- * Drops images above 'base' up to and including 'top', and sets the image
- * above 'top' to have base as its backing file.
+ * Drops images above 'base' up to and including 'top', and sets new 'base' as
+ * backing_hd of top's overlay (the image orignally has 'top' as backing file).
+ * top's overlay may be NULL if 'top' is active, no such update needed.
+ * Requires that the top's overlay to 'top' is opened r/w.
+ *
+ * 1) This will convert the following chain:
+ *
+ *     ... <- base <- ... <- top <- overlay <-... <- active
  *
- * Requires that the overlay to 'top' is opened r/w, so that the backing file
- * information in 'bs' can be properly updated.
+ * to
+ *
+ *     ... <- base <- overlay <- active
+ *
+ * 2) It is allowed for bottom==base, in which case it converts:
  *
- * E.g., this will convert the following chain:
- * bottom <- base <- intermediate <- top <- active
+ *     base <- ... <- top <- overlay <- ... <- active
  *
  * to
  *
- * bottom <- base <- active
+ *     base <- overlay <- active
  *
- * It is allowed for bottom==base, in which case it converts:
+ * 2) It also allows active==top, in which case it converts:
  *
- * base <- intermediate <- top <- active
+ *     ... <- base <- ... <- top (active)
  *
  * to
  *
- * base <- active
+ *     ... <- base == active == top
+ *
+ * i.e. only base and lower remains: *top == *base when return.
+ *
+ * 3) If base==NULL, it will drop all the BDS below overlay and set its
+ * backing_hd to NULL. I.e.:
  *
- * Error conditions:
- *  if active == top, that is considered an error
+ *     base(NULL) <- ... <- overlay <- ... <- active
+ *
+ * to
+ *
+ *     overlay <- ... <- active
  *
  */
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base)
 {
-    BlockDriverState *intermediate;
-    BlockDriverState *base_bs = NULL;
-    BlockDriverState *new_top_bs = NULL;
-    BlkIntermediateStates *intermediate_state, *next;
-    int ret = -EIO;
-
-    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
-    QSIMPLEQ_INIT(&states_to_delete);
+    BlockDriverState *drop_start, *overlay, *bs;
+    int ret = -EINVAL;
 
-    if (!top->drv || !base->drv) {
+    assert(active);
+    assert(top);
+    /* Verify that top is in backing chain of active */
+    bs = active;
+    while (bs && bs != top) {
+        bs = bs->backing_hd;
+    }
+    if (!bs) {
         goto exit;
     }
+    /* Verify that base is in backing chain of top */
+    if (base) {
+        while (bs && bs != base) {
+            bs = bs->backing_hd;
+        }
+        if (bs != base) {
+            goto exit;
+        }
+    }
 
-    new_top_bs = bdrv_find_overlay(active, top);
-
-    if (new_top_bs == NULL) {
-        /* we could not find the image above 'top', this is an error */
+    if (!top->drv || (base && !base->drv)) {
         goto exit;
     }
-
-    /* special case of new_top_bs->backing_hd already pointing to base - nothing
-     * to do, no intermediate images */
-    if (new_top_bs->backing_hd == base) {
+    if (top == base) {
+        ret = 0;
+        goto exit;
+    } else if (top == active) {
+        assert(base);
+        drop_start = active->backing_hd;
+        bdrv_swap(active, base);
+        base->backing_hd = NULL;
+        bdrv_unref(drop_start);
         ret = 0;
         goto exit;
     }
 
-    intermediate = top;
-
-    /* now we will go down through the list, and add each BDS we find
-     * into our deletion queue, until we hit the 'base'
-     */
-    while (intermediate) {
-        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
-        intermediate_state->bs = intermediate;
-        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
-        if (intermediate->backing_hd == base) {
-            base_bs = intermediate->backing_hd;
-            break;
-        }
-        intermediate = intermediate->backing_hd;
-    }
-    if (base_bs == NULL) {
-        /* something went wrong, we did not end at the base. safely
-         * unravel everything, and exit with error */
+    overlay = bdrv_find_overlay(active, top);
+    if (!overlay) {
         goto exit;
     }
-
-    /* success - we can delete the intermediate states, and link top->base */
-    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
-                                   base_bs->drv ? base_bs->drv->format_name : "");
+    ret = bdrv_change_backing_file(overlay,
+                                   base ? base->filename : NULL,
+                                   base ? base->drv->format_name : NULL);
     if (ret) {
         goto exit;
     }
-    new_top_bs->backing_hd = base_bs;
 
-    bdrv_refresh_limits(new_top_bs);
-
-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        /* so that bdrv_close() does not recursively close the chain */
-        intermediate_state->bs->backing_hd = NULL;
-        bdrv_unref(intermediate_state->bs);
+    bs = overlay->backing_hd;
+    bdrv_set_backing_hd(overlay, base);
+    if (base) {
+        bdrv_ref(base);
+    }
+    if (bs) {
+        bdrv_unref(bs);
     }
     ret = 0;
-
 exit:
-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        g_free(intermediate_state);
-    }
     return ret;
 }
 
-
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                                    size_t size)
 {
diff --git a/block/commit.c b/block/commit.c
index acec4ac..ac598d6 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -73,6 +73,7 @@  static void coroutine_fn commit_run(void *opaque)
     int bytes_written = 0;
     int64_t base_len;
 
+    overlay_bs = bdrv_find_overlay(active, top);
     ret = s->common.len = bdrv_getlength(top);
 
 
@@ -154,7 +155,6 @@  exit_restore_reopen:
     if (s->base_flags != bdrv_get_flags(base)) {
         bdrv_reopen(base, s->base_flags, NULL);
     }
-    overlay_bs = bdrv_find_overlay(active, top);
     if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
         bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
     }