diff mbox

[RFC,v2,1/6] block: add support functions for live commit, to find and delete images.

Message ID ceb1c0fd608c03c2406e0009b3a5d0f018f37496.1346351079.git.jcody@redhat.com
State New
Headers show

Commit Message

Jeff Cody Aug. 30, 2012, 6:47 p.m. UTC
Add bdrv_find_child(), and bdrv_delete_intermediate().

bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
                    find the image that is the immediate top of 'bs'

bdrv_delete_intermediate():
                    Given 3 BDS (active, top, base), delete images above
                    base up to and including top, and set base to be the
                    parent of top's child node.

                    E.g., this converts:

                    bottom <- base <- intermediate <- top <- active

                    to

                    bottom <- base <- active

                    where top == active is permitted, although active
                    will not be deleted.

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block.h |   5 ++-
 2 files changed, 146 insertions(+), 1 deletion(-)

Comments

Kevin Wolf Sept. 6, 2012, 1:23 p.m. UTC | #1
Am 30.08.2012 20:47, schrieb Jeff Cody:
> Add bdrv_find_child(), and bdrv_delete_intermediate().
> 
> bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
>                     find the image that is the immediate top of 'bs'
> 
> bdrv_delete_intermediate():
>                     Given 3 BDS (active, top, base), delete images above
>                     base up to and including top, and set base to be the
>                     parent of top's child node.
> 
>                     E.g., this converts:
> 
>                     bottom <- base <- intermediate <- top <- active
> 
>                     to
> 
>                     bottom <- base <- active
> 
>                     where top == active is permitted, although active
>                     will not be deleted.
> 
> Signed-off-by: Jeff Cody <jcody@redhat.com>

At first, when just reading the function name, I thought this would
actually delete the image file. Of course, it only removes it from the
backing file chain, but leaves the image file around. I don't have a
good suggestion, but if someone has a better name, I think we should
change it.

> ---
>  block.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block.h |   5 ++-
>  2 files changed, 146 insertions(+), 1 deletion(-)
> 
> diff --git a/block.c b/block.c
> index 9470319..11e275c 100644
> --- a/block.c
> +++ b/block.c
> @@ -1752,6 +1752,148 @@ int bdrv_change_backing_file(BlockDriverState *bs,
>      return ret;
>  }
>  
> +/* Finds the image layer immediately to the 'top' of bs.
> + *
> + * active is the current topmost image.
> + */

Most other function header comments in block.c have the /* on its own
line, so it would be nice to stay consistent.

Without looking at the code yet, I'm not quite sure if I understand this
correctly. Could use an example:

  base <- a <- b <- c <- d

bdrv_find_child(d, a) would return b? Why is it called bdrv_find_child
then, b is neither a (direct) child of d nor a.

> +BlockDriverState *bdrv_find_child(BlockDriverState *active,
> +                                  BlockDriverState *bs)
> +{
> +    BlockDriverState *child = NULL;
> +    BlockDriverState *intermediate;
> +
> +    /* if the active bs layer is the same as the new top, then there
> +     * is no image above the top, so it will be returned as the child
> +     */

"new top"?

And should this be mentioned in the header comment? Not sure how
generally useful this behaviour is. If it's only the right thing for the
only caller, maybe returning NULL and handling the case in the caller
would be better.

> +    if (active == bs) {
> +        child = active;
> +    } else {
> +        intermediate = active;
> +        while (intermediate->backing_hd) {
> +            if (intermediate->backing_hd == bs) {
> +                child = intermediate;
> +                break;
> +            }
> +            intermediate = intermediate->backing_hd;
> +        }
> +    }
> +
> +    return child;
> +}

So it returns NULL when bs isn't in the backing file chain of active.
Probably worth mentioning in the comment as well.

> +
> +typedef struct BlkIntermediateStates {
> +    BlockDriverState *bs;
> +    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
> +} BlkIntermediateStates;
> +
> +
> +/* deletes images above 'base' up to and including 'top', and sets the image
> + * above 'top' to have base as its backing file.
> + *
> + * E.g., this will convert the following chain:
> + * bottom <- base <- intermediate <- top <- active
> + *
> + * to
> + *
> + * bottom <- base <- active
> + *
> + * It is allowed for bottom==base, in which case it converts:
> + *
> + * base <- intermediate <- top <- active
> + *
> + * to
> + *
> + * base <- active
> + *
> + * It is also allowed for top==active, except in that case active is not
> + * deleted:

Hm, makes the interface inconsistent. Shouldn't you be using top ==
intermediate and it would work without any special casing?

> + *
> + * base <- intermediate <- top
> + *
> + * becomes
> + *
> + * base <- top
> + */
> +int bdrv_delete_intermediate(BlockDriverState *active, BlockDriverState *top,
> +                             BlockDriverState *base)
> +{
> +    BlockDriverState *intermediate;
> +    BlockDriverState *base_bs = NULL;
> +    BlockDriverState *new_top_bs = NULL;
> +    BlkIntermediateStates *intermediate_state, *next;
> +    int ret = -1;
> +
> +    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
> +    QSIMPLEQ_INIT(&states_to_delete);
> +
> +    if (!top->drv || !base->drv) {
> +        goto exit;
> +    }
> +
> +    new_top_bs = bdrv_find_child(active, top);

new_top_bs is the first BDS that should not be removed from the chain.

Why do we pass top and then search for new_top instead of passing
new_top directly?

> +
> +    /* special case of new_top_bs->backing_hd already pointing to base - nothing
> +     * to do, no intermediate images
> +     */
> +    if (new_top_bs->backing_hd == base) {
> +        ret = 0;
> +        goto exit;
> +    }
> +
> +    if (new_top_bs == NULL) {

Never reached, we segfault before. (How about unit tests for this
function, in the style of tests/check-q*.c?)

> +        /* we could not find the image above 'top', this is an error */
> +        goto exit;
> +    }
> +
> +    /* if the active and top image passed in are the same, then we
> +     * can't delete the active, so we start one below
> +     */
> +    intermediate = (active == top) ? active->backing_hd : top;

Aha. So intermediate is used to undo the special case. Now we're always
on the last image to be deleted.

This is equivalent to an unconditional new_top_bs->backing_hd.

> +
> +    /* now we will go down through the list, and add each BDS we find
> +     * into our deletion queue, until we hit the 'base'
> +     */
> +    while (intermediate) {
> +        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
> +        intermediate_state->bs = intermediate;
> +        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
> +
> +        if (intermediate->backing_hd == base) {
> +            base_bs = intermediate->backing_hd;
> +            break;
> +        }
> +        intermediate = intermediate->backing_hd;
> +    }
> +    if (base_bs == NULL) {
> +        /* something went wrong, we did not end at the base. safely
> +         * unravel everything, and exit with error */
> +        goto exit;
> +    }
> +
> +    /* success - we can delete the intermediate states, and link top->base */
> +    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
> +                                   base_bs->drv ? base_bs->drv->format_name : "");

Requires that new_top_bs is opened r/w. Definitely worth documenting.

Kevin
Eric Blake Sept. 6, 2012, 2:58 p.m. UTC | #2
On 09/06/2012 07:23 AM, Kevin Wolf wrote:
> Am 30.08.2012 20:47, schrieb Jeff Cody:
>> Add bdrv_find_child(), and bdrv_delete_intermediate().
>>
>> bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
>>                     find the image that is the immediate top of 'bs'
>>
>> bdrv_delete_intermediate():

> At first, when just reading the function name, I thought this would
> actually delete the image file. Of course, it only removes it from the
> backing file chain, but leaves the image file around. I don't have a
> good suggestion, but if someone has a better name, I think we should
> change it.

Maybe bdrv_unchain_intermediate()?
Jeff Cody Sept. 6, 2012, 2:59 p.m. UTC | #3
On 09/06/2012 09:23 AM, Kevin Wolf wrote:
> Am 30.08.2012 20:47, schrieb Jeff Cody:
>> Add bdrv_find_child(), and bdrv_delete_intermediate().
>>
>> bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
>>                     find the image that is the immediate top of 'bs'
>>
>> bdrv_delete_intermediate():
>>                     Given 3 BDS (active, top, base), delete images above
>>                     base up to and including top, and set base to be the
>>                     parent of top's child node.
>>
>>                     E.g., this converts:
>>
>>                     bottom <- base <- intermediate <- top <- active
>>
>>                     to
>>
>>                     bottom <- base <- active
>>
>>                     where top == active is permitted, although active
>>                     will not be deleted.
>>
>> Signed-off-by: Jeff Cody <jcody@redhat.com>
> 
> At first, when just reading the function name, I thought this would
> actually delete the image file. Of course, it only removes it from the
> backing file chain, but leaves the image file around. I don't have a
> good suggestion, but if someone has a better name, I think we should
> change it.

Hmm, the naming seems consistent with bdrv_delete(), which does not
actually delete the image files either (and, that is essentially what
this does... calls bdrv_delete(), on the intermediate images).

However, here are some other name proposals:

   *  bdrv_disconnect_intermediate()
   *  bdrv_drop_intermediate()
   *  bdrv_shorten_chain()

> 
>> ---
>>  block.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  block.h |   5 ++-
>>  2 files changed, 146 insertions(+), 1 deletion(-)
>>
>> diff --git a/block.c b/block.c
>> index 9470319..11e275c 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1752,6 +1752,148 @@ int bdrv_change_backing_file(BlockDriverState *bs,
>>      return ret;
>>  }
>>  
>> +/* Finds the image layer immediately to the 'top' of bs.
>> + *
>> + * active is the current topmost image.
>> + */
> 
> Most other function header comments in block.c have the /* on its own
> line, so it would be nice to stay consistent.
> 
> Without looking at the code yet, I'm not quite sure if I understand this
> correctly. Could use an example:
> 
>   base <- a <- b <- c <- d
> 
> bdrv_find_child(d, a) would return b? Why is it called bdrv_find_child
> then, b is neither a (direct) child of d nor a.
> 

For completeness: as we discussed on IRC, this will be renamed to
bdrv_find_overlay(), avoiding the parent/child terminology, which can be
a bit ambiguous.


>> +BlockDriverState *bdrv_find_child(BlockDriverState *active,
>> +                                  BlockDriverState *bs)
>> +{
>> +    BlockDriverState *child = NULL;
>> +    BlockDriverState *intermediate;
>> +
>> +    /* if the active bs layer is the same as the new top, then there
>> +     * is no image above the top, so it will be returned as the child
>> +     */
> 
> "new top"?
> 
> And should this be mentioned in the header comment? Not sure how
> generally useful this behaviour is. If it's only the right thing for the
> only caller, maybe returning NULL and handling the case in the caller
> would be better.
> 

Yes, it should definitely be mentioned in the comment header. I
initially was going to say that I don't think NULL is appropriate to
return here, but on more reflection I think you correct.  We shouldn't
return non-NULL if it is not actually the overlay to bs.  The caller can
check on a NULL return, and determine what to do at that point.


>> +    if (active == bs) {
>> +        child = active;
>> +    } else {
>> +        intermediate = active;
>> +        while (intermediate->backing_hd) {
>> +            if (intermediate->backing_hd == bs) {
>> +                child = intermediate;
>> +                break;
>> +            }
>> +            intermediate = intermediate->backing_hd;
>> +        }
>> +    }
>> +
>> +    return child;
>> +}
> 
> So it returns NULL when bs isn't in the backing file chain of active.
> Probably worth mentioning in the comment as well.

Agreed.

> 
>> +
>> +typedef struct BlkIntermediateStates {
>> +    BlockDriverState *bs;
>> +    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
>> +} BlkIntermediateStates;
>> +
>> +
>> +/* deletes images above 'base' up to and including 'top', and sets the image
>> + * above 'top' to have base as its backing file.
>> + *
>> + * E.g., this will convert the following chain:
>> + * bottom <- base <- intermediate <- top <- active
>> + *
>> + * to
>> + *
>> + * bottom <- base <- active
>> + *
>> + * It is allowed for bottom==base, in which case it converts:
>> + *
>> + * base <- intermediate <- top <- active
>> + *
>> + * to
>> + *
>> + * base <- active
>> + *
>> + * It is also allowed for top==active, except in that case active is not
>> + * deleted:
> 
> Hm, makes the interface inconsistent. Shouldn't you be using top ==
> intermediate and it would work without any special casing?
>

To remain consistent, maybe we should define it as an error if
top==active, and return error in that case?  The caller can be
responsible for checking for that - if the caller wants to merge down
the active layer, there are additional steps to be taken anyway.

>> + *
>> + * base <- intermediate <- top
>> + *
>> + * becomes
>> + *
>> + * base <- top
>> + */
>> +int bdrv_delete_intermediate(BlockDriverState *active, BlockDriverState *top,
>> +                             BlockDriverState *base)
>> +{
>> +    BlockDriverState *intermediate;
>> +    BlockDriverState *base_bs = NULL;
>> +    BlockDriverState *new_top_bs = NULL;
>> +    BlkIntermediateStates *intermediate_state, *next;
>> +    int ret = -1;
>> +
>> +    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
>> +    QSIMPLEQ_INIT(&states_to_delete);
>> +
>> +    if (!top->drv || !base->drv) {
>> +        goto exit;
>> +    }
>> +
>> +    new_top_bs = bdrv_find_child(active, top);
> 
> new_top_bs is the first BDS that should not be removed from the chain.
> 
> Why do we pass top and then search for new_top instead of passing
> new_top directly?
> 

(Even though we talked about this on IRC, including it here for
completeness):

Because the block commit API specifies the image to merge down, so we
need to find the image overlay on top of that, to adjust the backing
file correctly.  And if we change the semantics of the block commit API
to mean the image above the image to merge down, then that makes it
inconsistent when specifying merging down the active layer later on.

>> +
>> +    /* special case of new_top_bs->backing_hd already pointing to base - nothing
>> +     * to do, no intermediate images
>> +     */
>> +    if (new_top_bs->backing_hd == base) {
>> +        ret = 0;
>> +        goto exit;
>> +    }
>> +
>> +    if (new_top_bs == NULL) {
> 
> Never reached, we segfault before. (How about unit tests for this
> function, in the style of tests/check-q*.c?)
> 

Ugh, thanks - obviously this should be above the previous check.  And, I
agree on the unit test - I will add that in tests/check-*.


>> +        /* we could not find the image above 'top', this is an error */
>> +        goto exit;
>> +    }
>> +
>> +    /* if the active and top image passed in are the same, then we
>> +     * can't delete the active, so we start one below
>> +     */
>> +    intermediate = (active == top) ? active->backing_hd : top;
> 
> Aha. So intermediate is used to undo the special case. Now we're always
> on the last image to be deleted.
> 
> This is equivalent to an unconditional new_top_bs->backing_hd.
> 
>> +
>> +    /* now we will go down through the list, and add each BDS we find
>> +     * into our deletion queue, until we hit the 'base'
>> +     */
>> +    while (intermediate) {
>> +        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
>> +        intermediate_state->bs = intermediate;
>> +        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
>> +
>> +        if (intermediate->backing_hd == base) {
>> +            base_bs = intermediate->backing_hd;
>> +            break;
>> +        }
>> +        intermediate = intermediate->backing_hd;
>> +    }
>> +    if (base_bs == NULL) {
>> +        /* something went wrong, we did not end at the base. safely
>> +         * unravel everything, and exit with error */
>> +        goto exit;
>> +    }
>> +
>> +    /* success - we can delete the intermediate states, and link top->base */
>> +    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
>> +                                   base_bs->drv ? base_bs->drv->format_name : "");
> 
> Requires that new_top_bs is opened r/w. Definitely worth documenting.
>

Agreed.
Kevin Wolf Sept. 7, 2012, 10:19 a.m. UTC | #4
Am 06.09.2012 16:59, schrieb Jeff Cody:
> On 09/06/2012 09:23 AM, Kevin Wolf wrote:
>> Am 30.08.2012 20:47, schrieb Jeff Cody:
>>> Add bdrv_find_child(), and bdrv_delete_intermediate().
>>>
>>> bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
>>>                     find the image that is the immediate top of 'bs'
>>>
>>> bdrv_delete_intermediate():
>>>                     Given 3 BDS (active, top, base), delete images above
>>>                     base up to and including top, and set base to be the
>>>                     parent of top's child node.
>>>
>>>                     E.g., this converts:
>>>
>>>                     bottom <- base <- intermediate <- top <- active
>>>
>>>                     to
>>>
>>>                     bottom <- base <- active
>>>
>>>                     where top == active is permitted, although active
>>>                     will not be deleted.
>>>
>>> Signed-off-by: Jeff Cody <jcody@redhat.com>
>>
>> At first, when just reading the function name, I thought this would
>> actually delete the image file. Of course, it only removes it from the
>> backing file chain, but leaves the image file around. I don't have a
>> good suggestion, but if someone has a better name, I think we should
>> change it.
> 
> Hmm, the naming seems consistent with bdrv_delete(), which does not
> actually delete the image files either (and, that is essentially what
> this does... calls bdrv_delete(), on the intermediate images).
> 
> However, here are some other name proposals:
> 
>    *  bdrv_disconnect_intermediate()
>    *  bdrv_drop_intermediate()
>    *  bdrv_shorten_chain()

bdrv_drop_intermediate() sounds good to me.

>>
>>> +
>>> +typedef struct BlkIntermediateStates {
>>> +    BlockDriverState *bs;
>>> +    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
>>> +} BlkIntermediateStates;
>>> +
>>> +
>>> +/* deletes images above 'base' up to and including 'top', and sets the image
>>> + * above 'top' to have base as its backing file.
>>> + *
>>> + * E.g., this will convert the following chain:
>>> + * bottom <- base <- intermediate <- top <- active
>>> + *
>>> + * to
>>> + *
>>> + * bottom <- base <- active
>>> + *
>>> + * It is allowed for bottom==base, in which case it converts:
>>> + *
>>> + * base <- intermediate <- top <- active
>>> + *
>>> + * to
>>> + *
>>> + * base <- active
>>> + *
>>> + * It is also allowed for top==active, except in that case active is not
>>> + * deleted:
>>
>> Hm, makes the interface inconsistent. Shouldn't you be using top ==
>> intermediate and it would work without any special casing?
>>
> 
> To remain consistent, maybe we should define it as an error if
> top==active, and return error in that case?  The caller can be
> responsible for checking for that - if the caller wants to merge down
> the active layer, there are additional steps to be taken anyway.

Yes, why not.

And we can always revisit when implementing the additional functionality.

>>> +        /* we could not find the image above 'top', this is an error */
>>> +        goto exit;
>>> +    }
>>> +
>>> +    /* if the active and top image passed in are the same, then we
>>> +     * can't delete the active, so we start one below
>>> +     */
>>> +    intermediate = (active == top) ? active->backing_hd : top;
>>
>> Aha. So intermediate is used to undo the special case. Now we're always
>> on the last image to be deleted.
>>
>> This is equivalent to an unconditional new_top_bs->backing_hd.

How about changing this to use the simpler unconditional version?

Kevin
Jeff Cody Sept. 7, 2012, 3:17 p.m. UTC | #5
On 09/07/2012 06:19 AM, Kevin Wolf wrote:
> Am 06.09.2012 16:59, schrieb Jeff Cody:
>> On 09/06/2012 09:23 AM, Kevin Wolf wrote:
>>> Am 30.08.2012 20:47, schrieb Jeff Cody:
>>>> Add bdrv_find_child(), and bdrv_delete_intermediate().
>>>>
>>>> bdrv_find_child():  given 'bs' and the active (topmost) BDS of an image chain,
>>>>                     find the image that is the immediate top of 'bs'
>>>>
>>>> bdrv_delete_intermediate():
>>>>                     Given 3 BDS (active, top, base), delete images above
>>>>                     base up to and including top, and set base to be the
>>>>                     parent of top's child node.
>>>>
>>>>                     E.g., this converts:
>>>>
>>>>                     bottom <- base <- intermediate <- top <- active
>>>>
>>>>                     to
>>>>
>>>>                     bottom <- base <- active
>>>>
>>>>                     where top == active is permitted, although active
>>>>                     will not be deleted.
>>>>
>>>> Signed-off-by: Jeff Cody <jcody@redhat.com>
>>>
>>> At first, when just reading the function name, I thought this would
>>> actually delete the image file. Of course, it only removes it from the
>>> backing file chain, but leaves the image file around. I don't have a
>>> good suggestion, but if someone has a better name, I think we should
>>> change it.
>>
>> Hmm, the naming seems consistent with bdrv_delete(), which does not
>> actually delete the image files either (and, that is essentially what
>> this does... calls bdrv_delete(), on the intermediate images).
>>
>> However, here are some other name proposals:
>>
>>    *  bdrv_disconnect_intermediate()
>>    *  bdrv_drop_intermediate()
>>    *  bdrv_shorten_chain()
> 
> bdrv_drop_intermediate() sounds good to me.
> 
>>>
>>>> +
>>>> +typedef struct BlkIntermediateStates {
>>>> +    BlockDriverState *bs;
>>>> +    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
>>>> +} BlkIntermediateStates;
>>>> +
>>>> +
>>>> +/* deletes images above 'base' up to and including 'top', and sets the image
>>>> + * above 'top' to have base as its backing file.
>>>> + *
>>>> + * E.g., this will convert the following chain:
>>>> + * bottom <- base <- intermediate <- top <- active
>>>> + *
>>>> + * to
>>>> + *
>>>> + * bottom <- base <- active
>>>> + *
>>>> + * It is allowed for bottom==base, in which case it converts:
>>>> + *
>>>> + * base <- intermediate <- top <- active
>>>> + *
>>>> + * to
>>>> + *
>>>> + * base <- active
>>>> + *
>>>> + * It is also allowed for top==active, except in that case active is not
>>>> + * deleted:
>>>
>>> Hm, makes the interface inconsistent. Shouldn't you be using top ==
>>> intermediate and it would work without any special casing?
>>>
>>
>> To remain consistent, maybe we should define it as an error if
>> top==active, and return error in that case?  The caller can be
>> responsible for checking for that - if the caller wants to merge down
>> the active layer, there are additional steps to be taken anyway.
> 
> Yes, why not.
> 
> And we can always revisit when implementing the additional functionality.
> 
>>>> +        /* we could not find the image above 'top', this is an error */
>>>> +        goto exit;
>>>> +    }
>>>> +
>>>> +    /* if the active and top image passed in are the same, then we
>>>> +     * can't delete the active, so we start one below
>>>> +     */
>>>> +    intermediate = (active == top) ? active->backing_hd : top;
>>>
>>> Aha. So intermediate is used to undo the special case. Now we're always
>>> on the last image to be deleted.
>>>
>>> This is equivalent to an unconditional new_top_bs->backing_hd.
> 
> How about changing this to use the simpler unconditional version?

Sure - since active == top is now an error, there is no reason for the
more complicated logic.  And at this point, the statement
(new_top_bs->backing_hd == top) should always be true.

> 
> Kevin
>
diff mbox

Patch

diff --git a/block.c b/block.c
index 9470319..11e275c 100644
--- a/block.c
+++ b/block.c
@@ -1752,6 +1752,148 @@  int bdrv_change_backing_file(BlockDriverState *bs,
     return ret;
 }
 
+/* Finds the image layer immediately to the 'top' of bs.
+ *
+ * active is the current topmost image.
+ */
+BlockDriverState *bdrv_find_child(BlockDriverState *active,
+                                  BlockDriverState *bs)
+{
+    BlockDriverState *child = NULL;
+    BlockDriverState *intermediate;
+
+    /* if the active bs layer is the same as the new top, then there
+     * is no image above the top, so it will be returned as the child
+     */
+    if (active == bs) {
+        child = active;
+    } else {
+        intermediate = active;
+        while (intermediate->backing_hd) {
+            if (intermediate->backing_hd == bs) {
+                child = intermediate;
+                break;
+            }
+            intermediate = intermediate->backing_hd;
+        }
+    }
+
+    return child;
+}
+
+typedef struct BlkIntermediateStates {
+    BlockDriverState *bs;
+    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/* deletes images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * It is also allowed for top==active, except in that case active is not
+ * deleted:
+ *
+ * base <- intermediate <- top
+ *
+ * becomes
+ *
+ * base <- top
+ */
+int bdrv_delete_intermediate(BlockDriverState *active, BlockDriverState *top,
+                             BlockDriverState *base)
+{
+    BlockDriverState *intermediate;
+    BlockDriverState *base_bs = NULL;
+    BlockDriverState *new_top_bs = NULL;
+    BlkIntermediateStates *intermediate_state, *next;
+    int ret = -1;
+
+    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+    QSIMPLEQ_INIT(&states_to_delete);
+
+    if (!top->drv || !base->drv) {
+        goto exit;
+    }
+
+    new_top_bs = bdrv_find_child(active, top);
+
+    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+     * to do, no intermediate images
+     */
+    if (new_top_bs->backing_hd == base) {
+        ret = 0;
+        goto exit;
+    }
+
+    if (new_top_bs == NULL) {
+        /* we could not find the image above 'top', this is an error */
+        goto exit;
+    }
+
+    /* if the active and top image passed in are the same, then we
+     * can't delete the active, so we start one below
+     */
+    intermediate = (active == top) ? active->backing_hd : top;
+
+    /* now we will go down through the list, and add each BDS we find
+     * into our deletion queue, until we hit the 'base'
+     */
+    while (intermediate) {
+        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+        intermediate_state->bs = intermediate;
+        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+        if (intermediate->backing_hd == base) {
+            base_bs = intermediate->backing_hd;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+    if (base_bs == NULL) {
+        /* something went wrong, we did not end at the base. safely
+         * unravel everything, and exit with error */
+        goto exit;
+    }
+
+    /* success - we can delete the intermediate states, and link top->base */
+    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+                                   base_bs->drv ? base_bs->drv->format_name : "");
+    if (ret) {
+        goto exit;
+    }
+    new_top_bs->backing_hd = base_bs;
+
+
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        /* so that bdrv_close() does not recursively close the chain */
+        intermediate_state->bs->backing_hd = NULL;
+        bdrv_delete(intermediate_state->bs);
+    }
+    ret = 0;
+
+exit:
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        g_free(intermediate_state);
+    }
+    return ret;
+}
+
+
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                                    size_t size)
 {
diff --git a/block.h b/block.h
index db812b1..ee76869 100644
--- a/block.h
+++ b/block.h
@@ -201,7 +201,10 @@  int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
-
+int bdrv_delete_intermediate(BlockDriverState *active, BlockDriverState *top,
+                             BlockDriverState *base);
+BlockDriverState *bdrv_find_child(BlockDriverState *active,
+                                  BlockDriverState *bs);
 
 typedef struct BdrvCheckResult {
     int corruptions;