diff mbox

[2/8] qcow2: add dirty-bitmaps feature

Message ID 1433776886-27239-3-git-send-email-vsementsov@virtuozzo.com
State New
Headers show

Commit Message

Vladimir Sementsov-Ogievskiy June 8, 2015, 3:21 p.m. UTC
From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>

Adds dirty-bitmaps feature to qcow2 format as specified in
docs/specs/qcow2.txt

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/Makefile.objs        |   2 +-
 block/qcow2-dirty-bitmap.c | 503 +++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c              |  56 +++++
 block/qcow2.h              |  50 +++++
 include/block/block_int.h  |  10 +
 5 files changed, 620 insertions(+), 1 deletion(-)
 create mode 100644 block/qcow2-dirty-bitmap.c

Comments

Stefan Hajnoczi June 9, 2015, 4:52 p.m. UTC | #1
On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:

I haven't fully reviewed this patch yet but here are initial comments.

> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        /* Read statically sized part of the dirty_bitmap header */
> +        offset = align_offset(offset, 8);
> +        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        offset += sizeof(h);
> +        bm = s->dirty_bitmaps + i;
> +        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
> +        bm->l1_size = be32_to_cpu(h.l1_size);
> +        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
> +        bm->bitmap_size = be64_to_cpu(h.bitmap_size);

Input validation is missing.  These could be junk values.  Min, max,
alignment, etc need to be checked.

> @@ -90,6 +91,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>      QCowExtension ext;
>      uint64_t offset;
>      int ret;
> +    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
>  
>  #ifdef DEBUG_EXT
>      printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
> @@ -160,6 +162,33 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>              }
>              break;
>  
> +        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
> +            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
> +                                 "Could not read ext header");
> +                return ret;
> +            }
> +
> +            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
> +            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
> +
> +            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
> +            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
> +
> +            ret = qcow2_read_dirty_bitmaps(bs);

Missing input validation.  We cannot trust dirty_bitmaps_offset or
nb_dirty_bitmaps.
Stefan Hajnoczi June 10, 2015, 2:30 p.m. UTC | #2
On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:

I noticed a corner case, it's probably not a problem in practice:

Since the dirty bitmap is stored with the help of a BlockDriverState
(and its bs->file), it's possible that writing the bitmap will cause
bits in the bitmap to be dirtied!

> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
> new file mode 100644
> index 0000000..bc0167c
> --- /dev/null
> +++ b/block/qcow2-dirty-bitmap.c
> @@ -0,0 +1,503 @@
> +/*
> + * Dirty bitmpas for the QCOW version 2 format

s/bitmpas/bitmaps/

> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);

Please use g_try_new0() and handle the NULL return value.
g_new/g_malloc abort the process if there is not enough memory.  When
opening untrusted image files it is possible that large values will be
encountered and allocations fail.  In that case .bdrv_open() should fail
instead of killing QEMU.

Using g_try_*() in QEMU is not an exact science but large data buffers
or allocations where external inputs influence the size are good
candidates.

Other allocations in these patches should do that too.

> +    /* Allocate space for the new dirty bitmap table */
> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
> +    offset = dirty_bitmaps_offset;
> +    if (offset < 0) {
> +        ret = offset;
> +        goto fail;
> +    }
> +    ret = bdrv_flush(bs);

Not sure there is a need for this.  The clusters are inaccessible since
no metadata points to them yet.  Therefore we don't need to flush yet
because there is no risk of seeing an inconsistent state.

> +    /* Write all dirty bitmaps to the new table */
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        memset(&h, 0, sizeof(h));
> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
> +        h.l1_size = cpu_to_be32(bm->l1_size);
> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
> +
> +        name_size = strlen(bm->name);
> +        assert(name_size <= UINT16_MAX);
> +        h.name_size = cpu_to_be16(name_size);
> +        offset = align_offset(offset, 8);
> +
> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += sizeof(h);
> +
> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +    }

If files have many thousands of bitmaps then this loop will be slow.  It
would be much faster to write out 1 cluster at a time.  This probably
doesn't matter in practice since this function doesn't get called much
and normally files will have few bitmaps.

> +
> +    /*
> +     * Update the header extension to point to the new dirty bitmap table. This
> +     * requires the new table and its refcounts to be stable on disk.
> +     */
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not update qcow2 header\n");
> +        goto fail;
> +    }

qcow2_update_header() does not flush.  We need to flush before freeing
the old clusters in order to guarantee that the file now points to the
new clusters.

> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i, dirty_bitmap_index, ret;
> +    uint64_t offset;
> +    QCowDirtyBitmap *bm;
> +    uint64_t *l1_table;
> +    uint8_t *buf;
> +
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        return NULL;
> +    }
> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
> +        return NULL;
> +    }
> +
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));

Please use g_try_malloc() with NULL handling.

> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    buf = g_malloc0(bm->l1_size * s->cluster_size);

What is the maximum l1_size value?  cluster_size and l1_size are 32-bit
so with 64 KB cluster_size this overflows if l1_size > 65535.  Do you
want to cast to size_t?

> +    for (i = 0; i < bm->l1_size; ++i) {
> +        offset = be64_to_cpu(l1_table[i]);
> +        if (!(offset & 1)) {

This doesn't honor the 0 offset means unallocated cluster behavior for
the Standard Cluster Descriptor from the qcow2 specification.

> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
> +                             s->cluster_size);
> +            if (ret < 0) {
> +                goto fail;

Missing g_free(buf)

> +    l1_table = g_try_new(uint64_t, bm->l1_size);
> +    if (l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
> +
> +    /* initialize with zero clusters */
> +    for (i = 0; i < s->l1_size; i++) {
> +        l1_table[i] = cpu_to_be64(1);
> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
> +                                        s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }

Flush is needed here to ensure the bitmap has reached disk before the
dirty_bitmaps array is written out.

> +
> +    g_free(l1_table);
> +    l1_table = NULL;
> +
> +    /* Append the new dirty bitmap to the dirty bitmap list */
> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
> +    if (s->dirty_bitmaps) {
> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
> +        old_dirty_bitmap_list = s->dirty_bitmaps;
> +    }
> +    s->dirty_bitmaps = new_dirty_bitmap_list;
> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
> +
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        g_free(s->dirty_bitmaps);
> +        s->dirty_bitmaps = old_dirty_bitmap_list;
> +        s->nb_dirty_bitmaps--;
> +        goto fail;
> +    }
> +
> +    g_free(old_dirty_bitmap_list);
> +
> +    return 0;
> +
> +fail:
> +    g_free(bm->name);
> +    g_free(l1_table);
> +

The l1_table clusters should be freed on failure.
John Snow June 11, 2015, 11:04 p.m. UTC | #3
On 06/08/2015 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> 
> Adds dirty-bitmaps feature to qcow2 format as specified in
> docs/specs/qcow2.txt
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/Makefile.objs        |   2 +-
>  block/qcow2-dirty-bitmap.c | 503 +++++++++++++++++++++++++++++++++++++++++++++
>  block/qcow2.c              |  56 +++++
>  block/qcow2.h              |  50 +++++
>  include/block/block_int.h  |  10 +
>  5 files changed, 620 insertions(+), 1 deletion(-)
>  create mode 100644 block/qcow2-dirty-bitmap.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 0d8c2a4..bff12b4 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -1,5 +1,5 @@
>  block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
> -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
> +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-dirty-bitmap.o
>  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
> new file mode 100644
> index 0000000..bc0167c
> --- /dev/null
> +++ b/block/qcow2-dirty-bitmap.c
> @@ -0,0 +1,503 @@
> +/*
> + * Dirty bitmpas for the QCOW version 2 format
> + *
> + * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
> + *
> + * This file is derived from qcow2-snapshot.c, original copyright:
> + * Copyright (c) 2004-2006 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu-common.h"
> +#include "block/block_int.h"
> +#include "block/qcow2.h"
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        g_free(s->dirty_bitmaps[i].name);
> +    }
> +    g_free(s->dirty_bitmaps);
> +    s->dirty_bitmaps = NULL;
> +    s->nb_dirty_bitmaps = 0;
> +}
> +
> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        /* Read statically sized part of the dirty_bitmap header */
> +        offset = align_offset(offset, 8);
> +        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        offset += sizeof(h);
> +        bm = s->dirty_bitmaps + i;
> +        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
> +        bm->l1_size = be32_to_cpu(h.l1_size);
> +        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
> +        bm->bitmap_size = be64_to_cpu(h.bitmap_size);
> +
> +        name_size = be16_to_cpu(h.name_size);
> +
> +        /* Read dirty_bitmap name */
> +        bm->name = g_malloc(name_size + 1);
> +        ret = bdrv_pread(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +        bm->name[name_size] = '\0';
> +
> +        if (offset - s->dirty_bitmaps_offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset - s->dirty_bitmaps_offset <= INT_MAX);
> +    s->dirty_bitmaps_size = offset - s->dirty_bitmaps_offset;
> +    return 0;
> +
> +fail:
> +    qcow2_free_dirty_bitmaps(bs);
> +    return ret;
> +}
> +
> +/* Add at the end of the file a new table of dirty bitmaps */
> +static int qcow2_write_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *bm;
> +    QCowDirtyBitmapHeader h;
> +    int i, name_size, dirty_bitmaps_size;
> +    int64_t offset, dirty_bitmaps_offset = 0;
> +    int ret;
> +
> +    int old_dirty_bitmaps_size = s->dirty_bitmaps_size;
> +    int64_t old_dirty_bitmaps_offset = s->dirty_bitmaps_offset;
> +
> +    /* Compute the size of the dirty bitmaps table */
> +    offset = 0;
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        offset = align_offset(offset, 8);
> +        offset += sizeof(h);
> +        offset += strlen(bm->name);
> +
> +        if (offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset <= INT_MAX);
> +    dirty_bitmaps_size = offset;
> +
> +    /* Allocate space for the new dirty bitmap table */
> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
> +    offset = dirty_bitmaps_offset;
> +    if (offset < 0) {
> +        ret = offset;
> +        goto fail;
> +    }
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* The dirty bitmap table position has not yet been updated, so these
> +     * clusters must indeed be completely free */
> +    ret = qcow2_pre_write_overlap_check(bs, 0, offset, dirty_bitmaps_size);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* Write all dirty bitmaps to the new table */
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        memset(&h, 0, sizeof(h));
> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
> +        h.l1_size = cpu_to_be32(bm->l1_size);
> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
> +
> +        name_size = strlen(bm->name);
> +        assert(name_size <= UINT16_MAX);
> +        h.name_size = cpu_to_be16(name_size);
> +        offset = align_offset(offset, 8);
> +
> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += sizeof(h);
> +
> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +    }
> +
> +    /*
> +     * Update the header extension to point to the new dirty bitmap table. This
> +     * requires the new table and its refcounts to be stable on disk.
> +     */
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not update qcow2 header\n");
> +        goto fail;
> +    }
> +
> +    /* Free old dirty bitmap table */
> +    qcow2_free_clusters(bs, old_dirty_bitmaps_offset, old_dirty_bitmaps_size,
> +                        QCOW2_DISCARD_ALWAYS);
> +    return 0;
> +
> +fail:
> +    if (dirty_bitmaps_offset > 0) {
> +        qcow2_free_clusters(bs, dirty_bitmaps_offset, dirty_bitmaps_size,
> +                            QCOW2_DISCARD_ALWAYS);
> +    }
> +    return ret;
> +}
> +
> +static int find_dirty_bitmap_by_name(BlockDriverState *bs,
> +                                     const char *name)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        if (!strcmp(s->dirty_bitmaps[i].name, name)) {
> +            return i;
> +        }
> +    }
> +
> +    return -1;
> +}
> +
> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i, dirty_bitmap_index, ret;
> +    uint64_t offset;
> +    QCowDirtyBitmap *bm;
> +    uint64_t *l1_table;
> +    uint8_t *buf;
> +
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        return NULL;
> +    }
> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
> +        return NULL;
> +    }
> +
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        offset = be64_to_cpu(l1_table[i]);
> +        if (!(offset & 1)) {
> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
> +                             s->cluster_size);
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +        }
> +    }
> +
> +    g_free(l1_table);
> +    return buf;
> +
> +fail:
> +    g_free(l1_table);
> +    return NULL;
> +}
> +
> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int cl_size = s->cluster_size;
> +    int i, dirty_bitmap_index, ret = 0, n;
> +    uint64_t *l1_table;
> +    QCowDirtyBitmap *bm;
> +    uint64_t buf_size;
> +    uint8_t *p;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    /* find/create dirty bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index >= 0) {
> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
> +
> +        if (size != bm->bitmap_size ||
> +            granularity != bm->bitmap_granularity) {
> +            qcow2_dirty_bitmap_delete(bs, name, NULL);
> +            dirty_bitmap_index = -1;
> +        }
> +    }
> +    if (dirty_bitmap_index < 0) {
> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);
> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
> +    }
> +    bm = s->dirty_bitmaps + dirty_bitmap_index;

I catch a segfault right around here if I do the following:

./x86_64-softmmu/qemu-system-x86_64 --dirty-bitmap
file=bitmaps.qcow2,name=bitmap0,drive=drive0 -drive
if=none,file=hda.qcow2,id=drive0 -device ide-hd,drive=drive0

hda.qcow2 and bitmaps.qcow2 are both empty files, but bitmaps.qcow2 has
a size of '0'.

Then when I click close in the QEMU GTK frontend, we hit a segfault when
trying to close because s->dirty_bitmaps is NULL, because it appears as
if we've never actually tried to add the (empty) bitmap to the (empty) file.

Your iotest works, but I am not actually sure why, because I don't
actually know how to *create* a persistent bitmap. I thought that the
-dirty-bitmap CLI would create one in the file specified with file=, but
it apparently only creates an in-memory bitmap and sets the file
pointer, but never initializes any of these structures. Then, when we go
to close, it gets confused and everything breaks a bit.

> +
> +    /* read l1 table */
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
> +    buf_size = align_offset(buf_size, 4);
> +    n = buf_size / cl_size;
> +    p = buf;
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
> +
> +        if (buffer_is_zero(p, write_size)) {
> +            if (addr) {
> +                qcow2_free_clusters(bs, addr, cl_size,
> +                                    QCOW2_DISCARD_ALWAYS);
> +            }
> +            l1_table[i] = cpu_to_be64(1);
> +        } else {
> +            if (!addr) {
> +                addr = qcow2_alloc_clusters(bs, cl_size);
> +                l1_table[i] = cpu_to_be64(addr);
> +            }
> +
> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
> +            if (ret < 0) {
> +                goto finish;
> +            }
> +        }
> +
> +        p += cl_size;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +finish:
> +    g_free(l1_table);
> +    return ret;
> +}
> +/* if no id is provided, a new one is constructed */
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *new_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap *old_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap sn1, *bm = &sn1;
> +    int i, ret;
> +    uint64_t *l1_table = NULL;
> +    int64_t l1_table_offset;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    if (s->nb_dirty_bitmaps >= QCOW_MAX_DIRTY_BITMAPS) {
> +        return -EFBIG;
> +    }
> +
> +    memset(bm, 0, sizeof(*bm));
> +
> +    /* Check that the ID is unique */
> +    if (find_dirty_bitmap_by_name(bs, name) >= 0) {
> +        return -EEXIST;
> +    }
> +
> +    /* Populate bm with passed data */
> +    bm->name = g_strdup(name);
> +    bm->bitmap_granularity = granularity;
> +    bm->bitmap_size = size;
> +
> +    bm->l1_size =
> +        size_to_clusters(s, (((size - 1) / sector_granularity) >> 3) + 1);
> +    l1_table_offset =
> +        qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
> +    if (l1_table_offset < 0) {
> +        ret = l1_table_offset;
> +        goto fail;
> +    }
> +    bm->l1_table_offset = l1_table_offset;
> +
> +    l1_table = g_try_new(uint64_t, bm->l1_size);
> +    if (l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
> +
> +    /* initialize with zero clusters */
> +    for (i = 0; i < s->l1_size; i++) {
> +        l1_table[i] = cpu_to_be64(1);

bm->l1_size here in my crash output is just "1",
but s->l1_size is 16, so we crash all over this array.

I assume you meant bm->l1_size here. This is a good case to make against
calling everything "L1."

> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
> +                                        s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    g_free(l1_table);

I can also catch a segfault here by doing something like this:

./x86_64-softmmu/qemu-system-x86_64 -drive
if=none,format=qcow2,cache=writethrough,file=hda.qcow2,id=drive0
-dirty-bitmap name=bitmap,drive=drive0

Trying to mimick your iotest which does not use an external bitmap file
-- it uses the implicit self-storage.

In this case, hda.qcow2 is still empty (but sized as 8GB) and I try to
quit before any writes occur.

freeing l1_table here causes memory corruption and even valgrind goes
down in flames:

==13284== Invalid write of size 8
==13284==    at 0x53A15E: qcow2_dirty_bitmap_create
(qcow2-dirty-bitmap.c:406)
==13284==    by 0x539D15: qcow2_dirty_bitmap_store
(qcow2-dirty-bitmap.c:307)
==13284==    by 0x505F27: bdrv_store_dirty_bitmap (block.c:3176)
==13284==    by 0x50306D: bdrv_close (block.c:1739)
==13284==    by 0x5032FF: bdrv_close_all (block.c:1797)
==13284==    by 0x3049DC: main (vl.c:4577)
==13284==  Address 0x239b7978 is 0 bytes after a block of size 8 alloc'd
==13284==    at 0x4A06BCF: malloc (vg_replace_malloc.c:296)
==13284==    by 0x300111: malloc_and_trace (vl.c:2706)
==13284==    by 0x62B954E: g_try_malloc (gmem.c:242)
==13284==    by 0x53A11E: qcow2_dirty_bitmap_create
(qcow2-dirty-bitmap.c:398)
==13284==    by 0x539D15: qcow2_dirty_bitmap_store
(qcow2-dirty-bitmap.c:307)
==13284==    by 0x505F27: bdrv_store_dirty_bitmap (block.c:3176)
==13284==    by 0x50306D: bdrv_close (block.c:1739)
==13284==    by 0x5032FF: bdrv_close_all (block.c:1797)
==13284==    by 0x3049DC: main (vl.c:4577)
==13284==
--13284-- VALGRIND INTERNAL ERROR: Valgrind received a signal 11
(SIGSEGV) - exiting
--13284-- si_code=80;  Faulting address: 0x0;  sp: 0x8090a1de0

valgrind: the 'impossible' happened:
   Killed by fatal signal

> +    l1_table = NULL;
> +
> +    /* Append the new dirty bitmap to the dirty bitmap list */
> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
> +    if (s->dirty_bitmaps) {
> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
> +        old_dirty_bitmap_list = s->dirty_bitmaps;
> +    }
> +    s->dirty_bitmaps = new_dirty_bitmap_list;
> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
> +
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        g_free(s->dirty_bitmaps);
> +        s->dirty_bitmaps = old_dirty_bitmap_list;
> +        s->nb_dirty_bitmaps--;
> +        goto fail;
> +    }
> +
> +    g_free(old_dirty_bitmap_list);
> +
> +    return 0;
> +
Disk is 8GiB, 16,777,216 sectors, and bm->bitmap_size matches that.
> +fail:
> +    g_free(bm->name);
> +    g_free(l1_table);
> +
> +    return ret;
> +}
> +
> +static int qcow2_dirty_bitmap_free_clusters(BlockDriverState *bs,
> +                                            QCowDirtyBitmap *bm)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int ret, i;
> +    uint64_t *l1_table = g_new(uint64_t, bm->l1_size);
> +
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        g_free(l1_table);
> +        return ret;
> +    }
> +
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]);
> +        qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS);
> +    }
> +
> +    qcow2_free_clusters(bs, bm->l1_table_offset, bm->l1_size * sizeof(uint64_t),
> +                        QCOW2_DISCARD_ALWAYS);
> +
> +    g_free(l1_table);
> +    return 0;
> +}
> +
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap bm;
> +    int dirty_bitmap_index, ret = 0;
> +
> +    /* Search the dirty_bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        error_setg(errp, "Can't find the dirty bitmap");
> +        return -ENOENT;
> +    }
> +    bm = s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    /* Remove it from the dirty_bitmap list */
> +    memmove(s->dirty_bitmaps + dirty_bitmap_index,
> +            s->dirty_bitmaps + dirty_bitmap_index + 1,
> +            (s->nb_dirty_bitmaps - dirty_bitmap_index - 1) * sizeof(bm));
> +    s->nb_dirty_bitmaps--;
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret,
> +                         "Failed to remove dirty bitmap"
> +                         " from dirty bitmap list");
> +        return ret;
> +    }
> +
> +    qcow2_dirty_bitmap_free_clusters(bs, &bm);
> +    g_free(bm.name);
> +
> +    return ret;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b9a72e3..406e55d 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -61,6 +61,7 @@ typedef struct {
>  #define  QCOW2_EXT_MAGIC_END 0
>  #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
>  #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
> +#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
>  
>  static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
>  {
> @@ -90,6 +91,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>      QCowExtension ext;
>      uint64_t offset;
>      int ret;
> +    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
>  
>  #ifdef DEBUG_EXT
>      printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
> @@ -160,6 +162,33 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>              }
>              break;
>  
> +        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
> +            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
> +                                 "Could not read ext header");
> +                return ret;
> +            }
> +
> +            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
> +            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
> +
> +            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
> +            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
> +
> +            ret = qcow2_read_dirty_bitmaps(bs);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "Could not read dirty bitmaps");
> +                return ret;
> +            }
> +
> +#ifdef DEBUG_EXT
> +            printf("Qcow2: Got dirty bitmaps extension:"
> +                   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
> +                   s->dirty_bitmaps_offset, s->nb_dirty_bitmaps);
> +#endif
> +            break;
> +
>          default:
>              /* unknown magic - save it in case we need to rewrite the header */
>              {
> @@ -1000,6 +1029,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>      g_free(s->unknown_header_fields);
>      cleanup_unknown_header_ext(bs);
>      qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>      qcow2_refcount_close(bs);
>      qemu_vfree(s->l1_table);
>      /* else pre-write overlap checks in cache_destroy may crash */
> @@ -1466,6 +1496,7 @@ static void qcow2_close(BlockDriverState *bs)
>      qemu_vfree(s->cluster_data);
>      qcow2_refcount_close(bs);
>      qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>  }
>  
>  static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
> @@ -1667,6 +1698,21 @@ int qcow2_update_header(BlockDriverState *bs)
>      buf += ret;
>      buflen -= ret;
>  
> +    if (s->nb_dirty_bitmaps > 0) {
> +        Qcow2DirtyBitmapHeaderExt dirty_bitmaps_header = {
> +            .nb_dirty_bitmaps = cpu_to_be32(s->nb_dirty_bitmaps),
> +            .dirty_bitmaps_offset = cpu_to_be64(s->dirty_bitmaps_offset)
> +        };
> +        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
> +                             &dirty_bitmaps_header, sizeof(dirty_bitmaps_header),
> +                             buflen);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        buf += ret;
> +        buflen -= ret;
> +    }
> +
>      /* Keep unknown header extensions */
>      QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
>          ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
> @@ -2176,6 +2222,12 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
>          return -ENOTSUP;
>      }
>  
> +    /* cannot proceed if image has dirty_bitmaps */
> +    if (s->nb_dirty_bitmaps) {
> +        error_report("Can't resize an image which has dirty bitmaps");
> +        return -ENOTSUP;
> +    }
> +
>      /* shrinking is currently not supported */
>      if (offset < bs->total_sectors * 512) {
>          error_report("qcow2 doesn't support shrinking images yet");
> @@ -2952,6 +3004,10 @@ BlockDriver bdrv_qcow2 = {
>      .bdrv_get_info          = qcow2_get_info,
>      .bdrv_get_specific_info = qcow2_get_specific_info,
>  
> +    .bdrv_dirty_bitmap_load = qcow2_dirty_bitmap_load,
> +    .bdrv_dirty_bitmap_store = qcow2_dirty_bitmap_store,
> +    .bdrv_dirty_bitmap_delete = qcow2_dirty_bitmap_delete,
> +
>      .bdrv_save_vmstate    = qcow2_save_vmstate,
>      .bdrv_load_vmstate    = qcow2_load_vmstate,
>  
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 422b825..24beee0 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -39,6 +39,7 @@
>  
>  #define QCOW_MAX_CRYPT_CLUSTERS 32
>  #define QCOW_MAX_SNAPSHOTS 65536
> +#define QCOW_MAX_DIRTY_BITMAPS 65536
>  
>  /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
>   * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
> @@ -52,6 +53,8 @@
>   * space for snapshot names and IDs */
>  #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
>  
> +#define QCOW_MAX_DIRTY_BITMAPS_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
> +
>  /* indicate that the refcount of the referenced cluster is exactly one. */
>  #define QCOW_OFLAG_COPIED     (1ULL << 63)
>  /* indicate that the cluster is compressed (they never have the copied flag) */
> @@ -138,6 +141,19 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
>      /* name follows  */
>  } QCowSnapshotHeader;
>  
> +typedef struct QEMU_PACKED QCowDirtyBitmapHeader {
> +    /* header is 8 byte aligned */
> +    uint64_t l1_table_offset;
> +
> +    uint32_t l1_size;
> +    uint32_t bitmap_granularity;
> +
> +    uint64_t bitmap_size;
> +    uint16_t name_size;
> +
> +    /* name follows  */
> +} QCowDirtyBitmapHeader;
> +
>  typedef struct QEMU_PACKED QCowSnapshotExtraData {
>      uint64_t vm_state_size_large;
>      uint64_t disk_size;
> @@ -156,6 +172,14 @@ typedef struct QCowSnapshot {
>      uint64_t vm_clock_nsec;
>  } QCowSnapshot;
>  
> +typedef struct QCowDirtyBitmap {
> +    uint64_t l1_table_offset;
> +    uint32_t l1_size;
> +    char *name;
> +    int bitmap_granularity;
> +    uint64_t bitmap_size;
> +} QCowDirtyBitmap;
> +
>  struct Qcow2Cache;
>  typedef struct Qcow2Cache Qcow2Cache;
>  
> @@ -218,6 +242,11 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
>  typedef void Qcow2SetRefcountFunc(void *refcount_array,
>                                    uint64_t index, uint64_t value);
>  
> +typedef struct Qcow2DirtyBitmapHeaderExt {
> +    uint32_t nb_dirty_bitmaps;
> +    uint64_t dirty_bitmaps_offset;
> +} QEMU_PACKED Qcow2DirtyBitmapHeaderExt;
> +
>  typedef struct BDRVQcowState {
>      int cluster_bits;
>      int cluster_size;
> @@ -259,6 +288,11 @@ typedef struct BDRVQcowState {
>      unsigned int nb_snapshots;
>      QCowSnapshot *snapshots;
>  
> +    uint64_t dirty_bitmaps_offset;
> +    int dirty_bitmaps_size;
> +    unsigned int nb_dirty_bitmaps;
> +    QCowDirtyBitmap *dirty_bitmaps;
> +
>      int flags;
>      int qcow_version;
>      bool use_lazy_refcounts;
> @@ -570,6 +604,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
>  void qcow2_free_snapshots(BlockDriverState *bs);
>  int qcow2_read_snapshots(BlockDriverState *bs);
>  
> +/* qcow2-dirty-bitmap.c functions */
> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                             const char *name, uint64_t size,
> +                             int granularity);
> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                                 const char *name, uint64_t size,
> +                                 int granularity);
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity);
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp);
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs);
> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs);
> +
>  /* qcow2-cache.c functions */
>  Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
>  int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index db29b74..88855b4 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -206,6 +206,16 @@ struct BlockDriver {
>      int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
>      ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
>  
> +    int (*bdrv_dirty_bitmap_store)(BlockDriverState *bs, uint8_t *buf,
> +                                   const char *name, uint64_t size,
> +                                   int granularity);
> +    uint8_t *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
> +                                       const char *name, uint64_t size,
> +                                       int granularity);
> +    int (*bdrv_dirty_bitmap_delete)(BlockDriverState *bs,
> +                                    const char *name,
> +                                    Error **errp);
> +
>      int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
>                               int64_t pos);
>      int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
> 


In light of this, some "sanity" tests that test cases like no writes,
empty bitmaps, empty files, etc I think will be appropriate.
John Snow June 12, 2015, 7:02 p.m. UTC | #4
On 06/10/2015 10:30 AM, Stefan Hajnoczi wrote:
> On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> 
> I noticed a corner case, it's probably not a problem in practice:
> 
> Since the dirty bitmap is stored with the help of a BlockDriverState
> (and its bs->file), it's possible that writing the bitmap will cause
> bits in the bitmap to be dirtied!
> 

But since it's metadata and not stored within a disk sector, can this
actually happen? Do you have an example of a scenario where this might
come up?

>> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
>> new file mode 100644
>> index 0000000..bc0167c
>> --- /dev/null
>> +++ b/block/qcow2-dirty-bitmap.c
>> @@ -0,0 +1,503 @@
>> +/*
>> + * Dirty bitmpas for the QCOW version 2 format
> 
> s/bitmpas/bitmaps/
> 
>> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    QCowDirtyBitmapHeader h;
>> +    QCowDirtyBitmap *bm;
>> +    int i, name_size;
>> +    int64_t offset;
>> +    int ret;
>> +
>> +    if (!s->nb_dirty_bitmaps) {
>> +        s->dirty_bitmaps = NULL;
>> +        s->dirty_bitmaps_size = 0;
>> +        return 0;
>> +    }
>> +
>> +    offset = s->dirty_bitmaps_offset;
>> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
> 
> Please use g_try_new0() and handle the NULL return value.
> g_new/g_malloc abort the process if there is not enough memory.  When
> opening untrusted image files it is possible that large values will be
> encountered and allocations fail.  In that case .bdrv_open() should fail
> instead of killing QEMU.
> 
> Using g_try_*() in QEMU is not an exact science but large data buffers
> or allocations where external inputs influence the size are good
> candidates.
> 
> Other allocations in these patches should do that too.
> 
>> +    /* Allocate space for the new dirty bitmap table */
>> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
>> +    offset = dirty_bitmaps_offset;
>> +    if (offset < 0) {
>> +        ret = offset;
>> +        goto fail;
>> +    }
>> +    ret = bdrv_flush(bs);
> 
> Not sure there is a need for this.  The clusters are inaccessible since
> no metadata points to them yet.  Therefore we don't need to flush yet
> because there is no risk of seeing an inconsistent state.
> 
>> +    /* Write all dirty bitmaps to the new table */
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        bm = s->dirty_bitmaps + i;
>> +        memset(&h, 0, sizeof(h));
>> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
>> +        h.l1_size = cpu_to_be32(bm->l1_size);
>> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
>> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
>> +
>> +        name_size = strlen(bm->name);
>> +        assert(name_size <= UINT16_MAX);
>> +        h.name_size = cpu_to_be16(name_size);
>> +        offset = align_offset(offset, 8);
>> +
>> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        offset += sizeof(h);
>> +
>> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        offset += name_size;
>> +    }
> 
> If files have many thousands of bitmaps then this loop will be slow.  It
> would be much faster to write out 1 cluster at a time.  This probably
> doesn't matter in practice since this function doesn't get called much
> and normally files will have few bitmaps.
> 
>> +
>> +    /*
>> +     * Update the header extension to point to the new dirty bitmap table. This
>> +     * requires the new table and its refcounts to be stable on disk.
>> +     */
>> +    ret = bdrv_flush(bs);
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
>> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
>> +    ret = qcow2_update_header(bs);
>> +    if (ret < 0) {
>> +        fprintf(stderr, "Could not update qcow2 header\n");
>> +        goto fail;
>> +    }
> 
> qcow2_update_header() does not flush.  We need to flush before freeing
> the old clusters in order to guarantee that the file now points to the
> new clusters.
> 
>> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
>> +                            const char *name, uint64_t size,
>> +                            int granularity)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int i, dirty_bitmap_index, ret;
>> +    uint64_t offset;
>> +    QCowDirtyBitmap *bm;
>> +    uint64_t *l1_table;
>> +    uint8_t *buf;
>> +
>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>> +    if (dirty_bitmap_index < 0) {
>> +        return NULL;
>> +    }
>> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
>> +
>> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
>> +        return NULL;
>> +    }
>> +
>> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> 
> Please use g_try_malloc() with NULL handling.
> 
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
> 
> What is the maximum l1_size value?  cluster_size and l1_size are 32-bit
> so with 64 KB cluster_size this overflows if l1_size > 65535.  Do you
> want to cast to size_t?
> 
>> +    for (i = 0; i < bm->l1_size; ++i) {
>> +        offset = be64_to_cpu(l1_table[i]);
>> +        if (!(offset & 1)) {
> 
> This doesn't honor the 0 offset means unallocated cluster behavior for
> the Standard Cluster Descriptor from the qcow2 specification.
> 
>> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
>> +                             s->cluster_size);
>> +            if (ret < 0) {
>> +                goto fail;
> 
> Missing g_free(buf)
> 
>> +    l1_table = g_try_new(uint64_t, bm->l1_size);
>> +    if (l1_table == NULL) {
>> +        ret = -ENOMEM;
>> +        goto fail;
>> +    }
>> +
>> +    /* initialize with zero clusters */
>> +    for (i = 0; i < s->l1_size; i++) {
>> +        l1_table[i] = cpu_to_be64(1);
>> +    }
>> +
>> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
>> +                                        s->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
>> +                      s->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
> 
> Flush is needed here to ensure the bitmap has reached disk before the
> dirty_bitmaps array is written out.
> 
>> +
>> +    g_free(l1_table);
>> +    l1_table = NULL;
>> +
>> +    /* Append the new dirty bitmap to the dirty bitmap list */
>> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
>> +    if (s->dirty_bitmaps) {
>> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
>> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
>> +        old_dirty_bitmap_list = s->dirty_bitmaps;
>> +    }
>> +    s->dirty_bitmaps = new_dirty_bitmap_list;
>> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
>> +
>> +    ret = qcow2_write_dirty_bitmaps(bs);
>> +    if (ret < 0) {
>> +        g_free(s->dirty_bitmaps);
>> +        s->dirty_bitmaps = old_dirty_bitmap_list;
>> +        s->nb_dirty_bitmaps--;
>> +        goto fail;
>> +    }
>> +
>> +    g_free(old_dirty_bitmap_list);
>> +
>> +    return 0;
>> +
>> +fail:
>> +    g_free(bm->name);
>> +    g_free(l1_table);
>> +
> 
> The l1_table clusters should be freed on failure.
>
John Snow June 12, 2015, 9:55 p.m. UTC | #5
On 06/08/2015 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> 
> Adds dirty-bitmaps feature to qcow2 format as specified in
> docs/specs/qcow2.txt
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/Makefile.objs        |   2 +-
>  block/qcow2-dirty-bitmap.c | 503 +++++++++++++++++++++++++++++++++++++++++++++
>  block/qcow2.c              |  56 +++++
>  block/qcow2.h              |  50 +++++
>  include/block/block_int.h  |  10 +
>  5 files changed, 620 insertions(+), 1 deletion(-)
>  create mode 100644 block/qcow2-dirty-bitmap.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 0d8c2a4..bff12b4 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -1,5 +1,5 @@
>  block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
> -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
> +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-dirty-bitmap.o
>  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
> new file mode 100644
> index 0000000..bc0167c
> --- /dev/null
> +++ b/block/qcow2-dirty-bitmap.c
> @@ -0,0 +1,503 @@
> +/*
> + * Dirty bitmpas for the QCOW version 2 format
> + *
> + * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
> + *
> + * This file is derived from qcow2-snapshot.c, original copyright:
> + * Copyright (c) 2004-2006 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu-common.h"
> +#include "block/block_int.h"
> +#include "block/qcow2.h"
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        g_free(s->dirty_bitmaps[i].name);
> +    }
> +    g_free(s->dirty_bitmaps);
> +    s->dirty_bitmaps = NULL;
> +    s->nb_dirty_bitmaps = 0;
> +}
> +

OK.

> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        /* Read statically sized part of the dirty_bitmap header */
> +        offset = align_offset(offset, 8);
> +        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        offset += sizeof(h);
> +        bm = s->dirty_bitmaps + i;
> +        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
> +        bm->l1_size = be32_to_cpu(h.l1_size);
> +        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
> +        bm->bitmap_size = be64_to_cpu(h.bitmap_size);
> +
> +        name_size = be16_to_cpu(h.name_size);
> +
> +        /* Read dirty_bitmap name */
> +        bm->name = g_malloc(name_size + 1);
> +        ret = bdrv_pread(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +        bm->name[name_size] = '\0';
> +
> +        if (offset - s->dirty_bitmaps_offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset - s->dirty_bitmaps_offset <= INT_MAX);
> +    s->dirty_bitmaps_size = offset - s->dirty_bitmaps_offset;
> +    return 0;
> +
> +fail:
> +    qcow2_free_dirty_bitmaps(bs);
> +    return ret;
> +}
> +

OK

> +/* Add at the end of the file a new table of dirty bitmaps */
> +static int qcow2_write_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *bm;
> +    QCowDirtyBitmapHeader h;
> +    int i, name_size, dirty_bitmaps_size;
> +    int64_t offset, dirty_bitmaps_offset = 0;
> +    int ret;
> +
> +    int old_dirty_bitmaps_size = s->dirty_bitmaps_size;
> +    int64_t old_dirty_bitmaps_offset = s->dirty_bitmaps_offset;
> +
> +    /* Compute the size of the dirty bitmaps table */
> +    offset = 0;
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        offset = align_offset(offset, 8);
> +        offset += sizeof(h);
> +        offset += strlen(bm->name);
> +
> +        if (offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset <= INT_MAX);
> +    dirty_bitmaps_size = offset;
> +
> +    /* Allocate space for the new dirty bitmap table */
> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
> +    offset = dirty_bitmaps_offset;
> +    if (offset < 0) {
> +        ret = offset;
> +        goto fail;
> +    }
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* The dirty bitmap table position has not yet been updated, so these
> +     * clusters must indeed be completely free */
> +    ret = qcow2_pre_write_overlap_check(bs, 0, offset, dirty_bitmaps_size);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* Write all dirty bitmaps to the new table */
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        memset(&h, 0, sizeof(h));
> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
> +        h.l1_size = cpu_to_be32(bm->l1_size);
> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
> +
> +        name_size = strlen(bm->name);
> +        assert(name_size <= UINT16_MAX);
> +        h.name_size = cpu_to_be16(name_size);
> +        offset = align_offset(offset, 8);
> +
> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += sizeof(h);
> +
> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +    }
> +
> +    /*
> +     * Update the header extension to point to the new dirty bitmap table. This
> +     * requires the new table and its refcounts to be stable on disk.
> +     */
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not update qcow2 header\n");
> +        goto fail;
> +    }
> +
> +    /* Free old dirty bitmap table */
> +    qcow2_free_clusters(bs, old_dirty_bitmaps_offset, old_dirty_bitmaps_size,
> +                        QCOW2_DISCARD_ALWAYS);
> +    return 0;
> +
> +fail:
> +    if (dirty_bitmaps_offset > 0) {
> +        qcow2_free_clusters(bs, dirty_bitmaps_offset, dirty_bitmaps_size,
> +                            QCOW2_DISCARD_ALWAYS);
> +    }
> +    return ret;
> +}
> +
> +static int find_dirty_bitmap_by_name(BlockDriverState *bs,
> +                                     const char *name)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        if (!strcmp(s->dirty_bitmaps[i].name, name)) {
> +            return i;
> +        }
> +    }
> +
> +    return -1;
> +}
> +

OK

> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i, dirty_bitmap_index, ret;
> +    uint64_t offset;
> +    QCowDirtyBitmap *bm;
> +    uint64_t *l1_table;
> +    uint8_t *buf;
> +
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        return NULL;
> +    }
> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
> +        return NULL;
> +    }
> +
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        offset = be64_to_cpu(l1_table[i]);
> +        if (!(offset & 1)) {
> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
> +                             s->cluster_size);
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +        }
> +    }
> +
> +    g_free(l1_table);
> +    return buf;
> +
> +fail:
> +    g_free(l1_table);
> +    return NULL;
> +}
> +

OK, though the prototype strikes me as strange: what's the use case for
making the caller specify the size and granularity in order to be able
to see if the bitmap is present?

This makes it hard to tell the difference between "The requested bitmap
wasn't found" and "The requested bitmap did not match the expected
attributes."

Why not just let the caller verify the bitmap received matches their
expectations and leave this as a (bs, name) pair?

> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int cl_size = s->cluster_size;
> +    int i, dirty_bitmap_index, ret = 0, n;
> +    uint64_t *l1_table;
> +    QCowDirtyBitmap *bm;
> +    uint64_t buf_size;
> +    uint8_t *p;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    /* find/create dirty bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index >= 0) {
> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
> +
> +        if (size != bm->bitmap_size ||
> +            granularity != bm->bitmap_granularity) {
> +            qcow2_dirty_bitmap_delete(bs, name, NULL);

If this fails, we should 'return ret'.

> +            dirty_bitmap_index = -1;
> +        }
> +    }

Oh, find_dirty_bitmap_by_name only looks by name, but then you check to
make sure the size and granularity matches. If it doesn't, you actually
create a new bitmap with the *same name* but different attributes, and
delete the old one.

Is that appropriate? I guess if we're already here in store, it means we
made it past the add checks... which means for whatever reason we
definitely want to store *this* bitmap...

I think this code is a little extraneous, it might be best to just issue
an ultimatum that "You can't have two bitmaps with the same name in a
file." and let that be that -- finding something with the wrong size
would just simply be an error.

> +    if (dirty_bitmap_index < 0) {
> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);

If this fails, we need to return ret immediately.

> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
> +    }
> +    bm = s->dirty_bitmaps + dirty_bitmap_index;
> +
> +    /* read l1 table */
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
> +    buf_size = align_offset(buf_size, 4);
> +    n = buf_size / cl_size;
> +    p = buf;
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
> +
> +        if (buffer_is_zero(p, write_size)) {
> +            if (addr) {
> +                qcow2_free_clusters(bs, addr, cl_size,
> +                                    QCOW2_DISCARD_ALWAYS);
> +            }
> +            l1_table[i] = cpu_to_be64(1);
> +        } else {
> +            if (!addr) {
> +                addr = qcow2_alloc_clusters(bs, cl_size);
> +                l1_table[i] = cpu_to_be64(addr);
> +            }
> +
> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
> +            if (ret < 0) {
> +                goto finish;
> +            }
> +        }
> +
> +        p += cl_size;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +finish:
> +    g_free(l1_table);
> +    return ret;
> +}
> +/* if no id is provided, a new one is constructed */
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *new_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap *old_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap sn1, *bm = &sn1;
> +    int i, ret;
> +    uint64_t *l1_table = NULL;
> +    int64_t l1_table_offset;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    if (s->nb_dirty_bitmaps >= QCOW_MAX_DIRTY_BITMAPS) {
> +        return -EFBIG;
> +    }
> +
> +    memset(bm, 0, sizeof(*bm));
> +
> +    /* Check that the ID is unique */
> +    if (find_dirty_bitmap_by_name(bs, name) >= 0) {
> +        return -EEXIST;
> +    }
> +
> +    /* Populate bm with passed data */
> +    bm->name = g_strdup(name);
> +    bm->bitmap_granularity = granularity;
> +    bm->bitmap_size = size;
> +
> +    bm->l1_size =
> +        size_to_clusters(s, (((size - 1) / sector_granularity) >> 3) + 1);
> +    l1_table_offset =
> +        qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));

As mentioned previously (sorry for replying so many times to the same
patches, I've been making multiple passes over these patches and trying
to work my way through the qcow2 bits) I think the use of bm->l1_size
and s->l1_size is getting a little mixed up here and there.

Should this be bm->l1_size?

> +    if (l1_table_offset < 0) {
> +        ret = l1_table_offset;
> +        goto fail;
> +    }
> +    bm->l1_table_offset = l1_table_offset;
> +
> +    l1_table = g_try_new(uint64_t, bm->l1_size);
> +    if (l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
> +
> +    /* initialize with zero clusters */
> +    for (i = 0; i < s->l1_size; i++) {

Here too?

> +        l1_table[i] = cpu_to_be64(1);
> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
> +                                        s->l1_size * sizeof(uint64_t));

And here?

> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      s->l1_size * sizeof(uint64_t));

Or here?

> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    g_free(l1_table);
> +    l1_table = NULL;
> +
> +    /* Append the new dirty bitmap to the dirty bitmap list */
> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
> +    if (s->dirty_bitmaps) {
> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
> +        old_dirty_bitmap_list = s->dirty_bitmaps;
> +    }
> +    s->dirty_bitmaps = new_dirty_bitmap_list;
> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
> +
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        g_free(s->dirty_bitmaps);
> +        s->dirty_bitmaps = old_dirty_bitmap_list;
> +        s->nb_dirty_bitmaps--;
> +        goto fail;
> +    }
> +
> +    g_free(old_dirty_bitmap_list);
> +
> +    return 0;
> +
> +fail:
> +    g_free(bm->name);
> +    g_free(l1_table);
> +
> +    return ret;
> +}
> +
> +static int qcow2_dirty_bitmap_free_clusters(BlockDriverState *bs,
> +                                            QCowDirtyBitmap *bm)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int ret, i;
> +    uint64_t *l1_table = g_new(uint64_t, bm->l1_size);
> +
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        g_free(l1_table);
> +        return ret;
> +    }
> +
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]);
> +        qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS);
> +    }
> +
> +    qcow2_free_clusters(bs, bm->l1_table_offset, bm->l1_size * sizeof(uint64_t),
> +                        QCOW2_DISCARD_ALWAYS);
> +
> +    g_free(l1_table);
> +    return 0;
> +}
> +
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap bm;
> +    int dirty_bitmap_index, ret = 0;
> +
> +    /* Search the dirty_bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        error_setg(errp, "Can't find the dirty bitmap");
> +        return -ENOENT;
> +    }
> +    bm = s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    /* Remove it from the dirty_bitmap list */
> +    memmove(s->dirty_bitmaps + dirty_bitmap_index,
> +            s->dirty_bitmaps + dirty_bitmap_index + 1,
> +            (s->nb_dirty_bitmaps - dirty_bitmap_index - 1) * sizeof(bm));
> +    s->nb_dirty_bitmaps--;
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret,
> +                         "Failed to remove dirty bitmap"
> +                         " from dirty bitmap list");
> +        return ret;
> +    }

What happens to the bitmap we didn't successfully delete at this point?
Seems to me that it's leaked and our internal representation of what's
in the file becomes inconsistent, no?

The only place we currently call this function, too, doesn't even check
the return code, so this isn't safe.

> +
> +    qcow2_dirty_bitmap_free_clusters(bs, &bm);
> +    g_free(bm.name);
> +
> +    return ret;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b9a72e3..406e55d 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -61,6 +61,7 @@ typedef struct {
>  #define  QCOW2_EXT_MAGIC_END 0
>  #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
>  #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
> +#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
>  
>  static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
>  {
> @@ -90,6 +91,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>      QCowExtension ext;
>      uint64_t offset;
>      int ret;
> +    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
>  
>  #ifdef DEBUG_EXT
>      printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
> @@ -160,6 +162,33 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>              }
>              break;
>  
> +        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
> +            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
> +                                 "Could not read ext header");
> +                return ret;
> +            }
> +
> +            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
> +            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
> +
> +            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
> +            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
> +
> +            ret = qcow2_read_dirty_bitmaps(bs);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "Could not read dirty bitmaps");
> +                return ret;
> +            }
> +
> +#ifdef DEBUG_EXT
> +            printf("Qcow2: Got dirty bitmaps extension:"
> +                   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
> +                   s->dirty_bitmaps_offset, s->nb_dirty_bitmaps);
> +#endif
> +            break;
> +
>          default:
>              /* unknown magic - save it in case we need to rewrite the header */
>              {
> @@ -1000,6 +1029,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>      g_free(s->unknown_header_fields);
>      cleanup_unknown_header_ext(bs);
>      qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>      qcow2_refcount_close(bs);
>      qemu_vfree(s->l1_table);
>      /* else pre-write overlap checks in cache_destroy may crash */
> @@ -1466,6 +1496,7 @@ static void qcow2_close(BlockDriverState *bs)
>      qemu_vfree(s->cluster_data);
>      qcow2_refcount_close(bs);
>      qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>  }
>  
>  static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
> @@ -1667,6 +1698,21 @@ int qcow2_update_header(BlockDriverState *bs)
>      buf += ret;
>      buflen -= ret;
>  
> +    if (s->nb_dirty_bitmaps > 0) {
> +        Qcow2DirtyBitmapHeaderExt dirty_bitmaps_header = {
> +            .nb_dirty_bitmaps = cpu_to_be32(s->nb_dirty_bitmaps),
> +            .dirty_bitmaps_offset = cpu_to_be64(s->dirty_bitmaps_offset)
> +        };
> +        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
> +                             &dirty_bitmaps_header, sizeof(dirty_bitmaps_header),
> +                             buflen);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        buf += ret;
> +        buflen -= ret;
> +    }
> +
>      /* Keep unknown header extensions */
>      QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
>          ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
> @@ -2176,6 +2222,12 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
>          return -ENOTSUP;
>      }
>  
> +    /* cannot proceed if image has dirty_bitmaps */
> +    if (s->nb_dirty_bitmaps) {
> +        error_report("Can't resize an image which has dirty bitmaps");
> +        return -ENOTSUP;
> +    }
> +

Not true anymore: support for truncating a drive with dirty bitmaps was
indeed implemented.

In my mind it's OK if we limit this "temporarily" for now, but I will be
reviewing this series with the ability to change images in mind, as this
is something we should support eventually.

>      /* shrinking is currently not supported */
>      if (offset < bs->total_sectors * 512) {
>          error_report("qcow2 doesn't support shrinking images yet");
> @@ -2952,6 +3004,10 @@ BlockDriver bdrv_qcow2 = {
>      .bdrv_get_info          = qcow2_get_info,
>      .bdrv_get_specific_info = qcow2_get_specific_info,
>  
> +    .bdrv_dirty_bitmap_load = qcow2_dirty_bitmap_load,
> +    .bdrv_dirty_bitmap_store = qcow2_dirty_bitmap_store,
> +    .bdrv_dirty_bitmap_delete = qcow2_dirty_bitmap_delete,
> +
>      .bdrv_save_vmstate    = qcow2_save_vmstate,
>      .bdrv_load_vmstate    = qcow2_load_vmstate,
>  
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 422b825..24beee0 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -39,6 +39,7 @@
>  
>  #define QCOW_MAX_CRYPT_CLUSTERS 32
>  #define QCOW_MAX_SNAPSHOTS 65536
> +#define QCOW_MAX_DIRTY_BITMAPS 65536
>  
>  /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
>   * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
> @@ -52,6 +53,8 @@
>   * space for snapshot names and IDs */
>  #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
>  
> +#define QCOW_MAX_DIRTY_BITMAPS_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
> +
>  /* indicate that the refcount of the referenced cluster is exactly one. */
>  #define QCOW_OFLAG_COPIED     (1ULL << 63)
>  /* indicate that the cluster is compressed (they never have the copied flag) */
> @@ -138,6 +141,19 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
>      /* name follows  */
>  } QCowSnapshotHeader;
>  
> +typedef struct QEMU_PACKED QCowDirtyBitmapHeader {
> +    /* header is 8 byte aligned */
> +    uint64_t l1_table_offset;
> +
> +    uint32_t l1_size;
> +    uint32_t bitmap_granularity;
> +
> +    uint64_t bitmap_size;
> +    uint16_t name_size;
> +
> +    /* name follows  */
> +} QCowDirtyBitmapHeader;
> +
>  typedef struct QEMU_PACKED QCowSnapshotExtraData {
>      uint64_t vm_state_size_large;
>      uint64_t disk_size;
> @@ -156,6 +172,14 @@ typedef struct QCowSnapshot {
>      uint64_t vm_clock_nsec;
>  } QCowSnapshot;
>  
> +typedef struct QCowDirtyBitmap {
> +    uint64_t l1_table_offset;
> +    uint32_t l1_size;
> +    char *name;
> +    int bitmap_granularity;
> +    uint64_t bitmap_size;
> +} QCowDirtyBitmap;
> +
>  struct Qcow2Cache;
>  typedef struct Qcow2Cache Qcow2Cache;
>  
> @@ -218,6 +242,11 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
>  typedef void Qcow2SetRefcountFunc(void *refcount_array,
>                                    uint64_t index, uint64_t value);
>  
> +typedef struct Qcow2DirtyBitmapHeaderExt {
> +    uint32_t nb_dirty_bitmaps;
> +    uint64_t dirty_bitmaps_offset;
> +} QEMU_PACKED Qcow2DirtyBitmapHeaderExt;
> +
>  typedef struct BDRVQcowState {
>      int cluster_bits;
>      int cluster_size;
> @@ -259,6 +288,11 @@ typedef struct BDRVQcowState {
>      unsigned int nb_snapshots;
>      QCowSnapshot *snapshots;
>  
> +    uint64_t dirty_bitmaps_offset;
> +    int dirty_bitmaps_size;
> +    unsigned int nb_dirty_bitmaps;
> +    QCowDirtyBitmap *dirty_bitmaps;
> +
>      int flags;
>      int qcow_version;
>      bool use_lazy_refcounts;
> @@ -570,6 +604,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
>  void qcow2_free_snapshots(BlockDriverState *bs);
>  int qcow2_read_snapshots(BlockDriverState *bs);
>  
> +/* qcow2-dirty-bitmap.c functions */
> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                             const char *name, uint64_t size,
> +                             int granularity);
> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                                 const char *name, uint64_t size,
> +                                 int granularity);
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity);
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp);
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs);
> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs);
> +
>  /* qcow2-cache.c functions */
>  Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
>  int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index db29b74..88855b4 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -206,6 +206,16 @@ struct BlockDriver {
>      int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
>      ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
>  
> +    int (*bdrv_dirty_bitmap_store)(BlockDriverState *bs, uint8_t *buf,
> +                                   const char *name, uint64_t size,
> +                                   int granularity);
> +    uint8_t *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
> +                                       const char *name, uint64_t size,
> +                                       int granularity);
> +    int (*bdrv_dirty_bitmap_delete)(BlockDriverState *bs,
> +                                    const char *name,
> +                                    Error **errp);
> +
>      int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
>                               int64_t pos);
>      int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
> 

OK, that's the last of my pending replies -- I think I'm done digging
through this series until a v3 shows up at this point, sorry for the
flood of mails :)

Thanks,
--John Snow
Vladimir Sementsov-Ogievskiy June 15, 2015, 2:05 p.m. UTC | #6
On 12.06.2015 02:04, John Snow wrote:
>
> On 06/08/2015 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
>> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>>
>> Adds dirty-bitmaps feature to qcow2 format as specified in
>> docs/specs/qcow2.txt
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block/Makefile.objs        |   2 +-
>>   block/qcow2-dirty-bitmap.c | 503 +++++++++++++++++++++++++++++++++++++++++++++
>>   block/qcow2.c              |  56 +++++
>>   block/qcow2.h              |  50 +++++
>>   include/block/block_int.h  |  10 +
>>   5 files changed, 620 insertions(+), 1 deletion(-)
>>   create mode 100644 block/qcow2-dirty-bitmap.c
>>
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 0d8c2a4..bff12b4 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -1,5 +1,5 @@
>>   block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
>> -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
>> +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-dirty-bitmap.o
>>   block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>>   block-obj-y += qed-check.o
>>   block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
>> new file mode 100644
>> index 0000000..bc0167c
>> --- /dev/null
>> +++ b/block/qcow2-dirty-bitmap.c
>> @@ -0,0 +1,503 @@
>> +/*
>> + * Dirty bitmpas for the QCOW version 2 format
>> + *
>> + * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
>> + *
>> + * This file is derived from qcow2-snapshot.c, original copyright:
>> + * Copyright (c) 2004-2006 Fabrice Bellard
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include "block/block_int.h"
>> +#include "block/qcow2.h"
>> +
>> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int i;
>> +
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        g_free(s->dirty_bitmaps[i].name);
>> +    }
>> +    g_free(s->dirty_bitmaps);
>> +    s->dirty_bitmaps = NULL;
>> +    s->nb_dirty_bitmaps = 0;
>> +}
>> +
>> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    QCowDirtyBitmapHeader h;
>> +    QCowDirtyBitmap *bm;
>> +    int i, name_size;
>> +    int64_t offset;
>> +    int ret;
>> +
>> +    if (!s->nb_dirty_bitmaps) {
>> +        s->dirty_bitmaps = NULL;
>> +        s->dirty_bitmaps_size = 0;
>> +        return 0;
>> +    }
>> +
>> +    offset = s->dirty_bitmaps_offset;
>> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
>> +
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        /* Read statically sized part of the dirty_bitmap header */
>> +        offset = align_offset(offset, 8);
>> +        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +
>> +        offset += sizeof(h);
>> +        bm = s->dirty_bitmaps + i;
>> +        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
>> +        bm->l1_size = be32_to_cpu(h.l1_size);
>> +        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
>> +        bm->bitmap_size = be64_to_cpu(h.bitmap_size);
>> +
>> +        name_size = be16_to_cpu(h.name_size);
>> +
>> +        /* Read dirty_bitmap name */
>> +        bm->name = g_malloc(name_size + 1);
>> +        ret = bdrv_pread(bs->file, offset, bm->name, name_size);
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        offset += name_size;
>> +        bm->name[name_size] = '\0';
>> +
>> +        if (offset - s->dirty_bitmaps_offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
>> +            ret = -EFBIG;
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    assert(offset - s->dirty_bitmaps_offset <= INT_MAX);
>> +    s->dirty_bitmaps_size = offset - s->dirty_bitmaps_offset;
>> +    return 0;
>> +
>> +fail:
>> +    qcow2_free_dirty_bitmaps(bs);
>> +    return ret;
>> +}
>> +
>> +/* Add at the end of the file a new table of dirty bitmaps */
>> +static int qcow2_write_dirty_bitmaps(BlockDriverState *bs)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    QCowDirtyBitmap *bm;
>> +    QCowDirtyBitmapHeader h;
>> +    int i, name_size, dirty_bitmaps_size;
>> +    int64_t offset, dirty_bitmaps_offset = 0;
>> +    int ret;
>> +
>> +    int old_dirty_bitmaps_size = s->dirty_bitmaps_size;
>> +    int64_t old_dirty_bitmaps_offset = s->dirty_bitmaps_offset;
>> +
>> +    /* Compute the size of the dirty bitmaps table */
>> +    offset = 0;
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        bm = s->dirty_bitmaps + i;
>> +        offset = align_offset(offset, 8);
>> +        offset += sizeof(h);
>> +        offset += strlen(bm->name);
>> +
>> +        if (offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
>> +            ret = -EFBIG;
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    assert(offset <= INT_MAX);
>> +    dirty_bitmaps_size = offset;
>> +
>> +    /* Allocate space for the new dirty bitmap table */
>> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
>> +    offset = dirty_bitmaps_offset;
>> +    if (offset < 0) {
>> +        ret = offset;
>> +        goto fail;
>> +    }
>> +    ret = bdrv_flush(bs);
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    /* The dirty bitmap table position has not yet been updated, so these
>> +     * clusters must indeed be completely free */
>> +    ret = qcow2_pre_write_overlap_check(bs, 0, offset, dirty_bitmaps_size);
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    /* Write all dirty bitmaps to the new table */
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        bm = s->dirty_bitmaps + i;
>> +        memset(&h, 0, sizeof(h));
>> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
>> +        h.l1_size = cpu_to_be32(bm->l1_size);
>> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
>> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
>> +
>> +        name_size = strlen(bm->name);
>> +        assert(name_size <= UINT16_MAX);
>> +        h.name_size = cpu_to_be16(name_size);
>> +        offset = align_offset(offset, 8);
>> +
>> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        offset += sizeof(h);
>> +
>> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        offset += name_size;
>> +    }
>> +
>> +    /*
>> +     * Update the header extension to point to the new dirty bitmap table. This
>> +     * requires the new table and its refcounts to be stable on disk.
>> +     */
>> +    ret = bdrv_flush(bs);
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
>> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
>> +    ret = qcow2_update_header(bs);
>> +    if (ret < 0) {
>> +        fprintf(stderr, "Could not update qcow2 header\n");
>> +        goto fail;
>> +    }
>> +
>> +    /* Free old dirty bitmap table */
>> +    qcow2_free_clusters(bs, old_dirty_bitmaps_offset, old_dirty_bitmaps_size,
>> +                        QCOW2_DISCARD_ALWAYS);
>> +    return 0;
>> +
>> +fail:
>> +    if (dirty_bitmaps_offset > 0) {
>> +        qcow2_free_clusters(bs, dirty_bitmaps_offset, dirty_bitmaps_size,
>> +                            QCOW2_DISCARD_ALWAYS);
>> +    }
>> +    return ret;
>> +}
>> +
>> +static int find_dirty_bitmap_by_name(BlockDriverState *bs,
>> +                                     const char *name)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int i;
>> +
>> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
>> +        if (!strcmp(s->dirty_bitmaps[i].name, name)) {
>> +            return i;
>> +        }
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
>> +                            const char *name, uint64_t size,
>> +                            int granularity)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int i, dirty_bitmap_index, ret;
>> +    uint64_t offset;
>> +    QCowDirtyBitmap *bm;
>> +    uint64_t *l1_table;
>> +    uint8_t *buf;
>> +
>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>> +    if (dirty_bitmap_index < 0) {
>> +        return NULL;
>> +    }
>> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
>> +
>> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
>> +        return NULL;
>> +    }
>> +
>> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
>> +    for (i = 0; i < bm->l1_size; ++i) {
>> +        offset = be64_to_cpu(l1_table[i]);
>> +        if (!(offset & 1)) {
>> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
>> +                             s->cluster_size);
>> +            if (ret < 0) {
>> +                goto fail;
>> +            }
>> +        }
>> +    }
>> +
>> +    g_free(l1_table);
>> +    return buf;
>> +
>> +fail:
>> +    g_free(l1_table);
>> +    return NULL;
>> +}
>> +
>> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
>> +                            const char *name, uint64_t size,
>> +                            int granularity)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int cl_size = s->cluster_size;
>> +    int i, dirty_bitmap_index, ret = 0, n;
>> +    uint64_t *l1_table;
>> +    QCowDirtyBitmap *bm;
>> +    uint64_t buf_size;
>> +    uint8_t *p;
>> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
>> +
>> +    /* find/create dirty bitmap */
>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>> +    if (dirty_bitmap_index >= 0) {
>> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
>> +
>> +        if (size != bm->bitmap_size ||
>> +            granularity != bm->bitmap_granularity) {
>> +            qcow2_dirty_bitmap_delete(bs, name, NULL);
>> +            dirty_bitmap_index = -1;
>> +        }
>> +    }
>> +    if (dirty_bitmap_index < 0) {
>> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);
>> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
>> +    }
>> +    bm = s->dirty_bitmaps + dirty_bitmap_index;
> I catch a segfault right around here if I do the following:
>
> ./x86_64-softmmu/qemu-system-x86_64 --dirty-bitmap
> file=bitmaps.qcow2,name=bitmap0,drive=drive0 -drive
> if=none,file=hda.qcow2,id=drive0 -device ide-hd,drive=drive0
>
> hda.qcow2 and bitmaps.qcow2 are both empty files, but bitmaps.qcow2 has
> a size of '0'.
empty file or qcow2 files of size 0 (with header) ?
>
> Then when I click close in the QEMU GTK frontend, we hit a segfault when
> trying to close because s->dirty_bitmaps is NULL, because it appears as
> if we've never actually tried to add the (empty) bitmap to the (empty) file.
>
> Your iotest works, but I am not actually sure why, because I don't
> actually know how to *create* a persistent bitmap. I thought that the
> -dirty-bitmap CLI would create one in the file specified with file=, but
> it apparently only creates an in-memory bitmap and sets the file
> pointer, but never initializes any of these structures. Then, when we go
> to close, it gets confused and everything breaks a bit.
>
>> +
>> +    /* read l1 table */
>> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto finish;
>> +    }
>> +
>> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
>> +    buf_size = align_offset(buf_size, 4);
>> +    n = buf_size / cl_size;
>> +    p = buf;
>> +    for (i = 0; i < bm->l1_size; ++i) {
>> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
>> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
>> +
>> +        if (buffer_is_zero(p, write_size)) {
>> +            if (addr) {
>> +                qcow2_free_clusters(bs, addr, cl_size,
>> +                                    QCOW2_DISCARD_ALWAYS);
>> +            }
>> +            l1_table[i] = cpu_to_be64(1);
>> +        } else {
>> +            if (!addr) {
>> +                addr = qcow2_alloc_clusters(bs, cl_size);
>> +                l1_table[i] = cpu_to_be64(addr);
>> +            }
>> +
>> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
>> +            if (ret < 0) {
>> +                goto finish;
>> +            }
>> +        }
>> +
>> +        p += cl_size;
>> +    }
>> +
>> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
>> +                      bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto finish;
>> +    }
>> +
>> +finish:
>> +    g_free(l1_table);
>> +    return ret;
>> +}
>> +/* if no id is provided, a new one is constructed */
>> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
>> +                              uint64_t size, int granularity)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    QCowDirtyBitmap *new_dirty_bitmap_list = NULL;
>> +    QCowDirtyBitmap *old_dirty_bitmap_list = NULL;
>> +    QCowDirtyBitmap sn1, *bm = &sn1;
>> +    int i, ret;
>> +    uint64_t *l1_table = NULL;
>> +    int64_t l1_table_offset;
>> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
>> +
>> +    if (s->nb_dirty_bitmaps >= QCOW_MAX_DIRTY_BITMAPS) {
>> +        return -EFBIG;
>> +    }
>> +
>> +    memset(bm, 0, sizeof(*bm));
>> +
>> +    /* Check that the ID is unique */
>> +    if (find_dirty_bitmap_by_name(bs, name) >= 0) {
>> +        return -EEXIST;
>> +    }
>> +
>> +    /* Populate bm with passed data */
>> +    bm->name = g_strdup(name);
>> +    bm->bitmap_granularity = granularity;
>> +    bm->bitmap_size = size;
>> +
>> +    bm->l1_size =
>> +        size_to_clusters(s, (((size - 1) / sector_granularity) >> 3) + 1);
>> +    l1_table_offset =
>> +        qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
>> +    if (l1_table_offset < 0) {
>> +        ret = l1_table_offset;
>> +        goto fail;
>> +    }
>> +    bm->l1_table_offset = l1_table_offset;
>> +
>> +    l1_table = g_try_new(uint64_t, bm->l1_size);
>> +    if (l1_table == NULL) {
>> +        ret = -ENOMEM;
>> +        goto fail;
>> +    }
>> +
>> +    /* initialize with zero clusters */
>> +    for (i = 0; i < s->l1_size; i++) {
>> +        l1_table[i] = cpu_to_be64(1);
> bm->l1_size here in my crash output is just "1",
> but s->l1_size is 16, so we crash all over this array.
>
> I assume you meant bm->l1_size here. This is a good case to make against
> calling everything "L1."
>
>> +    }
>> +
>> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
>> +                                        s->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
>> +                      s->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    g_free(l1_table);
> I can also catch a segfault here by doing something like this:
>
> ./x86_64-softmmu/qemu-system-x86_64 -drive
> if=none,format=qcow2,cache=writethrough,file=hda.qcow2,id=drive0
> -dirty-bitmap name=bitmap,drive=drive0
>
> Trying to mimick your iotest which does not use an external bitmap file
> -- it uses the implicit self-storage.
>
> In this case, hda.qcow2 is still empty (but sized as 8GB) and I try to
> quit before any writes occur.
>
> freeing l1_table here causes memory corruption and even valgrind goes
> down in flames:
>
> ==13284== Invalid write of size 8
> ==13284==    at 0x53A15E: qcow2_dirty_bitmap_create
> (qcow2-dirty-bitmap.c:406)
> ==13284==    by 0x539D15: qcow2_dirty_bitmap_store
> (qcow2-dirty-bitmap.c:307)
> ==13284==    by 0x505F27: bdrv_store_dirty_bitmap (block.c:3176)
> ==13284==    by 0x50306D: bdrv_close (block.c:1739)
> ==13284==    by 0x5032FF: bdrv_close_all (block.c:1797)
> ==13284==    by 0x3049DC: main (vl.c:4577)
> ==13284==  Address 0x239b7978 is 0 bytes after a block of size 8 alloc'd
> ==13284==    at 0x4A06BCF: malloc (vg_replace_malloc.c:296)
> ==13284==    by 0x300111: malloc_and_trace (vl.c:2706)
> ==13284==    by 0x62B954E: g_try_malloc (gmem.c:242)
> ==13284==    by 0x53A11E: qcow2_dirty_bitmap_create
> (qcow2-dirty-bitmap.c:398)
> ==13284==    by 0x539D15: qcow2_dirty_bitmap_store
> (qcow2-dirty-bitmap.c:307)
> ==13284==    by 0x505F27: bdrv_store_dirty_bitmap (block.c:3176)
> ==13284==    by 0x50306D: bdrv_close (block.c:1739)
> ==13284==    by 0x5032FF: bdrv_close_all (block.c:1797)
> ==13284==    by 0x3049DC: main (vl.c:4577)
> ==13284==
> --13284-- VALGRIND INTERNAL ERROR: Valgrind received a signal 11
> (SIGSEGV) - exiting
> --13284-- si_code=80;  Faulting address: 0x0;  sp: 0x8090a1de0
>
> valgrind: the 'impossible' happened:
>     Killed by fatal signal
>
>> +    l1_table = NULL;
>> +
>> +    /* Append the new dirty bitmap to the dirty bitmap list */
>> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
>> +    if (s->dirty_bitmaps) {
>> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
>> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
>> +        old_dirty_bitmap_list = s->dirty_bitmaps;
>> +    }
>> +    s->dirty_bitmaps = new_dirty_bitmap_list;
>> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
>> +
>> +    ret = qcow2_write_dirty_bitmaps(bs);
>> +    if (ret < 0) {
>> +        g_free(s->dirty_bitmaps);
>> +        s->dirty_bitmaps = old_dirty_bitmap_list;
>> +        s->nb_dirty_bitmaps--;
>> +        goto fail;
>> +    }
>> +
>> +    g_free(old_dirty_bitmap_list);
>> +
>> +    return 0;
>> +
> Disk is 8GiB, 16,777,216 sectors, and bm->bitmap_size matches that.
>> +fail:
>> +    g_free(bm->name);
>> +    g_free(l1_table);
>> +
>> +    return ret;
>> +}
>> +
>> +static int qcow2_dirty_bitmap_free_clusters(BlockDriverState *bs,
>> +                                            QCowDirtyBitmap *bm)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int ret, i;
>> +    uint64_t *l1_table = g_new(uint64_t, bm->l1_size);
>> +
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        g_free(l1_table);
>> +        return ret;
>> +    }
>> +
>> +    for (i = 0; i < bm->l1_size; ++i) {
>> +        uint64_t addr = be64_to_cpu(l1_table[i]);
>> +        qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS);
>> +    }
>> +
>> +    qcow2_free_clusters(bs, bm->l1_table_offset, bm->l1_size * sizeof(uint64_t),
>> +                        QCOW2_DISCARD_ALWAYS);
>> +
>> +    g_free(l1_table);
>> +    return 0;
>> +}
>> +
>> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
>> +                              const char *name,
>> +                              Error **errp)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    QCowDirtyBitmap bm;
>> +    int dirty_bitmap_index, ret = 0;
>> +
>> +    /* Search the dirty_bitmap */
>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>> +    if (dirty_bitmap_index < 0) {
>> +        error_setg(errp, "Can't find the dirty bitmap");
>> +        return -ENOENT;
>> +    }
>> +    bm = s->dirty_bitmaps[dirty_bitmap_index];
>> +
>> +    /* Remove it from the dirty_bitmap list */
>> +    memmove(s->dirty_bitmaps + dirty_bitmap_index,
>> +            s->dirty_bitmaps + dirty_bitmap_index + 1,
>> +            (s->nb_dirty_bitmaps - dirty_bitmap_index - 1) * sizeof(bm));
>> +    s->nb_dirty_bitmaps--;
>> +    ret = qcow2_write_dirty_bitmaps(bs);
>> +    if (ret < 0) {
>> +        error_setg_errno(errp, -ret,
>> +                         "Failed to remove dirty bitmap"
>> +                         " from dirty bitmap list");
>> +        return ret;
>> +    }
>> +
>> +    qcow2_dirty_bitmap_free_clusters(bs, &bm);
>> +    g_free(bm.name);
>> +
>> +    return ret;
>> +}
>> diff --git a/block/qcow2.c b/block/qcow2.c
>> index b9a72e3..406e55d 100644
>> --- a/block/qcow2.c
>> +++ b/block/qcow2.c
>> @@ -61,6 +61,7 @@ typedef struct {
>>   #define  QCOW2_EXT_MAGIC_END 0
>>   #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
>>   #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
>> +#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
>>   
>>   static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
>>   {
>> @@ -90,6 +91,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>>       QCowExtension ext;
>>       uint64_t offset;
>>       int ret;
>> +    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
>>   
>>   #ifdef DEBUG_EXT
>>       printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
>> @@ -160,6 +162,33 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>>               }
>>               break;
>>   
>> +        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
>> +            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
>> +            if (ret < 0) {
>> +                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
>> +                                 "Could not read ext header");
>> +                return ret;
>> +            }
>> +
>> +            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
>> +            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
>> +
>> +            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
>> +            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
>> +
>> +            ret = qcow2_read_dirty_bitmaps(bs);
>> +            if (ret < 0) {
>> +                error_setg_errno(errp, -ret, "Could not read dirty bitmaps");
>> +                return ret;
>> +            }
>> +
>> +#ifdef DEBUG_EXT
>> +            printf("Qcow2: Got dirty bitmaps extension:"
>> +                   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
>> +                   s->dirty_bitmaps_offset, s->nb_dirty_bitmaps);
>> +#endif
>> +            break;
>> +
>>           default:
>>               /* unknown magic - save it in case we need to rewrite the header */
>>               {
>> @@ -1000,6 +1029,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>>       g_free(s->unknown_header_fields);
>>       cleanup_unknown_header_ext(bs);
>>       qcow2_free_snapshots(bs);
>> +    qcow2_free_dirty_bitmaps(bs);
>>       qcow2_refcount_close(bs);
>>       qemu_vfree(s->l1_table);
>>       /* else pre-write overlap checks in cache_destroy may crash */
>> @@ -1466,6 +1496,7 @@ static void qcow2_close(BlockDriverState *bs)
>>       qemu_vfree(s->cluster_data);
>>       qcow2_refcount_close(bs);
>>       qcow2_free_snapshots(bs);
>> +    qcow2_free_dirty_bitmaps(bs);
>>   }
>>   
>>   static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
>> @@ -1667,6 +1698,21 @@ int qcow2_update_header(BlockDriverState *bs)
>>       buf += ret;
>>       buflen -= ret;
>>   
>> +    if (s->nb_dirty_bitmaps > 0) {
>> +        Qcow2DirtyBitmapHeaderExt dirty_bitmaps_header = {
>> +            .nb_dirty_bitmaps = cpu_to_be32(s->nb_dirty_bitmaps),
>> +            .dirty_bitmaps_offset = cpu_to_be64(s->dirty_bitmaps_offset)
>> +        };
>> +        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
>> +                             &dirty_bitmaps_header, sizeof(dirty_bitmaps_header),
>> +                             buflen);
>> +        if (ret < 0) {
>> +            goto fail;
>> +        }
>> +        buf += ret;
>> +        buflen -= ret;
>> +    }
>> +
>>       /* Keep unknown header extensions */
>>       QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
>>           ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
>> @@ -2176,6 +2222,12 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
>>           return -ENOTSUP;
>>       }
>>   
>> +    /* cannot proceed if image has dirty_bitmaps */
>> +    if (s->nb_dirty_bitmaps) {
>> +        error_report("Can't resize an image which has dirty bitmaps");
>> +        return -ENOTSUP;
>> +    }
>> +
>>       /* shrinking is currently not supported */
>>       if (offset < bs->total_sectors * 512) {
>>           error_report("qcow2 doesn't support shrinking images yet");
>> @@ -2952,6 +3004,10 @@ BlockDriver bdrv_qcow2 = {
>>       .bdrv_get_info          = qcow2_get_info,
>>       .bdrv_get_specific_info = qcow2_get_specific_info,
>>   
>> +    .bdrv_dirty_bitmap_load = qcow2_dirty_bitmap_load,
>> +    .bdrv_dirty_bitmap_store = qcow2_dirty_bitmap_store,
>> +    .bdrv_dirty_bitmap_delete = qcow2_dirty_bitmap_delete,
>> +
>>       .bdrv_save_vmstate    = qcow2_save_vmstate,
>>       .bdrv_load_vmstate    = qcow2_load_vmstate,
>>   
>> diff --git a/block/qcow2.h b/block/qcow2.h
>> index 422b825..24beee0 100644
>> --- a/block/qcow2.h
>> +++ b/block/qcow2.h
>> @@ -39,6 +39,7 @@
>>   
>>   #define QCOW_MAX_CRYPT_CLUSTERS 32
>>   #define QCOW_MAX_SNAPSHOTS 65536
>> +#define QCOW_MAX_DIRTY_BITMAPS 65536
>>   
>>   /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
>>    * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
>> @@ -52,6 +53,8 @@
>>    * space for snapshot names and IDs */
>>   #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
>>   
>> +#define QCOW_MAX_DIRTY_BITMAPS_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
>> +
>>   /* indicate that the refcount of the referenced cluster is exactly one. */
>>   #define QCOW_OFLAG_COPIED     (1ULL << 63)
>>   /* indicate that the cluster is compressed (they never have the copied flag) */
>> @@ -138,6 +141,19 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
>>       /* name follows  */
>>   } QCowSnapshotHeader;
>>   
>> +typedef struct QEMU_PACKED QCowDirtyBitmapHeader {
>> +    /* header is 8 byte aligned */
>> +    uint64_t l1_table_offset;
>> +
>> +    uint32_t l1_size;
>> +    uint32_t bitmap_granularity;
>> +
>> +    uint64_t bitmap_size;
>> +    uint16_t name_size;
>> +
>> +    /* name follows  */
>> +} QCowDirtyBitmapHeader;
>> +
>>   typedef struct QEMU_PACKED QCowSnapshotExtraData {
>>       uint64_t vm_state_size_large;
>>       uint64_t disk_size;
>> @@ -156,6 +172,14 @@ typedef struct QCowSnapshot {
>>       uint64_t vm_clock_nsec;
>>   } QCowSnapshot;
>>   
>> +typedef struct QCowDirtyBitmap {
>> +    uint64_t l1_table_offset;
>> +    uint32_t l1_size;
>> +    char *name;
>> +    int bitmap_granularity;
>> +    uint64_t bitmap_size;
>> +} QCowDirtyBitmap;
>> +
>>   struct Qcow2Cache;
>>   typedef struct Qcow2Cache Qcow2Cache;
>>   
>> @@ -218,6 +242,11 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
>>   typedef void Qcow2SetRefcountFunc(void *refcount_array,
>>                                     uint64_t index, uint64_t value);
>>   
>> +typedef struct Qcow2DirtyBitmapHeaderExt {
>> +    uint32_t nb_dirty_bitmaps;
>> +    uint64_t dirty_bitmaps_offset;
>> +} QEMU_PACKED Qcow2DirtyBitmapHeaderExt;
>> +
>>   typedef struct BDRVQcowState {
>>       int cluster_bits;
>>       int cluster_size;
>> @@ -259,6 +288,11 @@ typedef struct BDRVQcowState {
>>       unsigned int nb_snapshots;
>>       QCowSnapshot *snapshots;
>>   
>> +    uint64_t dirty_bitmaps_offset;
>> +    int dirty_bitmaps_size;
>> +    unsigned int nb_dirty_bitmaps;
>> +    QCowDirtyBitmap *dirty_bitmaps;
>> +
>>       int flags;
>>       int qcow_version;
>>       bool use_lazy_refcounts;
>> @@ -570,6 +604,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
>>   void qcow2_free_snapshots(BlockDriverState *bs);
>>   int qcow2_read_snapshots(BlockDriverState *bs);
>>   
>> +/* qcow2-dirty-bitmap.c functions */
>> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
>> +                             const char *name, uint64_t size,
>> +                             int granularity);
>> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
>> +                                 const char *name, uint64_t size,
>> +                                 int granularity);
>> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
>> +                              uint64_t size, int granularity);
>> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
>> +                              const char *name,
>> +                              Error **errp);
>> +
>> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs);
>> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs);
>> +
>>   /* qcow2-cache.c functions */
>>   Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
>>   int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index db29b74..88855b4 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -206,6 +206,16 @@ struct BlockDriver {
>>       int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
>>       ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
>>   
>> +    int (*bdrv_dirty_bitmap_store)(BlockDriverState *bs, uint8_t *buf,
>> +                                   const char *name, uint64_t size,
>> +                                   int granularity);
>> +    uint8_t *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
>> +                                       const char *name, uint64_t size,
>> +                                       int granularity);
>> +    int (*bdrv_dirty_bitmap_delete)(BlockDriverState *bs,
>> +                                    const char *name,
>> +                                    Error **errp);
>> +
>>       int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
>>                                int64_t pos);
>>       int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
>>
>
> In light of this, some "sanity" tests that test cases like no writes,
> empty bitmaps, empty files, etc I think will be appropriate.
>
>
Stefan Hajnoczi June 15, 2015, 2:42 p.m. UTC | #7
On Fri, Jun 12, 2015 at 03:02:33PM -0400, John Snow wrote:
> 
> 
> On 06/10/2015 10:30 AM, Stefan Hajnoczi wrote:
> > On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > 
> > I noticed a corner case, it's probably not a problem in practice:
> > 
> > Since the dirty bitmap is stored with the help of a BlockDriverState
> > (and its bs->file), it's possible that writing the bitmap will cause
> > bits in the bitmap to be dirtied!
> > 
> 
> But since it's metadata and not stored within a disk sector, can this
> actually happen? Do you have an example of a scenario where this might
> come up?

The persistent dirty bitmap for bs->file is storeed in the qcow2 BDS.
This results in recursion.

This is a misconfiguration but I just want to understand what happens
when someone does this by mistake.

Stefan
John Snow June 23, 2015, 5:57 p.m. UTC | #8
On 06/15/2015 10:42 AM, Stefan Hajnoczi wrote:
> On Fri, Jun 12, 2015 at 03:02:33PM -0400, John Snow wrote:
>>
>>
>> On 06/10/2015 10:30 AM, Stefan Hajnoczi wrote:
>>> On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>>
>>> I noticed a corner case, it's probably not a problem in practice:
>>>
>>> Since the dirty bitmap is stored with the help of a BlockDriverState
>>> (and its bs->file), it's possible that writing the bitmap will cause
>>> bits in the bitmap to be dirtied!
>>>
>>
>> But since it's metadata and not stored within a disk sector, can this
>> actually happen? Do you have an example of a scenario where this might
>> come up?
> 
> The persistent dirty bitmap for bs->file is storeed in the qcow2 BDS.
> This results in recursion.
> 
> This is a misconfiguration but I just want to understand what happens
> when someone does this by mistake.
> 
> Stefan
> 

I still don't follow you, actually.

The dirty bitmap only tracks changed virtual disk sectors, not actual
file sectors, right? Writing a bitmap that describes foo.qcow2 to
foo.qcow2 won't dirty bitmaps, it's an out-of-channel write as far as
the bitmap is concerned.

Right? Am I fatally misunderstanding the situation?
Stefan Hajnoczi June 24, 2015, 9:39 a.m. UTC | #9
On Tue, Jun 23, 2015 at 01:57:55PM -0400, John Snow wrote:
> On 06/15/2015 10:42 AM, Stefan Hajnoczi wrote:
> > On Fri, Jun 12, 2015 at 03:02:33PM -0400, John Snow wrote:
> >>
> >>
> >> On 06/10/2015 10:30 AM, Stefan Hajnoczi wrote:
> >>> On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> >>>
> >>> I noticed a corner case, it's probably not a problem in practice:
> >>>
> >>> Since the dirty bitmap is stored with the help of a BlockDriverState
> >>> (and its bs->file), it's possible that writing the bitmap will cause
> >>> bits in the bitmap to be dirtied!
> >>>
> >>
> >> But since it's metadata and not stored within a disk sector, can this
> >> actually happen? Do you have an example of a scenario where this might
> >> come up?
> > 
> > The persistent dirty bitmap for bs->file is storeed in the qcow2 BDS.
> > This results in recursion.
> > 
> > This is a misconfiguration but I just want to understand what happens
> > when someone does this by mistake.
> > 
> > Stefan
> > 
> 
> I still don't follow you, actually.
> 
> The dirty bitmap only tracks changed virtual disk sectors, not actual
> file sectors, right? Writing a bitmap that describes foo.qcow2 to
> foo.qcow2 won't dirty bitmaps, it's an out-of-channel write as far as
> the bitmap is concerned.
> 
> Right? Am I fatally misunderstanding the situation?

There is no out-of-channel for bs->file.  The bs->file raw-posix image
file includes the bs qcow2 sectors that hold the dirty bitmap.

This is a corner case and I don't think any valid configuration would
hit it.

Stefan
Vladimir Sementsov-Ogievskiy Aug. 14, 2015, 5:14 p.m. UTC | #10
On 10.06.2015 17:30, Stefan Hajnoczi wrote:
> On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>
>
>
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto fail;
>> +    }
>> +
>> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
> What is the maximum l1_size value?  cluster_size and l1_size are 32-bit
> so with 64 KB cluster_size this overflows if l1_size > 65535.  Do you
> want to cast to size_t?

Hmm. What the maximum RAM space we'd like to spend on dirty bitmap? I 
think 4Gb is too much.. So here should be limited not the l1_size but 
number of bytes needed to store the bitmap. What is maximum disk size we 
are dealing with?
Stefan Hajnoczi Aug. 26, 2015, 9:09 a.m. UTC | #11
On Fri, Aug 14, 2015 at 08:14:46PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 10.06.2015 17:30, Stefan Hajnoczi wrote:
> >On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> >>+    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> >>+                     bm->l1_size * sizeof(uint64_t));
> >>+    if (ret < 0) {
> >>+        goto fail;
> >>+    }
> >>+
> >>+    buf = g_malloc0(bm->l1_size * s->cluster_size);
> >What is the maximum l1_size value?  cluster_size and l1_size are 32-bit
> >so with 64 KB cluster_size this overflows if l1_size > 65535.  Do you
> >want to cast to size_t?
> 
> Hmm. What the maximum RAM space we'd like to spend on dirty bitmap? I think
> 4Gb is too much.. So here should be limited not the l1_size but number of
> bytes needed to store the bitmap. What is maximum disk size we are dealing
> with?

Modern file systems support up to exa- (XFS) or zetta- (ZFS) byte size.
If the disk image size is large, then the cluster size will probably
also be set larger than 64 KB (e.g. 1 MB).

Anyway, with 64 KB cluster size & bitmap granularity a 128 MB dirty
bitmap covers a 64 TB disk image.  So how about 256 MB or 512 MB max
dirty bitmap size?

Stefan
Vladimir Sementsov-Ogievskiy Aug. 26, 2015, 1:15 p.m. UTC | #12
On 13.06.2015 00:55, John Snow wrote:
>
> On 06/08/2015 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
>> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>>
>> Adds dirty-bitmaps feature to qcow2 format as specified in
>> docs/specs/qcow2.txt
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
...
>> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
>> +                            const char *name, uint64_t size,
>> +                            int granularity)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int cl_size = s->cluster_size;
>> +    int i, dirty_bitmap_index, ret = 0, n;
>> +    uint64_t *l1_table;
>> +    QCowDirtyBitmap *bm;
>> +    uint64_t buf_size;
>> +    uint8_t *p;
>> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
>> +
>> +    /* find/create dirty bitmap */
>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>> +    if (dirty_bitmap_index >= 0) {
>> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
>> +
>> +        if (size != bm->bitmap_size ||
>> +            granularity != bm->bitmap_granularity) {
>> +            qcow2_dirty_bitmap_delete(bs, name, NULL);
> If this fails, we should 'return ret'.
>
>> +            dirty_bitmap_index = -1;
>> +        }
>> +    }
> Oh, find_dirty_bitmap_by_name only looks by name, but then you check to
> make sure the size and granularity matches. If it doesn't, you actually
> create a new bitmap with the *same name* but different attributes, and
> delete the old one.
>
> Is that appropriate? I guess if we're already here in store, it means we
> made it past the add checks... which means for whatever reason we
> definitely want to store *this* bitmap...
>
> I think this code is a little extraneous, it might be best to just issue
> an ultimatum that "You can't have two bitmaps with the same name in a
> file." and let that be that -- finding something with the wrong size
> would just simply be an error.
>
>> +    if (dirty_bitmap_index < 0) {
>> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);
> If this fails, we need to return ret immediately.

Not agree. I think it's ok for qcow2_dirty_bitmap_store to store given 
bitmap if it can. It can in two cases (in next patchset version):

1) found the bitmap with the same name, size and granularity: it is 
assumed to be the previous version and will be rewritten
2) not found the bitmap: it's ok, just save it.. This case works when 
the bitmap was created while qemu runs.



>
>> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
>> +    }
>> +    bm = s->dirty_bitmaps + dirty_bitmap_index;
>> +
>> +    /* read l1 table */
>> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>> +                     bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto finish;
>> +    }
>> +
>> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
>> +    buf_size = align_offset(buf_size, 4);
>> +    n = buf_size / cl_size;
>> +    p = buf;
>> +    for (i = 0; i < bm->l1_size; ++i) {
>> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
>> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
>> +
>> +        if (buffer_is_zero(p, write_size)) {
>> +            if (addr) {
>> +                qcow2_free_clusters(bs, addr, cl_size,
>> +                                    QCOW2_DISCARD_ALWAYS);
>> +            }
>> +            l1_table[i] = cpu_to_be64(1);
>> +        } else {
>> +            if (!addr) {
>> +                addr = qcow2_alloc_clusters(bs, cl_size);
>> +                l1_table[i] = cpu_to_be64(addr);
>> +            }
>> +
>> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
>> +            if (ret < 0) {
>> +                goto finish;
>> +            }
>> +        }
>> +
>> +        p += cl_size;
>> +    }
>> +
>> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
>> +                      bm->l1_size * sizeof(uint64_t));
>> +    if (ret < 0) {
>> +        goto finish;
>> +    }
>> +
>> +finish:
>> +    g_free(l1_table);
>> +    return ret;
>> +}
>> +/* if no id is provided, a new one is constructed */
>> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
Vladimir Sementsov-Ogievskiy Aug. 26, 2015, 2:14 p.m. UTC | #13
On 26.08.2015 16:15, Vladimir Sementsov-Ogievskiy wrote:
> On 13.06.2015 00:55, John Snow wrote:
>>
>> On 06/08/2015 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>>>
>>> Adds dirty-bitmaps feature to qcow2 format as specified in
>>> docs/specs/qcow2.txt
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
> ...
>>> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
>>> +                            const char *name, uint64_t size,
>>> +                            int granularity)
>>> +{
>>> +    BDRVQcowState *s = bs->opaque;
>>> +    int cl_size = s->cluster_size;
>>> +    int i, dirty_bitmap_index, ret = 0, n;
>>> +    uint64_t *l1_table;
>>> +    QCowDirtyBitmap *bm;
>>> +    uint64_t buf_size;
>>> +    uint8_t *p;
>>> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
>>> +
>>> +    /* find/create dirty bitmap */
>>> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
>>> +    if (dirty_bitmap_index >= 0) {
>>> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
>>> +
>>> +        if (size != bm->bitmap_size ||
>>> +            granularity != bm->bitmap_granularity) {
>>> +            qcow2_dirty_bitmap_delete(bs, name, NULL);
>> If this fails, we should 'return ret'.
>>
>>> +            dirty_bitmap_index = -1;
>>> +        }
>>> +    }
>> Oh, find_dirty_bitmap_by_name only looks by name, but then you check to
>> make sure the size and granularity matches. If it doesn't, you actually
>> create a new bitmap with the *same name* but different attributes, and
>> delete the old one.
>>
>> Is that appropriate? I guess if we're already here in store, it means we
>> made it past the add checks... which means for whatever reason we
>> definitely want to store *this* bitmap...
>>
>> I think this code is a little extraneous, it might be best to just issue
>> an ultimatum that "You can't have two bitmaps with the same name in a
>> file." and let that be that -- finding something with the wrong size
>> would just simply be an error.
>>
>>> +    if (dirty_bitmap_index < 0) {
>>> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);
>> If this fails, we need to return ret immediately.
>
> Not agree. I think it's ok for qcow2_dirty_bitmap_store to store given 
> bitmap if it can. It can in two cases (in next patchset version):
>
> 1) found the bitmap with the same name, size and granularity: it is 
> assumed to be the previous version and will be rewritten
> 2) not found the bitmap: it's ok, just save it.. This case works when 
> the bitmap was created while qemu runs.
>

Oh, sorry. You mean,
ret = qcow2_dirty_bitmap_create ...
if (ret < 0) {
return ret;
}

, ok

>
>
>>
>>> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
>>> +    }
>>> +    bm = s->dirty_bitmaps + dirty_bitmap_index;
>>> +
>>> +    /* read l1 table */
>>> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
>>> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
>>> +                     bm->l1_size * sizeof(uint64_t));
>>> +    if (ret < 0) {
>>> +        goto finish;
>>> +    }
>>> +
>>> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
>>> +    buf_size = align_offset(buf_size, 4);
>>> +    n = buf_size / cl_size;
>>> +    p = buf;
>>> +    for (i = 0; i < bm->l1_size; ++i) {
>>> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
>>> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
>>> +
>>> +        if (buffer_is_zero(p, write_size)) {
>>> +            if (addr) {
>>> +                qcow2_free_clusters(bs, addr, cl_size,
>>> +                                    QCOW2_DISCARD_ALWAYS);
>>> +            }
>>> +            l1_table[i] = cpu_to_be64(1);
>>> +        } else {
>>> +            if (!addr) {
>>> +                addr = qcow2_alloc_clusters(bs, cl_size);
>>> +                l1_table[i] = cpu_to_be64(addr);
>>> +            }
>>> +
>>> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
>>> +            if (ret < 0) {
>>> +                goto finish;
>>> +            }
>>> +        }
>>> +
>>> +        p += cl_size;
>>> +    }
>>> +
>>> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
>>> +                      bm->l1_size * sizeof(uint64_t));
>>> +    if (ret < 0) {
>>> +        goto finish;
>>> +    }
>>> +
>>> +finish:
>>> +    g_free(l1_table);
>>> +    return ret;
>>> +}
>>> +/* if no id is provided, a new one is constructed */
>>> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
>
Vladimir Sementsov-Ogievskiy Aug. 27, 2015, 12:43 p.m. UTC | #14
I've made this similar to snapshots, but now I think...

Why should we maintain dirty bitmap directory in ram? This only gives us 
saving of one extra bdrv_read of header on loading bitmap, but is 
doesn't matter in comparison with reading the whole bitmap to memory. 
Also, the bitmap should not be loaded more then once.

Maintaining dirty bitmap directory in ram also saves extra read and 
conversion BE->cpu on updating header (for changing in_use). But I'm not 
sure that this is serious, because write and conversion cpu->BE should 
be done anyway.

Drawback of current approach is obvious: extra code, extra structs, 
extra conversions.

One read of the whole table may be will be needed for loading 
auto-loading bitmaps, but then this table can be freed from ram.

To have fast access to bitmap headers (for changing in_use field, for 
ex.), may be it will be good to maintain in ram mapping from bitmap name 
to offset in the image. List of pairs "struct {const char *name, 
uint64_t offset}" for example..

On 08.06.2015 18:21, Vladimir Sementsov-Ogievskiy wrote:
> From: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>
> Adds dirty-bitmaps feature to qcow2 format as specified in
> docs/specs/qcow2.txt
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>   block/Makefile.objs        |   2 +-
>   block/qcow2-dirty-bitmap.c | 503 +++++++++++++++++++++++++++++++++++++++++++++
>   block/qcow2.c              |  56 +++++
>   block/qcow2.h              |  50 +++++
>   include/block/block_int.h  |  10 +
>   5 files changed, 620 insertions(+), 1 deletion(-)
>   create mode 100644 block/qcow2-dirty-bitmap.c
>
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 0d8c2a4..bff12b4 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -1,5 +1,5 @@
>   block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
> -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
> +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-dirty-bitmap.o
>   block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>   block-obj-y += qed-check.o
>   block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
> new file mode 100644
> index 0000000..bc0167c
> --- /dev/null
> +++ b/block/qcow2-dirty-bitmap.c
> @@ -0,0 +1,503 @@
> +/*
> + * Dirty bitmpas for the QCOW version 2 format
> + *
> + * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
> + *
> + * This file is derived from qcow2-snapshot.c, original copyright:
> + * Copyright (c) 2004-2006 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu-common.h"
> +#include "block/block_int.h"
> +#include "block/qcow2.h"
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        g_free(s->dirty_bitmaps[i].name);
> +    }
> +    g_free(s->dirty_bitmaps);
> +    s->dirty_bitmaps = NULL;
> +    s->nb_dirty_bitmaps = 0;
> +}
> +
> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        /* Read statically sized part of the dirty_bitmap header */
> +        offset = align_offset(offset, 8);
> +        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        offset += sizeof(h);
> +        bm = s->dirty_bitmaps + i;
> +        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
> +        bm->l1_size = be32_to_cpu(h.l1_size);
> +        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
> +        bm->bitmap_size = be64_to_cpu(h.bitmap_size);
> +
> +        name_size = be16_to_cpu(h.name_size);
> +
> +        /* Read dirty_bitmap name */
> +        bm->name = g_malloc(name_size + 1);
> +        ret = bdrv_pread(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +        bm->name[name_size] = '\0';
> +
> +        if (offset - s->dirty_bitmaps_offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset - s->dirty_bitmaps_offset <= INT_MAX);
> +    s->dirty_bitmaps_size = offset - s->dirty_bitmaps_offset;
> +    return 0;
> +
> +fail:
> +    qcow2_free_dirty_bitmaps(bs);
> +    return ret;
> +}
> +
> +/* Add at the end of the file a new table of dirty bitmaps */
> +static int qcow2_write_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *bm;
> +    QCowDirtyBitmapHeader h;
> +    int i, name_size, dirty_bitmaps_size;
> +    int64_t offset, dirty_bitmaps_offset = 0;
> +    int ret;
> +
> +    int old_dirty_bitmaps_size = s->dirty_bitmaps_size;
> +    int64_t old_dirty_bitmaps_offset = s->dirty_bitmaps_offset;
> +
> +    /* Compute the size of the dirty bitmaps table */
> +    offset = 0;
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        offset = align_offset(offset, 8);
> +        offset += sizeof(h);
> +        offset += strlen(bm->name);
> +
> +        if (offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
> +            ret = -EFBIG;
> +            goto fail;
> +        }
> +    }
> +
> +    assert(offset <= INT_MAX);
> +    dirty_bitmaps_size = offset;
> +
> +    /* Allocate space for the new dirty bitmap table */
> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
> +    offset = dirty_bitmaps_offset;
> +    if (offset < 0) {
> +        ret = offset;
> +        goto fail;
> +    }
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* The dirty bitmap table position has not yet been updated, so these
> +     * clusters must indeed be completely free */
> +    ret = qcow2_pre_write_overlap_check(bs, 0, offset, dirty_bitmaps_size);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* Write all dirty bitmaps to the new table */
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        memset(&h, 0, sizeof(h));
> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
> +        h.l1_size = cpu_to_be32(bm->l1_size);
> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
> +
> +        name_size = strlen(bm->name);
> +        assert(name_size <= UINT16_MAX);
> +        h.name_size = cpu_to_be16(name_size);
> +        offset = align_offset(offset, 8);
> +
> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += sizeof(h);
> +
> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +    }
> +
> +    /*
> +     * Update the header extension to point to the new dirty bitmap table. This
> +     * requires the new table and its refcounts to be stable on disk.
> +     */
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not update qcow2 header\n");
> +        goto fail;
> +    }
> +
> +    /* Free old dirty bitmap table */
> +    qcow2_free_clusters(bs, old_dirty_bitmaps_offset, old_dirty_bitmaps_size,
> +                        QCOW2_DISCARD_ALWAYS);
> +    return 0;
> +
> +fail:
> +    if (dirty_bitmaps_offset > 0) {
> +        qcow2_free_clusters(bs, dirty_bitmaps_offset, dirty_bitmaps_size,
> +                            QCOW2_DISCARD_ALWAYS);
> +    }
> +    return ret;
> +}
> +
> +static int find_dirty_bitmap_by_name(BlockDriverState *bs,
> +                                     const char *name)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        if (!strcmp(s->dirty_bitmaps[i].name, name)) {
> +            return i;
> +        }
> +    }
> +
> +    return -1;
> +}
> +
> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i, dirty_bitmap_index, ret;
> +    uint64_t offset;
> +    QCowDirtyBitmap *bm;
> +    uint64_t *l1_table;
> +    uint8_t *buf;
> +
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        return NULL;
> +    }
> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
> +        return NULL;
> +    }
> +
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    buf = g_malloc0(bm->l1_size * s->cluster_size);
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        offset = be64_to_cpu(l1_table[i]);
> +        if (!(offset & 1)) {
> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
> +                             s->cluster_size);
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +        }
> +    }
> +
> +    g_free(l1_table);
> +    return buf;
> +
> +fail:
> +    g_free(l1_table);
> +    return NULL;
> +}
> +
> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int cl_size = s->cluster_size;
> +    int i, dirty_bitmap_index, ret = 0, n;
> +    uint64_t *l1_table;
> +    QCowDirtyBitmap *bm;
> +    uint64_t buf_size;
> +    uint8_t *p;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    /* find/create dirty bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index >= 0) {
> +        bm = s->dirty_bitmaps + dirty_bitmap_index;
> +
> +        if (size != bm->bitmap_size ||
> +            granularity != bm->bitmap_granularity) {
> +            qcow2_dirty_bitmap_delete(bs, name, NULL);
> +            dirty_bitmap_index = -1;
> +        }
> +    }
> +    if (dirty_bitmap_index < 0) {
> +        qcow2_dirty_bitmap_create(bs, name, size, granularity);
> +        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
> +    }
> +    bm = s->dirty_bitmaps + dirty_bitmap_index;
> +
> +    /* read l1 table */
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
> +    buf_size = align_offset(buf_size, 4);
> +    n = buf_size / cl_size;
> +    p = buf;
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
> +        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
> +
> +        if (buffer_is_zero(p, write_size)) {
> +            if (addr) {
> +                qcow2_free_clusters(bs, addr, cl_size,
> +                                    QCOW2_DISCARD_ALWAYS);
> +            }
> +            l1_table[i] = cpu_to_be64(1);
> +        } else {
> +            if (!addr) {
> +                addr = qcow2_alloc_clusters(bs, cl_size);
> +                l1_table[i] = cpu_to_be64(addr);
> +            }
> +
> +            ret = bdrv_pwrite(bs->file, addr, p, write_size);
> +            if (ret < 0) {
> +                goto finish;
> +            }
> +        }
> +
> +        p += cl_size;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto finish;
> +    }
> +
> +finish:
> +    g_free(l1_table);
> +    return ret;
> +}
> +/* if no id is provided, a new one is constructed */
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap *new_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap *old_dirty_bitmap_list = NULL;
> +    QCowDirtyBitmap sn1, *bm = &sn1;
> +    int i, ret;
> +    uint64_t *l1_table = NULL;
> +    int64_t l1_table_offset;
> +    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +
> +    if (s->nb_dirty_bitmaps >= QCOW_MAX_DIRTY_BITMAPS) {
> +        return -EFBIG;
> +    }
> +
> +    memset(bm, 0, sizeof(*bm));
> +
> +    /* Check that the ID is unique */
> +    if (find_dirty_bitmap_by_name(bs, name) >= 0) {
> +        return -EEXIST;
> +    }
> +
> +    /* Populate bm with passed data */
> +    bm->name = g_strdup(name);
> +    bm->bitmap_granularity = granularity;
> +    bm->bitmap_size = size;
> +
> +    bm->l1_size =
> +        size_to_clusters(s, (((size - 1) / sector_granularity) >> 3) + 1);
> +    l1_table_offset =
> +        qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
> +    if (l1_table_offset < 0) {
> +        ret = l1_table_offset;
> +        goto fail;
> +    }
> +    bm->l1_table_offset = l1_table_offset;
> +
> +    l1_table = g_try_new(uint64_t, bm->l1_size);
> +    if (l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
> +
> +    /* initialize with zero clusters */
> +    for (i = 0; i < s->l1_size; i++) {
> +        l1_table[i] = cpu_to_be64(1);
> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
> +                                        s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    g_free(l1_table);
> +    l1_table = NULL;
> +
> +    /* Append the new dirty bitmap to the dirty bitmap list */
> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
> +    if (s->dirty_bitmaps) {
> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
> +        old_dirty_bitmap_list = s->dirty_bitmaps;
> +    }
> +    s->dirty_bitmaps = new_dirty_bitmap_list;
> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
> +
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        g_free(s->dirty_bitmaps);
> +        s->dirty_bitmaps = old_dirty_bitmap_list;
> +        s->nb_dirty_bitmaps--;
> +        goto fail;
> +    }
> +
> +    g_free(old_dirty_bitmap_list);
> +
> +    return 0;
> +
> +fail:
> +    g_free(bm->name);
> +    g_free(l1_table);
> +
> +    return ret;
> +}
> +
> +static int qcow2_dirty_bitmap_free_clusters(BlockDriverState *bs,
> +                                            QCowDirtyBitmap *bm)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int ret, i;
> +    uint64_t *l1_table = g_new(uint64_t, bm->l1_size);
> +
> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        g_free(l1_table);
> +        return ret;
> +    }
> +
> +    for (i = 0; i < bm->l1_size; ++i) {
> +        uint64_t addr = be64_to_cpu(l1_table[i]);
> +        qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS);
> +    }
> +
> +    qcow2_free_clusters(bs, bm->l1_table_offset, bm->l1_size * sizeof(uint64_t),
> +                        QCOW2_DISCARD_ALWAYS);
> +
> +    g_free(l1_table);
> +    return 0;
> +}
> +
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmap bm;
> +    int dirty_bitmap_index, ret = 0;
> +
> +    /* Search the dirty_bitmap */
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        error_setg(errp, "Can't find the dirty bitmap");
> +        return -ENOENT;
> +    }
> +    bm = s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    /* Remove it from the dirty_bitmap list */
> +    memmove(s->dirty_bitmaps + dirty_bitmap_index,
> +            s->dirty_bitmaps + dirty_bitmap_index + 1,
> +            (s->nb_dirty_bitmaps - dirty_bitmap_index - 1) * sizeof(bm));
> +    s->nb_dirty_bitmaps--;
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret,
> +                         "Failed to remove dirty bitmap"
> +                         " from dirty bitmap list");
> +        return ret;
> +    }
> +
> +    qcow2_dirty_bitmap_free_clusters(bs, &bm);
> +    g_free(bm.name);
> +
> +    return ret;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b9a72e3..406e55d 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -61,6 +61,7 @@ typedef struct {
>   #define  QCOW2_EXT_MAGIC_END 0
>   #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
>   #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
> +#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
>   
>   static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
>   {
> @@ -90,6 +91,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>       QCowExtension ext;
>       uint64_t offset;
>       int ret;
> +    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
>   
>   #ifdef DEBUG_EXT
>       printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
> @@ -160,6 +162,33 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>               }
>               break;
>   
> +        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
> +            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
> +                                 "Could not read ext header");
> +                return ret;
> +            }
> +
> +            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
> +            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
> +
> +            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
> +            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
> +
> +            ret = qcow2_read_dirty_bitmaps(bs);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "Could not read dirty bitmaps");
> +                return ret;
> +            }
> +
> +#ifdef DEBUG_EXT
> +            printf("Qcow2: Got dirty bitmaps extension:"
> +                   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
> +                   s->dirty_bitmaps_offset, s->nb_dirty_bitmaps);
> +#endif
> +            break;
> +
>           default:
>               /* unknown magic - save it in case we need to rewrite the header */
>               {
> @@ -1000,6 +1029,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>       g_free(s->unknown_header_fields);
>       cleanup_unknown_header_ext(bs);
>       qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>       qcow2_refcount_close(bs);
>       qemu_vfree(s->l1_table);
>       /* else pre-write overlap checks in cache_destroy may crash */
> @@ -1466,6 +1496,7 @@ static void qcow2_close(BlockDriverState *bs)
>       qemu_vfree(s->cluster_data);
>       qcow2_refcount_close(bs);
>       qcow2_free_snapshots(bs);
> +    qcow2_free_dirty_bitmaps(bs);
>   }
>   
>   static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
> @@ -1667,6 +1698,21 @@ int qcow2_update_header(BlockDriverState *bs)
>       buf += ret;
>       buflen -= ret;
>   
> +    if (s->nb_dirty_bitmaps > 0) {
> +        Qcow2DirtyBitmapHeaderExt dirty_bitmaps_header = {
> +            .nb_dirty_bitmaps = cpu_to_be32(s->nb_dirty_bitmaps),
> +            .dirty_bitmaps_offset = cpu_to_be64(s->dirty_bitmaps_offset)
> +        };
> +        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
> +                             &dirty_bitmaps_header, sizeof(dirty_bitmaps_header),
> +                             buflen);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        buf += ret;
> +        buflen -= ret;
> +    }
> +
>       /* Keep unknown header extensions */
>       QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
>           ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
> @@ -2176,6 +2222,12 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
>           return -ENOTSUP;
>       }
>   
> +    /* cannot proceed if image has dirty_bitmaps */
> +    if (s->nb_dirty_bitmaps) {
> +        error_report("Can't resize an image which has dirty bitmaps");
> +        return -ENOTSUP;
> +    }
> +
>       /* shrinking is currently not supported */
>       if (offset < bs->total_sectors * 512) {
>           error_report("qcow2 doesn't support shrinking images yet");
> @@ -2952,6 +3004,10 @@ BlockDriver bdrv_qcow2 = {
>       .bdrv_get_info          = qcow2_get_info,
>       .bdrv_get_specific_info = qcow2_get_specific_info,
>   
> +    .bdrv_dirty_bitmap_load = qcow2_dirty_bitmap_load,
> +    .bdrv_dirty_bitmap_store = qcow2_dirty_bitmap_store,
> +    .bdrv_dirty_bitmap_delete = qcow2_dirty_bitmap_delete,
> +
>       .bdrv_save_vmstate    = qcow2_save_vmstate,
>       .bdrv_load_vmstate    = qcow2_load_vmstate,
>   
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 422b825..24beee0 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -39,6 +39,7 @@
>   
>   #define QCOW_MAX_CRYPT_CLUSTERS 32
>   #define QCOW_MAX_SNAPSHOTS 65536
> +#define QCOW_MAX_DIRTY_BITMAPS 65536
>   
>   /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
>    * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
> @@ -52,6 +53,8 @@
>    * space for snapshot names and IDs */
>   #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
>   
> +#define QCOW_MAX_DIRTY_BITMAPS_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
> +
>   /* indicate that the refcount of the referenced cluster is exactly one. */
>   #define QCOW_OFLAG_COPIED     (1ULL << 63)
>   /* indicate that the cluster is compressed (they never have the copied flag) */
> @@ -138,6 +141,19 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
>       /* name follows  */
>   } QCowSnapshotHeader;
>   
> +typedef struct QEMU_PACKED QCowDirtyBitmapHeader {
> +    /* header is 8 byte aligned */
> +    uint64_t l1_table_offset;
> +
> +    uint32_t l1_size;
> +    uint32_t bitmap_granularity;
> +
> +    uint64_t bitmap_size;
> +    uint16_t name_size;
> +
> +    /* name follows  */
> +} QCowDirtyBitmapHeader;
> +
>   typedef struct QEMU_PACKED QCowSnapshotExtraData {
>       uint64_t vm_state_size_large;
>       uint64_t disk_size;
> @@ -156,6 +172,14 @@ typedef struct QCowSnapshot {
>       uint64_t vm_clock_nsec;
>   } QCowSnapshot;
>   
> +typedef struct QCowDirtyBitmap {
> +    uint64_t l1_table_offset;
> +    uint32_t l1_size;
> +    char *name;
> +    int bitmap_granularity;
> +    uint64_t bitmap_size;
> +} QCowDirtyBitmap;
> +
>   struct Qcow2Cache;
>   typedef struct Qcow2Cache Qcow2Cache;
>   
> @@ -218,6 +242,11 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
>   typedef void Qcow2SetRefcountFunc(void *refcount_array,
>                                     uint64_t index, uint64_t value);
>   
> +typedef struct Qcow2DirtyBitmapHeaderExt {
> +    uint32_t nb_dirty_bitmaps;
> +    uint64_t dirty_bitmaps_offset;
> +} QEMU_PACKED Qcow2DirtyBitmapHeaderExt;
> +
>   typedef struct BDRVQcowState {
>       int cluster_bits;
>       int cluster_size;
> @@ -259,6 +288,11 @@ typedef struct BDRVQcowState {
>       unsigned int nb_snapshots;
>       QCowSnapshot *snapshots;
>   
> +    uint64_t dirty_bitmaps_offset;
> +    int dirty_bitmaps_size;
> +    unsigned int nb_dirty_bitmaps;
> +    QCowDirtyBitmap *dirty_bitmaps;
> +
>       int flags;
>       int qcow_version;
>       bool use_lazy_refcounts;
> @@ -570,6 +604,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
>   void qcow2_free_snapshots(BlockDriverState *bs);
>   int qcow2_read_snapshots(BlockDriverState *bs);
>   
> +/* qcow2-dirty-bitmap.c functions */
> +int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
> +                             const char *name, uint64_t size,
> +                             int granularity);
> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                                 const char *name, uint64_t size,
> +                                 int granularity);
> +int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
> +                              uint64_t size, int granularity);
> +int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
> +                              const char *name,
> +                              Error **errp);
> +
> +void qcow2_free_dirty_bitmaps(BlockDriverState *bs);
> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs);
> +
>   /* qcow2-cache.c functions */
>   Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
>   int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index db29b74..88855b4 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -206,6 +206,16 @@ struct BlockDriver {
>       int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
>       ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
>   
> +    int (*bdrv_dirty_bitmap_store)(BlockDriverState *bs, uint8_t *buf,
> +                                   const char *name, uint64_t size,
> +                                   int granularity);
> +    uint8_t *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
> +                                       const char *name, uint64_t size,
> +                                       int granularity);
> +    int (*bdrv_dirty_bitmap_delete)(BlockDriverState *bs,
> +                                    const char *name,
> +                                    Error **errp);
> +
>       int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
>                                int64_t pos);
>       int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 0d8c2a4..bff12b4 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,5 @@ 
 block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-dirty-bitmap.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
new file mode 100644
index 0000000..bc0167c
--- /dev/null
+++ b/block/qcow2-dirty-bitmap.c
@@ -0,0 +1,503 @@ 
+/*
+ * Dirty bitmpas for the QCOW version 2 format
+ *
+ * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
+ *
+ * This file is derived from qcow2-snapshot.c, original copyright:
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+void qcow2_free_dirty_bitmaps(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
+        g_free(s->dirty_bitmaps[i].name);
+    }
+    g_free(s->dirty_bitmaps);
+    s->dirty_bitmaps = NULL;
+    s->nb_dirty_bitmaps = 0;
+}
+
+int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowDirtyBitmapHeader h;
+    QCowDirtyBitmap *bm;
+    int i, name_size;
+    int64_t offset;
+    int ret;
+
+    if (!s->nb_dirty_bitmaps) {
+        s->dirty_bitmaps = NULL;
+        s->dirty_bitmaps_size = 0;
+        return 0;
+    }
+
+    offset = s->dirty_bitmaps_offset;
+    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);
+
+    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
+        /* Read statically sized part of the dirty_bitmap header */
+        offset = align_offset(offset, 8);
+        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+        if (ret < 0) {
+            goto fail;
+        }
+
+        offset += sizeof(h);
+        bm = s->dirty_bitmaps + i;
+        bm->l1_table_offset = be64_to_cpu(h.l1_table_offset);
+        bm->l1_size = be32_to_cpu(h.l1_size);
+        bm->bitmap_granularity = be32_to_cpu(h.bitmap_granularity);
+        bm->bitmap_size = be64_to_cpu(h.bitmap_size);
+
+        name_size = be16_to_cpu(h.name_size);
+
+        /* Read dirty_bitmap name */
+        bm->name = g_malloc(name_size + 1);
+        ret = bdrv_pread(bs->file, offset, bm->name, name_size);
+        if (ret < 0) {
+            goto fail;
+        }
+        offset += name_size;
+        bm->name[name_size] = '\0';
+
+        if (offset - s->dirty_bitmaps_offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
+    }
+
+    assert(offset - s->dirty_bitmaps_offset <= INT_MAX);
+    s->dirty_bitmaps_size = offset - s->dirty_bitmaps_offset;
+    return 0;
+
+fail:
+    qcow2_free_dirty_bitmaps(bs);
+    return ret;
+}
+
+/* Add at the end of the file a new table of dirty bitmaps */
+static int qcow2_write_dirty_bitmaps(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowDirtyBitmap *bm;
+    QCowDirtyBitmapHeader h;
+    int i, name_size, dirty_bitmaps_size;
+    int64_t offset, dirty_bitmaps_offset = 0;
+    int ret;
+
+    int old_dirty_bitmaps_size = s->dirty_bitmaps_size;
+    int64_t old_dirty_bitmaps_offset = s->dirty_bitmaps_offset;
+
+    /* Compute the size of the dirty bitmaps table */
+    offset = 0;
+    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
+        bm = s->dirty_bitmaps + i;
+        offset = align_offset(offset, 8);
+        offset += sizeof(h);
+        offset += strlen(bm->name);
+
+        if (offset > QCOW_MAX_DIRTY_BITMAPS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
+    }
+
+    assert(offset <= INT_MAX);
+    dirty_bitmaps_size = offset;
+
+    /* Allocate space for the new dirty bitmap table */
+    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
+    offset = dirty_bitmaps_offset;
+    if (offset < 0) {
+        ret = offset;
+        goto fail;
+    }
+    ret = bdrv_flush(bs);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* The dirty bitmap table position has not yet been updated, so these
+     * clusters must indeed be completely free */
+    ret = qcow2_pre_write_overlap_check(bs, 0, offset, dirty_bitmaps_size);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Write all dirty bitmaps to the new table */
+    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
+        bm = s->dirty_bitmaps + i;
+        memset(&h, 0, sizeof(h));
+        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
+        h.l1_size = cpu_to_be32(bm->l1_size);
+        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
+        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
+
+        name_size = strlen(bm->name);
+        assert(name_size <= UINT16_MAX);
+        h.name_size = cpu_to_be16(name_size);
+        offset = align_offset(offset, 8);
+
+        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+        if (ret < 0) {
+            goto fail;
+        }
+        offset += sizeof(h);
+
+        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
+        if (ret < 0) {
+            goto fail;
+        }
+        offset += name_size;
+    }
+
+    /*
+     * Update the header extension to point to the new dirty bitmap table. This
+     * requires the new table and its refcounts to be stable on disk.
+     */
+    ret = bdrv_flush(bs);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
+    s->dirty_bitmaps_size = dirty_bitmaps_size;
+    ret = qcow2_update_header(bs);
+    if (ret < 0) {
+        fprintf(stderr, "Could not update qcow2 header\n");
+        goto fail;
+    }
+
+    /* Free old dirty bitmap table */
+    qcow2_free_clusters(bs, old_dirty_bitmaps_offset, old_dirty_bitmaps_size,
+                        QCOW2_DISCARD_ALWAYS);
+    return 0;
+
+fail:
+    if (dirty_bitmaps_offset > 0) {
+        qcow2_free_clusters(bs, dirty_bitmaps_offset, dirty_bitmaps_size,
+                            QCOW2_DISCARD_ALWAYS);
+    }
+    return ret;
+}
+
+static int find_dirty_bitmap_by_name(BlockDriverState *bs,
+                                     const char *name)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
+        if (!strcmp(s->dirty_bitmaps[i].name, name)) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
+                            const char *name, uint64_t size,
+                            int granularity)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i, dirty_bitmap_index, ret;
+    uint64_t offset;
+    QCowDirtyBitmap *bm;
+    uint64_t *l1_table;
+    uint8_t *buf;
+
+    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
+    if (dirty_bitmap_index < 0) {
+        return NULL;
+    }
+    bm = &s->dirty_bitmaps[dirty_bitmap_index];
+
+    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
+        return NULL;
+    }
+
+    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
+    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
+                     bm->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    buf = g_malloc0(bm->l1_size * s->cluster_size);
+    for (i = 0; i < bm->l1_size; ++i) {
+        offset = be64_to_cpu(l1_table[i]);
+        if (!(offset & 1)) {
+            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
+                             s->cluster_size);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
+    }
+
+    g_free(l1_table);
+    return buf;
+
+fail:
+    g_free(l1_table);
+    return NULL;
+}
+
+int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
+                            const char *name, uint64_t size,
+                            int granularity)
+{
+    BDRVQcowState *s = bs->opaque;
+    int cl_size = s->cluster_size;
+    int i, dirty_bitmap_index, ret = 0, n;
+    uint64_t *l1_table;
+    QCowDirtyBitmap *bm;
+    uint64_t buf_size;
+    uint8_t *p;
+    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
+
+    /* find/create dirty bitmap */
+    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
+    if (dirty_bitmap_index >= 0) {
+        bm = s->dirty_bitmaps + dirty_bitmap_index;
+
+        if (size != bm->bitmap_size ||
+            granularity != bm->bitmap_granularity) {
+            qcow2_dirty_bitmap_delete(bs, name, NULL);
+            dirty_bitmap_index = -1;
+        }
+    }
+    if (dirty_bitmap_index < 0) {
+        qcow2_dirty_bitmap_create(bs, name, size, granularity);
+        dirty_bitmap_index = s->nb_dirty_bitmaps - 1;
+    }
+    bm = s->dirty_bitmaps + dirty_bitmap_index;
+
+    /* read l1 table */
+    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));
+    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
+                     bm->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        goto finish;
+    }
+
+    buf_size = (((size - 1) / sector_granularity) >> 3) + 1;
+    buf_size = align_offset(buf_size, 4);
+    n = buf_size / cl_size;
+    p = buf;
+    for (i = 0; i < bm->l1_size; ++i) {
+        uint64_t addr = be64_to_cpu(l1_table[i]) & ~511;
+        int write_size = (i == n ? (buf_size % cl_size) : cl_size);
+
+        if (buffer_is_zero(p, write_size)) {
+            if (addr) {
+                qcow2_free_clusters(bs, addr, cl_size,
+                                    QCOW2_DISCARD_ALWAYS);
+            }
+            l1_table[i] = cpu_to_be64(1);
+        } else {
+            if (!addr) {
+                addr = qcow2_alloc_clusters(bs, cl_size);
+                l1_table[i] = cpu_to_be64(addr);
+            }
+
+            ret = bdrv_pwrite(bs->file, addr, p, write_size);
+            if (ret < 0) {
+                goto finish;
+            }
+        }
+
+        p += cl_size;
+    }
+
+    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
+                      bm->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        goto finish;
+    }
+
+finish:
+    g_free(l1_table);
+    return ret;
+}
+/* if no id is provided, a new one is constructed */
+int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
+                              uint64_t size, int granularity)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowDirtyBitmap *new_dirty_bitmap_list = NULL;
+    QCowDirtyBitmap *old_dirty_bitmap_list = NULL;
+    QCowDirtyBitmap sn1, *bm = &sn1;
+    int i, ret;
+    uint64_t *l1_table = NULL;
+    int64_t l1_table_offset;
+    int sector_granularity = granularity >> BDRV_SECTOR_BITS;
+
+    if (s->nb_dirty_bitmaps >= QCOW_MAX_DIRTY_BITMAPS) {
+        return -EFBIG;
+    }
+
+    memset(bm, 0, sizeof(*bm));
+
+    /* Check that the ID is unique */
+    if (find_dirty_bitmap_by_name(bs, name) >= 0) {
+        return -EEXIST;
+    }
+
+    /* Populate bm with passed data */
+    bm->name = g_strdup(name);
+    bm->bitmap_granularity = granularity;
+    bm->bitmap_size = size;
+
+    bm->l1_size =
+        size_to_clusters(s, (((size - 1) / sector_granularity) >> 3) + 1);
+    l1_table_offset =
+        qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+    if (l1_table_offset < 0) {
+        ret = l1_table_offset;
+        goto fail;
+    }
+    bm->l1_table_offset = l1_table_offset;
+
+    l1_table = g_try_new(uint64_t, bm->l1_size);
+    if (l1_table == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    /* initialize with zero clusters */
+    for (i = 0; i < s->l1_size; i++) {
+        l1_table[i] = cpu_to_be64(1);
+    }
+
+    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
+                                        s->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
+                      s->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    g_free(l1_table);
+    l1_table = NULL;
+
+    /* Append the new dirty bitmap to the dirty bitmap list */
+    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
+    if (s->dirty_bitmaps) {
+        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
+               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
+        old_dirty_bitmap_list = s->dirty_bitmaps;
+    }
+    s->dirty_bitmaps = new_dirty_bitmap_list;
+    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
+
+    ret = qcow2_write_dirty_bitmaps(bs);
+    if (ret < 0) {
+        g_free(s->dirty_bitmaps);
+        s->dirty_bitmaps = old_dirty_bitmap_list;
+        s->nb_dirty_bitmaps--;
+        goto fail;
+    }
+
+    g_free(old_dirty_bitmap_list);
+
+    return 0;
+
+fail:
+    g_free(bm->name);
+    g_free(l1_table);
+
+    return ret;
+}
+
+static int qcow2_dirty_bitmap_free_clusters(BlockDriverState *bs,
+                                            QCowDirtyBitmap *bm)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, i;
+    uint64_t *l1_table = g_new(uint64_t, bm->l1_size);
+
+    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
+                     bm->l1_size * sizeof(uint64_t));
+    if (ret < 0) {
+        g_free(l1_table);
+        return ret;
+    }
+
+    for (i = 0; i < bm->l1_size; ++i) {
+        uint64_t addr = be64_to_cpu(l1_table[i]);
+        qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS);
+    }
+
+    qcow2_free_clusters(bs, bm->l1_table_offset, bm->l1_size * sizeof(uint64_t),
+                        QCOW2_DISCARD_ALWAYS);
+
+    g_free(l1_table);
+    return 0;
+}
+
+int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
+                              const char *name,
+                              Error **errp)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowDirtyBitmap bm;
+    int dirty_bitmap_index, ret = 0;
+
+    /* Search the dirty_bitmap */
+    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
+    if (dirty_bitmap_index < 0) {
+        error_setg(errp, "Can't find the dirty bitmap");
+        return -ENOENT;
+    }
+    bm = s->dirty_bitmaps[dirty_bitmap_index];
+
+    /* Remove it from the dirty_bitmap list */
+    memmove(s->dirty_bitmaps + dirty_bitmap_index,
+            s->dirty_bitmaps + dirty_bitmap_index + 1,
+            (s->nb_dirty_bitmaps - dirty_bitmap_index - 1) * sizeof(bm));
+    s->nb_dirty_bitmaps--;
+    ret = qcow2_write_dirty_bitmaps(bs);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "Failed to remove dirty bitmap"
+                         " from dirty bitmap list");
+        return ret;
+    }
+
+    qcow2_dirty_bitmap_free_clusters(bs, &bm);
+    g_free(bm.name);
+
+    return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index b9a72e3..406e55d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -61,6 +61,7 @@  typedef struct {
 #define  QCOW2_EXT_MAGIC_END 0
 #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
+#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
 
 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -90,6 +91,7 @@  static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
     QCowExtension ext;
     uint64_t offset;
     int ret;
+    Qcow2DirtyBitmapHeaderExt dirty_bitmaps_ext;
 
 #ifdef DEBUG_EXT
     printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
@@ -160,6 +162,33 @@  static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
             }
             break;
 
+        case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
+            ret = bdrv_pread(bs->file, offset, &dirty_bitmaps_ext, ext.len);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "ERROR: dirty_bitmaps_ext: "
+                                 "Could not read ext header");
+                return ret;
+            }
+
+            be64_to_cpus(&dirty_bitmaps_ext.dirty_bitmaps_offset);
+            be32_to_cpus(&dirty_bitmaps_ext.nb_dirty_bitmaps);
+
+            s->dirty_bitmaps_offset = dirty_bitmaps_ext.dirty_bitmaps_offset;
+            s->nb_dirty_bitmaps = dirty_bitmaps_ext.nb_dirty_bitmaps;
+
+            ret = qcow2_read_dirty_bitmaps(bs);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "Could not read dirty bitmaps");
+                return ret;
+            }
+
+#ifdef DEBUG_EXT
+            printf("Qcow2: Got dirty bitmaps extension:"
+                   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
+                   s->dirty_bitmaps_offset, s->nb_dirty_bitmaps);
+#endif
+            break;
+
         default:
             /* unknown magic - save it in case we need to rewrite the header */
             {
@@ -1000,6 +1029,7 @@  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     g_free(s->unknown_header_fields);
     cleanup_unknown_header_ext(bs);
     qcow2_free_snapshots(bs);
+    qcow2_free_dirty_bitmaps(bs);
     qcow2_refcount_close(bs);
     qemu_vfree(s->l1_table);
     /* else pre-write overlap checks in cache_destroy may crash */
@@ -1466,6 +1496,7 @@  static void qcow2_close(BlockDriverState *bs)
     qemu_vfree(s->cluster_data);
     qcow2_refcount_close(bs);
     qcow2_free_snapshots(bs);
+    qcow2_free_dirty_bitmaps(bs);
 }
 
 static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
@@ -1667,6 +1698,21 @@  int qcow2_update_header(BlockDriverState *bs)
     buf += ret;
     buflen -= ret;
 
+    if (s->nb_dirty_bitmaps > 0) {
+        Qcow2DirtyBitmapHeaderExt dirty_bitmaps_header = {
+            .nb_dirty_bitmaps = cpu_to_be32(s->nb_dirty_bitmaps),
+            .dirty_bitmaps_offset = cpu_to_be64(s->dirty_bitmaps_offset)
+        };
+        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
+                             &dirty_bitmaps_header, sizeof(dirty_bitmaps_header),
+                             buflen);
+        if (ret < 0) {
+            goto fail;
+        }
+        buf += ret;
+        buflen -= ret;
+    }
+
     /* Keep unknown header extensions */
     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
@@ -2176,6 +2222,12 @@  static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
         return -ENOTSUP;
     }
 
+    /* cannot proceed if image has dirty_bitmaps */
+    if (s->nb_dirty_bitmaps) {
+        error_report("Can't resize an image which has dirty bitmaps");
+        return -ENOTSUP;
+    }
+
     /* shrinking is currently not supported */
     if (offset < bs->total_sectors * 512) {
         error_report("qcow2 doesn't support shrinking images yet");
@@ -2952,6 +3004,10 @@  BlockDriver bdrv_qcow2 = {
     .bdrv_get_info          = qcow2_get_info,
     .bdrv_get_specific_info = qcow2_get_specific_info,
 
+    .bdrv_dirty_bitmap_load = qcow2_dirty_bitmap_load,
+    .bdrv_dirty_bitmap_store = qcow2_dirty_bitmap_store,
+    .bdrv_dirty_bitmap_delete = qcow2_dirty_bitmap_delete,
+
     .bdrv_save_vmstate    = qcow2_save_vmstate,
     .bdrv_load_vmstate    = qcow2_load_vmstate,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 422b825..24beee0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -39,6 +39,7 @@ 
 
 #define QCOW_MAX_CRYPT_CLUSTERS 32
 #define QCOW_MAX_SNAPSHOTS 65536
+#define QCOW_MAX_DIRTY_BITMAPS 65536
 
 /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
  * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
@@ -52,6 +53,8 @@ 
  * space for snapshot names and IDs */
 #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
 
+#define QCOW_MAX_DIRTY_BITMAPS_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -138,6 +141,19 @@  typedef struct QEMU_PACKED QCowSnapshotHeader {
     /* name follows  */
 } QCowSnapshotHeader;
 
+typedef struct QEMU_PACKED QCowDirtyBitmapHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint32_t bitmap_granularity;
+
+    uint64_t bitmap_size;
+    uint16_t name_size;
+
+    /* name follows  */
+} QCowDirtyBitmapHeader;
+
 typedef struct QEMU_PACKED QCowSnapshotExtraData {
     uint64_t vm_state_size_large;
     uint64_t disk_size;
@@ -156,6 +172,14 @@  typedef struct QCowSnapshot {
     uint64_t vm_clock_nsec;
 } QCowSnapshot;
 
+typedef struct QCowDirtyBitmap {
+    uint64_t l1_table_offset;
+    uint32_t l1_size;
+    char *name;
+    int bitmap_granularity;
+    uint64_t bitmap_size;
+} QCowDirtyBitmap;
+
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;
 
@@ -218,6 +242,11 @@  typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
 typedef void Qcow2SetRefcountFunc(void *refcount_array,
                                   uint64_t index, uint64_t value);
 
+typedef struct Qcow2DirtyBitmapHeaderExt {
+    uint32_t nb_dirty_bitmaps;
+    uint64_t dirty_bitmaps_offset;
+} QEMU_PACKED Qcow2DirtyBitmapHeaderExt;
+
 typedef struct BDRVQcowState {
     int cluster_bits;
     int cluster_size;
@@ -259,6 +288,11 @@  typedef struct BDRVQcowState {
     unsigned int nb_snapshots;
     QCowSnapshot *snapshots;
 
+    uint64_t dirty_bitmaps_offset;
+    int dirty_bitmaps_size;
+    unsigned int nb_dirty_bitmaps;
+    QCowDirtyBitmap *dirty_bitmaps;
+
     int flags;
     int qcow_version;
     bool use_lazy_refcounts;
@@ -570,6 +604,22 @@  int qcow2_snapshot_load_tmp(BlockDriverState *bs,
 void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
 
+/* qcow2-dirty-bitmap.c functions */
+int qcow2_dirty_bitmap_store(BlockDriverState *bs, uint8_t *buf,
+                             const char *name, uint64_t size,
+                             int granularity);
+uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
+                                 const char *name, uint64_t size,
+                                 int granularity);
+int qcow2_dirty_bitmap_create(BlockDriverState *bs, const char *name,
+                              uint64_t size, int granularity);
+int qcow2_dirty_bitmap_delete(BlockDriverState *bs,
+                              const char *name,
+                              Error **errp);
+
+void qcow2_free_dirty_bitmaps(BlockDriverState *bs);
+int qcow2_read_dirty_bitmaps(BlockDriverState *bs);
+
 /* qcow2-cache.c functions */
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index db29b74..88855b4 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -206,6 +206,16 @@  struct BlockDriver {
     int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
     ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
 
+    int (*bdrv_dirty_bitmap_store)(BlockDriverState *bs, uint8_t *buf,
+                                   const char *name, uint64_t size,
+                                   int granularity);
+    uint8_t *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
+                                       const char *name, uint64_t size,
+                                       int granularity);
+    int (*bdrv_dirty_bitmap_delete)(BlockDriverState *bs,
+                                    const char *name,
+                                    Error **errp);
+
     int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
                              int64_t pos);
     int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,