Patchwork [RFC,21/24] qcow2: add qcow2_open_conversion_target()

login
register
mail settings
Submitter Devin Nakamura
Date July 29, 2011, 4:49 a.m.
Message ID <1311914994-20482-22-git-send-email-devin122@gmail.com>
Download mbox | patch
Permalink /patch/107343/
State New
Headers show

Comments

Devin Nakamura - July 29, 2011, 4:49 a.m.
Still in very raw form.  Not likely to work yet

Signed-off-by: Devin Nakamura <devin122@gmail.com>
---
 block/qcow2.c |  124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 124 insertions(+), 0 deletions(-)
Kevin Wolf - Aug. 1, 2011, 3:26 p.m.
Am 29.07.2011 06:49, schrieb Devin Nakamura:
> Still in very raw form.  Not likely to work yet
> 
> Signed-off-by: Devin Nakamura <devin122@gmail.com>

I don't think it's quite as easy.

The problem is that qcow2 will access the image header in some places,
for example when growing the L1 or refcount table. You need to make sure
that it doesn't destroy the source format header, but writes to the
target format header at a different offset.

I think much of qcow2_open and qcow2_open_conversion_target could be the
same. That is, both could call a common function that takes a parameter
for the header offset.

The other part of qcow2_open_conversion_target (creating a new header
and a basic L1/refcount table) could possibly be shared with
qcow2_create2, though I'm not quite as sure about this one.

> ---
>  block/qcow2.c |  124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 124 insertions(+), 0 deletions(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 86df65d..f1e1e12 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1444,6 +1444,129 @@ static int qcow2_get_conversion_options(BlockDriverState *bs,
>      }
>      return 0;
>  }
> +
> +
> +static int qcow2_open_conversion_target(BlockDriverState *bs,
> +                                        BlockConversionOptions *drv_options,
> +                                        QEMUOptionParameter *usr_options)
> +{
> +    uint64_t imgsize, sectorsize, clusterbits;
> +    uint64_t num_refcount_blocks;
> +    uint16_t *refcount_block;
> +    uint64_t table_index, table_limit, i;
> +
> +    sectorsize = drv_options->cluster_size;

Sectors aren't clusters.

> +    clusterbits = 0;
> +    while (~sectorsize & 1) {
> +        sectorsize >>=1;
> +        clusterbits++;
> +    }
> +    if (sectorsize != 1) {
> +        return -EINVAL;
> +    }

Have a look at qcow2_create2, it uses ffs() to do this.

> +
> +
> +    imgsize = drv_options->image_size;
> +
> +    BDRVQcowState *s = bs->opaque;
> +    s->crypt_method_header = QCOW_CRYPT_NONE;

Shouldn't this be taken from drv_options?

> +    s->cluster_bits = clusterbits;
> +    s->cluster_size = 1 << s->cluster_bits;
> +    s->cluster_sectors = 1 << (s->cluster_bits - 9);
> +    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
> +    s->l2_size = 1 << s->l2_bits;
> +    bs->total_sectors = imgsize / 512;
> +    s->csize_shift = (62 - (s->cluster_bits - 8));
> +    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
> +    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
> +    //s->refcount_table_offset = 0;
> +    //s->refcount_table_size = 0;
> +    s->snapshots_offset = 0;
> +    s->nb_snapshots = 0;
> +    /* alloc L2 table/refcount block cache */
> +    s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, true); //todo: double check writethrough

writethrough mode will hurt performance and isn't required here.

> +    s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE,
> +        true);
> +
> +    s->cluster_cache = qemu_malloc(s->cluster_size);
> +    /* one more sector for decompressed data alignment */
> +    s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
> +                                  + 512);
> +    s->cluster_cache_offset = -1;
> +
> +    //Make up a refcount table
> +    const uint64_t num_clusters = bs->file->total_sectors >>
> +        (clusterbits - BDRV_SECTOR_BITS);

This is the number of cluster in the image file.

> +    const uint64_t cluster_size = 1 << s->cluster_bits;
> +    const uint64_t uint64_per_cluster = cluster_size / sizeof(uint64_t);
> +    const uint64_t uint16_per_cluster = cluster_size / sizeof(uint16_t);
> +    const uint64_t num_l2_tables = (num_clusters + uint64_per_cluster - 1) / 
> +                                    uint64_per_cluster;
> +    const uint64_t num_l1_clusters = (num_l2_tables + uint64_per_cluster - 1)
> +                                     / uint64_per_cluster);

L1/L2 tables are for clusters on the virtual disk. This can be something
completely different, and with some bad luck even larger than what you
calculate here (consider an image where in each L2 table only one
cluster is allocated - this will make the image files very small, but
requires still many L2 tables).

> +    s->l1_size = num_l2_tables;
> +    s->l1_table = qemu_mallocz(s->l1_size * s->cluster_size);
> +    num_refcount_blocks = num_clusters + num_l2_tables + num_l1_clusters;
> +    num_refcount_blocks = (num_refcount_blocks + uint16_per_cluster - 1) /
> +                          uint16_per_cluster;
> +
> +    /* Account for refcount blocks used to track refcount blocks */
> +    num_refcount_blocks *= uint16_per_cluster;
> +    num_refcount_blocks /= uint16_per_cluster -1;
> +    num_refcount_blocks += 1; //todo: fix rounding

Not sure what you're trying to do here, but align_offset() from qcow2.h
could help.

> +
> +    /* Account for refcount blocks used to track refcount table */
> +    num_refcount_blocks *= uint64_per_cluster;
> +    num_refcount_blocks /= uint64_per_cluster -1;
> +    num_refcount_blocks += 1; // todo: fix rounding
> +
> +    s->refcount_table_size = (num_refcount_blocks / uint64_per_cluster) +1;

I think the algorithm to calculate the required refcount table size
should be shared with qcow2_alloc_refcount_block.

Doesn't your algorithm forget to count the clusters used by refcount
tables/block themselves?

> +    s->refcount_table = qemu_mallocz(s->refcount_table_size * s->cluster_size);
> +    refcount_block = qemu_mallocz(s->cluster_size);
> +
> +//    s->refcount_table_size = num_refcount_blocks;
> +    s->free_cluster_index = num_clusters;
> +    s->l1_table_offset = s->free_cluster_index << s->cluster_bits;
> +    s->free_cluster_index += num_l1_clusters;
> +    s->refcount_table_offset = s->free_cluster_index << s->cluster_bits;
> +    s->free_cluster_index++;
> +
> +    /* assign offsets for all refcount blocks */
> +    for (table_index = 0; table_index < num_refcount_blocks; table_index++) {
> +        s->refcount_table[table_index] = s->free_cluster_index++ << s->cluster_bits;
> +    }
> +
> +    /* set all refcounts in the block to 1 */
> +    for (i=0; i < uint16_per_cluster; i++) {
> +        refcount_block[i] = cpu_to_be16(1);
> +    }
> +
> +    //TODO: double check this, it seems a bit fishy

That's my impression of the whole operation, too. :-)

This is why I thought it would be nice to share this code with normal
image creation. Then we have this fishy code at least only once.

Kevin
Devin Nakamura - Aug. 2, 2011, 4:37 a.m.
On Mon, Aug 1, 2011 at 11:26 AM, Kevin Wolf <kwolf@redhat.com> wrote:
> Am 29.07.2011 06:49, schrieb Devin Nakamura:
>> Still in very raw form.  Not likely to work yet
>>
>> Signed-off-by: Devin Nakamura <devin122@gmail.com>
>
> I don't think it's quite as easy.
>
> The problem is that qcow2 will access the image header in some places,
> for example when growing the L1 or refcount table. You need to make sure
> that it doesn't destroy the source format header, but writes to the
> target format header at a different offset.
This is why I've made sure to size the L1 and refcount tables so they
wont need to be resized. But I suppose that's really a balancing act
that's likely to break if someone makes changes to the current code,
or when snapshots are implemented

> I think much of qcow2_open and qcow2_open_conversion_target could be the
> same. That is, both could call a common function that takes a parameter
> for the header offset.
I'm almost certain I could do that (considering I lifted a lot of the
code for qcow2_open_conversion_target from qcow2_open)
> The other part of qcow2_open_conversion_target (creating a new header
> and a basic L1/refcount table) could possibly be shared with
> qcow2_create2, though I'm not quite as sure about this one.


>> +    //TODO: double check this, it seems a bit fishy
>
> That's my impression of the whole operation, too. :-)
>
> This is why I thought it would be nice to share this code with normal
> image creation. Then we have this fishy code at least only once.
>
> Kevin
>

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index 86df65d..f1e1e12 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1444,6 +1444,129 @@  static int qcow2_get_conversion_options(BlockDriverState *bs,
     }
     return 0;
 }
+
+
+static int qcow2_open_conversion_target(BlockDriverState *bs,
+                                        BlockConversionOptions *drv_options,
+                                        QEMUOptionParameter *usr_options)
+{
+    uint64_t imgsize, sectorsize, clusterbits;
+    uint64_t num_refcount_blocks;
+    uint16_t *refcount_block;
+    uint64_t table_index, table_limit, i;
+
+    sectorsize = drv_options->cluster_size;
+    clusterbits = 0;
+    while (~sectorsize & 1) {
+        sectorsize >>=1;
+        clusterbits++;
+    }
+    if (sectorsize != 1) {
+        return -EINVAL;
+    }
+
+
+    imgsize = drv_options->image_size;
+
+    BDRVQcowState *s = bs->opaque;
+    s->crypt_method_header = QCOW_CRYPT_NONE;
+    s->cluster_bits = clusterbits;
+    s->cluster_size = 1 << s->cluster_bits;
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
+    s->l2_size = 1 << s->l2_bits;
+    bs->total_sectors = imgsize / 512;
+    s->csize_shift = (62 - (s->cluster_bits - 8));
+    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
+    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+    //s->refcount_table_offset = 0;
+    //s->refcount_table_size = 0;
+    s->snapshots_offset = 0;
+    s->nb_snapshots = 0;
+    /* alloc L2 table/refcount block cache */
+    s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, true); //todo: double check writethrough
+    s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE,
+        true);
+
+    s->cluster_cache = qemu_malloc(s->cluster_size);
+    /* one more sector for decompressed data alignment */
+    s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+                                  + 512);
+    s->cluster_cache_offset = -1;
+
+    //Make up a refcount table
+    const uint64_t num_clusters = bs->file->total_sectors >>
+        (clusterbits - BDRV_SECTOR_BITS);
+    const uint64_t cluster_size = 1 << s->cluster_bits;
+    const uint64_t uint64_per_cluster = cluster_size / sizeof(uint64_t);
+    const uint64_t uint16_per_cluster = cluster_size / sizeof(uint16_t);
+    const uint64_t num_l2_tables = (num_clusters + uint64_per_cluster - 1) / 
+                                    uint64_per_cluster;
+    const uint64_t num_l1_clusters = (num_l2_tables + uint64_per_cluster - 1)
+                                     / uint64_per_cluster);
+    s->l1_size = num_l2_tables;
+    s->l1_table = qemu_mallocz(s->l1_size * s->cluster_size);
+    num_refcount_blocks = num_clusters + num_l2_tables + num_l1_clusters;
+    num_refcount_blocks = (num_refcount_blocks + uint16_per_cluster - 1) /
+                          uint16_per_cluster;
+
+    /* Account for refcount blocks used to track refcount blocks */
+    num_refcount_blocks *= uint16_per_cluster;
+    num_refcount_blocks /= uint16_per_cluster -1;
+    num_refcount_blocks += 1; //todo: fix rounding
+
+    /* Account for refcount blocks used to track refcount table */
+    num_refcount_blocks *= uint64_per_cluster;
+    num_refcount_blocks /= uint64_per_cluster -1;
+    num_refcount_blocks += 1; // todo: fix rounding
+
+    s->refcount_table_size = (num_refcount_blocks / uint64_per_cluster) +1;
+    s->refcount_table = qemu_mallocz(s->refcount_table_size * s->cluster_size);
+    refcount_block = qemu_mallocz(s->cluster_size);
+
+//    s->refcount_table_size = num_refcount_blocks;
+    s->free_cluster_index = num_clusters;
+    s->l1_table_offset = s->free_cluster_index << s->cluster_bits;
+    s->free_cluster_index += num_l1_clusters;
+    s->refcount_table_offset = s->free_cluster_index << s->cluster_bits;
+    s->free_cluster_index++;
+
+    /* assign offsets for all refcount blocks */
+    for (table_index = 0; table_index < num_refcount_blocks; table_index++) {
+        s->refcount_table[table_index] = s->free_cluster_index++ << s->cluster_bits;
+    }
+
+    /* set all refcounts in the block to 1 */
+    for (i=0; i < uint16_per_cluster; i++) {
+        refcount_block[i] = cpu_to_be16(1);
+    }
+
+    //TODO: double check this, it seems a bit fishy
+    table_limit = s->free_cluster_index / uint16_per_cluster;
+    for(table_index = 0; table_index < table_limit; table_index++) {
+        bdrv_write_sync(bs->file, s->refcount_table[table_index] >> BDRV_SECTOR_BITS,
+            (uint8_t*) refcount_block, s->cluster_sectors);
+    }
+
+    table_limit = s->free_cluster_index % uint16_per_cluster;
+    for(i=table_limit; i < uint64_per_cluster; i++) {
+        refcount_block[i] = 0;
+    }
+    bdrv_write_sync(bs->file, s->refcount_table[table_index] >> BDRV_SECTOR_BITS,
+            (uint8_t*) refcount_block, s->cluster_sectors);
+    uint64_t* be_refcount_table = qemu_mallocz(s->refcount_table_size * s->cluster_size);
+    for(i=0; i < s->refcount_table_size; i++) {
+        be_refcount_table[i] = cpu_to_be64(s->refcount_table[i]);
+    }
+    bdrv_write_sync(bs->file, s->refcount_table_offset >> BDRV_SECTOR_BITS,
+                    (uint8_t*)be_refcount_table,s->refcount_table_size * s->cluster_size);
+    QLIST_INIT(&s->cluster_allocs);
+
+    qemu_free(refcount_block);
+    return 0;
+}
+
+
 static QEMUOptionParameter qcow2_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -1518,6 +1641,7 @@  static BlockDriver bdrv_qcow2 = {
     .bdrv_map                    = qcow2_map,
     .bdrv_copy_header            = qcow2_copy_header,
     .bdrv_get_conversion_options = qcow2_get_conversion_options,
+    .bdrv_open_conversion_target = qcow2_open_conversion_target,
 };
 
 static void bdrv_qcow2_init(void)