diff mbox

[2/3] qcow2: Implement bdrv_amend_options

Message ID 1377677336-15804-3-git-send-email-mreitz@redhat.com
State New
Headers show

Commit Message

Max Reitz Aug. 28, 2013, 8:08 a.m. UTC
Implement bdrv_amend_options for compat, size, backing_file, backing_fmt
and lazy_refcounts.

Downgrading images from compat=1.1 to compat=0.10 is achieved through
handling all incompatible flags accordingly, clearing all compatible and
autoclear flags and expanding all zero clusters.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c |  66 ++++++++++++++++++
 block/qcow2.c         | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h         |   2 +
 3 files changed, 250 insertions(+)

Comments

Kevin Wolf Aug. 28, 2013, 11:06 a.m. UTC | #1
Am 28.08.2013 um 10:08 hat Max Reitz geschrieben:
> Implement bdrv_amend_options for compat, size, backing_file, backing_fmt
> and lazy_refcounts.
> 
> Downgrading images from compat=1.1 to compat=0.10 is achieved through
> handling all incompatible flags accordingly, clearing all compatible and
> autoclear flags and expanding all zero clusters.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-cluster.c |  66 ++++++++++++++++++
>  block/qcow2.c         | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/qcow2.h         |   2 +
>  3 files changed, 250 insertions(+)
> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index cca76d4..ac50db2 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1476,3 +1476,69 @@ fail:
>  
>      return ret;
>  }
> +
> +/*
> + * Expands all zero clusters on the image; important for downgrading to a qcow2
> + * version which doesn't yet support metadata zero clusters.
> + */
> +int qcow2_expand_zero_clusters(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < s->l1_size; i++) {

This fails to expand zero clusters in non-active L2 tables. (Please add
a test case for this scenario.)

> +        uint64_t *l2_table;
> +        int l2_index;
> +        int j;
> +        bool l2_dirty = false;
> +
> +        ret = get_cluster_table(bs, (uint64_t)i << (s->l2_bits +
> +                s->cluster_bits), &l2_table, &l2_index);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        for (j = 0; j < s->l2_size; j++) {
> +            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
> +            if (!(l2_entry & QCOW_OFLAG_COMPRESSED) &&
> +                (l2_entry & QCOW_OFLAG_ZERO)) {

qcow2_get_cluster_type()?

> +                /* uncompressed zero cluster */
> +                int64_t offset = qcow2_alloc_clusters(bs, s->cluster_size);
> +                if (offset < 0) {
> +                    ret = offset;
> +                    goto fail;
> +                }

Does it handle zero clusters with an offset (i.e. preallocation)
correctly? I believe we must either reuse that cluster or free it.

> +                ret = bdrv_write_zeroes(bs->file, offset >> BDRV_SECTOR_BITS,
> +                                        s->cluster_size >> BDRV_SECTOR_BITS);
> +                if (ret < 0) {
> +                    qcow2_free_clusters(bs, offset, s->cluster_size,
> +                            QCOW2_DISCARD_ALWAYS);
> +                    goto fail;
> +                }
> +
> +                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
> +                l2_dirty = true;
> +            }
> +        }
> +
> +        ret = 0;
> +
> +fail:
> +        if (l2_dirty) {
> +            qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);

qcow2_cache_depends_on_flush(s->l2_table_cache), too. The L2 table must
only be written when the zeroes are stable on disk.

> +        }
> +
> +        if (ret < 0) {
> +            qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
> +        } else {
> +            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
> +        }
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 78097e5..47cd5ad 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1735,6 +1735,187 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
>      return ret;
>  }
>  
> +/*
> + * Downgrades an image's version. To achieve this, any incompatible features
> + * have to be removed.
> + */
> +static int qcow2_downgrade(BlockDriverState *bs, int target_version)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int current_version = s->qcow_version;
> +    int ret;
> +
> +    if (target_version == current_version) {
> +        return 0;
> +    } else if (target_version > current_version) {
> +        return -EINVAL;
> +    } else if (target_version != 2) {
> +        return -EINVAL;
> +    }
> +
> +    /* clear incompatible features */
> +    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
> +        BdrvCheckResult result;
> +        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
> +        if (ret < 0) {
> +            return ret;
> +        }

This is unnecessary: The image could be opened, so we know that it was
clean when we started. We also know that we haven't crashed yet, so if we
flush all in-memory data, we'll have a consistent on-disk state again.

qcow2_mark_clean() already calls bdrv_flush(bs), so it does everything
that is needed in this respect.

> +        qcow2_mark_clean(bs);

However, it can return errors, for which we should check.

> +    }
> +
> +    if (s->incompatible_features) {
> +        return -ENOTSUP;
> +    }
> +
> +    /* since we can ignore compatible features, we can set them to 0 as well */
> +    s->compatible_features = 0;
> +    /* if lazy refcounts have been used, they have already been fixed through
> +     * clearing the dirty flag */
> +
> +    /* clearing autoclear features is trivial */
> +    s->autoclear_features = 0;
> +
> +    /* the refcount order might be different in newer images - however, qemu
> +     * doesn't support anything different than 4 anyway, so nothing to fix
> +     * there */
> +
> +    ret = qcow2_expand_zero_clusters(bs);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    s->qcow_version = target_version;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        s->qcow_version = current_version;
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static int qcow2_amend_options(BlockDriverState *bs,
> +                               QEMUOptionParameter *options)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int old_version = s->qcow_version, new_version = old_version;
> +    uint64_t new_size = 0;
> +    const char *backing_file = NULL, *backing_format = NULL;
> +    bool lazy_refcounts = s->use_lazy_refcounts;
> +    int ret;
> +    int i;
> +
> +    for (i = 0; options[i].name; i++)
> +    {
> +        if (!strcmp(options[i].name, "compat")) {
> +            if (!options[i].value.s) {
> +                /* preserve default */
> +            } else if (!strcmp(options[i].value.s, "0.10")) {
> +                new_version = 2;
> +            } else if (!strcmp(options[i].value.s, "1.1")) {
> +                new_version = 3;
> +            } else {
> +                fprintf(stderr, "Unknown compatibility level %s.\n",
> +                        options[i].value.s);
> +                return -EINVAL;
> +            }
> +        } else if (!strcmp(options[i].name, "preallocation")) {
> +            if (options[i].value.s) {
> +                fprintf(stderr, "Cannot change preallocation mode.\n");
> +                return -ENOTSUP;
> +            }
> +        } else if (!strcmp(options[i].name, "size")) {
> +            new_size = options[i].value.n;
> +        } else if (!strcmp(options[i].name, "backing_file")) {
> +            backing_file = options[i].value.s;
> +        } else if (!strcmp(options[i].name, "backing_fmt")) {
> +            backing_format = options[i].value.s;
> +        } else if (!strcmp(options[i].name, "encryption")) {
> +            if (options[i].value.n != !!s->crypt_method) {
> +                fprintf(stderr, "Changing the encryption flag is not "
> +                        "supported.\n");
> +                return -ENOTSUP;
> +            }
> +        } else if (!strcmp(options[i].name, "cluster_size")) {
> +            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
> +                fprintf(stderr, "Changing the cluster size is not "
> +                        "supported.\n");
> +                return -ENOTSUP;
> +            }
> +        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
> +            /* TODO: detect whether this flag was indeed explicitly given */
> +            lazy_refcounts = options[i].value.n;

I can see two ways to achieve this:

1. Add a new field 'bool assigned' to QEMUOptionParameter, which would
   be cleared before parsing an option string and set for each option in
   set_option_parameter()

2. Get the QemuOpts conversion series in and add a function that tells
   whether a given option was specified or not.

The same TODO should actually apply to encryption and cluster_size as
well, shouldn't it?

> +        } else {
> +            fprintf(stderr, "Unknown option '%s'.\n", options[i].name);

That's actually a programming error, perhaps a case for assert(false);

> +        }
> +    }
> +
> +    if (new_version != old_version) {
> +        if (new_version > old_version) {
> +            /* Upgrade */
> +            s->qcow_version = new_version;
> +            ret = qcow2_update_header(bs);
> +            if (ret < 0) {
> +                s->qcow_version = old_version;
> +                return ret;
> +            }
> +        } else {
> +            qcow2_downgrade(bs, new_version);

Error handling?

> +        }
> +    }
> +
> +    if (new_size) {
> +        ret = qcow2_truncate(bs, new_size);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    if (backing_file || backing_format) {
> +        ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
> +                                        backing_format ?: bs->backing_format);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    if (s->use_lazy_refcounts != lazy_refcounts) {
> +        if (lazy_refcounts) {
> +            if (s->qcow_version < 3) {
> +                fprintf(stderr, "Lazy refcounts only supported with compatibility "
> +                        "level 1.1 and above (use compat=1.1 or greater)\n");
> +                return -EINVAL;
> +            }
> +            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
> +            ret = qcow2_update_header(bs);
> +            if (ret < 0) {
> +                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
> +                return ret;
> +            }
> +            s->use_lazy_refcounts = true;
> +        } else {
> +            /* make image clean first */
> +            if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
> +                BdrvCheckResult result;
> +                ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
> +                if (ret < 0) {
> +                    return ret;
> +                }
> +            }

Unnecessary, like above.

> +            qcow2_mark_clean(bs);

And error handling again.

> +            /* now disallow lazy refcounts */
> +            s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
> +            ret = qcow2_update_header(bs);
> +            if (ret < 0) {
> +                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
> +                return ret;
> +            }
> +            s->use_lazy_refcounts = false;
> +        }
> +    }
> +
> +    return 0;
> +}

Kevin
Max Reitz Aug. 28, 2013, 11:39 a.m. UTC | #2
Hi,

Am 28.08.2013 13:06, schrieb Kevin Wolf:
> Am 28.08.2013 um 10:08 hat Max Reitz geschrieben:
>> Implement bdrv_amend_options for compat, size, backing_file, backing_fmt
>> and lazy_refcounts.
>>
>> Downgrading images from compat=1.1 to compat=0.10 is achieved through
>> handling all incompatible flags accordingly, clearing all compatible and
>> autoclear flags and expanding all zero clusters.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>> +int qcow2_expand_zero_clusters(BlockDriverState *bs)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    int ret;
>> +    int i;
>> +
>> +    for (i = 0; i < s->l1_size; i++) {
> This fails to expand zero clusters in non-active L2 tables. (Please add
> a test case for this scenario.)
Oh, yes, right.

>> +        uint64_t *l2_table;
>> +        int l2_index;
>> +        int j;
>> +        bool l2_dirty = false;
>> +
>> +        ret = get_cluster_table(bs, (uint64_t)i << (s->l2_bits +
>> +                s->cluster_bits), &l2_table, &l2_index);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>> +
>> +        for (j = 0; j < s->l2_size; j++) {
>> +            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
>> +            if (!(l2_entry & QCOW_OFLAG_COMPRESSED) &&
>> +                (l2_entry & QCOW_OFLAG_ZERO)) {
> qcow2_get_cluster_type()?
Sounds like an option to go for.

>> +                /* uncompressed zero cluster */
>> +                int64_t offset = qcow2_alloc_clusters(bs, s->cluster_size);
>> +                if (offset < 0) {
>> +                    ret = offset;
>> +                    goto fail;
>> +                }
> Does it handle zero clusters with an offset (i.e. preallocation)
> correctly? I believe we must either reuse that cluster or free it.
Okay.

>> +                ret = bdrv_write_zeroes(bs->file, offset >> BDRV_SECTOR_BITS,
>> +                                        s->cluster_size >> BDRV_SECTOR_BITS);
>> +                if (ret < 0) {
>> +                    qcow2_free_clusters(bs, offset, s->cluster_size,
>> +                            QCOW2_DISCARD_ALWAYS);
>> +                    goto fail;
>> +                }
>> +
>> +                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
>> +                l2_dirty = true;
>> +            }
>> +        }
>> +
>> +        ret = 0;
>> +
>> +fail:
>> +        if (l2_dirty) {
>> +            qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
> qcow2_cache_depends_on_flush(s->l2_table_cache), too. The L2 table must
> only be written when the zeroes are stable on disk.
Okay.

>> +    /* clear incompatible features */
>> +    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
>> +        BdrvCheckResult result;
>> +        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
> This is unnecessary: The image could be opened, so we know that it was
> clean when we started. We also know that we haven't crashed yet, so if we
> flush all in-memory data, we'll have a consistent on-disk state again.
>
> qcow2_mark_clean() already calls bdrv_flush(bs), so it does everything
> that is needed in this respect.
So that flag should always be already cleared at this point anyway?

>> +        } else if (!strcmp(options[i].name, "encryption")) {
>> +            if (options[i].value.n != !!s->crypt_method) {
>> +                fprintf(stderr, "Changing the encryption flag is not "
>> +                        "supported.\n");
>> +                return -ENOTSUP;
>> +            }
>> +        } else if (!strcmp(options[i].name, "cluster_size")) {
>> +            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
>> +                fprintf(stderr, "Changing the cluster size is not "
>> +                        "supported.\n");
>> +                return -ENOTSUP;
>> +            }
>> +        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
>> +            /* TODO: detect whether this flag was indeed explicitly given */
>> +            lazy_refcounts = options[i].value.n;
> I can see two ways to achieve this:
>
> 1. Add a new field 'bool assigned' to QEMUOptionParameter, which would
>     be cleared before parsing an option string and set for each option in
>     set_option_parameter()
>
> 2. Get the QemuOpts conversion series in and add a function that tells
>     whether a given option was specified or not.
>
> The same TODO should actually apply to encryption and cluster_size as
> well, shouldn't it?
Kind of; however, a cluster_size of 0 seems invalid to me, therefore it 
is pretty easy to detect that option not being given.

The TODO rather also applies to encryption; however, since the worst it 
does there is generate an error message, it isn't as bad as here (were 
the code might actually change the image if the flag is not given).

>> +        } else {
>> +            fprintf(stderr, "Unknown option '%s'.\n", options[i].name);
> That's actually a programming error, perhaps a case for assert(false);
True.

>> +        }
>> +    }
>> +
>> +    if (new_version != old_version) {
>> +        if (new_version > old_version) {
>> +            /* Upgrade */
>> +            s->qcow_version = new_version;
>> +            ret = qcow2_update_header(bs);
>> +            if (ret < 0) {
>> +                s->qcow_version = old_version;
>> +                return ret;
>> +            }
>> +        } else {
>> +            qcow2_downgrade(bs, new_version);
> Error handling?
Oh, right, forgot it. Sorry.


Max
Kevin Wolf Aug. 28, 2013, 11:48 a.m. UTC | #3
Am 28.08.2013 um 13:39 hat Max Reitz geschrieben:
> >>+    /* clear incompatible features */
> >>+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
> >>+        BdrvCheckResult result;
> >>+        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
> >>+        if (ret < 0) {
> >>+            return ret;
> >>+        }
> >This is unnecessary: The image could be opened, so we know that it was
> >clean when we started. We also know that we haven't crashed yet, so if we
> >flush all in-memory data, we'll have a consistent on-disk state again.
> >
> >qcow2_mark_clean() already calls bdrv_flush(bs), so it does everything
> >that is needed in this respect.
> So that flag should always be already cleared at this point anyway?

The qcow2_mark_clean() call is on the next line (which you removed from
the quote), so not at this point but one line later. But yeah, it's done
by other code.

> >>+        } else if (!strcmp(options[i].name, "encryption")) {
> >>+            if (options[i].value.n != !!s->crypt_method) {
> >>+                fprintf(stderr, "Changing the encryption flag is not "
> >>+                        "supported.\n");
> >>+                return -ENOTSUP;
> >>+            }
> >>+        } else if (!strcmp(options[i].name, "cluster_size")) {
> >>+            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
> >>+                fprintf(stderr, "Changing the cluster size is not "
> >>+                        "supported.\n");
> >>+                return -ENOTSUP;
> >>+            }
> >>+        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
> >>+            /* TODO: detect whether this flag was indeed explicitly given */
> >>+            lazy_refcounts = options[i].value.n;
> >I can see two ways to achieve this:
> >
> >1. Add a new field 'bool assigned' to QEMUOptionParameter, which would
> >    be cleared before parsing an option string and set for each option in
> >    set_option_parameter()
> >
> >2. Get the QemuOpts conversion series in and add a function that tells
> >    whether a given option was specified or not.
> >
> >The same TODO should actually apply to encryption and cluster_size as
> >well, shouldn't it?
> Kind of; however, a cluster_size of 0 seems invalid to me, therefore
> it is pretty easy to detect that option not being given.

Depends on whether you think that 'qemu-img amend -o cluster_size=0' is
a valid way of saying that you don't want to change the cluster size. I
would prefer to error out.

> The TODO rather also applies to encryption; however, since the worst
> it does there is generate an error message, it isn't as bad as here
> (were the code might actually change the image if the flag is not
> given).

Agreed, it "only" requires that the user specify the encryption flag
explicitly when changing options of an encrypted image. No disaster
happens, it's just unfriendly.

Kevin
Max Reitz Aug. 28, 2013, 12:05 p.m. UTC | #4
Hi,

Am 28.08.2013 13:48, schrieb Kevin Wolf:
> Am 28.08.2013 um 13:39 hat Max Reitz geschrieben:
>>>> +    /* clear incompatible features */
>>>> +    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
>>>> +        BdrvCheckResult result;
>>>> +        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
>>>> +        if (ret < 0) {
>>>> +            return ret;
>>>> +        }
>>> This is unnecessary: The image could be opened, so we know that it was
>>> clean when we started. We also know that we haven't crashed yet, so if we
>>> flush all in-memory data, we'll have a consistent on-disk state again.
>>>
>>> qcow2_mark_clean() already calls bdrv_flush(bs), so it does everything
>>> that is needed in this respect.
>> So that flag should always be already cleared at this point anyway?
> The qcow2_mark_clean() call is on the next line (which you removed from
> the quote), so not at this point but one line later. But yeah, it's done
> by other code.
Yes, I was referring to other code (which cleans the image right at 
opening).

>>>> +        } else if (!strcmp(options[i].name, "encryption")) {
>>>> +            if (options[i].value.n != !!s->crypt_method) {
>>>> +                fprintf(stderr, "Changing the encryption flag is not "
>>>> +                        "supported.\n");
>>>> +                return -ENOTSUP;
>>>> +            }
>>>> +        } else if (!strcmp(options[i].name, "cluster_size")) {
>>>> +            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
>>>> +                fprintf(stderr, "Changing the cluster size is not "
>>>> +                        "supported.\n");
>>>> +                return -ENOTSUP;
>>>> +            }
>>>> +        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
>>>> +            /* TODO: detect whether this flag was indeed explicitly given */
>>>> +            lazy_refcounts = options[i].value.n;
>>> I can see two ways to achieve this:
>>>
>>> 1. Add a new field 'bool assigned' to QEMUOptionParameter, which would
>>>     be cleared before parsing an option string and set for each option in
>>>     set_option_parameter()
>>>
>>> 2. Get the QemuOpts conversion series in and add a function that tells
>>>     whether a given option was specified or not.
>>>
>>> The same TODO should actually apply to encryption and cluster_size as
>>> well, shouldn't it?
>> Kind of; however, a cluster_size of 0 seems invalid to me, therefore
>> it is pretty easy to detect that option not being given.
> Depends on whether you think that 'qemu-img amend -o cluster_size=0' is
> a valid way of saying that you don't want to change the cluster size. I
> would prefer to error out.
No, I just missed the default for that option not being zero. Sorry, my 
fault.


Max
Kevin Wolf Aug. 28, 2013, 12:12 p.m. UTC | #5
Am 28.08.2013 um 14:05 hat Max Reitz geschrieben:
> Hi,
> 
> Am 28.08.2013 13:48, schrieb Kevin Wolf:
> >Am 28.08.2013 um 13:39 hat Max Reitz geschrieben:
> >>>>+    /* clear incompatible features */
> >>>>+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
> >>>>+        BdrvCheckResult result;
> >>>>+        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
> >>>>+        if (ret < 0) {
> >>>>+            return ret;
> >>>>+        }
> >>>This is unnecessary: The image could be opened, so we know that it was
> >>>clean when we started. We also know that we haven't crashed yet, so if we
> >>>flush all in-memory data, we'll have a consistent on-disk state again.
> >>>
> >>>qcow2_mark_clean() already calls bdrv_flush(bs), so it does everything
> >>>that is needed in this respect.
> >>So that flag should always be already cleared at this point anyway?
> >The qcow2_mark_clean() call is on the next line (which you removed from
> >the quote), so not at this point but one line later. But yeah, it's done
> >by other code.
> Yes, I was referring to other code (which cleans the image right at
> opening).

Well, yes and no then.

Yes, qcow2_open() checks the dirty flag and when it's set, it repairs
the image, which in turn clears the dirty flag.

No, this does not mean that the flag is clear at this point. The next
cluster allocation makes the image dirty again, but we have all
information about the cluster allocation in memory, so a simple flush
makes the image consistent. That's why qcow2_mark_clean() must be called
here, and why calling only this is sufficient.

> >>>>+        } else if (!strcmp(options[i].name, "encryption")) {
> >>>>+            if (options[i].value.n != !!s->crypt_method) {
> >>>>+                fprintf(stderr, "Changing the encryption flag is not "
> >>>>+                        "supported.\n");
> >>>>+                return -ENOTSUP;
> >>>>+            }
> >>>>+        } else if (!strcmp(options[i].name, "cluster_size")) {
> >>>>+            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
> >>>>+                fprintf(stderr, "Changing the cluster size is not "
> >>>>+                        "supported.\n");
> >>>>+                return -ENOTSUP;
> >>>>+            }
> >>>>+        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
> >>>>+            /* TODO: detect whether this flag was indeed explicitly given */
> >>>>+            lazy_refcounts = options[i].value.n;
> >>>I can see two ways to achieve this:
> >>>
> >>>1. Add a new field 'bool assigned' to QEMUOptionParameter, which would
> >>>    be cleared before parsing an option string and set for each option in
> >>>    set_option_parameter()
> >>>
> >>>2. Get the QemuOpts conversion series in and add a function that tells
> >>>    whether a given option was specified or not.
> >>>
> >>>The same TODO should actually apply to encryption and cluster_size as
> >>>well, shouldn't it?
> >>Kind of; however, a cluster_size of 0 seems invalid to me, therefore
> >>it is pretty easy to detect that option not being given.
> >Depends on whether you think that 'qemu-img amend -o cluster_size=0' is
> >a valid way of saying that you don't want to change the cluster size. I
> >would prefer to error out.
> No, I just missed the default for that option not being zero. Sorry,
> my fault.

Doesn't really change my point: Even if the default was 0, deciding that
the option wasn't given by checking against an invalid value that could
in theory also be passed on the command line is awkward, because it
prevents proper error handling for this specific invalid value.

Kevin
diff mbox

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cca76d4..ac50db2 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1476,3 +1476,69 @@  fail:
 
     return ret;
 }
+
+/*
+ * Expands all zero clusters on the image; important for downgrading to a qcow2
+ * version which doesn't yet support metadata zero clusters.
+ */
+int qcow2_expand_zero_clusters(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+    int i;
+
+    for (i = 0; i < s->l1_size; i++) {
+        uint64_t *l2_table;
+        int l2_index;
+        int j;
+        bool l2_dirty = false;
+
+        ret = get_cluster_table(bs, (uint64_t)i << (s->l2_bits +
+                s->cluster_bits), &l2_table, &l2_index);
+        if (ret < 0) {
+            return ret;
+        }
+
+        for (j = 0; j < s->l2_size; j++) {
+            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+            if (!(l2_entry & QCOW_OFLAG_COMPRESSED) &&
+                (l2_entry & QCOW_OFLAG_ZERO)) {
+                /* uncompressed zero cluster */
+                int64_t offset = qcow2_alloc_clusters(bs, s->cluster_size);
+                if (offset < 0) {
+                    ret = offset;
+                    goto fail;
+                }
+
+                ret = bdrv_write_zeroes(bs->file, offset >> BDRV_SECTOR_BITS,
+                                        s->cluster_size >> BDRV_SECTOR_BITS);
+                if (ret < 0) {
+                    qcow2_free_clusters(bs, offset, s->cluster_size,
+                            QCOW2_DISCARD_ALWAYS);
+                    goto fail;
+                }
+
+                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+                l2_dirty = true;
+            }
+        }
+
+        ret = 0;
+
+fail:
+        if (l2_dirty) {
+            qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        }
+
+        if (ret < 0) {
+            qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+        } else {
+            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+        }
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 78097e5..47cd5ad 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1735,6 +1735,187 @@  static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
     return ret;
 }
 
+/*
+ * Downgrades an image's version. To achieve this, any incompatible features
+ * have to be removed.
+ */
+static int qcow2_downgrade(BlockDriverState *bs, int target_version)
+{
+    BDRVQcowState *s = bs->opaque;
+    int current_version = s->qcow_version;
+    int ret;
+
+    if (target_version == current_version) {
+        return 0;
+    } else if (target_version > current_version) {
+        return -EINVAL;
+    } else if (target_version != 2) {
+        return -EINVAL;
+    }
+
+    /* clear incompatible features */
+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+        BdrvCheckResult result;
+        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+        if (ret < 0) {
+            return ret;
+        }
+        qcow2_mark_clean(bs);
+    }
+
+    if (s->incompatible_features) {
+        return -ENOTSUP;
+    }
+
+    /* since we can ignore compatible features, we can set them to 0 as well */
+    s->compatible_features = 0;
+    /* if lazy refcounts have been used, they have already been fixed through
+     * clearing the dirty flag */
+
+    /* clearing autoclear features is trivial */
+    s->autoclear_features = 0;
+
+    /* the refcount order might be different in newer images - however, qemu
+     * doesn't support anything different than 4 anyway, so nothing to fix
+     * there */
+
+    ret = qcow2_expand_zero_clusters(bs);
+    if (ret < 0) {
+        return ret;
+    }
+
+    s->qcow_version = target_version;
+    ret = qcow2_update_header(bs);
+    if (ret < 0) {
+        s->qcow_version = current_version;
+        return ret;
+    }
+    return 0;
+}
+
+static int qcow2_amend_options(BlockDriverState *bs,
+                               QEMUOptionParameter *options)
+{
+    BDRVQcowState *s = bs->opaque;
+    int old_version = s->qcow_version, new_version = old_version;
+    uint64_t new_size = 0;
+    const char *backing_file = NULL, *backing_format = NULL;
+    bool lazy_refcounts = s->use_lazy_refcounts;
+    int ret;
+    int i;
+
+    for (i = 0; options[i].name; i++)
+    {
+        if (!strcmp(options[i].name, "compat")) {
+            if (!options[i].value.s) {
+                /* preserve default */
+            } else if (!strcmp(options[i].value.s, "0.10")) {
+                new_version = 2;
+            } else if (!strcmp(options[i].value.s, "1.1")) {
+                new_version = 3;
+            } else {
+                fprintf(stderr, "Unknown compatibility level %s.\n",
+                        options[i].value.s);
+                return -EINVAL;
+            }
+        } else if (!strcmp(options[i].name, "preallocation")) {
+            if (options[i].value.s) {
+                fprintf(stderr, "Cannot change preallocation mode.\n");
+                return -ENOTSUP;
+            }
+        } else if (!strcmp(options[i].name, "size")) {
+            new_size = options[i].value.n;
+        } else if (!strcmp(options[i].name, "backing_file")) {
+            backing_file = options[i].value.s;
+        } else if (!strcmp(options[i].name, "backing_fmt")) {
+            backing_format = options[i].value.s;
+        } else if (!strcmp(options[i].name, "encryption")) {
+            if (options[i].value.n != !!s->crypt_method) {
+                fprintf(stderr, "Changing the encryption flag is not "
+                        "supported.\n");
+                return -ENOTSUP;
+            }
+        } else if (!strcmp(options[i].name, "cluster_size")) {
+            if (options[i].value.n && (options[i].value.n != s->cluster_size)) {
+                fprintf(stderr, "Changing the cluster size is not "
+                        "supported.\n");
+                return -ENOTSUP;
+            }
+        } else if (!strcmp(options[i].name, "lazy_refcounts")) {
+            /* TODO: detect whether this flag was indeed explicitly given */
+            lazy_refcounts = options[i].value.n;
+        } else {
+            fprintf(stderr, "Unknown option '%s'.\n", options[i].name);
+        }
+    }
+
+    if (new_version != old_version) {
+        if (new_version > old_version) {
+            /* Upgrade */
+            s->qcow_version = new_version;
+            ret = qcow2_update_header(bs);
+            if (ret < 0) {
+                s->qcow_version = old_version;
+                return ret;
+            }
+        } else {
+            qcow2_downgrade(bs, new_version);
+        }
+    }
+
+    if (new_size) {
+        ret = qcow2_truncate(bs, new_size);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (backing_file || backing_format) {
+        ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
+                                        backing_format ?: bs->backing_format);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (s->use_lazy_refcounts != lazy_refcounts) {
+        if (lazy_refcounts) {
+            if (s->qcow_version < 3) {
+                fprintf(stderr, "Lazy refcounts only supported with compatibility "
+                        "level 1.1 and above (use compat=1.1 or greater)\n");
+                return -EINVAL;
+            }
+            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
+            ret = qcow2_update_header(bs);
+            if (ret < 0) {
+                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
+                return ret;
+            }
+            s->use_lazy_refcounts = true;
+        } else {
+            /* make image clean first */
+            if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+                BdrvCheckResult result;
+                ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+            qcow2_mark_clean(bs);
+            /* now disallow lazy refcounts */
+            s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
+            ret = qcow2_update_header(bs);
+            if (ret < 0) {
+                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
+                return ret;
+            }
+            s->use_lazy_refcounts = false;
+        }
+    }
+
+    return 0;
+}
+
 static QEMUOptionParameter qcow2_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -1818,6 +1999,7 @@  static BlockDriver bdrv_qcow2 = {
 
     .create_options = qcow2_create_options,
     .bdrv_check = qcow2_check,
+    .bdrv_amend_options = qcow2_amend_options,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index dba9771..84109de 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -408,6 +408,8 @@  int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
     int nb_sectors);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
 
+int qcow2_expand_zero_clusters(BlockDriverState *bs);
+
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);