Patchwork [07/10] qcow2: Improve error handling in update_refcount

login
register
mail settings
Submitter Kevin Wolf
Date Jan. 18, 2010, 12:11 p.m.
Message ID <1263816696-24122-8-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/43065/
State New
Headers show

Comments

Kevin Wolf - Jan. 18, 2010, 12:11 p.m.
If update_refcount fails, try to undo any changes made so far to avoid
inconsistencies in the image file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-refcount.c |   32 +++++++++++++++++++++++++-------
 1 files changed, 25 insertions(+), 7 deletions(-)
Christoph Hellwig - Jan. 19, 2010, 6:51 p.m.
On Mon, Jan 18, 2010 at 01:11:33PM +0100, Kevin Wolf wrote:
> If update_refcount fails, try to undo any changes made so far to avoid
> inconsistencies in the image file.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block/qcow2-refcount.c |   32 +++++++++++++++++++++++++-------
>  1 files changed, 25 insertions(+), 7 deletions(-)
> 

> +    /*
> +     * Try do undo any updates if an error is returned (This may succeed in
> +     * some cases like ENOSPC for allocating a new refcount block)
> +     */
> +    if (ret < 0) {
> +        int dummy;
> +        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);

So we recursively call into update_refcount here.  What happens an error
causes all updates to fail?
Kevin Wolf - Jan. 20, 2010, 9:47 a.m.
Am 19.01.2010 19:51, schrieb Christoph Hellwig:
> On Mon, Jan 18, 2010 at 01:11:33PM +0100, Kevin Wolf wrote:
>> If update_refcount fails, try to undo any changes made so far to avoid
>> inconsistencies in the image file.
>>
>> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>> ---
>>  block/qcow2-refcount.c |   32 +++++++++++++++++++++++++-------
>>  1 files changed, 25 insertions(+), 7 deletions(-)
>>
> 
>> +    /*
>> +     * Try do undo any updates if an error is returned (This may succeed in
>> +     * some cases like ENOSPC for allocating a new refcount block)
>> +     */
>> +    if (ret < 0) {
>> +        int dummy;
>> +        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
> 
> So we recursively call into update_refcount here.  What happens an error
> causes all updates to fail?

We're only reverting writes that have succeeded. If reverting fails,
there are no successful writes and update_refcount will be called with a
length of 0 (which never fails). In the worst case we're only reverting
one cluster less with each recursive call - however, I don't think
that's a realistic scenario.

Kevin

Patch

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6f449c6..a84620f 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -278,6 +278,7 @@  static int update_refcount(BlockDriverState *bs,
     int64_t refcount_block_offset = 0;
     int64_t table_index = -1, old_table_index;
     int first_index = -1, last_index = -1;
+    int ret;
 
 #ifdef DEBUG_ALLOC2
     printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
@@ -292,6 +293,7 @@  static int update_refcount(BlockDriverState *bs,
     {
         int block_index, refcount;
         int64_t cluster_index = cluster_offset >> s->cluster_bits;
+        int64_t new_block;
 
         /* Only write refcount block to disk when we are done with it */
         old_table_index = table_index;
@@ -309,10 +311,12 @@  static int update_refcount(BlockDriverState *bs,
         }
 
         /* Load the refcount block and allocate it if needed */
-        refcount_block_offset = alloc_refcount_block(bs, cluster_index);
-        if (refcount_block_offset < 0) {
-            return refcount_block_offset;
+        new_block = alloc_refcount_block(bs, cluster_index);
+        if (new_block < 0) {
+            ret = new_block;
+            goto fail;
         }
+        refcount_block_offset = new_block;
 
         /* we can update the count and save it */
         block_index = cluster_index &
@@ -326,24 +330,38 @@  static int update_refcount(BlockDriverState *bs,
 
         refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
         refcount += addend;
-        if (refcount < 0 || refcount > 0xffff)
-            return -EINVAL;
+        if (refcount < 0 || refcount > 0xffff) {
+            ret = -EINVAL;
+            goto fail;
+        }
         if (refcount == 0 && cluster_index < s->free_cluster_index) {
             s->free_cluster_index = cluster_index;
         }
         s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
     }
 
+    ret = 0;
+fail:
+
     /* Write last changed block to disk */
     if (refcount_block_offset != 0) {
         if (write_refcount_block_entries(s, refcount_block_offset,
             first_index, last_index) < 0)
         {
-            return -EIO;
+            return ret < 0 ? ret : -EIO;
         }
     }
 
-    return 0;
+    /*
+     * Try do undo any updates if an error is returned (This may succeed in
+     * some cases like ENOSPC for allocating a new refcount block)
+     */
+    if (ret < 0) {
+        int dummy;
+        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
+    }
+
+    return ret;
 }
 
 /* addend must be 1 or -1 */