diff mbox

[3/5] qcow2: Employ metadata overlap checks

Message ID 1377522260-16676-4-git-send-email-mreitz@redhat.com
State New
Headers show

Commit Message

Max Reitz Aug. 26, 2013, 1:04 p.m. UTC
The pre-write overlap check function is now called before most of the
qcow2 writes (aborting it on collision or other error).

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c    | 17 +++++++++++++++++
 block/qcow2-cluster.c  | 23 +++++++++++++++++++++++
 block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
 block/qcow2.c          | 38 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 101 insertions(+), 1 deletion(-)

Comments

Kevin Wolf Aug. 27, 2013, 11:32 a.m. UTC | #1
Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> The pre-write overlap check function is now called before most of the
> qcow2 writes (aborting it on collision or other error).
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-cache.c    | 17 +++++++++++++++++
>  block/qcow2-cluster.c  | 23 +++++++++++++++++++++++
>  block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
>  block/qcow2.c          | 38 +++++++++++++++++++++++++++++++++++++-
>  4 files changed, 101 insertions(+), 1 deletion(-)

> @@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
>                          &s->aes_encrypt_key);
>      }
>  
> +    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> +            ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);

Looks a bit overcomplicated, I'd like something like this better:

cluster_offset + n_start * BDRV_SECTOR_SIZE

> +    if (ret) {
> +        ret = (ret < 0) ? ret : -EIO;

I wonder whether the -EIO logic should be moved into
qcow2_pre_write_overlap_check(). Currently each single caller seems to
have this check.

> +        goto out;
> +    }
> +
>      BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
>      ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
>      if (ret < 0) {
> diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
> index 0caac90..6f69ecc 100644
> --- a/block/qcow2-snapshot.c
> +++ b/block/qcow2-snapshot.c
> @@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
>          return ret;
>      }
>  
> +    /* The snapshot list position has not yet been updated, so these clusters
> +     * must indeed be completely free */
> +    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> +                                        offset, s->nb_snapshots * sizeof(h));
> +    if (ret) {
> +        return (ret < 0) ? ret : -EIO;
> +    }

This doesn't check the full size. snapshots_size should have the right
value.

> +
> +
>      /* Write all snapshots to the new list */
>      for(i = 0; i < s->nb_snapshots; i++) {
>          sn = s->snapshots + i;
> @@ -363,6 +372,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
>          l1_table[i] = cpu_to_be64(s->l1_table[i]);
>      }
>  
> +    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> +            sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
> +    if (ret) {
> +        ret = (ret < 0) ? ret : -EIO;
> +        goto fail;
> +    }
> +
>      ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
>                        s->l1_size * sizeof(uint64_t));
>      if (ret < 0) {
> @@ -475,6 +491,14 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
>          goto fail;
>      }
>  
> +    ret = qcow2_pre_write_overlap_check(bs,
> +            QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
> +            s->l1_table_offset, cur_l1_bytes);
> +    if (ret) {
> +        ret = (ret < 0) ? ret : -EIO;
> +        goto fail;
> +    }
> +
>      ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
>                             cur_l1_bytes);
>      if (ret < 0) {
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 1d0d7ca..95497c6 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -621,6 +621,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
>      qcow2_free_snapshots(bs);
>      qcow2_refcount_close(bs);
>      g_free(s->l1_table);
> +    /* else pre-write overlap checks in cache_destroy may crash */
> +    s->l1_table = NULL;
>      if (s->l2_table_cache) {
>          qcow2_cache_destroy(bs, s->l2_table_cache);
>      }
> @@ -920,6 +922,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
>                  cur_nr_sectors * 512);
>          }
>  
> +        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> +                ((cluster_offset >> 9) + index_in_cluster) << 9,

Same thing as above.

> +                cur_nr_sectors << 9);
> +        if (ret) {
> +            ret = (ret < 0) ? ret : -EIO;
> +            goto fail;
> +        }
> +
>          qemu_co_mutex_unlock(&s->lock);
>          BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
>          trace_qcow2_writev_data(qemu_coroutine_self(),

Kevin
Max Reitz Aug. 27, 2013, 11:41 a.m. UTC | #2
Am 27.08.2013 13:32, schrieb Kevin Wolf:
> Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
>> The pre-write overlap check function is now called before most of the
>> qcow2 writes (aborting it on collision or other error).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-cache.c    | 17 +++++++++++++++++
>>   block/qcow2-cluster.c  | 23 +++++++++++++++++++++++
>>   block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
>>   block/qcow2.c          | 38 +++++++++++++++++++++++++++++++++++++-
>>   4 files changed, 101 insertions(+), 1 deletion(-)
>> @@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
>>                           &s->aes_encrypt_key);
>>       }
>>   
>> +    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
>> +            ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
> Looks a bit overcomplicated, I'd like something like this better:
>
> cluster_offset + n_start * BDRV_SECTOR_SIZE
Yes, but this wouldn't correspond with the write call if (cluster_offset 
& ((1 << 9) - 1)) != 0. ;-)

Basically, I just wanted it to match exactly the write command.

>
>> +    if (ret) {
>> +        ret = (ret < 0) ? ret : -EIO;
> I wonder whether the -EIO logic should be moved into
> qcow2_pre_write_overlap_check(). Currently each single caller seems to
> have this check.
Seems reasonable. I didn't want to prevent the caller from receiving 
information about the exact overlap, but that could be achieved through 
an optional result pointer as well, I think.

>
>> +        goto out;
>> +    }
>> +
>>       BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
>>       ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
>>       if (ret < 0) {
>> diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
>> index 0caac90..6f69ecc 100644
>> --- a/block/qcow2-snapshot.c
>> +++ b/block/qcow2-snapshot.c
>> @@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
>>           return ret;
>>       }
>>   
>> +    /* The snapshot list position has not yet been updated, so these clusters
>> +     * must indeed be completely free */
>> +    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
>> +                                        offset, s->nb_snapshots * sizeof(h));
>> +    if (ret) {
>> +        return (ret < 0) ? ret : -EIO;
>> +    }
> This doesn't check the full size. snapshots_size should have the right
> value.
Yes, you're right.

Max
Kevin Wolf Aug. 27, 2013, 11:51 a.m. UTC | #3
Am 27.08.2013 um 13:41 hat Max Reitz geschrieben:
> Am 27.08.2013 13:32, schrieb Kevin Wolf:
> >Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> >>The pre-write overlap check function is now called before most of the
> >>qcow2 writes (aborting it on collision or other error).
> >>
> >>Signed-off-by: Max Reitz <mreitz@redhat.com>
> >>---
> >>  block/qcow2-cache.c    | 17 +++++++++++++++++
> >>  block/qcow2-cluster.c  | 23 +++++++++++++++++++++++
> >>  block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
> >>  block/qcow2.c          | 38 +++++++++++++++++++++++++++++++++++++-
> >>  4 files changed, 101 insertions(+), 1 deletion(-)
> >>@@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
> >>                          &s->aes_encrypt_key);
> >>      }
> >>+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> >>+            ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
> >Looks a bit overcomplicated, I'd like something like this better:
> >
> >cluster_offset + n_start * BDRV_SECTOR_SIZE
> Yes, but this wouldn't correspond with the write call if
> (cluster_offset & ((1 << 9) - 1)) != 0. ;-)

And then you have a problem anyway. It's something that I'd be happy to
assert() at any time, i.e. if it isn't true, it's a bug.

> Basically, I just wanted it to match exactly the write command.

I can see your point. Well, matter of taste, I guess.

> >
> >>+    if (ret) {
> >>+        ret = (ret < 0) ? ret : -EIO;
> >I wonder whether the -EIO logic should be moved into
> >qcow2_pre_write_overlap_check(). Currently each single caller seems to
> >have this check.
> Seems reasonable. I didn't want to prevent the caller from receiving
> information about the exact overlap, but that could be achieved
> through an optional result pointer as well, I think.

Don't complicate an interface for a potential caller that doesn't exist
yet. If one comes up, it will change the interface as it needs.

Kevin
diff mbox

Patch

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 2f3114e..da65297 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -115,6 +115,23 @@  static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
     }
 
     if (c == s->refcount_block_cache) {
+        ret = qcow2_pre_write_overlap_check(bs,
+                QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK,
+                c->entries[i].offset, s->cluster_size);
+    } else if (c == s->l2_table_cache) {
+        ret = qcow2_pre_write_overlap_check(bs,
+                QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2,
+                c->entries[i].offset, s->cluster_size);
+    } else {
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                c->entries[i].offset, s->cluster_size);
+    }
+
+    if (ret) {
+        return (ret < 0) ? ret : -EIO;
+    }
+
+    if (c == s->refcount_block_cache) {
         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
     } else if (c == s->l2_table_cache) {
         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cca76d4..be35983 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -80,6 +80,15 @@  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
         goto fail;
     }
 
+    /* the L1 position has not yet been updated, so these clusters must
+     * indeed be completely free */
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                                        new_l1_table_offset, new_l1_size2);
+    if (ret) {
+        ret = (ret < 0) ? ret : -EIO;
+        goto fail;
+    }
+
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
@@ -149,6 +158,13 @@  static int write_l1_entry(BlockDriverState *bs, int l1_index)
         buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
     }
 
+    ret = qcow2_pre_write_overlap_check(bs,
+            QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+            s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
+    if (ret) {
+        return (ret < 0) ? ret : -EIO;
+    }
+
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
         buf, sizeof(buf));
@@ -368,6 +384,13 @@  static int coroutine_fn copy_sectors(BlockDriverState *bs,
                         &s->aes_encrypt_key);
     }
 
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+            ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
+    if (ret) {
+        ret = (ret < 0) ? ret : -EIO;
+        goto out;
+    }
+
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
     ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
     if (ret < 0) {
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0caac90..6f69ecc 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -189,6 +189,15 @@  static int qcow2_write_snapshots(BlockDriverState *bs)
         return ret;
     }
 
+    /* The snapshot list position has not yet been updated, so these clusters
+     * must indeed be completely free */
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                                        offset, s->nb_snapshots * sizeof(h));
+    if (ret) {
+        return (ret < 0) ? ret : -EIO;
+    }
+
+
     /* Write all snapshots to the new list */
     for(i = 0; i < s->nb_snapshots; i++) {
         sn = s->snapshots + i;
@@ -363,6 +372,13 @@  int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         l1_table[i] = cpu_to_be64(s->l1_table[i]);
     }
 
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+            sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
+    if (ret) {
+        ret = (ret < 0) ? ret : -EIO;
+        goto fail;
+    }
+
     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                       s->l1_size * sizeof(uint64_t));
     if (ret < 0) {
@@ -475,6 +491,14 @@  int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto fail;
     }
 
+    ret = qcow2_pre_write_overlap_check(bs,
+            QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+            s->l1_table_offset, cur_l1_bytes);
+    if (ret) {
+        ret = (ret < 0) ? ret : -EIO;
+        goto fail;
+    }
+
     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                            cur_l1_bytes);
     if (ret < 0) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 1d0d7ca..95497c6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -621,6 +621,8 @@  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
     qcow2_free_snapshots(bs);
     qcow2_refcount_close(bs);
     g_free(s->l1_table);
+    /* else pre-write overlap checks in cache_destroy may crash */
+    s->l1_table = NULL;
     if (s->l2_table_cache) {
         qcow2_cache_destroy(bs, s->l2_table_cache);
     }
@@ -920,6 +922,14 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                 cur_nr_sectors * 512);
         }
 
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                ((cluster_offset >> 9) + index_in_cluster) << 9,
+                cur_nr_sectors << 9);
+        if (ret) {
+            ret = (ret < 0) ? ret : -EIO;
+            goto fail;
+        }
+
         qemu_co_mutex_unlock(&s->lock);
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
@@ -986,6 +996,8 @@  static void qcow2_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     g_free(s->l1_table);
+    /* else pre-write overlap checks in cache_destroy may crash */
+    s->l1_table = NULL;
 
     qcow2_cache_flush(bs, s->l2_table_cache);
     qcow2_cache_flush(bs, s->refcount_block_cache);
@@ -1663,6 +1675,14 @@  static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
+
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                sector_num << 9, s->cluster_sectors << 9);
+        if (ret) {
+            ret = (ret < 0) ? ret : -EIO;
+            goto fail;
+        }
+
         ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
         if (ret < 0) {
             goto fail;
@@ -1675,6 +1695,14 @@  static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
             goto fail;
         }
         cluster_offset &= s->cluster_offset_mask;
+
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                cluster_offset, out_len);
+        if (ret) {
+            ret = (ret < 0) ? ret : -EIO;
+            goto fail;
+        }
+
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
         ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
         if (ret < 0) {
@@ -1752,10 +1780,18 @@  static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
     BDRVQcowState *s = bs->opaque;
     int growable = bs->growable;
     int ret;
+    int64_t offset = qcow2_vm_state_offset(s) + pos;
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
     bs->growable = 1;
-    ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
+
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
+                                        qiov->size);
+    if (ret) {
+        return (ret < 0) ? ret : -EIO;
+    }
+
+    ret = bdrv_pwritev(bs, offset, qiov);
     bs->growable = growable;
 
     return ret;