Patchwork [v2] qcow2: Unlock during COW

login
register
mail settings
Submitter Kevin Wolf
Date Nov. 14, 2011, 5:55 p.m.
Message ID <1321293318-19177-1-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/125583/
State New
Headers show

Comments

Kevin Wolf - Nov. 14, 2011, 5:55 p.m.
Unlocking during COW allows for more parallelism. One change it requires is
that buffers are dynamically allocated instead of just using a per-image
buffer.

While touching the code, drop the synchronous qcow2_read() function and replace
it by a bdrv_read() call.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |  104 ++++++++++++++++--------------------------------
 1 files changed, 35 insertions(+), 69 deletions(-)
Stefan Hajnoczi - Nov. 15, 2011, 12:07 p.m.
On Mon, Nov 14, 2011 at 06:55:18PM +0100, Kevin Wolf wrote:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.
> 
> While touching the code, drop the synchronous qcow2_read() function and replace
> it by a bdrv_read() call.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block/qcow2-cluster.c |  104 ++++++++++++++++--------------------------------
>  1 files changed, 35 insertions(+), 69 deletions(-)

This should be safe because dependent requests are queued so we can
perform copy_sectors() in parallel with the non-dependent requests.

> -static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
> -                      uint8_t *buf, int nb_sectors)
> -{
> -    BDRVQcowState *s = bs->opaque;
> -    int ret, index_in_cluster, n, n1;
> -    uint64_t cluster_offset;
> -    struct iovec iov;
> -    QEMUIOVector qiov;
> -
> -    while (nb_sectors > 0) {
> -        n = nb_sectors;
> -
> -        ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
> -            &cluster_offset);
> -        if (ret < 0) {
> -            return ret;
> -        }
> -
> -        index_in_cluster = sector_num & (s->cluster_sectors - 1);
> -        if (!cluster_offset) {
> -            if (bs->backing_hd) {
> -                /* read from the base image */
> -                iov.iov_base = buf;
> -                iov.iov_len = n * 512;
> -                qemu_iovec_init_external(&qiov, &iov, 1);
> -
> -                n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
> -                if (n1 > 0) {
> -                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
> -                    ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
> -                    if (ret < 0)
> -                        return -1;
> -                }
> -            } else {
> -                memset(buf, 0, 512 * n);
> -            }
> -        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
> -            if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
> -                return -1;
> -            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
> -        } else {
> -            BLKDBG_EVENT(bs->file, BLKDBG_READ);

BLKDBG_READ is now unused, there is no other occurrence in QEMU.  Not
sure if you want to remove it from blkdebug now?

> -            ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
> -            if (ret != n * 512)
> -                return -1;
> -            if (s->crypt_method) {
> -                qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
> -                                &s->aes_decrypt_key);
> -            }
> -        }
> -        nb_sectors -= n;
> -        sector_num += n;
> -        buf += n * 512;
> -    }
> -    return 0;
> -}
> -
>  static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
>                          uint64_t cluster_offset, int n_start, int n_end)
>  {
>      BDRVQcowState *s = bs->opaque;
>      int n, ret;
> +    void *buf;
> +
> +    /*
> +     * If this is the last cluster and it is only partially used, we must only
> +     * copy until the end of the image, or bdrv_check_request will fail for the
> +     * bdrv_read/write calls below.
> +     */
> +    if (start_sect + n_end > bs->total_sectors) {
> +        n_end = bs->total_sectors - start_sect;
> +    }
>  
>      n = n_end - n_start;
> -    if (n <= 0)
> +    if (n <= 0) {
>          return 0;
> +    }
> +
> +    buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
> +
>      BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
> -    ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
> -    if (ret < 0)
> -        return ret;
> +    ret = bdrv_read(bs, start_sect + n_start, buf, n);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
>      if (s->crypt_method) {
>          qcow2_encrypt_sectors(s, start_sect + n_start,
> -                        s->cluster_data,
> -                        s->cluster_data, n, 1,
> +                        buf, buf, n, 1,
>                          &s->aes_encrypt_key);
>      }
> +
>      BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
> -    ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
> -        s->cluster_data, n);
> -    if (ret < 0)
> -        return ret;
> -    return 0;
> +    ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    ret = 0;

bdrv_write() returns 0 on success or -errno on failure.  We don't need
to check for ret < 0 or set ret = 0 here.  Just continuing execution
should be fine unless I missed something.

Stefan
Kevin Wolf - Nov. 23, 2011, 5:01 p.m.
Am 14.11.2011 18:55, schrieb Kevin Wolf:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.
> 
> While touching the code, drop the synchronous qcow2_read() function and replace
> it by a bdrv_read() call.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>

This seems to break an IDE guest for me. I don't quite understand why
yet, but maybe someone has an idea.

hw/ide/pci.c:313: bmdma_cmd_writeb: Assertion `bm->bus->dma->aiocb ==
((void *)0)' failed.

Kevin
Marc-André Lureau - May 5, 2012, 12:21 a.m.
Hi

On Mon, Nov 14, 2011 at 6:55 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.

This commit introduces failure with preallocation and some sizes:

qemu-img create -f qcow2 new.img 976563K -o preallocation=metadata
qemu-img: qemu-coroutine-lock.c:111: qemu_co_mutex_unlock: Assertion
`mutex->locked == 1' failed.

regards
Zhiyong Wu - May 5, 2012, 3:17 a.m.
On Sat, May 5, 2012 at 8:21 AM, Marc-André Lureau
<marcandre.lureau@gmail.com> wrote:
> Hi
>
> On Mon, Nov 14, 2011 at 6:55 PM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Unlocking during COW allows for more parallelism. One change it requires is
>> that buffers are dynamically allocated instead of just using a per-image
>> buffer.
>
> This commit introduces failure with preallocation and some sizes:
>
> qemu-img create -f qcow2 new.img 976563K -o preallocation=metadata
> qemu-img: qemu-coroutine-lock.c:111: qemu_co_mutex_unlock: Assertion
> `mutex->locked == 1' failed.
For this issue, i have submitted two patches, can you help test agaist
my patchs?
1. [1/2] block: make bdrv_create adopt coroutine
    http://patchwork.ozlabs.org/patch/157022/
2. [2/2] qcow2: lock on prealloc
    http://patchwork.ozlabs.org/patch/157023/
>
> regards
>
> --
> Marc-André Lureau
>

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f4e049f..0e33707 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -289,89 +289,51 @@  void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
     }
 }
 
-
-static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
-                      uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n, n1;
-    uint64_t cluster_offset;
-    struct iovec iov;
-    QEMUIOVector qiov;
-
-    while (nb_sectors > 0) {
-        n = nb_sectors;
-
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
-            &cluster_offset);
-        if (ret < 0) {
-            return ret;
-        }
-
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        if (!cluster_offset) {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                iov.iov_base = buf;
-                iov.iov_len = n * 512;
-                qemu_iovec_init_external(&qiov, &iov, 1);
-
-                n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
-                if (n1 > 0) {
-                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
-                    ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
-                    if (ret < 0)
-                        return -1;
-                }
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
-                return -1;
-            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
-        } else {
-            BLKDBG_EVENT(bs->file, BLKDBG_READ);
-            ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-            if (s->crypt_method) {
-                qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
-                                &s->aes_decrypt_key);
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
 static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
                         uint64_t cluster_offset, int n_start, int n_end)
 {
     BDRVQcowState *s = bs->opaque;
     int n, ret;
+    void *buf;
+
+    /*
+     * If this is the last cluster and it is only partially used, we must only
+     * copy until the end of the image, or bdrv_check_request will fail for the
+     * bdrv_read/write calls below.
+     */
+    if (start_sect + n_end > bs->total_sectors) {
+        n_end = bs->total_sectors - start_sect;
+    }
 
     n = n_end - n_start;
-    if (n <= 0)
+    if (n <= 0) {
         return 0;
+    }
+
+    buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+
     BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-    ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
-    if (ret < 0)
-        return ret;
+    ret = bdrv_read(bs, start_sect + n_start, buf, n);
+    if (ret < 0) {
+        goto out;
+    }
+
     if (s->crypt_method) {
         qcow2_encrypt_sectors(s, start_sect + n_start,
-                        s->cluster_data,
-                        s->cluster_data, n, 1,
+                        buf, buf, n, 1,
                         &s->aes_encrypt_key);
     }
+
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
-        s->cluster_data, n);
-    if (ret < 0)
-        return ret;
-    return 0;
+    ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = 0;
+out:
+    qemu_vfree(buf);
+    return ret;
 }
 
 
@@ -620,7 +582,9 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
     if (m->n_start) {
         cow = true;
+        qemu_co_mutex_unlock(&s->lock);
         ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+        qemu_co_mutex_lock(&s->lock);
         if (ret < 0)
             goto err;
     }
@@ -628,8 +592,10 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     if (m->nb_available & (s->cluster_sectors - 1)) {
         uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
         cow = true;
+        qemu_co_mutex_unlock(&s->lock);
         ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
                 m->nb_available - end, s->cluster_sectors);
+        qemu_co_mutex_lock(&s->lock);
         if (ret < 0)
             goto err;
     }