diff mbox

[v3,20/29] block: Align requests in bdrv_co_do_pwritev()

Message ID 1389968119-24771-21-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf Jan. 17, 2014, 2:15 p.m. UTC
This patch changes bdrv_co_do_pwritev() to actually be what its name
promises. If requests aren't properly aligned, it performs a RMW.

Requests touching the same block are serialised against the RMW request.
Further optimisation of this is possible by differentiating types of
requests (concurrent reads should actually be okay here).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

Comments

Benoît Canet Jan. 22, 2014, 8:29 p.m. UTC | #1
Le Friday 17 Jan 2014 à 15:15:10 (+0100), Kevin Wolf a écrit :
> This patch changes bdrv_co_do_pwritev() to actually be what its name
> promises. If requests aren't properly aligned, it performs a RMW.
> 
> Requests touching the same block are serialised against the RMW request.
> Further optimisation of this is possible by differentiating types of
> requests (concurrent reads should actually be okay here).
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> ---
>  block.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/block.c b/block.c
> index 55e8c69..859e1aa 100644
> --- a/block.c
> +++ b/block.c
> @@ -3055,6 +3055,12 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
>      BdrvRequestFlags flags)
>  {
>      BdrvTrackedRequest req;
> +    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
> +    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
> +    uint8_t *head_buf = NULL;
> +    uint8_t *tail_buf = NULL;
> +    QEMUIOVector local_qiov;
> +    bool use_local_qiov = false;
>      int ret;
>  
>      if (!bs->drv) {
> @@ -3073,10 +3079,88 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
>          bdrv_io_limits_intercept(bs, bytes >> BDRV_SECTOR_BITS, true);
>      }
>  
> +    /*
> +     * Align write if necessary by performing a read-modify-write cycle.
> +     * Pad qiov with the read parts and be sure to have a tracked request not
> +     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
> +     */
>      tracked_request_begin(&req, bs, offset, bytes, true);
> -    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, qiov, flags);
> +
> +    if (offset & (align - 1)) {
> +        QEMUIOVector head_qiov;
> +        struct iovec head_iov;
> +
> +        mark_request_serialising(&req, align);
> +        wait_serialising_requests(&req);
> +
> +        head_buf = qemu_blockalign(bs, align);
> +        head_iov = (struct iovec) {
> +            .iov_base   = head_buf,
> +            .iov_len    = align,
> +        };
> +        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
> +
> +        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
> +                                  align, &head_qiov, 0);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        qemu_iovec_init(&local_qiov, qiov->niov + 2);
> +        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
> +        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
> +        use_local_qiov = true;
> +
> +        bytes += offset & (align - 1);
> +        offset = offset & ~(align - 1);
> +    }
> +
> +    if ((offset + bytes) & (align - 1)) {
> +        QEMUIOVector tail_qiov;
> +        struct iovec tail_iov;
> +        size_t tail_bytes;
> +
> +        mark_request_serialising(&req, align);
> +        wait_serialising_requests(&req);
> +
> +        tail_buf = qemu_blockalign(bs, align);
> +        tail_iov = (struct iovec) {
> +            .iov_base   = tail_buf,
> +            .iov_len    = align,
> +        };
> +        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
> +
> +        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
> +                                  align, &tail_qiov, 0);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        if (!use_local_qiov) {
> +            qemu_iovec_init(&local_qiov, qiov->niov + 1);
> +            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
> +            use_local_qiov = true;
> +        }
> +
> +        tail_bytes = (offset + bytes) & (align - 1);
> +        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
> +
> +        bytes = ROUND_UP(bytes, align);
> +    }
> +
> +    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
> +                               use_local_qiov ? &local_qiov : qiov,
> +                               flags);
> +
> +fail:
>      tracked_request_end(&req);
>  
> +    if (use_local_qiov) {
> +        qemu_iovec_destroy(&local_qiov);
> +        qemu_vfree(head_buf);
> +        qemu_vfree(tail_buf);
> +    }
> +
>      return ret;
>  }
>  
> -- 
> 1.8.1.4
> 
> 
Reviewed-by: Benoit Canet <benoit@irqsave.net>
diff mbox

Patch

diff --git a/block.c b/block.c
index 55e8c69..859e1aa 100644
--- a/block.c
+++ b/block.c
@@ -3055,6 +3055,12 @@  static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
     BdrvRequestFlags flags)
 {
     BdrvTrackedRequest req;
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint8_t *head_buf = NULL;
+    uint8_t *tail_buf = NULL;
+    QEMUIOVector local_qiov;
+    bool use_local_qiov = false;
     int ret;
 
     if (!bs->drv) {
@@ -3073,10 +3079,88 @@  static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
         bdrv_io_limits_intercept(bs, bytes >> BDRV_SECTOR_BITS, true);
     }
 
+    /*
+     * Align write if necessary by performing a read-modify-write cycle.
+     * Pad qiov with the read parts and be sure to have a tracked request not
+     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+     */
     tracked_request_begin(&req, bs, offset, bytes, true);
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, qiov, flags);
+
+    if (offset & (align - 1)) {
+        QEMUIOVector head_qiov;
+        struct iovec head_iov;
+
+        mark_request_serialising(&req, align);
+        wait_serialising_requests(&req);
+
+        head_buf = qemu_blockalign(bs, align);
+        head_iov = (struct iovec) {
+            .iov_base   = head_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+
+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+                                  align, &head_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+        use_local_qiov = true;
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    if ((offset + bytes) & (align - 1)) {
+        QEMUIOVector tail_qiov;
+        struct iovec tail_iov;
+        size_t tail_bytes;
+
+        mark_request_serialising(&req, align);
+        wait_serialising_requests(&req);
+
+        tail_buf = qemu_blockalign(bs, align);
+        tail_iov = (struct iovec) {
+            .iov_base   = tail_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+
+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+                                  align, &tail_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        if (!use_local_qiov) {
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+            use_local_qiov = true;
+        }
+
+        tail_bytes = (offset + bytes) & (align - 1);
+        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
+
+        bytes = ROUND_UP(bytes, align);
+    }
+
+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+                               use_local_qiov ? &local_qiov : qiov,
+                               flags);
+
+fail:
     tracked_request_end(&req);
 
+    if (use_local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+        qemu_vfree(head_buf);
+        qemu_vfree(tail_buf);
+    }
+
     return ret;
 }