Patchwork [v2,3/3] dataplane: handle misaligned virtio-blk requests

login
register
mail settings
Submitter Stefan Hajnoczi
Date Jan. 11, 2013, 12:34 p.m.
Message ID <1357907697-16790-4-git-send-email-stefanha@redhat.com>
Download mbox | patch
Permalink /patch/211324/
State New
Headers show

Comments

Stefan Hajnoczi - Jan. 11, 2013, 12:34 p.m.
O_DIRECT on Linux has alignment requirements on I/O buffers and
misaligned requests result in -EINVAL.  The Linux virtio_blk guest
driver usually submits aligned requests so I forgot to handle misaligned
requests.

It turns out that virtio-win guest drivers submit misaligned requests.
Handle them using a bounce buffer that meets alignment requirements.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/dataplane/virtio-blk.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
Paolo Bonzini - Jan. 11, 2013, 1:28 p.m.
Il 11/01/2013 13:34, Stefan Hajnoczi ha scritto:
> +        iov = &bounce_iov;
> +        iov_cnt = 1;
> +
> +        if (read) {
> +            /* Need to copy back from bounce buffer on completion */
> +            read_qiov = g_slice_new(QEMUIOVector);
> +            qemu_iovec_init(read_qiov, iov_cnt);
> +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);

This is still wrong, how did you test it?

Paolo
Stefan Hajnoczi - Jan. 11, 2013, 2:40 p.m.
On Fri, Jan 11, 2013 at 02:28:17PM +0100, Paolo Bonzini wrote:
> Il 11/01/2013 13:34, Stefan Hajnoczi ha scritto:
> > +        iov = &bounce_iov;
> > +        iov_cnt = 1;
> > +
> > +        if (read) {
> > +            /* Need to copy back from bounce buffer on completion */
> > +            read_qiov = g_slice_new(QEMUIOVector);
> > +            qemu_iovec_init(read_qiov, iov_cnt);
> > +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
> 
> This is still wrong, how did you test it?

I sent it before reading the reviews from you and Kevin.  Bizarrely
Windows saw the files I copied to the disk - even across reboot (no
buffer cache involved?).  Or is Windows 8 doing tricks like "shutdown"
is actually "suspend to disk"?

Stefan
Kevin Wolf - Jan. 11, 2013, 3:50 p.m.
Am 11.01.2013 15:40, schrieb Stefan Hajnoczi:
> On Fri, Jan 11, 2013 at 02:28:17PM +0100, Paolo Bonzini wrote:
>> Il 11/01/2013 13:34, Stefan Hajnoczi ha scritto:
>>> +        iov = &bounce_iov;
>>> +        iov_cnt = 1;
>>> +
>>> +        if (read) {
>>> +            /* Need to copy back from bounce buffer on completion */
>>> +            read_qiov = g_slice_new(QEMUIOVector);
>>> +            qemu_iovec_init(read_qiov, iov_cnt);
>>> +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
>>
>> This is still wrong, how did you test it?
> 
> I sent it before reading the reviews from you and Kevin.  Bizarrely
> Windows saw the files I copied to the disk - even across reboot (no
> buffer cache involved?).  Or is Windows 8 doing tricks like "shutdown"
> is actually "suspend to disk"?

As far as I know this is exactly what it's doing. I've seen warnings
that you shouldn't mount file systems of a dual-boot Windows 8 if you
didn't change its options so that it really shuts down.

Kevin

Patch

diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
index a6696b8..88300a6 100644
--- a/hw/dataplane/virtio-blk.c
+++ b/hw/dataplane/virtio-blk.c
@@ -34,6 +34,8 @@  typedef struct {
     struct iocb iocb;               /* Linux AIO control block */
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
     unsigned int head;              /* vring descriptor index */
+    void *bounce_buffer;            /* used if guest buffers are unaligned */
+    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
 } VirtIOBlockRequest;
 
 struct VirtIOBlockDataPlane {
@@ -89,6 +91,15 @@  static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
 
     trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
 
+    if (req->read_qiov) {
+        assert(req->bounce_buffer);
+        qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_buffer, len);
+        qemu_iovec_destroy(req->read_qiov);
+        g_slice_free(QEMUIOVector, req->read_qiov);
+    }
+
+    qemu_vfree(req->bounce_buffer);
+
     qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
     qemu_iovec_destroy(req->inhdr);
     g_slice_free(QEMUIOVector, req->inhdr);
@@ -136,6 +147,29 @@  static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
                        QEMUIOVector *inhdr)
 {
     struct iocb *iocb;
+    QEMUIOVector qiov;
+    struct iovec bounce_iov;
+    void *bounce_buffer = NULL;
+    QEMUIOVector *read_qiov = NULL;
+
+    qemu_iovec_init_external(&qiov, iov, iov_cnt);
+    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
+        /* Redirect I/O to aligned bounce buffer */
+        bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
+        bounce_iov.iov_base = bounce_buffer;
+        bounce_iov.iov_len = qiov.size;
+        iov = &bounce_iov;
+        iov_cnt = 1;
+
+        if (read) {
+            /* Need to copy back from bounce buffer on completion */
+            read_qiov = g_slice_new(QEMUIOVector);
+            qemu_iovec_init(read_qiov, iov_cnt);
+            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
+        } else {
+            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+        }
+    }
 
     iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
 
@@ -143,6 +177,8 @@  static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
     VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
     req->head = head;
     req->inhdr = inhdr;
+    req->bounce_buffer = bounce_buffer;
+    req->read_qiov = read_qiov;
     return 0;
 }