diff mbox

[3/3] dataplane: handle misaligned virtio-blk requests

Message ID 1357836501-15555-4-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Jan. 10, 2013, 4:48 p.m. UTC
O_DIRECT on Linux has alignment requirements on I/O buffers and
misaligned requests result in -EINVAL.  The Linux virtio_blk guest
driver usually submits aligned requests so I forgot to handle misaligned
requests.

It turns out that virtio-win guest drivers submit misaligned requests.
Handle them using a bounce buffer that meets alignment requirements.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/dataplane/virtio-blk.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

Comments

Kevin Wolf Jan. 11, 2013, 12:39 p.m. UTC | #1
Am 10.01.2013 17:48, schrieb Stefan Hajnoczi:
> O_DIRECT on Linux has alignment requirements on I/O buffers and
> misaligned requests result in -EINVAL.  The Linux virtio_blk guest
> driver usually submits aligned requests so I forgot to handle misaligned
> requests.
> 
> It turns out that virtio-win guest drivers submit misaligned requests.
> Handle them using a bounce buffer that meets alignment requirements.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/dataplane/virtio-blk.c | 36 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
> index a6696b8..991ab5f 100644
> --- a/hw/dataplane/virtio-blk.c
> +++ b/hw/dataplane/virtio-blk.c
> @@ -34,6 +34,8 @@ typedef struct {
>      struct iocb iocb;               /* Linux AIO control block */
>      QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
>      unsigned int head;              /* vring descriptor index */
> +    void *bounce_buffer;            /* used if guest buffers are unaligned */
> +    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
>  } VirtIOBlockRequest;
>  
>  struct VirtIOBlockDataPlane {
> @@ -89,6 +91,15 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
>  
>      trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
>  
> +    if (req->read_qiov) {
> +        assert(req->bounce_buffer);
> +        qemu_iovec_to_buf(req->read_qiov, 0, req->bounce_buffer, len);

Shouldn't it be qemu_iovec_from_buf()?

Makes me wonder if qtest cases might be doable...

> +        qemu_iovec_destroy(req->read_qiov);
> +        g_slice_free(QEMUIOVector, req->read_qiov);
> +    }
> +
> +    qemu_vfree(req->bounce_buffer);
> +
>      qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
>      qemu_iovec_destroy(req->inhdr);
>      g_slice_free(QEMUIOVector, req->inhdr);
> @@ -136,6 +147,29 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
>                         QEMUIOVector *inhdr)
>  {
>      struct iocb *iocb;
> +    QEMUIOVector qiov;
> +    struct iovec bounce_iov;
> +    void *bounce_buffer = NULL;
> +    QEMUIOVector *read_qiov = NULL;
> +
> +    qemu_iovec_init_external(&qiov, iov, iov_cnt);
> +    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
> +        /* Redirect I/O to aligned bounce buffer */
> +        bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
> +        bounce_iov.iov_base = bounce_buffer;
> +        bounce_iov.iov_len = qiov.size;
> +        iov = &bounce_iov;
> +        iov_cnt = 1;
> +
> +        if (read) {
> +            /* Need to copy back from bounce buffer on completion */
> +            read_qiov = g_slice_new(QEMUIOVector);
> +            qemu_iovec_init(read_qiov, iov_cnt);
> +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);

This would have to be the original iov and iov_cnt, but they are already
overwritten here.

> +        } else {
> +            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
> +        }
> +    }
>  
>      iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
>  
> @@ -143,6 +177,8 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
>      VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
>      req->head = head;
>      req->inhdr = inhdr;
> +    req->bounce_buffer = bounce_buffer;
> +    req->read_qiov = read_qiov;
>      return 0;
>  }

Kevin
Paolo Bonzini Jan. 11, 2013, 1:27 p.m. UTC | #2
Il 11/01/2013 13:39, Kevin Wolf ha scritto:
> Am 10.01.2013 17:48, schrieb Stefan Hajnoczi:
>> O_DIRECT on Linux has alignment requirements on I/O buffers and
>> misaligned requests result in -EINVAL.  The Linux virtio_blk guest
>> driver usually submits aligned requests so I forgot to handle misaligned
>> requests.
>>
>> It turns out that virtio-win guest drivers submit misaligned requests.
>> Handle them using a bounce buffer that meets alignment requirements.
>>
>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>> ---
>>  hw/dataplane/virtio-blk.c | 36 ++++++++++++++++++++++++++++++++++++
>>  1 file changed, 36 insertions(+)
>>
>> diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
>> index a6696b8..991ab5f 100644
>> --- a/hw/dataplane/virtio-blk.c
>> +++ b/hw/dataplane/virtio-blk.c
>> @@ -34,6 +34,8 @@ typedef struct {
>>      struct iocb iocb;               /* Linux AIO control block */
>>      QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
>>      unsigned int head;              /* vring descriptor index */
>> +    void *bounce_buffer;            /* used if guest buffers are unaligned */
>> +    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
>>  } VirtIOBlockRequest;
>>  
>>  struct VirtIOBlockDataPlane {
>> @@ -89,6 +91,15 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
>>  
>>      trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
>>  
>> +    if (req->read_qiov) {
>> +        assert(req->bounce_buffer);
>> +        qemu_iovec_to_buf(req->read_qiov, 0, req->bounce_buffer, len);
> 
> Shouldn't it be qemu_iovec_from_buf()?

Yes.

> Makes me wonder if qtest cases might be doable...

Anthony, what's going on with libqos? :)

In the meanwhile...

>> +        qemu_iovec_destroy(req->read_qiov);
>> +        g_slice_free(QEMUIOVector, req->read_qiov);
>> +    }
>> +
>> +    qemu_vfree(req->bounce_buffer);
>> +
>>      qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
>>      qemu_iovec_destroy(req->inhdr);
>>      g_slice_free(QEMUIOVector, req->inhdr);
>> @@ -136,6 +147,29 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
>>                         QEMUIOVector *inhdr)
>>  {
>>      struct iocb *iocb;
>> +    QEMUIOVector qiov;
>> +    struct iovec bounce_iov;
>> +    void *bounce_buffer = NULL;
>> +    QEMUIOVector *read_qiov = NULL;
>> +
>> +    qemu_iovec_init_external(&qiov, iov, iov_cnt);
>> +    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {

... changing this to "if (1)" would be enough for testing with Linux guests.

Paolo

>> +        /* Redirect I/O to aligned bounce buffer */
>> +        bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
>> +        bounce_iov.iov_base = bounce_buffer;
>> +        bounce_iov.iov_len = qiov.size;
>> +        iov = &bounce_iov;
>> +        iov_cnt = 1;
>> +
>> +        if (read) {
>> +            /* Need to copy back from bounce buffer on completion */
>> +            read_qiov = g_slice_new(QEMUIOVector);
>> +            qemu_iovec_init(read_qiov, iov_cnt);
>> +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
> 
> This would have to be the original iov and iov_cnt, but they are already
> overwritten here.
> 
>> +        } else {
>> +            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
>> +        }
>> +    }
>>  
>>      iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
>>  
>> @@ -143,6 +177,8 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
>>      VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
>>      req->head = head;
>>      req->inhdr = inhdr;
>> +    req->bounce_buffer = bounce_buffer;
>> +    req->read_qiov = read_qiov;
>>      return 0;
>>  }
> 
> Kevin
> 
>
Stefan Hajnoczi Jan. 11, 2013, 2:38 p.m. UTC | #3
On Fri, Jan 11, 2013 at 01:39:40PM +0100, Kevin Wolf wrote:
> Am 10.01.2013 17:48, schrieb Stefan Hajnoczi:
> > O_DIRECT on Linux has alignment requirements on I/O buffers and
> > misaligned requests result in -EINVAL.  The Linux virtio_blk guest
> > driver usually submits aligned requests so I forgot to handle misaligned
> > requests.
> > 
> > It turns out that virtio-win guest drivers submit misaligned requests.
> > Handle them using a bounce buffer that meets alignment requirements.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/dataplane/virtio-blk.c | 36 ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 36 insertions(+)
> > 
> > diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
> > index a6696b8..991ab5f 100644
> > --- a/hw/dataplane/virtio-blk.c
> > +++ b/hw/dataplane/virtio-blk.c
> > @@ -34,6 +34,8 @@ typedef struct {
> >      struct iocb iocb;               /* Linux AIO control block */
> >      QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
> >      unsigned int head;              /* vring descriptor index */
> > +    void *bounce_buffer;            /* used if guest buffers are unaligned */
> > +    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
> >  } VirtIOBlockRequest;
> >  
> >  struct VirtIOBlockDataPlane {
> > @@ -89,6 +91,15 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
> >  
> >      trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
> >  
> > +    if (req->read_qiov) {
> > +        assert(req->bounce_buffer);
> > +        qemu_iovec_to_buf(req->read_qiov, 0, req->bounce_buffer, len);
> 
> Shouldn't it be qemu_iovec_from_buf()?
> 
> Makes me wonder if qtest cases might be doable...
> 
> > +        qemu_iovec_destroy(req->read_qiov);
> > +        g_slice_free(QEMUIOVector, req->read_qiov);
> > +    }
> > +
> > +    qemu_vfree(req->bounce_buffer);
> > +
> >      qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
> >      qemu_iovec_destroy(req->inhdr);
> >      g_slice_free(QEMUIOVector, req->inhdr);
> > @@ -136,6 +147,29 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
> >                         QEMUIOVector *inhdr)
> >  {
> >      struct iocb *iocb;
> > +    QEMUIOVector qiov;
> > +    struct iovec bounce_iov;
> > +    void *bounce_buffer = NULL;
> > +    QEMUIOVector *read_qiov = NULL;
> > +
> > +    qemu_iovec_init_external(&qiov, iov, iov_cnt);
> > +    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
> > +        /* Redirect I/O to aligned bounce buffer */
> > +        bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
> > +        bounce_iov.iov_base = bounce_buffer;
> > +        bounce_iov.iov_len = qiov.size;
> > +        iov = &bounce_iov;
> > +        iov_cnt = 1;
> > +
> > +        if (read) {
> > +            /* Need to copy back from bounce buffer on completion */
> > +            read_qiov = g_slice_new(QEMUIOVector);
> > +            qemu_iovec_init(read_qiov, iov_cnt);
> > +            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
> 
> This would have to be the original iov and iov_cnt, but they are already
> overwritten here.

Good point.  What's really weird is that I tested this with a Windows
guest - probably the buffer cache fooled me so I didn't notice read was
completely broken.

Stefan
Andreas Färber Jan. 11, 2013, 3:44 p.m. UTC | #4
Am 11.01.2013 14:27, schrieb Paolo Bonzini:
> Il 11/01/2013 13:39, Kevin Wolf ha scritto:
>> Makes me wonder if qtest cases might be doable...
> 
> Anthony, what's going on with libqos? :)

I posted a rebased version of I2C libqos support after Anthony approved
the pre-header-reorg version and am hoping it will get applied now.

For virtio here you're obviously waiting for PCI libqos, haven't seen a
new patch series yet. If the issue is just lack of time on Anthony's
part and not some major design issue, I'd be willing to help post 1.4.

Unfortunately I've been hearing that s390 qtest cases are not possible
due to it not using MMIO or PIO...

Regards,
Andreas
Paolo Bonzini Jan. 11, 2013, 4:25 p.m. UTC | #5
Il 11/01/2013 16:44, Andreas Färber ha scritto:
>>> >> Makes me wonder if qtest cases might be doable...
>> > 
>> > Anthony, what's going on with libqos? :)
> I posted a rebased version of I2C libqos support after Anthony approved
> the pre-header-reorg version and am hoping it will get applied now.
> 
> For virtio here you're obviously waiting for PCI libqos, haven't seen a
> new patch series yet. If the issue is just lack of time on Anthony's
> part and not some major design issue, I'd be willing to help post 1.4.
> 
> Unfortunately I've been hearing that s390 qtest cases are not possible
> due to it not using MMIO or PIO...

You can add s390 magic to qtest, just like it has I/O ports for x86...

Paolo
diff mbox

Patch

diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
index a6696b8..991ab5f 100644
--- a/hw/dataplane/virtio-blk.c
+++ b/hw/dataplane/virtio-blk.c
@@ -34,6 +34,8 @@  typedef struct {
     struct iocb iocb;               /* Linux AIO control block */
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
     unsigned int head;              /* vring descriptor index */
+    void *bounce_buffer;            /* used if guest buffers are unaligned */
+    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
 } VirtIOBlockRequest;
 
 struct VirtIOBlockDataPlane {
@@ -89,6 +91,15 @@  static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
 
     trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
 
+    if (req->read_qiov) {
+        assert(req->bounce_buffer);
+        qemu_iovec_to_buf(req->read_qiov, 0, req->bounce_buffer, len);
+        qemu_iovec_destroy(req->read_qiov);
+        g_slice_free(QEMUIOVector, req->read_qiov);
+    }
+
+    qemu_vfree(req->bounce_buffer);
+
     qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
     qemu_iovec_destroy(req->inhdr);
     g_slice_free(QEMUIOVector, req->inhdr);
@@ -136,6 +147,29 @@  static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
                        QEMUIOVector *inhdr)
 {
     struct iocb *iocb;
+    QEMUIOVector qiov;
+    struct iovec bounce_iov;
+    void *bounce_buffer = NULL;
+    QEMUIOVector *read_qiov = NULL;
+
+    qemu_iovec_init_external(&qiov, iov, iov_cnt);
+    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
+        /* Redirect I/O to aligned bounce buffer */
+        bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
+        bounce_iov.iov_base = bounce_buffer;
+        bounce_iov.iov_len = qiov.size;
+        iov = &bounce_iov;
+        iov_cnt = 1;
+
+        if (read) {
+            /* Need to copy back from bounce buffer on completion */
+            read_qiov = g_slice_new(QEMUIOVector);
+            qemu_iovec_init(read_qiov, iov_cnt);
+            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
+        } else {
+            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+        }
+    }
 
     iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
 
@@ -143,6 +177,8 @@  static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
     VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
     req->head = head;
     req->inhdr = inhdr;
+    req->bounce_buffer = bounce_buffer;
+    req->read_qiov = read_qiov;
     return 0;
 }