Patchwork [12/12] raw-win32: add emulated AIO support

login
register
mail settings
Submitter Paolo Bonzini
Date July 16, 2012, 10:42 a.m.
Message ID <1342435377-25897-13-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/171166/
State New
Headers show

Comments

Paolo Bonzini - July 16, 2012, 10:42 a.m.
The thread pool can be used under Win32 in the same way as in raw-posix.c.
Move the existing synchronous code into callbacks, and pass the return
code back.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-win32.c |  189 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 140 insertions(+), 49 deletions(-)
Blue Swirl - July 23, 2012, 4:35 p.m.
On Mon, Jul 16, 2012 at 10:42 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> The thread pool can be used under Win32 in the same way as in raw-posix.c.
> Move the existing synchronous code into callbacks, and pass the return
> code back.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/raw-win32.c |  189 +++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 140 insertions(+), 49 deletions(-)
>
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index e4b0b75..a50d636 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -25,6 +25,9 @@
>  #include "qemu-timer.h"
>  #include "block_int.h"
>  #include "module.h"
> +#include "raw-aio.h"
> +#include "trace.h"
> +#include "thread-pool.h"
>  #include <windows.h>
>  #include <winioctl.h>
>
> @@ -32,12 +35,130 @@
>  #define FTYPE_CD     1
>  #define FTYPE_HARDDISK 2
>
> +struct qemu_paiocb {

QEMUPAIOCB

> +    BlockDriverState *bs;
> +    HANDLE hfile;
> +    struct iovec *aio_iov;
> +    int aio_niov;
> +    size_t aio_nbytes;
> +    off_t aio_offset;
> +    int aio_type;
> +};
> +
>  typedef struct BDRVRawState {
>      HANDLE hfile;
>      int type;
>      char drive_path[16]; /* format: "d:\" */
>  } BDRVRawState;
>
> +/*
> + * Read/writes the data to/from a given linear buffer.
> + *
> + * Returns the number of bytes handles or -errno in case of an error. Short
> + * reads are only returned if the end of the file is reached.
> + */
> +static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
> +{
> +    size_t offset = 0;
> +    int i;
> +
> +    for (i = 0; i < aiocb->aio_niov; i++) {
> +        OVERLAPPED ov;
> +        DWORD ret, ret_count, len;
> +
> +        memset(&ov, 0, sizeof(ov));
> +        ov.Offset = (aiocb->aio_offset + offset);
> +        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
> +        len = aiocb->aio_iov[i].iov_len;
> +        if (aiocb->aio_type & QEMU_AIO_WRITE) {
> +            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
> +                            len, &ret_count, &ov);
> +        } else {
> +            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
> +                           len, &ret_count, &ov);
> +        }
> +        if (!ret) {
> +            ret_count = 0;
> +        }
> +        if (ret_count != len) {
> +            break;
> +        }
> +        offset += len;
> +    }
> +
> +    return offset;
> +}
> +
> +static int aio_worker(void *arg)
> +{
> +    struct qemu_paiocb *aiocb = arg;
> +    ssize_t ret = 0;
> +    size_t count;
> +
> +    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
> +    case QEMU_AIO_READ:
> +        count = handle_aiocb_rw(aiocb);
> +        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
> +            /* A short read means that we have reached EOF. Pad the buffer
> +             * with zeros for bytes after EOF. */
> +            QEMUIOVector qiov;
> +
> +            qemu_iovec_init_external(&qiov, aiocb->aio_iov,
> +                                     aiocb->aio_niov);
> +            qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - count, count);
> +
> +            count = aiocb->aio_nbytes;
> +        }
> +        if (count == aiocb->aio_nbytes) {
> +            ret = 0;
> +        } else {
> +            ret = -EINVAL;
> +        }
> +        break;
> +    case QEMU_AIO_WRITE:
> +        count = handle_aiocb_rw(aiocb);
> +        if (count == aiocb->aio_nbytes) {
> +            count = 0;
> +        } else {
> +            count = -EINVAL;
> +        }
> +        break;
> +    case QEMU_AIO_FLUSH:
> +        if (!FlushFileBuffers(aiocb->hfile)) {
> +            return -EIO;
> +        }
> +        break;
> +    default:
> +        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);

Assert instead?

> +        ret = -EINVAL;
> +        break;
> +    }
> +
> +    g_slice_free(struct qemu_paiocb, aiocb);
> +    return ret;
> +}
> +
> +static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
> +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +        BlockDriverCompletionFunc *cb, void *opaque, int type)
> +{
> +    struct qemu_paiocb *acb = g_slice_new(struct qemu_paiocb);
> +
> +    acb->bs = bs;
> +    acb->hfile = hfile;
> +    acb->aio_type = type;
> +
> +    if (qiov) {
> +        acb->aio_iov = qiov->iov;
> +        acb->aio_niov = qiov->niov;
> +    }
> +    acb->aio_nbytes = nb_sectors * 512;
> +    acb->aio_offset = sector_num * 512;
> +
> +    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
> +    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
> +}
> +
>  int qemu_ftruncate64(int fd, int64_t length)
>  {
>      LARGE_INTEGER li;
> @@ -109,59 +230,29 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
>      return 0;
>  }
>
> -static int raw_read(BlockDriverState *bs, int64_t sector_num,
> -                    uint8_t *buf, int nb_sectors)
> +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
> +                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +                         BlockDriverCompletionFunc *cb, void *opaque)
>  {
>      BDRVRawState *s = bs->opaque;
> -    OVERLAPPED ov;
> -    DWORD ret_count;
> -    int ret;
> -    int64_t offset = sector_num * 512;
> -    int count = nb_sectors * 512;
> -
> -    memset(&ov, 0, sizeof(ov));
> -    ov.Offset = offset;
> -    ov.OffsetHigh = offset >> 32;
> -    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
> -    if (!ret)
> -        return ret_count;
> -    if (ret_count == count)
> -        ret_count = 0;
> -    return ret_count;
> +    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
> +                       cb, opaque, QEMU_AIO_READ);
>  }
>
> -static int raw_write(BlockDriverState *bs, int64_t sector_num,
> -                     const uint8_t *buf, int nb_sectors)
> +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
> +                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +                          BlockDriverCompletionFunc *cb, void *opaque)
>  {
>      BDRVRawState *s = bs->opaque;
> -    OVERLAPPED ov;
> -    DWORD ret_count;
> -    int ret;
> -    int64_t offset = sector_num * 512;
> -    int count = nb_sectors * 512;
> -
> -    memset(&ov, 0, sizeof(ov));
> -    ov.Offset = offset;
> -    ov.OffsetHigh = offset >> 32;
> -    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
> -    if (!ret)
> -        return ret_count;
> -    if (ret_count == count)
> -        ret_count = 0;
> -    return ret_count;
> +    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
> +                       cb, opaque, QEMU_AIO_WRITE);
>  }
>
> -static int raw_flush(BlockDriverState *bs)
> +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
> +                         BlockDriverCompletionFunc *cb, void *opaque)
>  {
>      BDRVRawState *s = bs->opaque;
> -    int ret;
> -
> -    ret = FlushFileBuffers(s->hfile);
> -    if (ret == 0) {
> -        return -EIO;
> -    }
> -
> -    return 0;
> +    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
>  }
>
>  static void raw_close(BlockDriverState *bs)
> @@ -282,9 +373,9 @@ static BlockDriver bdrv_file = {
>      .bdrv_close                = raw_close,
>      .bdrv_create       = raw_create,
>
> -    .bdrv_read              = raw_read,
> -    .bdrv_write             = raw_write,
> -    .bdrv_co_flush_to_disk  = raw_flush,
> +    .bdrv_aio_readv     = raw_aio_readv,
> +    .bdrv_aio_writev    = raw_aio_writev,
> +    .bdrv_aio_flush     = raw_aio_flush,
>
>      .bdrv_truncate     = raw_truncate,
>      .bdrv_getlength    = raw_getlength,
> @@ -413,9 +504,9 @@ static BlockDriver bdrv_host_device = {
>      .bdrv_close                = raw_close,
>      .bdrv_has_zero_init = hdev_has_zero_init,
>
> -    .bdrv_read              = raw_read,
> -    .bdrv_write             = raw_write,
> -    .bdrv_co_flush_to_disk  = raw_flush,
> +    .bdrv_aio_readv     = raw_aio_readv,
> +    .bdrv_aio_writev    = raw_aio_writev,
> +    .bdrv_aio_flush     = raw_aio_flush,
>
>      .bdrv_getlength    = raw_getlength,
>      .bdrv_get_allocated_file_size
> --
> 1.7.10.4
>
>
Paolo Bonzini - July 23, 2012, 4:59 p.m.
Il 23/07/2012 18:35, Blue Swirl ha scritto:
>> > +struct qemu_paiocb {
> QEMUPAIOCB

RawWin32AIOData. :)

>> > +    BlockDriverState *bs;
>> > +    HANDLE hfile;
>> > +    struct iovec *aio_iov;
>> > +    int aio_niov;
>> > +    size_t aio_nbytes;
>> > +    off_t aio_offset;
>> > +    int aio_type;
>> > +};
>> > +
>> >  typedef struct BDRVRawState {
>> >      HANDLE hfile;
>> >      int type;
>> >      char drive_path[16]; /* format: "d:\" */
>> >  } BDRVRawState;
>> >
>> > +/*
>> > + * Read/writes the data to/from a given linear buffer.
>> > + *
>> > + * Returns the number of bytes handles or -errno in case of an error. Short
>> > + * reads are only returned if the end of the file is reached.
>> > + */
>> > +static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
>> > +{
>> > +    size_t offset = 0;
>> > +    int i;
>> > +
>> > +    for (i = 0; i < aiocb->aio_niov; i++) {
>> > +        OVERLAPPED ov;
>> > +        DWORD ret, ret_count, len;
>> > +
>> > +        memset(&ov, 0, sizeof(ov));
>> > +        ov.Offset = (aiocb->aio_offset + offset);
>> > +        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
>> > +        len = aiocb->aio_iov[i].iov_len;
>> > +        if (aiocb->aio_type & QEMU_AIO_WRITE) {
>> > +            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
>> > +                            len, &ret_count, &ov);
>> > +        } else {
>> > +            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
>> > +                           len, &ret_count, &ov);
>> > +        }
>> > +        if (!ret) {
>> > +            ret_count = 0;
>> > +        }
>> > +        if (ret_count != len) {
>> > +            break;
>> > +        }
>> > +        offset += len;
>> > +    }
>> > +
>> > +    return offset;
>> > +}
>> > +
>> > +static int aio_worker(void *arg)
>> > +{
>> > +    struct qemu_paiocb *aiocb = arg;
>> > +    ssize_t ret = 0;
>> > +    size_t count;
>> > +
>> > +    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
>> > +    case QEMU_AIO_READ:
>> > +        count = handle_aiocb_rw(aiocb);
>> > +        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
>> > +            /* A short read means that we have reached EOF. Pad the buffer
>> > +             * with zeros for bytes after EOF. */
>> > +            QEMUIOVector qiov;
>> > +
>> > +            qemu_iovec_init_external(&qiov, aiocb->aio_iov,
>> > +                                     aiocb->aio_niov);
>> > +            qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - count, count);
>> > +
>> > +            count = aiocb->aio_nbytes;
>> > +        }
>> > +        if (count == aiocb->aio_nbytes) {
>> > +            ret = 0;
>> > +        } else {
>> > +            ret = -EINVAL;
>> > +        }
>> > +        break;
>> > +    case QEMU_AIO_WRITE:
>> > +        count = handle_aiocb_rw(aiocb);
>> > +        if (count == aiocb->aio_nbytes) {
>> > +            count = 0;
>> > +        } else {
>> > +            count = -EINVAL;
>> > +        }
>> > +        break;
>> > +    case QEMU_AIO_FLUSH:
>> > +        if (!FlushFileBuffers(aiocb->hfile)) {
>> > +            return -EIO;
>> > +        }
>> > +        break;
>> > +    default:
>> > +        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
> Assert instead?

Yeah, this is cut-and-pasted from posix-aio-compat.c, I'll fix both.

Paolo

Patch

diff --git a/block/raw-win32.c b/block/raw-win32.c
index e4b0b75..a50d636 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -25,6 +25,9 @@ 
 #include "qemu-timer.h"
 #include "block_int.h"
 #include "module.h"
+#include "raw-aio.h"
+#include "trace.h"
+#include "thread-pool.h"
 #include <windows.h>
 #include <winioctl.h>
 
@@ -32,12 +35,130 @@ 
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2
 
+struct qemu_paiocb {
+    BlockDriverState *bs;
+    HANDLE hfile;
+    struct iovec *aio_iov;
+    int aio_niov;
+    size_t aio_nbytes;
+    off_t aio_offset;
+    int aio_type;
+};
+
 typedef struct BDRVRawState {
     HANDLE hfile;
     int type;
     char drive_path[16]; /* format: "d:\" */
 } BDRVRawState;
 
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
+{
+    size_t offset = 0;
+    int i;
+
+    for (i = 0; i < aiocb->aio_niov; i++) {
+        OVERLAPPED ov;
+        DWORD ret, ret_count, len;
+
+        memset(&ov, 0, sizeof(ov));
+        ov.Offset = (aiocb->aio_offset + offset);
+        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
+        len = aiocb->aio_iov[i].iov_len;
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                            len, &ret_count, &ov);
+        } else {
+            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                           len, &ret_count, &ov);
+        }
+        if (!ret) {
+            ret_count = 0;
+        }
+        if (ret_count != len) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static int aio_worker(void *arg)
+{
+    struct qemu_paiocb *aiocb = arg;
+    ssize_t ret = 0;
+    size_t count;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        count = handle_aiocb_rw(aiocb);
+        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
+            /* A short read means that we have reached EOF. Pad the buffer
+             * with zeros for bytes after EOF. */
+            QEMUIOVector qiov;
+
+            qemu_iovec_init_external(&qiov, aiocb->aio_iov,
+                                     aiocb->aio_niov);
+            qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - count, count);
+
+            count = aiocb->aio_nbytes;
+        }
+        if (count == aiocb->aio_nbytes) {
+            ret = 0;
+        } else {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        count = handle_aiocb_rw(aiocb);
+        if (count == aiocb->aio_nbytes) {
+            count = 0;
+        } else {
+            count = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        if (!FlushFileBuffers(aiocb->hfile)) {
+            return -EIO;
+        }
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(struct qemu_paiocb, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    struct qemu_paiocb *acb = g_slice_new(struct qemu_paiocb);
+
+    acb->bs = bs;
+    acb->hfile = hfile;
+    acb->aio_type = type;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 int qemu_ftruncate64(int fd, int64_t length)
 {
     LARGE_INTEGER li;
@@ -109,59 +230,29 @@  static int raw_open(BlockDriverState *bs, const char *filename, int flags)
     return 0;
 }
 
-static int raw_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
+    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                       cb, opaque, QEMU_AIO_READ);
 }
 
-static int raw_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                          BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
+    return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                       cb, opaque, QEMU_AIO_WRITE);
 }
 
-static int raw_flush(BlockDriverState *bs)
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = FlushFileBuffers(s->hfile);
-    if (ret == 0) {
-        return -EIO;
-    }
-
-    return 0;
+    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
 }
 
 static void raw_close(BlockDriverState *bs)
@@ -282,9 +373,9 @@  static BlockDriver bdrv_file = {
     .bdrv_close		= raw_close,
     .bdrv_create	= raw_create,
 
-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,
 
     .bdrv_truncate	= raw_truncate,
     .bdrv_getlength	= raw_getlength,
@@ -413,9 +504,9 @@  static BlockDriver bdrv_host_device = {
     .bdrv_close		= raw_close,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,
 
     .bdrv_getlength	= raw_getlength,
     .bdrv_get_allocated_file_size