Patchwork [v2,10/12] raw: merge posix-aio-compat.c into block/raw-posix.c

login
register
mail settings
Submitter Paolo Bonzini
Date Aug. 7, 2012, 11:17 a.m.
Message ID <1344338247-17567-11-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/175608/
State New
Headers show

Comments

Paolo Bonzini - Aug. 7, 2012, 11:17 a.m.
Making the qemu_paiocb specific to raw devices will let us access members
of the BDRVRawState arbitrarily.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Makefile.objs         |   1 -
 block/raw-posix-aio.h |   8 --
 block/raw-posix.c     | 289 +++++++++++++++++++++++++++++++++++++++++++
 posix-aio-compat.c    | 330 --------------------------------------------------
 4 file modificati, 289 inserzioni(+), 339 rimozioni(-)
 delete mode 100644 posix-aio-compat.c

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 1221f15..afefa02 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -45,7 +45,6 @@  block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += thread-pool.o nbd.o block.o aes.o qemu-config.o
 block-obj-y += qemu-progress.o qemu-sockets.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
-block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/block/raw-posix-aio.h b/block/raw-posix-aio.h
index 6725135..c714367 100644
--- a/block/raw-posix-aio.h
+++ b/block/raw-posix-aio.h
@@ -27,14 +27,6 @@ 
 #define QEMU_AIO_MISALIGNED   0x1000
 
 
-/* posix-aio-compat.c - thread pool based implementation */
-BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
-BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque);
-
 /* linux-aio.c - Linux native implementation */
 void *laio_init(void);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index e5faccd..00712dd 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -27,6 +27,9 @@ 
 #include "qemu-log.h"
 #include "block_int.h"
 #include "module.h"
+#include "trace.h"
+#include "thread-pool.h"
+#include "iov.h"
 #include "block/raw-posix-aio.h"
 
 #if defined(__APPLE__) && (__MACH__)
@@ -143,6 +145,20 @@  typedef struct BDRVRawState {
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);
 
+typedef struct RawPosixAIOData {
+    BlockDriverState *bs;
+    int aio_fildes;
+    union {
+        struct iovec *aio_iov;
+        void *aio_ioctl_buf;
+    };
+    int aio_niov;
+    size_t aio_nbytes;
+#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
+    off_t aio_offset;
+    int aio_type;
+} RawPosixAIOData;
+
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int cdrom_reopen(BlockDriverState *bs);
 #endif
@@ -311,6 +327,283 @@  static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return 1;
 }
 
+static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+    if (ret == -1) {
+        return -errno;
+    }
+
+    /*
+     * This looks weird, but the aio code only considers a request
+     * successful if it has written the full number of bytes.
+     *
+     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
+     * so in fact we return the ioctl command here to make posix_aio_read()
+     * happy..
+     */
+    return aiocb->aio_nbytes;
+}
+
+static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = qemu_fdatasync(aiocb->aio_fildes);
+    if (ret == -1) {
+        return -errno;
+    }
+    return 0;
+}
+
+#ifdef CONFIG_PREADV
+
+static bool preadv_present = true;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return preadv(fd, iov, nr_iov, offset);
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return pwritev(fd, iov, nr_iov, offset);
+}
+
+#else
+
+static bool preadv_present = false;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+#endif
+
+static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
+{
+    ssize_t len;
+
+    do {
+        if (aiocb->aio_type & QEMU_AIO_WRITE)
+            len = qemu_pwritev(aiocb->aio_fildes,
+                               aiocb->aio_iov,
+                               aiocb->aio_niov,
+                               aiocb->aio_offset);
+         else
+            len = qemu_preadv(aiocb->aio_fildes,
+                              aiocb->aio_iov,
+                              aiocb->aio_niov,
+                              aiocb->aio_offset);
+    } while (len == -1 && errno == EINTR);
+
+    if (len == -1) {
+        return -errno;
+    }
+    return len;
+}
+
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
+{
+    ssize_t offset = 0;
+    ssize_t len;
+
+    while (offset < aiocb->aio_nbytes) {
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            len = pwrite(aiocb->aio_fildes,
+                         (const char *)buf + offset,
+                         aiocb->aio_nbytes - offset,
+                         aiocb->aio_offset + offset);
+        } else {
+            len = pread(aiocb->aio_fildes,
+                        buf + offset,
+                        aiocb->aio_nbytes - offset,
+                        aiocb->aio_offset + offset);
+        }
+        if (len == -1 && errno == EINTR) {
+            continue;
+        } else if (len == -1) {
+            offset = -errno;
+            break;
+        } else if (len == 0) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
+{
+    ssize_t nbytes;
+    char *buf;
+
+    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
+        /*
+         * If there is just a single buffer, and it is properly aligned
+         * we can just use plain pread/pwrite without any problems.
+         */
+        if (aiocb->aio_niov == 1) {
+             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+        }
+        /*
+         * We have more than one iovec, and all are properly aligned.
+         *
+         * Try preadv/pwritev first and fall back to linearizing the
+         * buffer if it's not supported.
+         */
+        if (preadv_present) {
+            nbytes = handle_aiocb_rw_vector(aiocb);
+            if (nbytes == aiocb->aio_nbytes ||
+                (nbytes < 0 && nbytes != -ENOSYS)) {
+                return nbytes;
+            }
+            preadv_present = false;
+        }
+
+        /*
+         * XXX(hch): short read/write.  no easy way to handle the reminder
+         * using these interfaces.  For now retry using plain
+         * pread/pwrite?
+         */
+    }
+
+    /*
+     * Ok, we have to do it the hard way, copy all segments into
+     * a single aligned buffer.
+     */
+    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+    if (aiocb->aio_type & QEMU_AIO_WRITE) {
+        char *p = buf;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov; ++i) {
+            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
+            p += aiocb->aio_iov[i].iov_len;
+        }
+    }
+
+    nbytes = handle_aiocb_rw_linear(aiocb, buf);
+    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
+        char *p = buf;
+        size_t count = aiocb->aio_nbytes, copy;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov && count; ++i) {
+            copy = count;
+            if (copy > aiocb->aio_iov[i].iov_len) {
+                copy = aiocb->aio_iov[i].iov_len;
+            }
+            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+            p     += copy;
+            count -= copy;
+        }
+    }
+    qemu_vfree(buf);
+
+    return nbytes;
+}
+
+static int aio_worker(void *arg)
+{
+    RawPosixAIOData *aiocb = arg;
+    ssize_t ret = 0;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
+                      0, aiocb->aio_nbytes - ret);
+
+            ret = aiocb->aio_nbytes;
+        }
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        ret = handle_aiocb_flush(aiocb);
+        break;
+    case QEMU_AIO_IOCTL:
+        ret = handle_aiocb_ioctl(aiocb);
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(RawPosixAIOData, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = type;
+    acb->aio_fildes = fd;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
+static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = QEMU_AIO_IOCTL;
+    acb->aio_fildes = fd;
+    acb->aio_offset = 0;
+    acb->aio_ioctl_buf = buf;
+    acb->aio_ioctl_cmd = req;
+
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type)
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
deleted file mode 100644
index 4a1e3d3..0000000
--- a/posix-aio-compat.c
+++ /dev/null
@@ -1,330 +0,0 @@ 
-/*
- * QEMU posix-aio emulation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <errno.h>
-#include <time.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "qemu-queue.h"
-#include "osdep.h"
-#include "sysemu.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "thread-pool.h"
-#include "block_int.h"
-#include "iov.h"
-
-#include "block/raw-posix-aio.h"
-
-struct qemu_paiocb {
-    BlockDriverAIOCB common;
-    int aio_fildes;
-    union {
-        struct iovec *aio_iov;
-        void *aio_ioctl_buf;
-    };
-    int aio_niov;
-    size_t aio_nbytes;
-#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-    off_t aio_offset;
-    int aio_type;
-};
-
-#ifdef CONFIG_PREADV
-static int preadv_present = 1;
-#else
-static int preadv_present = 0;
-#endif
-
-static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
-{
-    int ret;
-
-    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
-    if (ret == -1)
-        return -errno;
-
-    /*
-     * This looks weird, but the aio code only considers a request
-     * successful if it has written the full number of bytes.
-     *
-     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
-     * so in fact we return the ioctl command here to make posix_aio_read()
-     * happy..
-     */
-    return aiocb->aio_nbytes;
-}
-
-static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb)
-{
-    int ret;
-
-    ret = qemu_fdatasync(aiocb->aio_fildes);
-    if (ret == -1)
-        return -errno;
-    return 0;
-}
-
-#ifdef CONFIG_PREADV
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return preadv(fd, iov, nr_iov, offset);
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return pwritev(fd, iov, nr_iov, offset);
-}
-
-#else
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-#endif
-
-static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
-{
-    ssize_t len;
-
-    do {
-        if (aiocb->aio_type & QEMU_AIO_WRITE)
-            len = qemu_pwritev(aiocb->aio_fildes,
-                               aiocb->aio_iov,
-                               aiocb->aio_niov,
-                               aiocb->aio_offset);
-         else
-            len = qemu_preadv(aiocb->aio_fildes,
-                              aiocb->aio_iov,
-                              aiocb->aio_niov,
-                              aiocb->aio_offset);
-    } while (len == -1 && errno == EINTR);
-
-    if (len == -1)
-        return -errno;
-    return len;
-}
-
-/*
- * Read/writes the data to/from a given linear buffer.
- *
- * Returns the number of bytes handles or -errno in case of an error. Short
- * reads are only returned if the end of the file is reached.
- */
-static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
-{
-    ssize_t offset = 0;
-    ssize_t len;
-
-    while (offset < aiocb->aio_nbytes) {
-         if (aiocb->aio_type & QEMU_AIO_WRITE)
-             len = pwrite(aiocb->aio_fildes,
-                          (const char *)buf + offset,
-                          aiocb->aio_nbytes - offset,
-                          aiocb->aio_offset + offset);
-         else
-             len = pread(aiocb->aio_fildes,
-                         buf + offset,
-                         aiocb->aio_nbytes - offset,
-                         aiocb->aio_offset + offset);
-
-         if (len == -1 && errno == EINTR)
-             continue;
-         else if (len == -1) {
-             offset = -errno;
-             break;
-         } else if (len == 0)
-             break;
-
-         offset += len;
-    }
-
-    return offset;
-}
-
-static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
-{
-    ssize_t nbytes;
-    char *buf;
-
-    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
-        /*
-         * If there is just a single buffer, and it is properly aligned
-         * we can just use plain pread/pwrite without any problems.
-         */
-        if (aiocb->aio_niov == 1)
-             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
-
-        /*
-         * We have more than one iovec, and all are properly aligned.
-         *
-         * Try preadv/pwritev first and fall back to linearizing the
-         * buffer if it's not supported.
-         */
-        if (preadv_present) {
-            nbytes = handle_aiocb_rw_vector(aiocb);
-            if (nbytes == aiocb->aio_nbytes)
-                return nbytes;
-            if (nbytes < 0 && nbytes != -ENOSYS)
-                return nbytes;
-            preadv_present = 0;
-        }
-
-        /*
-         * XXX(hch): short read/write.  no easy way to handle the reminder
-         * using these interfaces.  For now retry using plain
-         * pread/pwrite?
-         */
-    }
-
-    /*
-     * Ok, we have to do it the hard way, copy all segments into
-     * a single aligned buffer.
-     */
-    buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
-    if (aiocb->aio_type & QEMU_AIO_WRITE) {
-        char *p = buf;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov; ++i) {
-            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
-            p += aiocb->aio_iov[i].iov_len;
-        }
-    }
-
-    nbytes = handle_aiocb_rw_linear(aiocb, buf);
-    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
-        char *p = buf;
-        size_t count = aiocb->aio_nbytes, copy;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov && count; ++i) {
-            copy = count;
-            if (copy > aiocb->aio_iov[i].iov_len)
-                copy = aiocb->aio_iov[i].iov_len;
-            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
-            p     += copy;
-            count -= copy;
-        }
-    }
-    qemu_vfree(buf);
-
-    return nbytes;
-}
-
-static int aio_worker(void *arg)
-{
-    struct qemu_paiocb *aiocb = arg;
-    ssize_t ret = 0;
-
-    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
-    case QEMU_AIO_READ:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
-            /* A short read means that we have reached EOF. Pad the buffer
-             * with zeros for bytes after EOF. */
-            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
-                       0, aiocb->aio_nbytes - ret);
-
-            ret = aiocb->aio_nbytes;
-        }
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_WRITE:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_FLUSH:
-        ret = handle_aiocb_flush(aiocb);
-        break;
-    case QEMU_AIO_IOCTL:
-        ret = handle_aiocb_ioctl(aiocb);
-        break;
-    default:
-        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
-        ret = -EINVAL;
-        break;
-    }
-
-    qemu_aio_release(aiocb);
-    return ret;
-}
-
-static AIOPool raw_aio_pool = {
-    .aiocb_size         = sizeof(struct qemu_paiocb),
-};
-
-BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_paiocb *acb;
-
-    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
-
-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-
-BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    struct qemu_paiocb *acb;
-
-    acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
-    acb->aio_type = QEMU_AIO_IOCTL;
-    acb->aio_fildes = fd;
-    acb->aio_offset = 0;
-    acb->aio_ioctl_buf = buf;
-    acb->aio_ioctl_cmd = req;
-
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}