Patchwork [4/7] block: make discard asynchronous

login
register
mail settings
Submitter Paolo Bonzini
Date Jan. 14, 2013, 3:26 p.m.
Message ID <1358177218-16802-5-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/211808/
State New
Headers show

Comments

Paolo Bonzini - Jan. 14, 2013, 3:26 p.m.
This is easy with the thread pool, because we can use s->is_xfs and
s->has_discard from the worker function.

QEMU has a widespread assumption that each I/O operation writes less
than 2^32 bytes.  This patch doesn't fix it throughout of course,
but it starts correcting struct RawPosixAIOData so that there is
no regression with respect to the synchronous discard implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-aio.h   |   5 +-
 block/raw-posix.c | 164 ++++++++++++++++++++++++++++--------------------------
 2 files changed, 88 insertions(+), 81 deletions(-)
Stefan Hajnoczi - Jan. 15, 2013, 8:55 a.m.
On Mon, Jan 14, 2013 at 04:26:55PM +0100, Paolo Bonzini wrote:
> +static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
> +{
> +    int ret = -EOPNOTSUPP;
> +    BDRVRawState *s = aiocb->bs->opaque;
> +
> +    if (s->has_discard == 0) {
> +        return 0;
> +    }
> +
> +    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
> +#ifdef BLKDISCARD
> +        do {
> +            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
> +            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
> +                return 0;
> +            }
> +        } while (errno == EINTR);
> +
> +        ret = -errno;
> +#endif
> +    } else {
> +#ifdef CONFIG_XFS
> +        if (s->is_xfs) {
> +            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
> +        }
> +#endif
> +
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +        do {
> +            if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                          aiocb->aio_offset, aiocb->aio_nbytes) == 0) {

It is cleaner to use either aiocb->aio_fildes or s->fd consistently in
this function.  Minor issue, not worth respinning for.

Stefan

Patch

diff --git a/block/raw-aio.h b/block/raw-aio.h
index e77f361..c61f159 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -20,11 +20,14 @@ 
 #define QEMU_AIO_WRITE        0x0002
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
+#define QEMU_AIO_DISCARD      0x0010
 #define QEMU_AIO_TYPE_MASK \
-	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
+        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
+         QEMU_AIO_DISCARD)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
+#define QEMU_AIO_BLKDEV       0x2000
 
 
 /* linux-aio.c - Linux native implementation */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 1d32139..679fcc5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -163,7 +163,7 @@  typedef struct RawPosixAIOData {
         void *aio_ioctl_buf;
     };
     int aio_niov;
-    size_t aio_nbytes;
+    uint64_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
     off_t aio_offset;
     int aio_type;
@@ -623,6 +623,72 @@  static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
     return nbytes;
 }
 
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+    struct xfs_flock64 fl;
+
+    memset(&fl, 0, sizeof(fl));
+    fl.l_whence = SEEK_SET;
+    fl.l_start = offset;
+    fl.l_len = bytes;
+
+    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+#endif
+
+static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+{
+    int ret = -EOPNOTSUPP;
+    BDRVRawState *s = aiocb->bs->opaque;
+
+    if (s->has_discard == 0) {
+        return 0;
+    }
+
+    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+#ifdef BLKDISCARD
+        do {
+            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    } else {
+#ifdef CONFIG_XFS
+        if (s->is_xfs) {
+            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
+        }
+#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+        do {
+            if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                          aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    }
+
+    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+        ret == -ENOTTY) {
+        s->has_discard = 0;
+        ret = 0;
+    }
+    return ret;
+}
+
 static int aio_worker(void *arg)
 {
     RawPosixAIOData *aiocb = arg;
@@ -657,6 +723,9 @@  static int aio_worker(void *arg)
     case QEMU_AIO_IOCTL:
         ret = handle_aiocb_ioctl(aiocb);
         break;
+    case QEMU_AIO_DISCARD:
+        ret = handle_aiocb_discard(aiocb);
+        break;
     default:
         fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
         ret = -EINVAL;
@@ -1057,57 +1126,14 @@  static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
     }
 }
 
-#ifdef CONFIG_XFS
-static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
-{
-    struct xfs_flock64 fl;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = sector_num << 9;
-    fl.l_len = (int64_t)nb_sectors << 9;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
-        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
-}
-#endif
-
-static coroutine_fn int raw_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
-    int ret = -EOPNOTSUPP;
     BDRVRawState *s = bs->opaque;
 
-    if (!s->has_discard) {
-        return 0;
-    }
-
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        return xfs_discard(s, sector_num, nb_sectors);
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-    do {
-        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                      sector_num << BDRV_SECTOR_BITS,
-                      (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-
-    if (ret == -EOPNOTSUPP) {
-        return 0;
-    }
-    return ret;
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD);
 }
 
 static QEMUOptionParameter raw_create_options[] = {
@@ -1130,12 +1156,12 @@  static BlockDriver bdrv_file = {
     .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
-    .bdrv_co_discard = raw_co_discard,
     .bdrv_co_is_allocated = raw_co_is_allocated,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
     .bdrv_aio_flush = raw_aio_flush,
+    .bdrv_aio_discard = raw_aio_discard,
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
@@ -1345,38 +1371,17 @@  static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
-static coroutine_fn int hdev_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    int ret;
-
-    if (s->has_discard == 0) {
-        return 0;
-    }
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
 
-    ret = -EOPNOTSUPP;
-#ifdef BLKDISCARD
-    do {
-        uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
-        if (ioctl(s->fd, BLKDISCARD, range) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
-        ret == -ENOTTY) {
-        s->has_discard = 0;
-        ret = 0;
+    if (fd_open(bs) < 0) {
+        return NULL;
     }
-    return ret;
-
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }
 
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1447,11 +1452,10 @@  static BlockDriver bdrv_host_device = {
     .create_options     = raw_create_options,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
-    .bdrv_co_discard    = hdev_co_discard,
-
     .bdrv_aio_readv	= raw_aio_readv,
     .bdrv_aio_writev	= raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_aio_discard   = hdev_aio_discard,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength	= raw_getlength,