Patchwork [0/7] Discard improvements

login
register
mail settings
Submitter Paolo Bonzini
Date Jan. 14, 2013, 3:26 p.m.
Message ID <1358177192-16720-1-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/211805/
State New
Headers show

Comments

Paolo Bonzini - Jan. 14, 2013, 3:26 p.m.
This series builds on the patch from Kusanagi Kouichi, and also adds
discard support for non-passthrough block devices (BLKDISCARD), and
asynchronous discard support.

SCSI already calls bdrv_aio_discard, so it is not affected by these
patches.

Kusanagi Kouichi (1):
  raw-posix: support discard on more filesystems

Paolo Bonzini (6):
  raw-posix: remember whether discard failed
  raw: support discard on block devices
  block: make discard asynchronous
  ide: fix TRIM with empty range entry
  ide: issue discard asynchronously but serialize the pieces
  block: clear dirty bitmap when discarding

 block.c           |   8 +++-
 block/raw-aio.h   |   5 ++-
 block/raw-posix.c | 125 +++++++++++++++++++++++++++++++++++++++++-------------
 configure         |  19 +++++++++
 hw/ide/core.c     |  79 +++++++++++++++++++++++-----------
 5 files changed, 179 insertions(+), 57 deletions(-)
Michael Tokarev - Jan. 15, 2013, 10:05 a.m.
That's quite an.. interesting $Subject.  Can be read in at least two ways... ;)

Sorry can't resist ;)

/mjt

Patch

diff --git a/block/raw-posix.c b/block/raw-posix.c
index c3d7fda..e8d79af 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -59,6 +59,9 @@ 
 #ifdef CONFIG_FIEMAP
 #include <linux/fiemap.h>
 #endif
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <linux/falloc.h>
+#endif
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <sys/disk.h>
 #include <sys/cdio.h>
@@ -1074,15 +1077,34 @@  static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
 static coroutine_fn int raw_co_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors)
 {
-#ifdef CONFIG_XFS
+    int ret = -EOPNOTSUPP;
+
+#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
     BDRVRawState *s = bs->opaque;
 
+#ifdef CONFIG_XFS
     if (s->is_xfs) {
         return xfs_discard(s, sector_num, nb_sectors);
     }
 #endif
 
-    return 0;
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    do {
+        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      sector_num << BDRV_SECTOR_BITS,
+                      (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
+            return 0;
+        }
+    } while (errno == EINTR);
+
+    ret = -errno;
+#endif
+#endif
+
+    if (ret == -EOPNOTSUPP) {
+        return 0;
+    }
+    return ret;
 }
 
 static QEMUOptionParameter raw_create_options[] = {
diff --git a/configure b/configure
index ea42fe2..94059a6 100755
--- a/configure
+++ b/configure
@@ -2586,6 +2586,22 @@  if compile_prog "" "" ; then
   fallocate=yes
 fi
 
+# check for fallocate hole punching
+fallocate_punch_hole=no
+cat > $TMPC << EOF
+#include <fcntl.h>
+#include <linux/falloc.h>
+
+int main(void)
+{
+    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
+    return 0;
+}
+EOF
+if compile_prog "" "" ; then
+  fallocate_punch_hole=yes
+fi
+
 # check for sync_file_range
 sync_file_range=no
 cat > $TMPC << EOF
@@ -3501,6 +3517,9 @@  fi
 if test "$fallocate" = "yes" ; then
   echo "CONFIG_FALLOCATE=y" >> $config_host_mak
 fi
+if test "$fallocate_punch_hole" = "yes" ; then
+  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
+fi
 if test "$sync_file_range" = "yes" ; then
   echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
 fi
-- 
1.8.1


From 902a4c49606481c0d6a6e6344b272630e726decf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 9 Jan 2013 12:41:24 +0100
Subject: [PATCH 2/7] raw-posix: remember whether discard failed

Avoid sending system calls repeatedly if they shall fail.  This
does not apply to XFS: if the filesystem-specific ioctl fails,
something weird is happening.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-posix.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index e8d79af..b647cfb 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -141,6 +141,7 @@  typedef struct BDRVRawState {
 #ifdef CONFIG_XFS
     bool is_xfs : 1;
 #endif
+    bool has_discard : 1;
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
@@ -292,6 +293,7 @@  static int raw_open_common(BlockDriverState *bs, const char *filename,
     }
 #endif
 
+    s->has_discard = 1;
 #ifdef CONFIG_XFS
     if (platform_test_xfs_fd(s->fd)) {
         s->is_xfs = 1;
@@ -1078,10 +1080,12 @@  static coroutine_fn int raw_co_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors)
 {
     int ret = -EOPNOTSUPP;
-
-#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
     BDRVRawState *s = bs->opaque;
 
+    if (!s->has_discard) {
+        return 0;
+    }
+
 #ifdef CONFIG_XFS
     if (s->is_xfs) {
         return xfs_discard(s, sector_num, nb_sectors);
@@ -1099,7 +1103,6 @@  static coroutine_fn int raw_co_discard(BlockDriverState *bs,
 
     ret = -errno;
 #endif
-#endif
 
     if (ret == -EOPNOTSUPP) {
         return 0;
-- 
1.8.1


From e0326f3a54949331b5776d2d594d6dace3e808ef Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 9 Jun 2012 02:31:13 +0200
Subject: [PATCH 3/7] raw: support discard on block devices

Block devices use a ioctl instead of fallocate, so add a separate
implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-posix.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index b647cfb..1d32139 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1345,6 +1345,40 @@  static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
+static coroutine_fn int hdev_co_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    if (s->has_discard == 0) {
+        return 0;
+    }
+    ret = fd_open(bs);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = -EOPNOTSUPP;
+#ifdef BLKDISCARD
+    do {
+        uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
+        if (ioctl(s->fd, BLKDISCARD, range) == 0) {
+            return 0;
+        }
+    } while (errno == EINTR);
+
+    ret = -errno;
+#endif
+    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+        ret == -ENOTTY) {
+        s->has_discard = 0;
+        ret = 0;
+    }
+    return ret;
+
+}
+
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
@@ -1413,6 +1447,8 @@  static BlockDriver bdrv_host_device = {
     .create_options     = raw_create_options,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
+    .bdrv_co_discard    = hdev_co_discard,
+
     .bdrv_aio_readv	= raw_aio_readv,
     .bdrv_aio_writev	= raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
-- 
1.8.1


From 45fb22df0d1ae2469aef8f7e14758484114e0761 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Jun 2012 13:33:19 +0200
Subject: [PATCH 4/7] block: make discard asynchronous

This is easy with the thread pool, because we can use s->is_xfs and
s->has_discard from the worker function.

QEMU has a widespread assumption that each I/O operation writes less
than 2^32 bytes.  This patch doesn't fix it throughout of course,
but it starts correcting struct RawPosixAIOData so that there is
no regression with respect to the synchronous discard implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-aio.h   |   5 +-
 block/raw-posix.c | 164 ++++++++++++++++++++++++++++--------------------------
 2 files changed, 88 insertions(+), 81 deletions(-)

diff --git a/block/raw-aio.h b/block/raw-aio.h
index e77f361..c61f159 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -20,11 +20,14 @@ 
 #define QEMU_AIO_WRITE        0x0002
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
+#define QEMU_AIO_DISCARD      0x0010
 #define QEMU_AIO_TYPE_MASK \
-	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
+        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
+         QEMU_AIO_DISCARD)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
+#define QEMU_AIO_BLKDEV       0x2000
 
 
 /* linux-aio.c - Linux native implementation */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 1d32139..679fcc5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -163,7 +163,7 @@  typedef struct RawPosixAIOData {
         void *aio_ioctl_buf;
     };
     int aio_niov;
-    size_t aio_nbytes;
+    uint64_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
     off_t aio_offset;
     int aio_type;
@@ -623,6 +623,72 @@  static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
     return nbytes;
 }
 
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+    struct xfs_flock64 fl;
+
+    memset(&fl, 0, sizeof(fl));
+    fl.l_whence = SEEK_SET;
+    fl.l_start = offset;
+    fl.l_len = bytes;
+
+    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+#endif
+
+static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+{
+    int ret = -EOPNOTSUPP;
+    BDRVRawState *s = aiocb->bs->opaque;
+
+    if (s->has_discard == 0) {
+        return 0;
+    }
+
+    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+#ifdef BLKDISCARD
+        do {
+            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    } else {
+#ifdef CONFIG_XFS
+        if (s->is_xfs) {
+            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
+        }
+#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+        do {
+            if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                          aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    }
+
+    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+        ret == -ENOTTY) {
+        s->has_discard = 0;
+        ret = 0;
+    }
+    return ret;
+}
+
 static int aio_worker(void *arg)
 {
     RawPosixAIOData *aiocb = arg;
@@ -657,6 +723,9 @@  static int aio_worker(void *arg)
     case QEMU_AIO_IOCTL:
         ret = handle_aiocb_ioctl(aiocb);
         break;
+    case QEMU_AIO_DISCARD:
+        ret = handle_aiocb_discard(aiocb);
+        break;
     default:
         fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
         ret = -EINVAL;
@@ -1057,57 +1126,14 @@  static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
     }
 }
 
-#ifdef CONFIG_XFS
-static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
-{
-    struct xfs_flock64 fl;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = sector_num << 9;
-    fl.l_len = (int64_t)nb_sectors << 9;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
-        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
-}
-#endif
-
-static coroutine_fn int raw_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
-    int ret = -EOPNOTSUPP;
     BDRVRawState *s = bs->opaque;
 
-    if (!s->has_discard) {
-        return 0;
-    }
-
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        return xfs_discard(s, sector_num, nb_sectors);
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-    do {
-        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                      sector_num << BDRV_SECTOR_BITS,
-                      (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-
-    if (ret == -EOPNOTSUPP) {
-        return 0;
-    }
-    return ret;
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD);
 }
 
 static QEMUOptionParameter raw_create_options[] = {
@@ -1130,12 +1156,12 @@  static BlockDriver bdrv_file = {
     .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
-    .bdrv_co_discard = raw_co_discard,
     .bdrv_co_is_allocated = raw_co_is_allocated,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
     .bdrv_aio_flush = raw_aio_flush,
+    .bdrv_aio_discard = raw_aio_discard,
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
@@ -1345,38 +1371,17 @@  static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
-static coroutine_fn int hdev_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    int ret;
-
-    if (s->has_discard == 0) {
-        return 0;
-    }
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
 
-    ret = -EOPNOTSUPP;
-#ifdef BLKDISCARD
-    do {
-        uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
-        if (ioctl(s->fd, BLKDISCARD, range) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
-        ret == -ENOTTY) {
-        s->has_discard = 0;
-        ret = 0;
+    if (fd_open(bs) < 0) {
+        return NULL;
     }
-    return ret;
-
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }
 
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1447,11 +1452,10 @@  static BlockDriver bdrv_host_device = {
     .create_options     = raw_create_options,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
-    .bdrv_co_discard    = hdev_co_discard,
-
     .bdrv_aio_readv	= raw_aio_readv,
     .bdrv_aio_writev	= raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_aio_discard   = hdev_aio_discard,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength	= raw_getlength,
-- 
1.8.1


From e987ea95699f9753809ca6618f46840c0b528ee6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 9 Mar 2012 08:54:35 +0100
Subject: [PATCH 5/7] ide: fix TRIM with empty range entry

ATA-ACS-3 says "If the two byte range length is zero, then the LBA
Range Entry shall be discarded as padding."  iovecs are used as if
they are linearized, so it is incorrect to discard the rest of
this iovec.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ide/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 6f1938a..cb77dfc 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -374,7 +374,7 @@  BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
             uint16_t count = entry >> 48;
 
             if (count == 0) {
-                break;
+                continue;
             }
 
             ret = bdrv_discard(bs, sector, count);
-- 
1.8.1


From d8b4ee3ffae9d919809506183b206f51b34d76cf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 8 Mar 2012 13:08:14 +0100
Subject: [PATCH 6/7] ide: issue discard asynchronously but serialize the
 pieces

Now that discard can take a long time, make it asynchronous.
Each LBA range entry is processed separately because discard
can be an expensive operation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ide/core.c | 79 ++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index cb77dfc..14ad079 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -325,14 +325,26 @@  typedef struct TrimAIOCB {
     BlockDriverAIOCB common;
     QEMUBH *bh;
     int ret;
+    QEMUIOVector *qiov;
+    BlockDriverAIOCB *aiocb;
+    int i, j;
 } TrimAIOCB;
 
 static void trim_aio_cancel(BlockDriverAIOCB *acb)
 {
     TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
 
+    /* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again.  */
+    iocb->j = iocb->qiov->niov - 1;
+    iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1;
+
+    /* Tell ide_issue_trim_cb not to trigger the completion, too.  */
     qemu_bh_delete(iocb->bh);
     iocb->bh = NULL;
+
+    if (iocb->aiocb) {
+        bdrv_aio_cancel(iocb->aiocb);
+    }
     qemu_aio_release(iocb);
 }
 
@@ -349,43 +361,60 @@  static void ide_trim_bh_cb(void *opaque)
 
     qemu_bh_delete(iocb->bh);
     iocb->bh = NULL;
-
     qemu_aio_release(iocb);
 }
 
+static void ide_issue_trim_cb(void *opaque, int ret)
+{
+    TrimAIOCB *iocb = opaque;
+    if (ret >= 0) {
+        while (iocb->j < iocb->qiov->niov) {
+            int j = iocb->j;
+            while (++iocb->i < iocb->qiov->iov[j].iov_len / 8) {
+                int i = iocb->i;
+                uint64_t *buffer = iocb->qiov->iov[j].iov_base;
+
+                /* 6-byte LBA + 2-byte range per entry */
+                uint64_t entry = le64_to_cpu(buffer[i]);
+                uint64_t sector = entry & 0x0000ffffffffffffULL;
+                uint16_t count = entry >> 48;
+
+                if (count == 0) {
+                    continue;
+                }
+
+                /* Got an entry! Submit and exit.  */
+                iocb->aiocb = bdrv_aio_discard(iocb->common.bs, sector, count,
+                                               ide_issue_trim_cb, opaque);
+                return;
+            }
+
+            iocb->j++;
+            iocb->i = -1;
+        }
+    } else {
+        iocb->ret = ret;
+    }
+
+    iocb->aiocb = NULL;
+    if (iocb->bh) {
+        qemu_bh_schedule(iocb->bh);
+    }
+}
+
 BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     TrimAIOCB *iocb;
-    int i, j, ret;
 
     iocb = qemu_aio_get(&trim_aiocb_info, bs, cb, opaque);
     iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
     iocb->ret = 0;
-
-    for (j = 0; j < qiov->niov; j++) {
-        uint64_t *buffer = qiov->iov[j].iov_base;
-
-        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
-            /* 6-byte LBA + 2-byte range per entry */
-            uint64_t entry = le64_to_cpu(buffer[i]);
-            uint64_t sector = entry & 0x0000ffffffffffffULL;
-            uint16_t count = entry >> 48;
-
-            if (count == 0) {
-                continue;
-            }
-
-            ret = bdrv_discard(bs, sector, count);
-            if (!iocb->ret) {
-                iocb->ret = ret;
-            }
-        }
-    }
-
-    qemu_bh_schedule(iocb->bh);
-
+    iocb->qiov = qiov;
+    iocb->i = -1;
+    iocb->j = 0;
+    ide_issue_trim_cb(iocb, 0);
     return &iocb->common;
 }
 
-- 
1.8.1


From 5fcec7acdc50f6d3a915a0b7c0ff0d43ec24d209 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 11 Jun 2012 09:12:43 +0200
Subject: [PATCH 7/7] block: clear dirty bitmap when discarding

Note that resetting bits in the dirty bitmap is done _before_ actually
processing the request.  Writes, instead, set bits after the request
is completed.

This way, when there are concurrent write and discard requests, the
outcome will always be that the blocks are marked dirty.  This scenario
should never happen, but it is safer to do it this way.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index b5e64ec..ffcc9e6 100644
--- a/block.c
+++ b/block.c
@@ -4174,7 +4174,13 @@  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
         return -EIO;
     } else if (bs->read_only) {
         return -EROFS;
-    } else if (bs->drv->bdrv_co_discard) {
+    }
+
+    if (bs->dirty_bitmap) {
+        set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
+    }
+
+    if (bs->drv->bdrv_co_discard) {
         return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
     } else if (bs->drv->bdrv_aio_discard) {
         BlockDriverAIOCB *acb;