Patchwork raw-posix: Support discard on more filesystems

login
register
mail settings
Submitter Kusanagi Kouichi
Date Jan. 5, 2013, 12:05 p.m.
Message ID <20130105120511.388913E403B@msa101.auone-net.jp>
Download mbox | patch
Permalink /patch/209666/
State New
Headers show

Comments

Kusanagi Kouichi - Jan. 5, 2013, 12:05 p.m.
Linux 2.6.38 introduced the filesystem independent interface to
deallocate part of a file. As of Linux 3.7, btrfs, ext4, ocfs2,
tmpfs and xfs support it.

Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
---
 block/raw-posix.c |   22 ++++++++++++++++++++--
 configure         |   19 +++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)
Stefan Hajnoczi - Jan. 7, 2013, 1:54 p.m.
On Sat, Jan 05, 2013 at 09:05:10PM +0900, Kusanagi Kouichi wrote:
> @@ -1098,15 +1101,30 @@ static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
>  static coroutine_fn int raw_co_discard(BlockDriverState *bs,
>      int64_t sector_num, int nb_sectors)
>  {
> -#ifdef CONFIG_XFS
> +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
>      BDRVRawState *s = bs->opaque;
> +#endif
> +    int ret = 0;
>  
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +    do {
> +        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                      sector_num << BDRV_SECTOR_BITS,
> +                      nb_sectors << BDRV_SECTOR_BITS) == 0) {
> +            return 0;
> +        }
> +    } while (errno == EINTR);

Is fallocate(fd, FALLOC_FL_PUNCH_HOLE) a blocking operation?

If yes, we need to perform this call in an aio worker thread (see
read/write/flush/ioctl in block/raw-posix.c).  Failure to do this means
QEMU and the guest will be blocked during fallocate and this causes poor
performance.

Stefan
Paolo Bonzini - Jan. 9, 2013, 11:02 a.m.
Il 05/01/2013 13:05, Kusanagi Kouichi ha scritto:
> Linux 2.6.38 introduced the filesystem independent interface to
> deallocate part of a file. As of Linux 3.7, btrfs, ext4, ocfs2,
> tmpfs and xfs support it.
> 
> Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
> ---
>  block/raw-posix.c |   22 ++++++++++++++++++++--
>  configure         |   19 +++++++++++++++++++
>  2 files changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 87d888e..66ba0b5 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -59,6 +59,9 @@
>  #ifdef CONFIG_FIEMAP
>  #include <linux/fiemap.h>
>  #endif
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +#include <linux/falloc.h>
> +#endif
>  #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
>  #include <sys/disk.h>
>  #include <sys/cdio.h>
> @@ -1098,15 +1101,30 @@ static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
>  static coroutine_fn int raw_co_discard(BlockDriverState *bs,
>      int64_t sector_num, int nb_sectors)
>  {
> -#ifdef CONFIG_XFS
> +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
>      BDRVRawState *s = bs->opaque;
> +#endif
> +    int ret = 0;
>  
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +    do {
> +        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                      sector_num << BDRV_SECTOR_BITS,
> +                      nb_sectors << BDRV_SECTOR_BITS) == 0) {

The last argument needs a cast to int64_t.

I was going to post a similar patch, I'll take the best parts of yours
and mine. :)

Paolo

> +            return 0;
> +        }
> +    } while (errno == EINTR);
> +
> +    ret = -errno;
> +#endif
> +
> +#ifdef CONFIG_XFS
>      if (s->is_xfs) {
>          return xfs_discard(s, sector_num, nb_sectors);
>      }
>  #endif
>  
> -    return 0;
> +    return ret;
>  }
>  
>  static QEMUOptionParameter raw_create_options[] = {
> diff --git a/configure b/configure
> index 837a84a..ff2e373 100755
> --- a/configure
> +++ b/configure
> @@ -2576,6 +2576,22 @@ if compile_prog "" "" ; then
>    fallocate=yes
>  fi
>  
> +# check for fallocate hole punching
> +fallocate_punch_hole=no
> +cat > $TMPC << EOF
> +#include <fcntl.h>
> +#include <linux/falloc.h>
> +
> +int main(void)
> +{
> +    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
> +    return 0;
> +}
> +EOF
> +if compile_prog "" "" ; then
> +  fallocate_punch_hole=yes
> +fi
> +
>  # check for sync_file_range
>  sync_file_range=no
>  cat > $TMPC << EOF
> @@ -3485,6 +3501,9 @@ fi
>  if test "$fallocate" = "yes" ; then
>    echo "CONFIG_FALLOCATE=y" >> $config_host_mak
>  fi
> +if test "$fallocate_punch_hole" = "yes" ; then
> +  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
> +fi
>  if test "$sync_file_range" = "yes" ; then
>    echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
>  fi
>

Patch

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 87d888e..66ba0b5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -59,6 +59,9 @@ 
 #ifdef CONFIG_FIEMAP
 #include <linux/fiemap.h>
 #endif
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <linux/falloc.h>
+#endif
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <sys/disk.h>
 #include <sys/cdio.h>
@@ -1098,15 +1101,30 @@  static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
 static coroutine_fn int raw_co_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors)
 {
-#ifdef CONFIG_XFS
+#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
     BDRVRawState *s = bs->opaque;
+#endif
+    int ret = 0;
 
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    do {
+        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      sector_num << BDRV_SECTOR_BITS,
+                      nb_sectors << BDRV_SECTOR_BITS) == 0) {
+            return 0;
+        }
+    } while (errno == EINTR);
+
+    ret = -errno;
+#endif
+
+#ifdef CONFIG_XFS
     if (s->is_xfs) {
         return xfs_discard(s, sector_num, nb_sectors);
     }
 #endif
 
-    return 0;
+    return ret;
 }
 
 static QEMUOptionParameter raw_create_options[] = {
diff --git a/configure b/configure
index 837a84a..ff2e373 100755
--- a/configure
+++ b/configure
@@ -2576,6 +2576,22 @@  if compile_prog "" "" ; then
   fallocate=yes
 fi
 
+# check for fallocate hole punching
+fallocate_punch_hole=no
+cat > $TMPC << EOF
+#include <fcntl.h>
+#include <linux/falloc.h>
+
+int main(void)
+{
+    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
+    return 0;
+}
+EOF
+if compile_prog "" "" ; then
+  fallocate_punch_hole=yes
+fi
+
 # check for sync_file_range
 sync_file_range=no
 cat > $TMPC << EOF
@@ -3485,6 +3501,9 @@  fi
 if test "$fallocate" = "yes" ; then
   echo "CONFIG_FALLOCATE=y" >> $config_host_mak
 fi
+if test "$fallocate_punch_hole" = "yes" ; then
+  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
+fi
 if test "$sync_file_range" = "yes" ; then
   echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
 fi