Patchwork [UBUNTU] sync before umount to reduce time taken by ext4 umount

login
register
mail settings
Submitter Surbhi Palande
Date April 9, 2010, 5:44 p.m.
Message ID <1270835046-14280-1-git-send-email-surbhi.palande@canonical.com>
Download mbox | patch
Permalink /patch/49857/
State Accepted
Delegated to: Andy Whitcroft
Headers show

Comments

Surbhi Palande - April 9, 2010, 5:44 p.m.
This patch is a tempory fix for the bug 543617 on launchpad where users have
reported huge amount of time for an ext4 unmount. When sync is called prior to
umount, the time taken by umount is reduced to an acceptable value. The
reason due to which ext4 umount takes a long time is:
* a journal transaction is involved with every inode associated with a flush
at umount time. A wait is performed till this journal transaction is flushed
to the disk.

In the example mentioned by Kees Cook on LP, there are 65K inodes and their
associated journal data blocks which are flushed to the disk and 65K barriers
called with a single unmount call. Due to the large dirty data flush interval
time, the journal flush is not happening before umount is called in this
particular case.

The following patch puts the sync() related code before umount, so that users
get an acceptable wait period whenever umount is called.

Do consider merging this for Lucid, till a real fix is available.

From bc7dddbc909a17c63b13ba4c62d420256bd11758 Mon Sep 17 00:00:00 2001
From: Surbhi Palande <surbhi.palande@canonical.com>
Date: Wed, 7 Apr 2010 15:03:11 +0300
Subject: [PATCH] sync before umount to reduce time taken by ext4 umount

This is a temporary fix to reduce the time it takes an unmount, when no prior
sync is called. The appropriate solution would be to fix the underlying ext4
journal, sync and barrier calls.

BugLink: http://launchpad.net/bugs/543617

Signed-off-by: Surbhi Palande <surbhi.palande@canonical.com>
---
 fs/namespace.c     |    7 +++++++
 fs/sync.c          |    2 +-
 include/linux/fs.h |    1 +
 3 files changed, 9 insertions(+), 1 deletions(-)
Stefan Bader - April 13, 2010, 1:19 p.m.
It looks like this should at least give no badness. Only it should be made a
SAUCE patch.

Surbhi Palande wrote:
> This patch is a tempory fix for the bug 543617 on launchpad where users have
> reported huge amount of time for an ext4 unmount. When sync is called prior to
> umount, the time taken by umount is reduced to an acceptable value. The
> reason due to which ext4 umount takes a long time is:
> * a journal transaction is involved with every inode associated with a flush
> at umount time. A wait is performed till this journal transaction is flushed
> to the disk.
> 
> In the example mentioned by Kees Cook on LP, there are 65K inodes and their
> associated journal data blocks which are flushed to the disk and 65K barriers
> called with a single unmount call. Due to the large dirty data flush interval
> time, the journal flush is not happening before umount is called in this
> particular case.
> 
> The following patch puts the sync() related code before umount, so that users
> get an acceptable wait period whenever umount is called.
> 
> Do consider merging this for Lucid, till a real fix is available.
> 
> From bc7dddbc909a17c63b13ba4c62d420256bd11758 Mon Sep 17 00:00:00 2001
> From: Surbhi Palande <surbhi.palande@canonical.com>
> Date: Wed, 7 Apr 2010 15:03:11 +0300
> Subject: [PATCH] sync before umount to reduce time taken by ext4 umount
> 
> This is a temporary fix to reduce the time it takes an unmount, when no prior
> sync is called. The appropriate solution would be to fix the underlying ext4
> journal, sync and barrier calls.
> 
> BugLink: http://launchpad.net/bugs/543617
> 
> Signed-off-by: Surbhi Palande <surbhi.palande@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  fs/namespace.c     |    7 +++++++
>  fs/sync.c          |    2 +-
>  include/linux/fs.h |    1 +
>  3 files changed, 9 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index a2cadcf..a648d7f 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -33,6 +33,7 @@
>  #include <asm/unistd.h>
>  #include "pnode.h"
>  #include "internal.h"
> +#include <linux/fs.h>
>  
>  #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
>  #define HASH_SIZE (1UL << HASH_SHIFT)
> @@ -1134,6 +1135,12 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
>  	if (!capable(CAP_SYS_ADMIN))
>  		goto dput_and_out;
>  
> +	/* Temporary solution to fix long umount periods till
> +	 * we find the real fix
> +	 */
> +	sync_filesystems(0);
> +	sync_filesystems(1);
> +
>  	retval = do_umount(path.mnt, flags);
>  dput_and_out:
>  	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
> diff --git a/fs/sync.c b/fs/sync.c
> index d104591..09e3734 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
>   * flags again, which will cause process A to resync everything.  Fix that with
>   * a local mutex.
>   */
> -static void sync_filesystems(int wait)
> +void sync_filesystems(int wait)
>  {
>  	struct super_block *sb;
>  	static DEFINE_MUTEX(mutex);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 692a3ee..1a51c61 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1973,6 +1973,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
>  	return 0;
>  }
>  #endif
> +extern void sync_filesystems(int wait);
>  extern int sync_filesystem(struct super_block *);
>  extern const struct file_operations def_blk_fops;
>  extern const struct file_operations def_chr_fops;
Andy Whitcroft - April 13, 2010, 2:14 p.m.
On Fri, Apr 09, 2010 at 08:44:06PM +0300, Surbhi Palande wrote:
> This patch is a tempory fix for the bug 543617 on launchpad where users have
> reported huge amount of time for an ext4 unmount. When sync is called prior to
> umount, the time taken by umount is reduced to an acceptable value. The
> reason due to which ext4 umount takes a long time is:
> * a journal transaction is involved with every inode associated with a flush
> at umount time. A wait is performed till this journal transaction is flushed
> to the disk.
> 
> In the example mentioned by Kees Cook on LP, there are 65K inodes and their
> associated journal data blocks which are flushed to the disk and 65K barriers
> called with a single unmount call. Due to the large dirty data flush interval
> time, the journal flush is not happening before umount is called in this
> particular case.
> 
> The following patch puts the sync() related code before umount, so that users
> get an acceptable wait period whenever umount is called.
> 
> Do consider merging this for Lucid, till a real fix is available.
> 
> From bc7dddbc909a17c63b13ba4c62d420256bd11758 Mon Sep 17 00:00:00 2001
> From: Surbhi Palande <surbhi.palande@canonical.com>
> Date: Wed, 7 Apr 2010 15:03:11 +0300
> Subject: [PATCH] sync before umount to reduce time taken by ext4 umount
> 
> This is a temporary fix to reduce the time it takes an unmount, when no prior
> sync is called. The appropriate solution would be to fix the underlying ext4
> journal, sync and barrier calls.
> 
> BugLink: http://launchpad.net/bugs/543617
> 
> Signed-off-by: Surbhi Palande <surbhi.palande@canonical.com>
> ---
>  fs/namespace.c     |    7 +++++++
>  fs/sync.c          |    2 +-
>  include/linux/fs.h |    1 +
>  3 files changed, 9 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index a2cadcf..a648d7f 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -33,6 +33,7 @@
>  #include <asm/unistd.h>
>  #include "pnode.h"
>  #include "internal.h"
> +#include <linux/fs.h>
>  
>  #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
>  #define HASH_SIZE (1UL << HASH_SHIFT)
> @@ -1134,6 +1135,12 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
>  	if (!capable(CAP_SYS_ADMIN))
>  		goto dput_and_out;
>  
> +	/* Temporary solution to fix long umount periods till
> +	 * we find the real fix
> +	 */
> +	sync_filesystems(0);
> +	sync_filesystems(1);
> +
>  	retval = do_umount(path.mnt, flags);
>  dput_and_out:
>  	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
> diff --git a/fs/sync.c b/fs/sync.c
> index d104591..09e3734 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
>   * flags again, which will cause process A to resync everything.  Fix that with
>   * a local mutex.
>   */
> -static void sync_filesystems(int wait)
> +void sync_filesystems(int wait)
>  {
>  	struct super_block *sb;
>  	static DEFINE_MUTEX(mutex);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 692a3ee..1a51c61 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1973,6 +1973,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
>  	return 0;
>  }
>  #endif
> +extern void sync_filesystems(int wait);
>  extern int sync_filesystem(struct super_block *);
>  extern const struct file_operations def_blk_fops;
>  extern const struct file_operations def_chr_fops;

I guess this is ok for a temporary solution.

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
Andy Whitcroft - April 13, 2010, 2:47 p.m.
Applied to Lucid.

-apw

Patch

diff --git a/fs/namespace.c b/fs/namespace.c
index a2cadcf..a648d7f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@ 
 #include <asm/unistd.h>
 #include "pnode.h"
 #include "internal.h"
+#include <linux/fs.h>
 
 #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
 #define HASH_SIZE (1UL << HASH_SHIFT)
@@ -1134,6 +1135,12 @@  SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 	if (!capable(CAP_SYS_ADMIN))
 		goto dput_and_out;
 
+	/* Temporary solution to fix long umount periods till
+	 * we find the real fix
+	 */
+	sync_filesystems(0);
+	sync_filesystems(1);
+
 	retval = do_umount(path.mnt, flags);
 dput_and_out:
 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
diff --git a/fs/sync.c b/fs/sync.c
index d104591..09e3734 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -89,7 +89,7 @@  EXPORT_SYMBOL_GPL(sync_filesystem);
  * flags again, which will cause process A to resync everything.  Fix that with
  * a local mutex.
  */
-static void sync_filesystems(int wait)
+void sync_filesystems(int wait)
 {
 	struct super_block *sb;
 	static DEFINE_MUTEX(mutex);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 692a3ee..1a51c61 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1973,6 +1973,7 @@  static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 #endif
+extern void sync_filesystems(int wait);
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;