Patchwork [10/19] ext4: Convert to new freezing mechanism

login
register
mail settings
Submitter Jan Kara
Date March 5, 2012, 4:01 p.m.
Message ID <1330963277-26336-11-git-send-email-jack@suse.cz>
Download mbox | patch
Permalink /patch/144709/
State Superseded
Headers show

Comments

Jan Kara - March 5, 2012, 4:01 p.m.
We remove most of frozen checks since upper layer takes care
of blocking all writes. We only have to handle protection in
ext4_page_mkwrite() in a special way because we cannot use
generic block_page_mkwrite().

CC: linux-ext4@vger.kernel.org
CC: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c |    7 ++-----
 fs/ext4/super.c |   29 +++++------------------------
 2 files changed, 7 insertions(+), 29 deletions(-)
Kamal Mostafa - March 7, 2012, 10:32 p.m.
Re: the patch set:
    [PATCH 00/19] Fix filesystem freezing deadlocks

In my initial smoke testing of this, I find that if I freeze a newly
created ext4 filesystem immediately after mounting it for the very first
time, then I get the new SB_FREEZE_COMPLETE warning from
ext4_journal_start_sb() every 0.4 seconds from ext4lazyinit...

        # mkfs -t ext4 /dev/sdaX
        # mount /dev/sdaX /mnt
        # fsfreeze -f /mnt

         WARNING:
        at /home/kamal/src/linux/ubuntu-precise/fs/ext4/super.c:301
        ext4_journal_start_sb+0x159/0x160()
        
         Pid: 3252, comm: ext4lazyinit Tainted: G        W
        3.2.0-18-generic #28+kamal1+jankara1
        
         Call Trace:
          [<ffffffff8106724f>] warn_slowpath_common+0x7f/0xc0
          [<ffffffff810672aa>] warn_slowpath_null+0x1a/0x20
          [<ffffffff812352c9>] ext4_journal_start_sb+0x159/0x160
          [<ffffffff8121326b>] ? ext4_init_inode_table+0xab/0x370
          [<ffffffff8121326b>] ext4_init_inode_table+0xab/0x370
          [<ffffffff81659cb5>] ? schedule_timeout+0x175/0x320
          [<ffffffff81226905>] ext4_run_li_request+0x85/0xe0
          [<ffffffff812269fc>] ext4_lazyinit_thread+0x9c/0x1c0
          [<ffffffff81226960>] ? ext4_run_li_request+0xe0/0xe0
          [<ffffffff8108a39c>] kthread+0x8c/0xa0
          [<ffffffff81665e34>] kernel_thread_helper+0x4/0x10
          [<ffffffff8108a310>] ? flush_kthread_worker+0xa0/0xa0
          [<ffffffff81665e30>] ? gs_change+0x13/0x13

 -Kamal


On Mon, 2012-03-05 at 17:01 +0100, Jan Kara wrote:
> We remove most of frozen checks since upper layer takes care
> of blocking all writes. We only have to handle protection in
> ext4_page_mkwrite() in a special way because we cannot use
> generic block_page_mkwrite().
> 
> CC: linux-ext4@vger.kernel.org
> CC: "Theodore Ts'o" <tytso@mit.edu>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/ext4/inode.c |    7 ++-----
>  fs/ext4/super.c |   29 +++++------------------------
>  2 files changed, 7 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index feaa82f..c65baf9 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4593,11 +4593,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	get_block_t *get_block;
>  	int retries = 0;
>  
> -	/*
> -	 * This check is racy but catches the common case. We rely on
> -	 * __block_page_mkwrite() to do a reliable check.
> -	 */
> -	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
> +	sb_start_pagefault(inode->i_sb);
>  	/* Delalloc case is easy... */
>  	if (test_opt(inode->i_sb, DELALLOC) &&
>  	    !ext4_should_journal_data(inode) &&
> @@ -4665,5 +4661,6 @@ retry_alloc:
>  out_ret:
>  	ret = block_page_mkwrite_return(ret);
>  out:
> +	sb_end_pagefault(inode->i_sb);
>  	return ret;
>  }
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 502c61f..0f1024a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -289,33 +289,17 @@ static void ext4_put_nojournal(handle_t *handle)
>   * journal_end calls result in the superblock being marked dirty, so
>   * that sync() will call the filesystem's write_super callback if
>   * appropriate.
> - *
> - * To avoid j_barrier hold in userspace when a user calls freeze(),
> - * ext4 prevents a new handle from being started by s_frozen, which
> - * is in an upper layer.
>   */
>  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
>  {
>  	journal_t *journal;
> -	handle_t  *handle;
>  
>  	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
>  	if (sb->s_flags & MS_RDONLY)
>  		return ERR_PTR(-EROFS);
>  
> +	WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
>  	journal = EXT4_SB(sb)->s_journal;
> -	handle = ext4_journal_current_handle();
> -
> -	/*
> -	 * If a handle has been started, it should be allowed to
> -	 * finish, otherwise deadlock could happen between freeze
> -	 * and others(e.g. truncate) due to the restart of the
> -	 * journal handle if the filesystem is forzen and active
> -	 * handles are not stopped.
> -	 */
> -	if (!handle)
> -		vfs_check_frozen(sb, SB_FREEZE_TRANS);
> -
>  	if (!journal)
>  		return ext4_get_nojournal();
>  	/*
> @@ -4280,10 +4264,8 @@ int ext4_force_commit(struct super_block *sb)
>  		return 0;
>  
>  	journal = EXT4_SB(sb)->s_journal;
> -	if (journal) {
> -		vfs_check_frozen(sb, SB_FREEZE_TRANS);
> +	if (journal)
>  		ret = ext4_journal_force_commit(journal);
> -	}
>  
>  	return ret;
>  }
> @@ -4315,9 +4297,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
>   * gives us a chance to flush the journal completely and mark the fs clean.
>   *
>   * Note that only this function cannot bring a filesystem to be in a clean
> - * state independently, because ext4 prevents a new handle from being started
> - * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
> - * the upper layer.
> + * state independently. It relies on upper layer to stop all data & metadata
> + * modifications.
>   */
>  static int ext4_freeze(struct super_block *sb)
>  {
> @@ -4344,7 +4325,7 @@ static int ext4_freeze(struct super_block *sb)
>  	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
>  	error = ext4_commit_super(sb, 1);
>  out:
> -	/* we rely on s_frozen to stop further updates */
> +	/* we rely on upper layer to stop further updates */
>  	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
>  	return error;
>  }

Patch

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index feaa82f..c65baf9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4593,11 +4593,7 @@  int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	get_block_t *get_block;
 	int retries = 0;
 
-	/*
-	 * This check is racy but catches the common case. We rely on
-	 * __block_page_mkwrite() to do a reliable check.
-	 */
-	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+	sb_start_pagefault(inode->i_sb);
 	/* Delalloc case is easy... */
 	if (test_opt(inode->i_sb, DELALLOC) &&
 	    !ext4_should_journal_data(inode) &&
@@ -4665,5 +4661,6 @@  retry_alloc:
 out_ret:
 	ret = block_page_mkwrite_return(ret);
 out:
+	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61f..0f1024a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -289,33 +289,17 @@  static void ext4_put_nojournal(handle_t *handle)
  * journal_end calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
  */
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
 	journal_t *journal;
-	handle_t  *handle;
 
 	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
 	if (sb->s_flags & MS_RDONLY)
 		return ERR_PTR(-EROFS);
 
+	WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
 	journal = EXT4_SB(sb)->s_journal;
-	handle = ext4_journal_current_handle();
-
-	/*
-	 * If a handle has been started, it should be allowed to
-	 * finish, otherwise deadlock could happen between freeze
-	 * and others(e.g. truncate) due to the restart of the
-	 * journal handle if the filesystem is forzen and active
-	 * handles are not stopped.
-	 */
-	if (!handle)
-		vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
 	if (!journal)
 		return ext4_get_nojournal();
 	/*
@@ -4280,10 +4264,8 @@  int ext4_force_commit(struct super_block *sb)
 		return 0;
 
 	journal = EXT4_SB(sb)->s_journal;
-	if (journal) {
-		vfs_check_frozen(sb, SB_FREEZE_TRANS);
+	if (journal)
 		ret = ext4_journal_force_commit(journal);
-	}
 
 	return ret;
 }
@@ -4315,9 +4297,8 @@  static int ext4_sync_fs(struct super_block *sb, int wait)
  * gives us a chance to flush the journal completely and mark the fs clean.
  *
  * Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
- * the upper layer.
+ * state independently. It relies on upper layer to stop all data & metadata
+ * modifications.
  */
 static int ext4_freeze(struct super_block *sb)
 {
@@ -4344,7 +4325,7 @@  static int ext4_freeze(struct super_block *sb)
 	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 	error = ext4_commit_super(sb, 1);
 out:
-	/* we rely on s_frozen to stop further updates */
+	/* we rely on upper layer to stop further updates */
 	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	return error;
 }