Patchwork [PATCHv4,3/5] ext4: remove unnecessary superblock dirtying

login
register
mail settings
Submitter Artem Bityutskiy
Date July 4, 2012, 12:21 p.m.
Message ID <1341404514-13660-4-git-send-email-dedekind1@gmail.com>
Download mbox | patch
Permalink /patch/168968/
State Superseded
Headers show

Comments

Artem Bityutskiy - July 4, 2012, 12:21 p.m.
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

This patch changes the '__ext4_handle_dirty_super()' function which is used
by ext4 to update the superblock via the journal in the following cases:

1. When creating the first large file on a file system without
   EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature.
2. When re-sizing the file-system.
3. When creating an xattr on a file-system without the
   EXT4_FEATURE_COMPAT_EXT_ATTR feature.
4. When adding or deleting an orphan (because we update the 's_last_orphan'
   superblock field).

This function, however, falls back to just marking the superblock as dirty
if the file-system has no journal. This means that we delay the actual
superblock I/O submission by 5 seconds (roughly speaking). Namely, the
'sync_supers()' kernel thread will call 'ext4_write_super()' later, where
we actually will submit the superblock down to the media.

However:
1. For cases 1-3 it does not add any value to delay the I/O submission. These
   events are rare and we may just commit submit the superblock for
   asynchronous I/O right away.
2. For case 4 - similarly, not terribly frequent event in most of workloads.
   It should be good enough to just submit asynchronous superblock write-out.

This patch also removes 's_dirt' condition on the unmount path because we never
set it anymore, so we should not test it.

Tested using xfstests for both journalled and non-journalled ext4.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ext4/ext4.h      |    1 +
 fs/ext4/ext4_jbd2.c |    2 +-
 fs/ext4/super.c     |    5 ++---
 3 files changed, 4 insertions(+), 4 deletions(-)
Jan Kara - July 4, 2012, 1:11 p.m.
On Wed 04-07-12 15:21:52, Artem Bityutskiy wrote:
> From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
> 
> This patch changes the '__ext4_handle_dirty_super()' function which is used
> by ext4 to update the superblock via the journal in the following cases:
> 
> 1. When creating the first large file on a file system without
>    EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature.
> 2. When re-sizing the file-system.
> 3. When creating an xattr on a file-system without the
>    EXT4_FEATURE_COMPAT_EXT_ATTR feature.
> 4. When adding or deleting an orphan (because we update the 's_last_orphan'
>    superblock field).
> 
> This function, however, falls back to just marking the superblock as dirty
> if the file-system has no journal. This means that we delay the actual
> superblock I/O submission by 5 seconds (roughly speaking). Namely, the
> 'sync_supers()' kernel thread will call 'ext4_write_super()' later, where
> we actually will submit the superblock down to the media.
> 
> However:
> 1. For cases 1-3 it does not add any value to delay the I/O submission. These
>    events are rare and we may just commit submit the superblock for
>    asynchronous I/O right away.
> 2. For case 4 - similarly, not terribly frequent event in most of workloads.
>    It should be good enough to just submit asynchronous superblock write-out.
  Well, it happens for every inode being truncated / deleted to it can be
rather frequent. That's why I wanted to have now == 1 case everywhere -
i.e. just recompute the checksum and do mark_buffer_dirty(). I'd just
remove the 'now' test in this patch and then in patch 5 remove the now
argument from the function and callers as you did.

									Honza

> 
> This patch also removes 's_dirt' condition on the unmount path because we never
> set it anymore, so we should not test it.
> 
> Tested using xfstests for both journalled and non-journalled ext4.
> 
> Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
> ---
>  fs/ext4/ext4.h      |    1 +
>  fs/ext4/ext4_jbd2.c |    2 +-
>  fs/ext4/super.c     |    5 ++---
>  3 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 0c4042e..b2439d5 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2041,6 +2041,7 @@ extern int ext4_superblock_csum_verify(struct super_block *sb,
>  				       struct ext4_super_block *es);
>  extern void ext4_superblock_csum_set(struct super_block *sb,
>  				     struct ext4_super_block *es);
> +extern int ext4_commit_super(struct super_block *sb, int sync);
>  extern void *ext4_kvmalloc(size_t size, gfp_t flags);
>  extern void *ext4_kvzalloc(size_t size, gfp_t flags);
>  extern void ext4_kvfree(void *ptr);
> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index 90f7c2e..27354df 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c
> @@ -156,6 +156,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
>  				(struct ext4_super_block *)bh->b_data);
>  		mark_buffer_dirty(bh);
>  	} else
> -		sb->s_dirt = 1;
> +		err = ext4_commit_super(sb, 0);
>  	return err;
>  }
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index eb7aa3e..9b26ba0 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -63,7 +63,6 @@ static struct ext4_features *ext4_feat;
>  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
>  			     unsigned long journal_devnum);
>  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
> -static int ext4_commit_super(struct super_block *sb, int sync);
>  static void ext4_mark_recovery_complete(struct super_block *sb,
>  					struct ext4_super_block *es);
>  static void ext4_clear_journal_err(struct super_block *sb,
> @@ -896,7 +895,7 @@ static void ext4_put_super(struct super_block *sb)
>  		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
>  		es->s_state = cpu_to_le16(sbi->s_mount_state);
>  	}
> -	if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
> +	if (!(sb->s_flags & MS_RDONLY))
>  		ext4_commit_super(sb, 1);
>  
>  	if (sbi->s_proc) {
> @@ -4155,7 +4154,7 @@ static int ext4_load_journal(struct super_block *sb,
>  	return 0;
>  }
>  
> -static int ext4_commit_super(struct super_block *sb, int sync)
> +int ext4_commit_super(struct super_block *sb, int sync)
>  {
>  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
>  	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
> -- 
> 1.7.7.6
>
Artem Bityutskiy - July 10, 2012, 10:35 a.m.
On Wed, 2012-07-04 at 15:11 +0200, Jan Kara wrote:
> On Wed 04-07-12 15:21:52, Artem Bityutskiy wrote:
> > From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
> > 
> > This patch changes the '__ext4_handle_dirty_super()' function which is used
> > by ext4 to update the superblock via the journal in the following cases:
> > 
> > 1. When creating the first large file on a file system without
> >    EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature.
> > 2. When re-sizing the file-system.
> > 3. When creating an xattr on a file-system without the
> >    EXT4_FEATURE_COMPAT_EXT_ATTR feature.
> > 4. When adding or deleting an orphan (because we update the 's_last_orphan'
> >    superblock field).
> > 
> > This function, however, falls back to just marking the superblock as dirty
> > if the file-system has no journal. This means that we delay the actual
> > superblock I/O submission by 5 seconds (roughly speaking). Namely, the
> > 'sync_supers()' kernel thread will call 'ext4_write_super()' later, where
> > we actually will submit the superblock down to the media.
> > 
> > However:
> > 1. For cases 1-3 it does not add any value to delay the I/O submission. These
> >    events are rare and we may just commit submit the superblock for
> >    asynchronous I/O right away.
> > 2. For case 4 - similarly, not terribly frequent event in most of workloads.
> >    It should be good enough to just submit asynchronous superblock write-out.
>   Well, it happens for every inode being truncated / deleted to it can be
> rather frequent. That's why I wanted to have now == 1 case everywhere -
> i.e. just recompute the checksum and do mark_buffer_dirty(). I'd just
> remove the 'now' test in this patch and then in patch 5 remove the now
> argument from the function and callers as you did.

I am a bit confused.

It seems you consider that 'ext4_commit_super()' is a considerably
slower than just marking the buffer as dirty right away. But I do not
really understand why - all it does - it just updates a couple of
superblock fields and then marks the buffer as dirty (I assume sync ==
0). So from my POW they are almost the same. And when csum is enabled -
re-calculating csum will probably be the longest part.

More important is that we dirty the superblock on every deletion - this
mean that with my change we will re-calculate checsum on every deletion
and I am not sure it is nice. Ideally, we should be able to calculate
the checksum just before sending the buffer to the IO queue...

I'll prepare a new patch-set and send it to you. Thanks!
Artem Bityutskiy - July 10, 2012, 12:17 p.m.
On Wed, 2012-07-04 at 15:11 +0200, Jan Kara wrote:
> On Wed 04-07-12 15:21:52, Artem Bityutskiy wrote:
> > From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
> > 
> > This patch changes the '__ext4_handle_dirty_super()' function which is used
> > by ext4 to update the superblock via the journal in the following cases:
> > 
> > 1. When creating the first large file on a file system without
> >    EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature.
> > 2. When re-sizing the file-system.
> > 3. When creating an xattr on a file-system without the
> >    EXT4_FEATURE_COMPAT_EXT_ATTR feature.
> > 4. When adding or deleting an orphan (because we update the 's_last_orphan'
> >    superblock field).
> > 
> > This function, however, falls back to just marking the superblock as dirty
> > if the file-system has no journal. This means that we delay the actual
> > superblock I/O submission by 5 seconds (roughly speaking). Namely, the
> > 'sync_supers()' kernel thread will call 'ext4_write_super()' later, where
> > we actually will submit the superblock down to the media.
> > 
> > However:
> > 1. For cases 1-3 it does not add any value to delay the I/O submission. These
> >    events are rare and we may just commit submit the superblock for
> >    asynchronous I/O right away.
> > 2. For case 4 - similarly, not terribly frequent event in most of workloads.
> >    It should be good enough to just submit asynchronous superblock write-out.
>   Well, it happens for every inode being truncated / deleted to it can be
> rather frequent. That's why I wanted to have now == 1 case everywhere -
> i.e. just recompute the checksum and do mark_buffer_dirty(). I'd just
> remove the 'now' test in this patch and then in patch 5 remove the now
> argument from the function and callers as you did.

It looked logical to me to use 'ext4_commit_super()' always and remove
'now' and marking the buffer dirty directly. Just because I thought the
speed difference should be nearly 0, and 'ext4_commit_super()' is doing
some error checking. But you seem to suggest to do the opposite, and I
do not understand why would that be better. So I dropped this change so
far.

I've sent v5 where I basically only changed the commit message in patch
3 and dropped patch 5. In patch 3 I've explicitly indicated that we'll
do more checksum calculations, but I think this is OK acceptable.

Thanks!
Jan Kara - July 10, 2012, 12:52 p.m.
On Tue 10-07-12 13:35:36, Artem Bityutskiy wrote:
> On Wed, 2012-07-04 at 15:11 +0200, Jan Kara wrote:
> > On Wed 04-07-12 15:21:52, Artem Bityutskiy wrote:
> > > From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
> > > 
> > > This patch changes the '__ext4_handle_dirty_super()' function which is used
> > > by ext4 to update the superblock via the journal in the following cases:
> > > 
> > > 1. When creating the first large file on a file system without
> > >    EXT4_FEATURE_RO_COMPAT_LARGE_FILE feature.
> > > 2. When re-sizing the file-system.
> > > 3. When creating an xattr on a file-system without the
> > >    EXT4_FEATURE_COMPAT_EXT_ATTR feature.
> > > 4. When adding or deleting an orphan (because we update the 's_last_orphan'
> > >    superblock field).
> > > 
> > > This function, however, falls back to just marking the superblock as dirty
> > > if the file-system has no journal. This means that we delay the actual
> > > superblock I/O submission by 5 seconds (roughly speaking). Namely, the
> > > 'sync_supers()' kernel thread will call 'ext4_write_super()' later, where
> > > we actually will submit the superblock down to the media.
> > > 
> > > However:
> > > 1. For cases 1-3 it does not add any value to delay the I/O submission. These
> > >    events are rare and we may just commit submit the superblock for
> > >    asynchronous I/O right away.
> > > 2. For case 4 - similarly, not terribly frequent event in most of workloads.
> > >    It should be good enough to just submit asynchronous superblock write-out.
> >   Well, it happens for every inode being truncated / deleted to it can be
> > rather frequent. That's why I wanted to have now == 1 case everywhere -
> > i.e. just recompute the checksum and do mark_buffer_dirty(). I'd just
> > remove the 'now' test in this patch and then in patch 5 remove the now
> > argument from the function and callers as you did.
> 
> I am a bit confused.
> 
> It seems you consider that 'ext4_commit_super()' is a considerably
> slower than just marking the buffer as dirty right away. But I do not
> really understand why - all it does - it just updates a couple of
> superblock fields and then marks the buffer as dirty (I assume sync ==
> 0). So from my POW they are almost the same. And when csum is enabled -
> re-calculating csum will probably be the longest part.
  Well, the part you might be missing is:
        ext4_free_blocks_count_set(es,
                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeclusters_counter)));
        es->s_free_inodes_count =
                cpu_to_le32(percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeinodes_counter));
  percpu_counter_sum() *is* rather expensive. At least for big machines.

  Also just marking the buffer dirty more corresponds to what we do when
journalling.

> More important is that we dirty the superblock on every deletion - this
> mean that with my change we will re-calculate checsum on every deletion
> and I am not sure it is nice. Ideally, we should be able to calculate
> the checksum just before sending the buffer to the IO queue...
  Yes, that would be nice but it's not easy to do currently...

								Honza

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c4042e..b2439d5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2041,6 +2041,7 @@  extern int ext4_superblock_csum_verify(struct super_block *sb,
 				       struct ext4_super_block *es);
 extern void ext4_superblock_csum_set(struct super_block *sb,
 				     struct ext4_super_block *es);
+extern int ext4_commit_super(struct super_block *sb, int sync);
 extern void *ext4_kvmalloc(size_t size, gfp_t flags);
 extern void *ext4_kvzalloc(size_t size, gfp_t flags);
 extern void ext4_kvfree(void *ptr);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 90f7c2e..27354df 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -156,6 +156,6 @@  int __ext4_handle_dirty_super(const char *where, unsigned int line,
 				(struct ext4_super_block *)bh->b_data);
 		mark_buffer_dirty(bh);
 	} else
-		sb->s_dirt = 1;
+		err = ext4_commit_super(sb, 0);
 	return err;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e..9b26ba0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -63,7 +63,6 @@  static struct ext4_features *ext4_feat;
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
 static void ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
 static void ext4_clear_journal_err(struct super_block *sb,
@@ -896,7 +895,7 @@  static void ext4_put_super(struct super_block *sb)
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 	}
-	if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
+	if (!(sb->s_flags & MS_RDONLY))
 		ext4_commit_super(sb, 1);
 
 	if (sbi->s_proc) {
@@ -4155,7 +4154,7 @@  static int ext4_load_journal(struct super_block *sb,
 	return 0;
 }
 
-static int ext4_commit_super(struct super_block *sb, int sync)
+int ext4_commit_super(struct super_block *sb, int sync)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;