diff mbox series

[9/9] ext4: Use mmb infrastructure for inode buffer writeout

Message ID 20260511121356.241821-18-jack@suse.cz
State Not Applicable
Headers show
Series fs: Fix missed inode write during fsync | expand

Commit Message

Jan Kara May 11, 2026, 12:13 p.m. UTC
Use mmb inode buffer writeout infrastructure to reliably write out
inode's inode table block on fsync(2) in nojournal mode (from
ext4_sync_parent() and ext4_fsync_nojournal()). This significantly
simplifies the code as we don't have to explicitely handle inode buffer
writeback in ext4_write_inode() and thus we can also remove
sync_inode_metadata() calls from ext4_sync_parent() and
ext4_write_inode() call from ext4_fsync_nojournal().

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4_jbd2.c |  2 +-
 fs/ext4/ext4_jbd2.h |  2 ++
 fs/ext4/fsync.c     | 12 ------------
 fs/ext4/inode.c     | 24 +++++-------------------
 4 files changed, 8 insertions(+), 32 deletions(-)

Comments

Christian Brauner May 11, 2026, 1:30 p.m. UTC | #1
On Mon, May 11, 2026 at 02:13:59PM +0200, Jan Kara wrote:
> Use mmb inode buffer writeout infrastructure to reliably write out
> inode's inode table block on fsync(2) in nojournal mode (from
> ext4_sync_parent() and ext4_fsync_nojournal()). This significantly
> simplifies the code as we don't have to explicitely handle inode buffer
> writeback in ext4_write_inode() and thus we can also remove
> sync_inode_metadata() calls from ext4_sync_parent() and
> ext4_write_inode() call from ext4_fsync_nojournal().
> 
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/ext4/ext4_jbd2.c |  2 +-
>  fs/ext4/ext4_jbd2.h |  2 ++
>  fs/ext4/fsync.c     | 12 ------------
>  fs/ext4/inode.c     | 24 +++++-------------------
>  4 files changed, 8 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index 74f05bd0cdde..6bbaf72108fd 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c
> @@ -350,7 +350,7 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line,
>  	return 0;
>  }
>  
> -static void ext4_inode_attach_mmb(struct inode *inode)
> +void ext4_inode_attach_mmb(struct inode *inode)
>  {
>  	struct mapping_metadata_bhs *mmb;
>  
> diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
> index 63d17c5201b5..2a01b8279c88 100644
> --- a/fs/ext4/ext4_jbd2.h
> +++ b/fs/ext4/ext4_jbd2.h
> @@ -122,6 +122,8 @@
>  #define EXT4_HT_EXT_CONVERT     11
>  #define EXT4_HT_MAX             12
>  
> +void ext4_inode_attach_mmb(struct inode *inode);
> +
>  int
>  ext4_mark_iloc_dirty(handle_t *handle,
>  		     struct inode *inode,
> diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
> index e25d365e1179..af84489e57c6 100644
> --- a/fs/ext4/fsync.c
> +++ b/fs/ext4/fsync.c
> @@ -75,9 +75,6 @@ static int ext4_sync_parent(struct inode *inode)
>  			if (ret)
>  				break;
>  		}
> -		ret = sync_inode_metadata(inode, 1);
> -		if (ret)
> -			break;
>  	}
>  	dput(dentry);
>  	return ret;
> @@ -87,10 +84,6 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
>  				int datasync, bool *needs_barrier)
>  {
>  	struct inode *inode = file->f_inode;
> -	struct writeback_control wbc = {
> -		.sync_mode = WB_SYNC_ALL,
> -		.nr_to_write = 0,
> -	};
>  	int ret;
>  
>  	ret = mmb_fsync_noflush(file, EXT4_I(inode)->i_metadata_bhs,
> @@ -98,11 +91,6 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
>  	if (ret)
>  		return ret;
>  
> -	/* Force writeout of inode table buffer to disk */
> -	ret = ext4_write_inode(inode, &wbc);
> -	if (ret)
> -		return ret;
> -
>  	ret = ext4_sync_parent(inode);
>  
>  	if (test_opt(inode->i_sb, BARRIER))
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3e66e9510909..09506b4de1b2 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -5786,24 +5786,6 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
>  
>  		err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
>  						EXT4_I(inode)->i_sync_tid);
> -	} else {
> -		struct ext4_iloc iloc;
> -
> -		err = __ext4_get_inode_loc_noinmem(inode, &iloc);
> -		if (err)
> -			return err;
> -		/*
> -		 * sync(2) will flush the whole buffer cache. No need to do
> -		 * it here separately for each inode.
> -		 */
> -		if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
> -			sync_dirty_buffer(iloc.bh);
> -		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
> -			ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO,
> -					       "IO error syncing inode");
> -			err = -EIO;
> -		}
> -		brelse(iloc.bh);
>  	}
>  	return err;
>  }
> @@ -6348,7 +6330,11 @@ int ext4_mark_iloc_dirty(handle_t *handle,
>  
>  	/* the do_update_inode consumes one bh->b_count */
>  	get_bh(iloc->bh);
> -
> +	if (!ext4_handle_valid(handle)) {
> +		if (!EXT4_I(inode)->i_metadata_bhs)
> +			ext4_inode_attach_mmb(inode);
> +		EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr;

The series is great overall. The only thing I think we should change is
that we should hide this

EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr;

behind a dedicated static inline/regular function call instead of
open-coding it everywhere. Can then also be paired with some
VFS_WARN_ON_ONCE() to detect garbage bh->b_blocknr.
Jan Kara May 13, 2026, 10:45 a.m. UTC | #2
On Mon 11-05-26 15:30:34, Christian Brauner wrote:
> On Mon, May 11, 2026 at 02:13:59PM +0200, Jan Kara wrote:
> > Use mmb inode buffer writeout infrastructure to reliably write out
> > inode's inode table block on fsync(2) in nojournal mode (from
> > ext4_sync_parent() and ext4_fsync_nojournal()). This significantly
> > simplifies the code as we don't have to explicitely handle inode buffer
> > writeback in ext4_write_inode() and thus we can also remove
> > sync_inode_metadata() calls from ext4_sync_parent() and
> > ext4_write_inode() call from ext4_fsync_nojournal().
> > 
> > Signed-off-by: Jan Kara <jack@suse.cz>

...

> > @@ -6348,7 +6330,11 @@ int ext4_mark_iloc_dirty(handle_t *handle,
> >  
> >  	/* the do_update_inode consumes one bh->b_count */
> >  	get_bh(iloc->bh);
> > -
> > +	if (!ext4_handle_valid(handle)) {
> > +		if (!EXT4_I(inode)->i_metadata_bhs)
> > +			ext4_inode_attach_mmb(inode);
> > +		EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr;
> 
> The series is great overall. The only thing I think we should change is
> that we should hide this
> 
> EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr;
> 
> behind a dedicated static inline/regular function call instead of
> open-coding it everywhere. Can then also be paired with some
> VFS_WARN_ON_ONCE() to detect garbage bh->b_blocknr.

Good point. I've created mmb_mark_inode_buffer_dirty() helper for this
matching mmb_mark_buffer_dirty() we use for standard metadata buffers. It
now also handles dirtying the buffer and synchronization with mmb_sync()
clearing the inode_blk.

								Honza
diff mbox series

Patch

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 74f05bd0cdde..6bbaf72108fd 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -350,7 +350,7 @@  int __ext4_journal_get_create_access(const char *where, unsigned int line,
 	return 0;
 }
 
-static void ext4_inode_attach_mmb(struct inode *inode)
+void ext4_inode_attach_mmb(struct inode *inode)
 {
 	struct mapping_metadata_bhs *mmb;
 
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 63d17c5201b5..2a01b8279c88 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,6 +122,8 @@ 
 #define EXT4_HT_EXT_CONVERT     11
 #define EXT4_HT_MAX             12
 
+void ext4_inode_attach_mmb(struct inode *inode);
+
 int
 ext4_mark_iloc_dirty(handle_t *handle,
 		     struct inode *inode,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e25d365e1179..af84489e57c6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,9 +75,6 @@  static int ext4_sync_parent(struct inode *inode)
 			if (ret)
 				break;
 		}
-		ret = sync_inode_metadata(inode, 1);
-		if (ret)
-			break;
 	}
 	dput(dentry);
 	return ret;
@@ -87,10 +84,6 @@  static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
 				int datasync, bool *needs_barrier)
 {
 	struct inode *inode = file->f_inode;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,
-	};
 	int ret;
 
 	ret = mmb_fsync_noflush(file, EXT4_I(inode)->i_metadata_bhs,
@@ -98,11 +91,6 @@  static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
 	if (ret)
 		return ret;
 
-	/* Force writeout of inode table buffer to disk */
-	ret = ext4_write_inode(inode, &wbc);
-	if (ret)
-		return ret;
-
 	ret = ext4_sync_parent(inode);
 
 	if (test_opt(inode->i_sb, BARRIER))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3e66e9510909..09506b4de1b2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5786,24 +5786,6 @@  int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 
 		err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
 						EXT4_I(inode)->i_sync_tid);
-	} else {
-		struct ext4_iloc iloc;
-
-		err = __ext4_get_inode_loc_noinmem(inode, &iloc);
-		if (err)
-			return err;
-		/*
-		 * sync(2) will flush the whole buffer cache. No need to do
-		 * it here separately for each inode.
-		 */
-		if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
-			sync_dirty_buffer(iloc.bh);
-		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-			ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO,
-					       "IO error syncing inode");
-			err = -EIO;
-		}
-		brelse(iloc.bh);
 	}
 	return err;
 }
@@ -6348,7 +6330,11 @@  int ext4_mark_iloc_dirty(handle_t *handle,
 
 	/* the do_update_inode consumes one bh->b_count */
 	get_bh(iloc->bh);
-
+	if (!ext4_handle_valid(handle)) {
+		if (!EXT4_I(inode)->i_metadata_bhs)
+			ext4_inode_attach_mmb(inode);
+		EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr;
+	}
 	/* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
 	err = ext4_do_update_inode(handle, inode, iloc);
 	put_bh(iloc->bh);