[2/4] fs: avoid double-writing the inode on a lazytime expiration
diff mbox series

Message ID 20200325122825.1086872-3-hch@lst.de
State New
Headers show
Series
  • [1/4] ubifs: remove broken lazytime support
Related show

Commit Message

Christoph Hellwig March 25, 2020, 12:28 p.m. UTC
In the case that an inode has dirty timestamp for longer than the
lazytime expiration timeout (or if all such inodes are being flushed
out due to a sync or syncfs system call), we need to inform the file
system that the inode is dirty so that the inode's timestamps can be
copied out to the on-disk data structures.  That's because if the file
system supports lazytime, it will have ignored the dirty_inode(inode,
I_DIRTY_TIME) notification when the timestamp was modified in memory.q
Previously, this was accomplished by calling mark_inode_dirty_sync(),
but that has the unfortunate side effect of also putting the inode the
writeback list, and that's not necessary in this case, since we will
immediately call write_inode() afterwards.  Replace the call to
mark_inode_dirty_sync() with a new lazytime_expired method to clearly
separate out this case.

Eric Biggers noticed that this was causing problems for fscrypt after
the key was removed[1].

Based on a patch from Theodore Ts'o.

[1] https://lore.kernel.org/r/20200306004555.GB225345@gmail.com

Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
Reported-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/super.c    |  6 ++++++
 fs/f2fs/super.c    |  6 ++++++
 fs/fs-writeback.c  |  3 ++-
 fs/inode.c         |  3 ++-
 fs/xfs/xfs_super.c | 12 +++---------
 include/linux/fs.h |  1 +
 6 files changed, 20 insertions(+), 11 deletions(-)

Comments

Christoph Hellwig March 25, 2020, 3:01 p.m. UTC | #1
On Wed, Mar 25, 2020 at 01:28:23PM +0100, Christoph Hellwig wrote:
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1448,6 +1448,11 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
>  	return EXT4_I(inode)->i_dquot;
>  }
>  
> +static void ext4_lazytime_expired(struct inode *inode)
> +{
> +	return ext4_dirty_inode(inode, I_DIRTY_SYNC);
> +}

FYI: this is inside an #ifdef CONFIG_QUOTA, so I'll have to respin even
if the overall approach looks good.
Dave Chinner March 26, 2020, 3:22 a.m. UTC | #2
On Wed, Mar 25, 2020 at 01:28:23PM +0100, Christoph Hellwig wrote:
> In the case that an inode has dirty timestamp for longer than the
> lazytime expiration timeout (or if all such inodes are being flushed
> out due to a sync or syncfs system call), we need to inform the file
> system that the inode is dirty so that the inode's timestamps can be
> copied out to the on-disk data structures.  That's because if the file
> system supports lazytime, it will have ignored the dirty_inode(inode,
> I_DIRTY_TIME) notification when the timestamp was modified in memory.q
> Previously, this was accomplished by calling mark_inode_dirty_sync(),
> but that has the unfortunate side effect of also putting the inode the
> writeback list, and that's not necessary in this case, since we will
> immediately call write_inode() afterwards.  Replace the call to
> mark_inode_dirty_sync() with a new lazytime_expired method to clearly
> separate out this case.


hmmm. Doesn't this cause issues with both iput() and
vfs_fsync_range() because they call mark_inode_dirty_sync() on
I_DIRTY_TIME inodes to move them onto the writeback list so they are
appropriately expired when the inode is written back.

i.e.:


> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 2094386af8ac..e5aafd40dd0f 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -612,19 +612,13 @@ xfs_fs_destroy_inode(
>  }
>  
>  static void
> -xfs_fs_dirty_inode(
> -	struct inode			*inode,
> -	int				flag)
> +xfs_fs_lazytime_expired(
> +	struct inode			*inode)
>  {
>  	struct xfs_inode		*ip = XFS_I(inode);
>  	struct xfs_mount		*mp = ip->i_mount;
>  	struct xfs_trans		*tp;
>  
> -	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
> -		return;
> -	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
> -		return;
> -
>  	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
>  		return;
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> @@ -1053,7 +1047,7 @@ xfs_fs_free_cached_objects(
>  static const struct super_operations xfs_super_operations = {
>  	.alloc_inode		= xfs_fs_alloc_inode,
>  	.destroy_inode		= xfs_fs_destroy_inode,
> -	.dirty_inode		= xfs_fs_dirty_inode,
> +	.lazytime_expired	= xfs_fs_lazytime_expired,
>  	.drop_inode		= xfs_fs_drop_inode,
>  	.put_super		= xfs_fs_put_super,
>  	.sync_fs		= xfs_fs_sync_fs,

This means XFS no longer updates/logs the current timestamp because
->dirty_inode(I_DIRTY_SYNC) is no longer called for XFS) before
->fsync flushes the inode data and metadata changes to the journal.
Hence the current in-memory timestamps are not present in the log
before the fsync is run as so we violate the fsync guarantees
lazytime gives for timestamp updates....

I haven't quite got it straight in my head if the iput() case has
similar problems, but the fsync case definitely looks broken.

Cheers,

Dave.
Christoph Hellwig March 26, 2020, 5:01 p.m. UTC | #3
On Thu, Mar 26, 2020 at 02:22:12PM +1100, Dave Chinner wrote:
> On Wed, Mar 25, 2020 at 01:28:23PM +0100, Christoph Hellwig wrote:
> > In the case that an inode has dirty timestamp for longer than the
> > lazytime expiration timeout (or if all such inodes are being flushed
> > out due to a sync or syncfs system call), we need to inform the file
> > system that the inode is dirty so that the inode's timestamps can be
> > copied out to the on-disk data structures.  That's because if the file
> > system supports lazytime, it will have ignored the dirty_inode(inode,
> > I_DIRTY_TIME) notification when the timestamp was modified in memory.q
> > Previously, this was accomplished by calling mark_inode_dirty_sync(),
> > but that has the unfortunate side effect of also putting the inode the
> > writeback list, and that's not necessary in this case, since we will
> > immediately call write_inode() afterwards.  Replace the call to
> > mark_inode_dirty_sync() with a new lazytime_expired method to clearly
> > separate out this case.
> 
> 
> hmmm. Doesn't this cause issues with both iput() and
> vfs_fsync_range() because they call mark_inode_dirty_sync() on
> I_DIRTY_TIME inodes to move them onto the writeback list so they are
> appropriately expired when the inode is written back.

True, we'd need to call ->lazytime_expired in the fsync path as well.
While looking into this I've also noticed that lazytime is "enabled"
unconditionally without a file system opt-in.  That means for file systems
that don't rely on ->dirty_inode it kinda "just works" except that both
Teds original fix and this one break that in one way or another.  This
series just cleanly disables it, but Ted's two patches would fail to
pass I_DIRTY_SYNC to ->write_inode.

This whole area is such a mess..

Patch
diff mbox series

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0c7c4adb664e..ebbf6370ccd6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1448,6 +1448,11 @@  static struct dquot **ext4_get_dquots(struct inode *inode)
 	return EXT4_I(inode)->i_dquot;
 }
 
+static void ext4_lazytime_expired(struct inode *inode)
+{
+	return ext4_dirty_inode(inode, I_DIRTY_SYNC);
+}
+
 static const struct dquot_operations ext4_quota_operations = {
 	.get_reserved_space	= ext4_get_reserved_space,
 	.write_dquot		= ext4_write_dquot,
@@ -1480,6 +1485,7 @@  static const struct super_operations ext4_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
+	.lazytime_expired = ext4_lazytime_expired,
 	.drop_inode	= ext4_drop_inode,
 	.evict_inode	= ext4_evict_inode,
 	.put_super	= ext4_put_super,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 65a7a432dfee..529334573944 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1100,6 +1100,11 @@  static void f2fs_dirty_inode(struct inode *inode, int flags)
 	f2fs_inode_dirtied(inode, false);
 }
 
+static void f2fs_lazytime_expired(struct inode *inode)
+{
+	return f2fs_dirty_inode(inode, I_DIRTY_SYNC);
+}
+
 static void f2fs_free_inode(struct inode *inode)
 {
 	fscrypt_free_inode(inode);
@@ -2355,6 +2360,7 @@  static const struct super_operations f2fs_sops = {
 	.drop_inode	= f2fs_drop_inode,
 	.write_inode	= f2fs_write_inode,
 	.dirty_inode	= f2fs_dirty_inode,
+	.lazytime_expired = f2fs_lazytime_expired,
 	.show_options	= f2fs_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= f2fs_quota_read,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76ac9c7d32ec..dc2d65c765ae 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1505,7 +1505,8 @@  __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	spin_unlock(&inode->i_lock);
 
 	if (dirty & I_DIRTY_TIME)
-		mark_inode_dirty_sync(inode);
+		inode->i_sb->s_op->lazytime_expired(inode);
+
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & ~I_DIRTY_PAGES) {
 		int err = write_inode(inode, wbc);
diff --git a/fs/inode.c b/fs/inode.c
index 93d9252a00ab..96cf26ed4c7b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1674,7 +1674,8 @@  int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
 	if (flags & S_MTIME)
 		inode->i_mtime = *time;
 	if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
-	    !(inode->i_sb->s_flags & SB_LAZYTIME))
+	    (!inode->i_sb->s_op->lazytime_expired ||
+	     !(inode->i_sb->s_flags & SB_LAZYTIME)))
 		dirty = true;
 
 	if (dirty)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2094386af8ac..e5aafd40dd0f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -612,19 +612,13 @@  xfs_fs_destroy_inode(
 }
 
 static void
-xfs_fs_dirty_inode(
-	struct inode			*inode,
-	int				flag)
+xfs_fs_lazytime_expired(
+	struct inode			*inode)
 {
 	struct xfs_inode		*ip = XFS_I(inode);
 	struct xfs_mount		*mp = ip->i_mount;
 	struct xfs_trans		*tp;
 
-	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
-		return;
-	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
-		return;
-
 	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
 		return;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1053,7 +1047,7 @@  xfs_fs_free_cached_objects(
 static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
-	.dirty_inode		= xfs_fs_dirty_inode,
+	.lazytime_expired	= xfs_fs_lazytime_expired,
 	.drop_inode		= xfs_fs_drop_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index abedbffe2c9e..07c213cdecf3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1948,6 +1948,7 @@  struct super_operations {
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
+	void (*lazytime_expired)(struct inode *inode);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
 	int (*freeze_super) (struct super_block *);