diff mbox

[3/5,resend] VFS: Fix s_umount thaw/write deadlock

Message ID 1323118489-16326-4-git-send-email-kamal@canonical.com
State Superseded, archived
Headers show

Commit Message

Kamal Mostafa Dec. 5, 2011, 8:54 p.m. UTC
From: Valerie Aurora <val@vaaconsulting.com>

File system freeze/thaw require the superblock's s_umount lock.
However, we write to the file system while holding the s_umount lock
in several places (e.g., writeback and quotas).  Any of these can
block on a frozen file system while holding s_umount, preventing any
later thaw from occurring, since thaw requires s_umount.

The solution is to add a check, vfs_is_frozen(), to all code paths
that write while holding sb->s_umount and return without writing if it
is true.

BugLink: https://bugs.launchpad.net/bugs/897421
Signed-off-by: Valerie Aurora <val@vaaconsulting.com>
Cc: Kamal Mostafa <kamal@canonical.com>
Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
---
 fs/drop_caches.c   |    8 ++++++++
 fs/fs-writeback.c  |   50 +++++++++++++++++++++++++++++++++-----------------
 fs/quota/quota.c   |   12 +++++++++++-
 fs/super.c         |   26 ++++++++++++++++++++++++++
 fs/sync.c          |    9 +++++----
 include/linux/fs.h |    6 +++++-
 6 files changed, 88 insertions(+), 23 deletions(-)

Comments

Christoph Hellwig Dec. 6, 2011, 11:35 a.m. UTC | #1
On Mon, Dec 05, 2011 at 12:54:47PM -0800, Kamal Mostafa wrote:
> +	/*
> +	 * Any file-system specific routines eventually called by
> +	 * drop_pagecache_sb() and drop_slab() ought to check for a
> +	 * frozen file system before writing to the disk.  Most file
> +	 * systems won't write in this case but XFS is a notable
> +	 * exception in certain cases.
> +	 */

Instead of adding a comment like this please fix whatever issue you
found or at least report it to the the people that might be able to
fix if you don't understand the code well enough.

> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 73c3992..dbeaede 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -554,6 +554,14 @@ static long writeback_sb_inodes(struct super_block *sb,
>  	while (!list_empty(&wb->b_io)) {
>  		struct inode *inode = wb_inode(wb->b_io.prev);
>  
> +		if (inode->i_sb == sb && vfs_is_frozen(sb)) {
> +			/*
> +			 * Inode is on a frozen superblock; skip it for now.
> +			 */
> +			redirty_tail(inode, wb);
> +			continue;
> +		}
> +

We make sure to not dirty any new inodes after the first phase of the
freeze, so this should be a BUG_ON/WARN_ON.

> +		/*
> +		 * Trylock also avoids read-write deadlocks that could be
> +		 * triggered by freeze.
> +		 */
> +		if (down_read_trylock(&sb->s_umount)) {
> +			writeback_inodes_sb(sb, reason);
> +			up_read(&sb->s_umount);
> +			return 1;
> +		}
> +	}

What exactly deadlock?  The comment as-is really doesn't tell the reader
anything interesting.  Note that switching it to a trylock really should
be a separate patch.  We've been through it a while ago and it got lost,
and now Miao Xie is looking into the general issue again.

Also as mentioned to Miao Xie a little elarier
writeback_inodes_if_idle should be rewritten to be a trivial wrapper
around writeback_inodes_sb_nr_if_idle instead of duplicating the logic.

While we're at it:  can anyway suggest a less cumbersome name than
writeback_inodes_sb_if_idle?  I'd go for
try_to_writeback_inodes_sb(_nr).

> index 35f4b0e..47983d9 100644
> --- a/fs/quota/quota.c
> +++ b/fs/quota/quota.c
> @@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
>  
>  static void quota_sync_one(struct super_block *sb, void *arg)
>  {
> -	if (sb->s_qcop && sb->s_qcop->quota_sync)
> +	if (sb->s_qcop && sb->s_qcop->quota_sync && !vfs_is_frozen(sb))
>  		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
>  }

Again, this should be a BUG_ON/WARN_ON.  We shouldn't redirty quotas
after the freeze.

> @@ -368,8 +368,18 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
>  		goto out;
>  	}
>  
> +	/*
> +	 * It's not clear which quota functions may write to the file
> +	 * system (all?).  Check for a frozen fs and bail out now.
> +	 */
> +	if (vfs_is_frozen(sb)) {
> +		ret = -EBUSY;
> +		goto out_drop_super;
> +	}

How about spending the three minutes to figure it out?
Q_GETFMT/Q_GETINFO/Q_XGETQSTAT and Q_GETQUOTA are the obvious read-only
candidates.

> + *	Note: If @f is going to write to the file system, it must first
> + *	check if the file system is frozen (via vfs_is_frozen(sb)) and
> + *	refuse to write if so.

That's an overgeneralization.

> + * During this function, sb->s_frozen goes through these values:
> + *
> + * SB_UNFROZEN: File system is normal, all writes progress as usual.
> + *
> + * SB_FREEZE_WRITE: The file system is in the process of being frozen
> + * and any remaining out-standing writes are being synced.  Writes
> + * that complete in-process writes should be permitted but new ones
> + * should be blocked.
> + *
> + * SB_FREEZE_TRANS: The file system is frozen.  The ->freeze_fs and
> + * ->unfreeze_fs ops are the only operations permitted to write to the
> + * file system in this state.
> + *
> + * sb->s_frozen is protected by sb->s_umount.  Additionally,
> + * SB_FREEZE_WRITE is only temporarily set during freeze/thaw while
> + * holding sb->s_umount for writing, so any other callers holding
> + * sb->s_umount will never see this state.
>   */

Please split adding this useful documentation into a separate patch.

> index 101b8ef..f497be8 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -68,7 +68,7 @@ int sync_filesystem(struct super_block *sb)
>  	/*
>  	 * No point in syncing out anything if the filesystem is read-only.
>  	 */
> -	if (sb->s_flags & MS_RDONLY)
> +	if ((sb->s_flags & MS_RDONLY) || vfs_is_frozen(sb))

Again, BUG_ON/WARN_ON.

>  static void sync_one_sb(struct super_block *sb, void *arg)
>  {
> -	if (!(sb->s_flags & MS_RDONLY))
> +	if (!(sb->s_flags & MS_RDONLY) && !vfs_is_frozen(sb))
>  		__sync_filesystem(sb, *(int *)arg);

Same here.

> +	if (!(sb->s_flags & MS_RDONLY) && !vfs_is_frozen(sb))
> +		ret = sync_filesystem(sb);

Same here.

> +static inline int vfs_is_frozen(struct super_block *sb) {
> +	return sb->s_frozen == SB_FREEZE_TRANS;
> +}

wrong brace placement.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara Dec. 7, 2011, 11:16 p.m. UTC | #2
On Tue 06-12-11 06:35:44, Christoph Hellwig wrote:
> On Mon, Dec 05, 2011 at 12:54:47PM -0800, Kamal Mostafa wrote:
> > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> > index 73c3992..dbeaede 100644
> > --- a/fs/fs-writeback.c
> > +++ b/fs/fs-writeback.c
> > @@ -554,6 +554,14 @@ static long writeback_sb_inodes(struct super_block *sb,
> >  	while (!list_empty(&wb->b_io)) {
> >  		struct inode *inode = wb_inode(wb->b_io.prev);
> >  
> > +		if (inode->i_sb == sb && vfs_is_frozen(sb)) {
> > +			/*
> > +			 * Inode is on a frozen superblock; skip it for now.
> > +			 */
> > +			redirty_tail(inode, wb);
> > +			continue;
> > +		}
> > +
> 
> We make sure to not dirty any new inodes after the first phase of the
> freeze, so this should be a BUG_ON/WARN_ON.
  This is not really true in presence of mmaped writes. To block mmaped
writes on a frozen filesystem, we need some synchronization between
page_mkwrite() and freezing code. Currently, to avoid any additional
locking overhead, we set page dirty and *then* check for filesystem being
frozen. Only this order can make sure either the page is written (and
write-protected) or the frozen check triggers and we wait... (see the
comment in block_page_mkwrite()). The nasty sideeffect of this is that
there can be dirty pages & inodes on a frozen filesystem. We are blocked in
the page fault of these pages so user cannot write any data to these pages
but still they are marked dirty.

Alternatively we could have a different mechanism (rw semaphore?) to
synchronize page faults and freezing but I'd hate the overhead for the case
almost noone cares about...

> While we're at it:  can anyway suggest a less cumbersome name than
> writeback_inodes_sb_if_idle?  I'd go for
> try_to_writeback_inodes_sb(_nr).
  Sounds good.

> > index 35f4b0e..47983d9 100644
> > --- a/fs/quota/quota.c
> > +++ b/fs/quota/quota.c
> > @@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
> >  
> >  static void quota_sync_one(struct super_block *sb, void *arg)
> >  {
> > -	if (sb->s_qcop && sb->s_qcop->quota_sync)
> > +	if (sb->s_qcop && sb->s_qcop->quota_sync && !vfs_is_frozen(sb))
> >  		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
> >  }
> 
> Again, this should be a BUG_ON/WARN_ON.  We shouldn't redirty quotas
> after the freeze.
  You cannot really turn the check into WARN_ON because we iterate over all
superblocks and only in ->quota_sync() check whether something is dirty.
But the check seems to be unnecessary, that is correct.

> > @@ -368,8 +368,18 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
> >  		goto out;
> >  	}
> >  
> > +	/*
> > +	 * It's not clear which quota functions may write to the file
> > +	 * system (all?).  Check for a frozen fs and bail out now.
> > +	 */
> > +	if (vfs_is_frozen(sb)) {
> > +		ret = -EBUSY;
> > +		goto out_drop_super;
> > +	}
> 
> How about spending the three minutes to figure it out?
> Q_GETFMT/Q_GETINFO/Q_XGETQSTAT and Q_GETQUOTA are the obvious read-only
> candidates.
  Q_GETQUOTA can actually cause filesystem modification (reservation of
space in quota file) but the others are read-only. Also after some thought
I'd prefer that quotactl(8) just blocks to be consistent with how other
syscalls behave...

								Honza
Matthew Wilcox Dec. 7, 2011, 11:49 p.m. UTC | #3
On Thu, Dec 08, 2011 at 12:16:58AM +0100, Jan Kara wrote:
> On Tue 06-12-11 06:35:44, Christoph Hellwig wrote:
> > We make sure to not dirty any new inodes after the first phase of the
> > freeze, so this should be a BUG_ON/WARN_ON.
>   This is not really true in presence of mmaped writes. To block mmaped
> writes on a frozen filesystem, we need some synchronization between
> page_mkwrite() and freezing code. Currently, to avoid any additional
> locking overhead, we set page dirty and *then* check for filesystem being
> frozen. Only this order can make sure either the page is written (and
> write-protected) or the frozen check triggers and we wait... (see the
> comment in block_page_mkwrite()). The nasty sideeffect of this is that
> there can be dirty pages & inodes on a frozen filesystem. We are blocked in
> the page fault of these pages so user cannot write any data to these pages
> but still they are marked dirty.

Can't we make the mmaps read-only during the freezing process?
Jan Kara Dec. 8, 2011, 12:05 a.m. UTC | #4
On Wed 07-12-11 16:49:20, Matthew Wilcox wrote:
> On Thu, Dec 08, 2011 at 12:16:58AM +0100, Jan Kara wrote:
> > On Tue 06-12-11 06:35:44, Christoph Hellwig wrote:
> > > We make sure to not dirty any new inodes after the first phase of the
> > > freeze, so this should be a BUG_ON/WARN_ON.
> >   This is not really true in presence of mmaped writes. To block mmaped
> > writes on a frozen filesystem, we need some synchronization between
> > page_mkwrite() and freezing code. Currently, to avoid any additional
> > locking overhead, we set page dirty and *then* check for filesystem being
> > frozen. Only this order can make sure either the page is written (and
> > write-protected) or the frozen check triggers and we wait... (see the
> > comment in block_page_mkwrite()). The nasty sideeffect of this is that
> > there can be dirty pages & inodes on a frozen filesystem. We are blocked in
> > the page fault of these pages so user cannot write any data to these pages
> > but still they are marked dirty.
> 
> Can't we make the mmaps read-only during the freezing process?
  But then application will get EFAULT instead of blocking? I don't think
that is acceptable (we'd see lots of applications dying ;).

								Honza
Christoph Hellwig Dec. 9, 2011, 11:47 a.m. UTC | #5
On Thu, Dec 08, 2011 at 12:16:58AM +0100, Jan Kara wrote:
> > We make sure to not dirty any new inodes after the first phase of the
> > freeze, so this should be a BUG_ON/WARN_ON.
>   This is not really true in presence of mmaped writes. To block mmaped
> writes on a frozen filesystem, we need some synchronization between
> page_mkwrite() and freezing code. Currently, to avoid any additional
> locking overhead, we set page dirty and *then* check for filesystem being
> frozen. Only this order can make sure either the page is written (and
> write-protected) or the frozen check triggers and we wait... (see the
> comment in block_page_mkwrite()). The nasty sideeffect of this is that
> there can be dirty pages & inodes on a frozen filesystem. We are blocked in
> the page fault of these pages so user cannot write any data to these pages
> but still they are marked dirty.
> 
> Alternatively we could have a different mechanism (rw semaphore?) to
> synchronize page faults and freezing but I'd hate the overhead for the case
> almost noone cares about...

I think the is the only sensible way to go forward.  Requiring hacks in
lots of random places to work around the fact that a single place that
might actually dirty pages despite supposedly blocking that from happen
simply isn't maintainable over the long run.

> > > +	 */
> > > +	if (vfs_is_frozen(sb)) {
> > > +		ret = -EBUSY;
> > > +		goto out_drop_super;
> > > +	}
> > 
> > How about spending the three minutes to figure it out?
> > Q_GETFMT/Q_GETINFO/Q_XGETQSTAT and Q_GETQUOTA are the obvious read-only
> > candidates.
>   Q_GETQUOTA can actually cause filesystem modification (reservation of
> space in quota file) but the others are read-only. Also after some thought
> I'd prefer that quotactl(8) just blocks to be consistent with how other
> syscalls behave...

How can a simple dqget cause modifications in the VFS quota code?
Dirting anything for a simple read of the quota information is not
only completely non-obvious but also doesn't make much sene.  We don't
dirty metadata on stat() either..

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c00e055..90f8f7e 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -57,6 +57,14 @@  int drop_caches_sysctl_handler(ctl_table *table, int write,
 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (ret)
 		return ret;
+
+	/*
+	 * Any file-system specific routines eventually called by
+	 * drop_pagecache_sb() and drop_slab() ought to check for a
+	 * frozen file system before writing to the disk.  Most file
+	 * systems won't write in this case but XFS is a notable
+	 * exception in certain cases.
+	 */
 	if (write) {
 		if (sysctl_drop_caches & 1)
 			iterate_supers(drop_pagecache_sb, NULL);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992..dbeaede 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -554,6 +554,14 @@  static long writeback_sb_inodes(struct super_block *sb,
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
 
+		if (inode->i_sb == sb && vfs_is_frozen(sb)) {
+			/*
+			 * Inode is on a frozen superblock; skip it for now.
+			 */
+			redirty_tail(inode, wb);
+			continue;
+		}
+
 		if (inode->i_sb != sb) {
 			if (work->sb) {
 				/*
@@ -1266,40 +1274,48 @@  EXPORT_SYMBOL(writeback_inodes_sb);
  * writeback_inodes_sb_if_idle	-	start writeback if none underway
  * @sb: the superblock
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
- * Returns 1 if writeback was started, 0 if not.
+ * Invoke writeback_inodes_sb if no writeback is currently underway
+ * and no one else holds the s_umount lock.  Returns 1 if writeback
+ * was started, 0 if not.
  */
 int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
-		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb, reason);
-		up_read(&sb->s_umount);
-		return 1;
-	} else
-		return 0;
+		/*
+		 * Trylock also avoids read-write deadlocks that could be
+		 * triggered by freeze.
+		 */
+		if (down_read_trylock(&sb->s_umount)) {
+			writeback_inodes_sb(sb, reason);
+			up_read(&sb->s_umount);
+			return 1;
+		}
+	}
+	return 0;
 }
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
 /**
- * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * writeback_inodes_sb_nr_if_idle	-	start writeback if none underway
  * @sb: the superblock
  * @nr: the number of pages to write
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
- * Returns 1 if writeback was started, 0 if not.
+ * Invoke writeback_inodes_sb_nr if no writeback is currently underway
+ * and no one else holds the s_umount lock.  Returns 1 if writeback
+ * was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
 				   unsigned long nr,
 				   enum wb_reason reason)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
-		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr, reason);
-		up_read(&sb->s_umount);
-		return 1;
-	} else
-		return 0;
+		if (down_read_trylock(&sb->s_umount)) {
+			writeback_inodes_sb_nr(sb, nr, reason);
+			up_read(&sb->s_umount);
+			return 1;
+		}
+	}
+	return 0;
 }
 EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35f4b0e..47983d9 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -47,7 +47,7 @@  static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
+	if (sb->s_qcop && sb->s_qcop->quota_sync && !vfs_is_frozen(sb))
 		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
 }
 
@@ -368,8 +368,18 @@  SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
 		goto out;
 	}
 
+	/*
+	 * It's not clear which quota functions may write to the file
+	 * system (all?).  Check for a frozen fs and bail out now.
+	 */
+	if (vfs_is_frozen(sb)) {
+		ret = -EBUSY;
+		goto out_drop_super;
+	}
+
 	ret = do_quotactl(sb, type, cmds, id, addr, pathp);
 
+out_drop_super:
 	drop_super(sb);
 out:
 	if (pathp && !IS_ERR(pathp))
diff --git a/fs/super.c b/fs/super.c
index afd0f1a..a56696b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -526,6 +526,10 @@  void sync_supers(void)
  *
  *	Scans the superblock list and calls given function, passing it
  *	locked superblock and given argument.
+ *
+ *	Note: If @f is going to write to the file system, it must first
+ *	check if the file system is frozen (via vfs_is_frozen(sb)) and
+ *	refuse to write if so.
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
@@ -595,6 +599,10 @@  EXPORT_SYMBOL(iterate_supers_type);
  *	
  *	Scans the superblock list and finds the superblock of the file system
  *	mounted on the device given. %NULL is returned if no match is found.
+ *
+ *	Note: If caller is going to write to the superblock, it must first
+ *	check if the file system is frozen (via vfs_is_frozen(sb)) and
+ *	refuse to write if so.
  */
 
 struct super_block *get_super(struct block_device *bdev)
@@ -1132,6 +1140,24 @@  out:
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
  * freeze_fs.  Subsequent calls to this without first thawing the fs will return
  * -EBUSY.
+ *
+ * During this function, sb->s_frozen goes through these values:
+ *
+ * SB_UNFROZEN: File system is normal, all writes progress as usual.
+ *
+ * SB_FREEZE_WRITE: The file system is in the process of being frozen
+ * and any remaining out-standing writes are being synced.  Writes
+ * that complete in-process writes should be permitted but new ones
+ * should be blocked.
+ *
+ * SB_FREEZE_TRANS: The file system is frozen.  The ->freeze_fs and
+ * ->unfreeze_fs ops are the only operations permitted to write to the
+ * file system in this state.
+ *
+ * sb->s_frozen is protected by sb->s_umount.  Additionally,
+ * SB_FREEZE_WRITE is only temporarily set during freeze/thaw while
+ * holding sb->s_umount for writing, so any other callers holding
+ * sb->s_umount will never see this state.
  */
 int freeze_super(struct super_block *sb)
 {
diff --git a/fs/sync.c b/fs/sync.c
index 101b8ef..f497be8 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -68,7 +68,7 @@  int sync_filesystem(struct super_block *sb)
 	/*
 	 * No point in syncing out anything if the filesystem is read-only.
 	 */
-	if (sb->s_flags & MS_RDONLY)
+	if ((sb->s_flags & MS_RDONLY) || vfs_is_frozen(sb))
 		return 0;
 
 	ret = __sync_filesystem(sb, 0);
@@ -80,7 +80,7 @@  EXPORT_SYMBOL_GPL(sync_filesystem);
 
 static void sync_one_sb(struct super_block *sb, void *arg)
 {
-	if (!(sb->s_flags & MS_RDONLY))
+	if (!(sb->s_flags & MS_RDONLY) && !vfs_is_frozen(sb))
 		__sync_filesystem(sb, *(int *)arg);
 }
 /*
@@ -136,7 +136,7 @@  SYSCALL_DEFINE1(syncfs, int, fd)
 {
 	struct file *file;
 	struct super_block *sb;
-	int ret;
+	int ret = 0;
 	int fput_needed;
 
 	file = fget_light(fd, &fput_needed);
@@ -145,7 +145,8 @@  SYSCALL_DEFINE1(syncfs, int, fd)
 	sb = file->f_dentry->d_sb;
 
 	down_read(&sb->s_umount);
-	ret = sync_filesystem(sb);
+	if (!(sb->s_flags & MS_RDONLY) && !vfs_is_frozen(sb))
+		ret = sync_filesystem(sb);
 	up_read(&sb->s_umount);
 
 	fput_light(file, fput_needed);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e313022..ac7a495 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1490,7 +1490,7 @@  extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan);
 extern struct timespec current_fs_time(struct super_block *sb);
 
 /*
- * Snapshotting support.
+ * Snapshotting support.  See freeze_super() for documentation.
  */
 enum {
 	SB_UNFROZEN = 0,
@@ -1501,6 +1501,10 @@  enum {
 #define vfs_check_frozen(sb, level) \
 	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
 
+static inline int vfs_is_frozen(struct super_block *sb) {
+	return sb->s_frozen == SB_FREEZE_TRANS;
+}
+
 /*
  * until VFS tracks user namespaces for inodes, just make all files
  * belong to init_user_ns