diff mbox series

[RFC] saner calling conventions for ->d_automount()

Message ID 20250424060845.GG2023217@ZenIV
State New
Headers show
Series [RFC] saner calling conventions for ->d_automount() | expand

Commit Message

Al Viro April 24, 2025, 6:08 a.m. UTC
Currently the calling conventions for ->d_automount() instances have
an odd wart - returned new mount to be attached is expected to have
refcount 2.

That kludge is intended to make sure that mark_mounts_for_expiry() called
before we get around to attaching that new mount to the tree won't decide
to take it out.  finish_automount() drops the extra reference after it's
done with attaching mount to the tree - or drops the reference twice in
case of error.  ->d_automount() instances have rather counterintuitive
boilerplate in them.

There's a much simpler approach: have mark_mounts_for_expiry() skip the
mounts that are yet to be mounted.  And to hell with grabbing/dropping
those extra references.  Makes for simpler correctness analysis, at that...
    
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---

Comments

Christian Brauner April 24, 2025, 8:38 a.m. UTC | #1
On Thu, Apr 24, 2025 at 07:08:45AM +0100, Al Viro wrote:
> Currently the calling conventions for ->d_automount() instances have
> an odd wart - returned new mount to be attached is expected to have
> refcount 2.
> 
> That kludge is intended to make sure that mark_mounts_for_expiry() called
> before we get around to attaching that new mount to the tree won't decide
> to take it out.  finish_automount() drops the extra reference after it's
> done with attaching mount to the tree - or drops the reference twice in
> case of error.  ->d_automount() instances have rather counterintuitive
> boilerplate in them.
> 
> There's a much simpler approach: have mark_mounts_for_expiry() skip the
> mounts that are yet to be mounted.  And to hell with grabbing/dropping
> those extra references.  Makes for simpler correctness analysis, at that...
>     
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---

Looks good!
Reviewed-by: Christian Brauner <brauner@kernel.org>
Jeff Layton April 24, 2025, 11:48 a.m. UTC | #2
On Thu, 2025-04-24 at 07:08 +0100, Al Viro wrote:
> Currently the calling conventions for ->d_automount() instances have
> an odd wart - returned new mount to be attached is expected to have
> refcount 2.
> 
> That kludge is intended to make sure that mark_mounts_for_expiry() called
> before we get around to attaching that new mount to the tree won't decide
> to take it out.  finish_automount() drops the extra reference after it's
> done with attaching mount to the tree - or drops the reference twice in
> case of error.  ->d_automount() instances have rather counterintuitive
> boilerplate in them.
> 
> There's a much simpler approach: have mark_mounts_for_expiry() skip the
> mounts that are yet to be mounted.  And to hell with grabbing/dropping
> those extra references.  Makes for simpler correctness analysis, at that...
>     
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
> diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
> index 767b2927c762..749637231773 100644
> --- a/Documentation/filesystems/porting.rst
> +++ b/Documentation/filesystems/porting.rst
> @@ -1203,3 +1203,10 @@ should use d_drop();d_splice_alias() and return the result of the latter.
>  If a positive dentry cannot be returned for some reason, in-kernel
>  clients such as cachefiles, nfsd, smb/server may not perform ideally but
>  will fail-safe.
> +
> +---
> +
> +**mandatory**
> +
> +Calling conventions for ->d_automount() have changed; we should *not* grab
> +an extra reference to new mount - it should be returned with refcount 1.
> diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
> index ae79c30b6c0c..cc0a58e96770 100644
> --- a/Documentation/filesystems/vfs.rst
> +++ b/Documentation/filesystems/vfs.rst
> @@ -1411,9 +1411,7 @@ defined:
>  
>  	If a vfsmount is returned, the caller will attempt to mount it
>  	on the mountpoint and will remove the vfsmount from its
> -	expiration list in the case of failure.  The vfsmount should be
> -	returned with 2 refs on it to prevent automatic expiration - the
> -	caller will clean up the additional ref.
> +	expiration list in the case of failure.
>  
>  	This function is only used if DCACHE_NEED_AUTOMOUNT is set on
>  	the dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is
> diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
> index 45cee6534122..9434a5399f2b 100644
> --- a/fs/afs/mntpt.c
> +++ b/fs/afs/mntpt.c
> @@ -189,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path)
>  	if (IS_ERR(newmnt))
>  		return newmnt;
>  
> -	mntget(newmnt); /* prevent immediate expiration */
>  	mnt_set_expiry(newmnt, &afs_vfsmounts);
>  	queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
>  			   afs_mntpt_expiry_timeout * HZ);
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 83ac192e7fdd..05d8584fd3b9 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -319,9 +319,6 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
>  
>  	/* Create the submount */
>  	mnt = fc_mount(fsc);
> -	if (!IS_ERR(mnt))
> -		mntget(mnt);
> -
>  	put_fs_context(fsc);
>  	return mnt;
>  }
> diff --git a/fs/namespace.c b/fs/namespace.c
> index bbda516444ff..1807ccb1a52d 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3903,10 +3903,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
>  		return PTR_ERR(m);
>  
>  	mnt = real_mount(m);
> -	/* The new mount record should have at least 2 refs to prevent it being
> -	 * expired before we get a chance to add it
> -	 */
> -	BUG_ON(mnt_get_count(mnt) < 2);
>  
>  	if (m->mnt_sb == path->mnt->mnt_sb &&
>  	    m->mnt_root == dentry) {
> @@ -3939,7 +3935,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
>  	unlock_mount(mp);
>  	if (unlikely(err))
>  		goto discard;
> -	mntput(m);
>  	return 0;
>  
>  discard_locked:
> @@ -3953,7 +3948,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
>  		namespace_unlock();
>  	}
>  	mntput(m);
> -	mntput(m);
>  	return err;
>  }
>  
> @@ -3990,11 +3984,14 @@ void mark_mounts_for_expiry(struct list_head *mounts)
>  
>  	/* extract from the expiration list every vfsmount that matches the
>  	 * following criteria:
> +	 * - already mounted
>  	 * - only referenced by its parent vfsmount
>  	 * - still marked for expiry (marked on the last call here; marks are
>  	 *   cleared by mntput())
>  	 */
>  	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
> +		if (!is_mounted(&mnt->mnt))
> +			continue;
>  		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
>  			propagate_mount_busy(mnt, 1))
>  			continue;
> diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
> index 973aed9cc5fe..7f1ec9c67ff2 100644
> --- a/fs/nfs/namespace.c
> +++ b/fs/nfs/namespace.c
> @@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
>  	if (IS_ERR(mnt))
>  		goto out_fc;
>  
> -	mntget(mnt); /* prevent immediate expiration */
>  	if (timeout <= 0)
>  		goto out_fc;
>  
> diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
> index e3f9213131c4..778daf11f1db 100644
> --- a/fs/smb/client/namespace.c
> +++ b/fs/smb/client/namespace.c
> @@ -283,7 +283,6 @@ struct vfsmount *cifs_d_automount(struct path *path)
>  		return newmnt;
>  	}
>  
> -	mntget(newmnt); /* prevent immediate expiration */
>  	mnt_set_expiry(newmnt, &cifs_automount_list);
>  	schedule_delayed_work(&cifs_automount_task,
>  			      cifs_mountpoint_expiry_timeout);
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 8ddf6b17215c..fa488721019f 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -10085,8 +10085,6 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
>  	put_filesystem(type);
>  	if (IS_ERR(mnt))
>  		return NULL;
> -	mntget(mnt);
> -
>  	return mnt;
>  }
>  
> 

Much cleaner:

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Paulo Alcantara April 24, 2025, 12:37 p.m. UTC | #3
Al Viro <viro@zeniv.linux.org.uk> writes:

> Currently the calling conventions for ->d_automount() instances have
> an odd wart - returned new mount to be attached is expected to have
> refcount 2.
>
> That kludge is intended to make sure that mark_mounts_for_expiry() called
> before we get around to attaching that new mount to the tree won't decide
> to take it out.  finish_automount() drops the extra reference after it's
> done with attaching mount to the tree - or drops the reference twice in
> case of error.  ->d_automount() instances have rather counterintuitive
> boilerplate in them.
>
> There's a much simpler approach: have mark_mounts_for_expiry() skip the
> mounts that are yet to be mounted.  And to hell with grabbing/dropping
> those extra references.  Makes for simpler correctness analysis, at that...
>     
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---

Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
David Howells April 25, 2025, 9:43 a.m. UTC | #4
Al Viro <viro@zeniv.linux.org.uk> wrote:

> Currently the calling conventions for ->d_automount() instances have
> an odd wart - returned new mount to be attached is expected to have
> refcount 2.
> 
> That kludge is intended to make sure that mark_mounts_for_expiry() called
> before we get around to attaching that new mount to the tree won't decide
> to take it out.  finish_automount() drops the extra reference after it's
> done with attaching mount to the tree - or drops the reference twice in
> case of error.  ->d_automount() instances have rather counterintuitive
> boilerplate in them.
> 
> There's a much simpler approach: have mark_mounts_for_expiry() skip the
> mounts that are yet to be mounted.  And to hell with grabbing/dropping
> those extra references.  Makes for simpler correctness analysis, at that...
>     
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

You can test this by installing the kafs-client package, telling systemd to
start afs.mount and then doing "ls /afs/openafs.org/doc", say.

Acked-by: David Howells <dhowells@redhat.com>
Tested-by: David Howells <dhowells@redhat.com>
Steven Rostedt April 25, 2025, 5:16 p.m. UTC | #5
On Thu, 24 Apr 2025 07:08:45 +0100
Al Viro <viro@zeniv.linux.org.uk> wrote:

> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 8ddf6b17215c..fa488721019f 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -10085,8 +10085,6 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
>  	put_filesystem(type);
>  	if (IS_ERR(mnt))
>  		return NULL;
> -	mntget(mnt);
> -
>  	return mnt;
>  }

Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>

-- Steve
diff mbox series

Patch

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 767b2927c762..749637231773 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1203,3 +1203,10 @@  should use d_drop();d_splice_alias() and return the result of the latter.
 If a positive dentry cannot be returned for some reason, in-kernel
 clients such as cachefiles, nfsd, smb/server may not perform ideally but
 will fail-safe.
+
+---
+
+**mandatory**
+
+Calling conventions for ->d_automount() have changed; we should *not* grab
+an extra reference to new mount - it should be returned with refcount 1.
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index ae79c30b6c0c..cc0a58e96770 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1411,9 +1411,7 @@  defined:
 
 	If a vfsmount is returned, the caller will attempt to mount it
 	on the mountpoint and will remove the vfsmount from its
-	expiration list in the case of failure.  The vfsmount should be
-	returned with 2 refs on it to prevent automatic expiration - the
-	caller will clean up the additional ref.
+	expiration list in the case of failure.
 
 	This function is only used if DCACHE_NEED_AUTOMOUNT is set on
 	the dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 45cee6534122..9434a5399f2b 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -189,7 +189,6 @@  struct vfsmount *afs_d_automount(struct path *path)
 	if (IS_ERR(newmnt))
 		return newmnt;
 
-	mntget(newmnt); /* prevent immediate expiration */
 	mnt_set_expiry(newmnt, &afs_vfsmounts);
 	queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
 			   afs_mntpt_expiry_timeout * HZ);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83ac192e7fdd..05d8584fd3b9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -319,9 +319,6 @@  static struct vfsmount *fuse_dentry_automount(struct path *path)
 
 	/* Create the submount */
 	mnt = fc_mount(fsc);
-	if (!IS_ERR(mnt))
-		mntget(mnt);
-
 	put_fs_context(fsc);
 	return mnt;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index bbda516444ff..1807ccb1a52d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3903,10 +3903,6 @@  int finish_automount(struct vfsmount *m, const struct path *path)
 		return PTR_ERR(m);
 
 	mnt = real_mount(m);
-	/* The new mount record should have at least 2 refs to prevent it being
-	 * expired before we get a chance to add it
-	 */
-	BUG_ON(mnt_get_count(mnt) < 2);
 
 	if (m->mnt_sb == path->mnt->mnt_sb &&
 	    m->mnt_root == dentry) {
@@ -3939,7 +3935,6 @@  int finish_automount(struct vfsmount *m, const struct path *path)
 	unlock_mount(mp);
 	if (unlikely(err))
 		goto discard;
-	mntput(m);
 	return 0;
 
 discard_locked:
@@ -3953,7 +3948,6 @@  int finish_automount(struct vfsmount *m, const struct path *path)
 		namespace_unlock();
 	}
 	mntput(m);
-	mntput(m);
 	return err;
 }
 
@@ -3990,11 +3984,14 @@  void mark_mounts_for_expiry(struct list_head *mounts)
 
 	/* extract from the expiration list every vfsmount that matches the
 	 * following criteria:
+	 * - already mounted
 	 * - only referenced by its parent vfsmount
 	 * - still marked for expiry (marked on the last call here; marks are
 	 *   cleared by mntput())
 	 */
 	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
+		if (!is_mounted(&mnt->mnt))
+			continue;
 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
 			propagate_mount_busy(mnt, 1))
 			continue;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 973aed9cc5fe..7f1ec9c67ff2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -195,7 +195,6 @@  struct vfsmount *nfs_d_automount(struct path *path)
 	if (IS_ERR(mnt))
 		goto out_fc;
 
-	mntget(mnt); /* prevent immediate expiration */
 	if (timeout <= 0)
 		goto out_fc;
 
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index e3f9213131c4..778daf11f1db 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -283,7 +283,6 @@  struct vfsmount *cifs_d_automount(struct path *path)
 		return newmnt;
 	}
 
-	mntget(newmnt); /* prevent immediate expiration */
 	mnt_set_expiry(newmnt, &cifs_automount_list);
 	schedule_delayed_work(&cifs_automount_task,
 			      cifs_mountpoint_expiry_timeout);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8ddf6b17215c..fa488721019f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10085,8 +10085,6 @@  static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
 	put_filesystem(type);
 	if (IS_ERR(mnt))
 		return NULL;
-	mntget(mnt);
-
 	return mnt;
 }