diff mbox series

[3/5] cifs: serialize initialization and cleanup of cfid

Message ID 20250502051517.10449-3-sprasad@microsoft.com
State New
Headers show
Series [1/5] cifs: protect cfid accesses with fid_lock | expand

Commit Message

Shyam Prasad N May 2, 2025, 5:13 a.m. UTC
From: Shyam Prasad N <sprasad@microsoft.com>

Today we can have multiple processes calling open_cached_dir
and other workers freeing the cached dir all in parallel.
Although small sections of this code is locked to protect
individual fields, there can be races between these threads
which can be hard to debug.

This patch serializes all initialization and cleanup of
the cfid struct and the associated resources: dentry and
the server handle.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 16 ++++++++++++++++
 fs/smb/client/cached_dir.h |  1 +
 2 files changed, 17 insertions(+)

Comments

Henrique Carvalho May 2, 2025, 3:10 p.m. UTC | #1
Hi Shyam,

On Fri, May 02, 2025 at 05:13:42AM +0000, nspmangalore@gmail.com wrote:
> From: Shyam Prasad N <sprasad@microsoft.com>
> 
> Today we can have multiple processes calling open_cached_dir
> and other workers freeing the cached dir all in parallel.
> Although small sections of this code is locked to protect
> individual fields, there can be races between these threads
> which can be hard to debug.
> 
> This patch serializes all initialization and cleanup of
> the cfid struct and the associated resources: dentry and
> the server handle.
> 
> Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> ---
>  fs/smb/client/cached_dir.c | 16 ++++++++++++++++
>  fs/smb/client/cached_dir.h |  1 +
>  2 files changed, 17 insertions(+)
> 
> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> index d307636c2679..9aedb6cf66df 100644
> --- a/fs/smb/client/cached_dir.c
> +++ b/fs/smb/client/cached_dir.c
> @@ -197,6 +197,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>  		return -ENOENT;
>  	}
>  
> +	/*
> +	 * the following is a critical section. We need to make sure that the
> +	 * callers are serialized per-cfid
> +	 */
> +	mutex_lock(&cfid->cfid_mutex);
> +
>  	/*
>  	 * check again that the cfid is valid (with mutex held this time).
>  	 * Return cached fid if it is valid (has a lease and has a time).
> @@ -207,11 +213,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>  	spin_lock(&cfid->fid_lock);
>  	if (cfid->has_lease && cfid->time) {
>  		spin_unlock(&cfid->fid_lock);
> +		mutex_unlock(&cfid->cfid_mutex);
>  		*ret_cfid = cfid;
>  		kfree(utf16_path);
>  		return 0;
>  	} else if (!cfid->has_lease) {
>  		spin_unlock(&cfid->fid_lock);
> +		mutex_unlock(&cfid->cfid_mutex);
>  		/* drop the ref that we have */
>  		kref_put(&cfid->refcount, smb2_close_cached_fid);
>  		kfree(utf16_path);
> @@ -228,6 +236,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>  	 */
>  	npath = path_no_prefix(cifs_sb, path);
>  	if (IS_ERR(npath)) {
> +		mutex_unlock(&cfid->cfid_mutex);
>  		rc = PTR_ERR(npath);
>  		goto out;
>  	}
> @@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>  		*ret_cfid = cfid;
>  		atomic_inc(&tcon->num_remote_opens);
>  	}
> +	mutex_unlock(&cfid->cfid_mutex);
> +
>  	kfree(utf16_path);
>  
>  	if (is_replayable_error(rc) &&
> @@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref)
>  					       refcount);
>  	int rc;
>  
> +	/* make sure not to race with server open */
> +	mutex_lock(&cfid->cfid_mutex);
> +
>  	spin_lock(&cfid->cfids->cfid_list_lock);
>  	if (cfid->on_list) {
>  		list_del(&cfid->entry);
> @@ -454,6 +468,7 @@ smb2_close_cached_fid(struct kref *ref)
>  	}
>  
>  	free_cached_dir(cfid);
> +	mutex_unlock(&cfid->cfid_mutex);
>  }
>  
>  void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
> @@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path)
>  	INIT_LIST_HEAD(&cfid->entry);
>  	INIT_LIST_HEAD(&cfid->dirents.entries);
>  	mutex_init(&cfid->dirents.de_mutex);
> +	mutex_init(&cfid->cfid_mutex);
>  	spin_lock_init(&cfid->fid_lock);
>  	kref_init(&cfid->refcount);
>  	return cfid;
> diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> index 1dfe79d947a6..93c936af2253 100644
> --- a/fs/smb/client/cached_dir.h
> +++ b/fs/smb/client/cached_dir.h
> @@ -42,6 +42,7 @@ struct cached_fid {
>  	struct kref refcount;
>  	struct cifs_fid fid;
>  	spinlock_t fid_lock;
> +	struct mutex cfid_mutex;
>  	struct cifs_tcon *tcon;
>  	struct dentry *dentry;
>  	struct work_struct put_work;
> -- 
> 2.43.0
> 
>

I might be missing something, but...

First, if smb2_close_cached_fid is the release function, meaning I just
released the last cfid ref. So in my understanding I want to, as fast as
possible, remove this cfid from list so it is not found anymore on
find_or_create_cached_dir and then free the cfid. That mutex inside the
function has the intention of preventing a race with open, but if I have
another cfid waiting to acquire the mutex that will just cause an UAF.

Second, I am not fully convinced that we need a mutex there. :/ I have
thought about it many times and I could not get a proof that there is a
race happening there.

Third, (referencing PATCH 2) even if we have a mutex there, shouldn't we
just let the thread that just acquired the mutex retry to acquire the
lease (which I believe is the current behavior).

Thanks,
Henrique
Henrique Carvalho May 2, 2025, 3:12 p.m. UTC | #2
On Fri, May 02, 2025 at 12:10:04PM -0300, Henrique Carvalho wrote:
> 
> Second, I am not fully convinced that we need a mutex there. :/ I have
> thought about it many times and I could not get a proof that there is a
> race happening there.
> 
> Third, (referencing PATCH 2) even if we have a mutex there, shouldn't we
> just let the thread that just acquired the mutex retry to acquire the
> lease (which I believe is the current behavior).

Here "there" means open_cached_dir.

> 
> Thanks,
> Henrique
diff mbox series

Patch

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index d307636c2679..9aedb6cf66df 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -197,6 +197,12 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		return -ENOENT;
 	}
 
+	/*
+	 * the following is a critical section. We need to make sure that the
+	 * callers are serialized per-cfid
+	 */
+	mutex_lock(&cfid->cfid_mutex);
+
 	/*
 	 * check again that the cfid is valid (with mutex held this time).
 	 * Return cached fid if it is valid (has a lease and has a time).
@@ -207,11 +213,13 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease && cfid->time) {
 		spin_unlock(&cfid->fid_lock);
+		mutex_unlock(&cfid->cfid_mutex);
 		*ret_cfid = cfid;
 		kfree(utf16_path);
 		return 0;
 	} else if (!cfid->has_lease) {
 		spin_unlock(&cfid->fid_lock);
+		mutex_unlock(&cfid->cfid_mutex);
 		/* drop the ref that we have */
 		kref_put(&cfid->refcount, smb2_close_cached_fid);
 		kfree(utf16_path);
@@ -228,6 +236,7 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 */
 	npath = path_no_prefix(cifs_sb, path);
 	if (IS_ERR(npath)) {
+		mutex_unlock(&cfid->cfid_mutex);
 		rc = PTR_ERR(npath);
 		goto out;
 	}
@@ -389,6 +398,8 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		*ret_cfid = cfid;
 		atomic_inc(&tcon->num_remote_opens);
 	}
+	mutex_unlock(&cfid->cfid_mutex);
+
 	kfree(utf16_path);
 
 	if (is_replayable_error(rc) &&
@@ -432,6 +443,9 @@  smb2_close_cached_fid(struct kref *ref)
 					       refcount);
 	int rc;
 
+	/* make sure not to race with server open */
+	mutex_lock(&cfid->cfid_mutex);
+
 	spin_lock(&cfid->cfids->cfid_list_lock);
 	if (cfid->on_list) {
 		list_del(&cfid->entry);
@@ -454,6 +468,7 @@  smb2_close_cached_fid(struct kref *ref)
 	}
 
 	free_cached_dir(cfid);
+	mutex_unlock(&cfid->cfid_mutex);
 }
 
 void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
@@ -666,6 +681,7 @@  static struct cached_fid *init_cached_dir(const char *path)
 	INIT_LIST_HEAD(&cfid->entry);
 	INIT_LIST_HEAD(&cfid->dirents.entries);
 	mutex_init(&cfid->dirents.de_mutex);
+	mutex_init(&cfid->cfid_mutex);
 	spin_lock_init(&cfid->fid_lock);
 	kref_init(&cfid->refcount);
 	return cfid;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 1dfe79d947a6..93c936af2253 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -42,6 +42,7 @@  struct cached_fid {
 	struct kref refcount;
 	struct cifs_fid fid;
 	spinlock_t fid_lock;
+	struct mutex cfid_mutex;
 	struct cifs_tcon *tcon;
 	struct dentry *dentry;
 	struct work_struct put_work;