Message ID | 20250502051517.10449-3-sprasad@microsoft.com |
---|---|
State | New |
Headers | show |
Series | [1/5] cifs: protect cfid accesses with fid_lock | expand |
Hi Shyam, On Fri, May 02, 2025 at 05:13:42AM +0000, nspmangalore@gmail.com wrote: > From: Shyam Prasad N <sprasad@microsoft.com> > > Today we can have multiple processes calling open_cached_dir > and other workers freeing the cached dir all in parallel. > Although small sections of this code is locked to protect > individual fields, there can be races between these threads > which can be hard to debug. > > This patch serializes all initialization and cleanup of > the cfid struct and the associated resources: dentry and > the server handle. > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > --- > fs/smb/client/cached_dir.c | 16 ++++++++++++++++ > fs/smb/client/cached_dir.h | 1 + > 2 files changed, 17 insertions(+) > > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c > index d307636c2679..9aedb6cf66df 100644 > --- a/fs/smb/client/cached_dir.c > +++ b/fs/smb/client/cached_dir.c > @@ -197,6 +197,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > return -ENOENT; > } > > + /* > + * the following is a critical section. We need to make sure that the > + * callers are serialized per-cfid > + */ > + mutex_lock(&cfid->cfid_mutex); > + > /* > * check again that the cfid is valid (with mutex held this time). > * Return cached fid if it is valid (has a lease and has a time). > @@ -207,11 +213,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > spin_lock(&cfid->fid_lock); > if (cfid->has_lease && cfid->time) { > spin_unlock(&cfid->fid_lock); > + mutex_unlock(&cfid->cfid_mutex); > *ret_cfid = cfid; > kfree(utf16_path); > return 0; > } else if (!cfid->has_lease) { > spin_unlock(&cfid->fid_lock); > + mutex_unlock(&cfid->cfid_mutex); > /* drop the ref that we have */ > kref_put(&cfid->refcount, smb2_close_cached_fid); > kfree(utf16_path); > @@ -228,6 +236,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > */ > npath = path_no_prefix(cifs_sb, path); > if (IS_ERR(npath)) { > + mutex_unlock(&cfid->cfid_mutex); > rc = PTR_ERR(npath); > goto out; > } > @@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > *ret_cfid = cfid; > atomic_inc(&tcon->num_remote_opens); > } > + mutex_unlock(&cfid->cfid_mutex); > + > kfree(utf16_path); > > if (is_replayable_error(rc) && > @@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref) > refcount); > int rc; > > + /* make sure not to race with server open */ > + mutex_lock(&cfid->cfid_mutex); > + > spin_lock(&cfid->cfids->cfid_list_lock); > if (cfid->on_list) { > list_del(&cfid->entry); > @@ -454,6 +468,7 @@ smb2_close_cached_fid(struct kref *ref) > } > > free_cached_dir(cfid); > + mutex_unlock(&cfid->cfid_mutex); > } > > void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, > @@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path) > INIT_LIST_HEAD(&cfid->entry); > INIT_LIST_HEAD(&cfid->dirents.entries); > mutex_init(&cfid->dirents.de_mutex); > + mutex_init(&cfid->cfid_mutex); > spin_lock_init(&cfid->fid_lock); > kref_init(&cfid->refcount); > return cfid; > diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h > index 1dfe79d947a6..93c936af2253 100644 > --- a/fs/smb/client/cached_dir.h > +++ b/fs/smb/client/cached_dir.h > @@ -42,6 +42,7 @@ struct cached_fid { > struct kref refcount; > struct cifs_fid fid; > spinlock_t fid_lock; > + struct mutex cfid_mutex; > struct cifs_tcon *tcon; > struct dentry *dentry; > struct work_struct put_work; > -- > 2.43.0 > > I might be missing something, but... First, if smb2_close_cached_fid is the release function, meaning I just released the last cfid ref. So in my understanding I want to, as fast as possible, remove this cfid from list so it is not found anymore on find_or_create_cached_dir and then free the cfid. That mutex inside the function has the intention of preventing a race with open, but if I have another cfid waiting to acquire the mutex that will just cause an UAF. Second, I am not fully convinced that we need a mutex there. :/ I have thought about it many times and I could not get a proof that there is a race happening there. Third, (referencing PATCH 2) even if we have a mutex there, shouldn't we just let the thread that just acquired the mutex retry to acquire the lease (which I believe is the current behavior). Thanks, Henrique
On Fri, May 02, 2025 at 12:10:04PM -0300, Henrique Carvalho wrote: > > Second, I am not fully convinced that we need a mutex there. :/ I have > thought about it many times and I could not get a proof that there is a > race happening there. > > Third, (referencing PATCH 2) even if we have a mutex there, shouldn't we > just let the thread that just acquired the mutex retry to acquire the > lease (which I believe is the current behavior). Here "there" means open_cached_dir. > > Thanks, > Henrique
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index d307636c2679..9aedb6cf66df 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -197,6 +197,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -ENOENT; } + /* + * the following is a critical section. We need to make sure that the + * callers are serialized per-cfid + */ + mutex_lock(&cfid->cfid_mutex); + /* * check again that the cfid is valid (with mutex held this time). * Return cached fid if it is valid (has a lease and has a time). @@ -207,11 +213,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, spin_lock(&cfid->fid_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfid->fid_lock); + mutex_unlock(&cfid->cfid_mutex); *ret_cfid = cfid; kfree(utf16_path); return 0; } else if (!cfid->has_lease) { spin_unlock(&cfid->fid_lock); + mutex_unlock(&cfid->cfid_mutex); /* drop the ref that we have */ kref_put(&cfid->refcount, smb2_close_cached_fid); kfree(utf16_path); @@ -228,6 +236,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, */ npath = path_no_prefix(cifs_sb, path); if (IS_ERR(npath)) { + mutex_unlock(&cfid->cfid_mutex); rc = PTR_ERR(npath); goto out; } @@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); } + mutex_unlock(&cfid->cfid_mutex); + kfree(utf16_path); if (is_replayable_error(rc) && @@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref) refcount); int rc; + /* make sure not to race with server open */ + mutex_lock(&cfid->cfid_mutex); + spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { list_del(&cfid->entry); @@ -454,6 +468,7 @@ smb2_close_cached_fid(struct kref *ref) } free_cached_dir(cfid); + mutex_unlock(&cfid->cfid_mutex); } void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, @@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path) INIT_LIST_HEAD(&cfid->entry); INIT_LIST_HEAD(&cfid->dirents.entries); mutex_init(&cfid->dirents.de_mutex); + mutex_init(&cfid->cfid_mutex); spin_lock_init(&cfid->fid_lock); kref_init(&cfid->refcount); return cfid; diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 1dfe79d947a6..93c936af2253 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -42,6 +42,7 @@ struct cached_fid { struct kref refcount; struct cifs_fid fid; spinlock_t fid_lock; + struct mutex cfid_mutex; struct cifs_tcon *tcon; struct dentry *dentry; struct work_struct put_work;