Message ID | 20250502225213.330418-1-henrique.carvalho@suse.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/2] smb: cached_dir.c: fix race in cfid release | expand |
The first few times I tried his patch it seemed to help avoid the generic/241 dentry crash, but the last four times I have run it, it still fails the same way so may have been a coincidence that the patch avoided the problem a couple of times. Suggestions welcome ... also looking at Shyam's and Enzo's patches which address similar locking issues On Fri, May 2, 2025 at 5:53 PM Henrique Carvalho <henrique.carvalho@suse.com> wrote: > > find_or_create_cached_dir() could grab a new reference after kref_put() > had seen the refcount drop to zero but before cfid_list_lock is acquired > in smb2_close_cached_fid(), leading to use-after-free. > > Switch to kref_put_lock() so cfid_release() runs with cfid_list_lock > held, closing that gap. > > Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com> > --- > V1 -> V2: kept the original function names and added __releases annotation > to silence sparse warning > > fs/smb/client/cached_dir.c | 32 ++++++++++++++++++++++++-------- > 1 file changed, 24 insertions(+), 8 deletions(-) > > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c > index fe738623cf1b..fc19c26bb014 100644 > --- a/fs/smb/client/cached_dir.c > +++ b/fs/smb/client/cached_dir.c > @@ -370,11 +370,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > * lease. Release one here, and the second below. > */ > cfid->has_lease = false; > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + > + /* > + * Safe to call while cfid_list_lock is held. > + * If close_cached_dir() ever ends up invoking smb2_close_cached_fid() > + * (our kref release callback) recursively, the reference‑counting logic > + * is already broken, so that would be a bug. > + */ > + close_cached_dir(cfid); > } > spin_unlock(&cfids->cfid_list_lock); > > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + close_cached_dir(cfid); > } else { > *ret_cfid = cfid; > atomic_inc(&tcon->num_remote_opens); > @@ -414,12 +421,12 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, > > static void > smb2_close_cached_fid(struct kref *ref) > +__releases(&cfid->cfids->cfid_list_lock) > { > struct cached_fid *cfid = container_of(ref, struct cached_fid, > refcount); > int rc; > > - spin_lock(&cfid->cfids->cfid_list_lock); > if (cfid->on_list) { > list_del(&cfid->entry); > cfid->on_list = false; > @@ -454,7 +461,14 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, > spin_lock(&cfid->cfids->cfid_list_lock); > if (cfid->has_lease) { > cfid->has_lease = false; > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + > + /* > + * Safe to call while cfid_list_lock is held. > + * If close_cached_dir() ever ends up invoking smb2_close_cached_fid() > + * (the release callback) here, the reference‑counting logic > + * is already broken, so that would be a bug. > + */ > + close_cached_dir(cfid); > } > spin_unlock(&cfid->cfids->cfid_list_lock); > close_cached_dir(cfid); > @@ -463,7 +477,9 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, > > void close_cached_dir(struct cached_fid *cfid) > { > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + kref_put_lock(&cfid->refcount, > + smb2_close_cached_fid, > + &cfid->cfids->cfid_list_lock); > } > > /* > @@ -564,7 +580,7 @@ cached_dir_offload_close(struct work_struct *work) > > WARN_ON(cfid->on_list); > > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + close_cached_dir(cfid); > cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); > } > > @@ -688,7 +704,7 @@ static void cfids_invalidation_worker(struct work_struct *work) > list_for_each_entry_safe(cfid, q, &entry, entry) { > list_del(&cfid->entry); > /* Drop the ref-count acquired in invalidate_all_cached_dirs */ > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + close_cached_dir(cfid); > } > } > > @@ -741,7 +757,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > * Drop the ref-count from above, either the lease-ref (if there > * was one) or the extra one acquired. > */ > - kref_put(&cfid->refcount, smb2_close_cached_fid); > + close_cached_dir(cfid); > } > queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, > dir_cache_timeout * HZ); > -- > 2.47.0 >
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1b..fc19c26bb014 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -370,11 +370,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, * lease. Release one here, and the second below. */ cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + + /* + * Safe to call while cfid_list_lock is held. + * If close_cached_dir() ever ends up invoking smb2_close_cached_fid() + * (our kref release callback) recursively, the reference‑counting logic + * is already broken, so that would be a bug. + */ + close_cached_dir(cfid); } spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } else { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -414,12 +421,12 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, static void smb2_close_cached_fid(struct kref *ref) +__releases(&cfid->cfids->cfid_list_lock) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); int rc; - spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { list_del(&cfid->entry); cfid->on_list = false; @@ -454,7 +461,14 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->has_lease) { cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + + /* + * Safe to call while cfid_list_lock is held. + * If close_cached_dir() ever ends up invoking smb2_close_cached_fid() + * (the release callback) here, the reference‑counting logic + * is already broken, so that would be a bug. + */ + close_cached_dir(cfid); } spin_unlock(&cfid->cfids->cfid_list_lock); close_cached_dir(cfid); @@ -463,7 +477,9 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, void close_cached_dir(struct cached_fid *cfid) { - kref_put(&cfid->refcount, smb2_close_cached_fid); + kref_put_lock(&cfid->refcount, + smb2_close_cached_fid, + &cfid->cfids->cfid_list_lock); } /* @@ -564,7 +580,7 @@ cached_dir_offload_close(struct work_struct *work) WARN_ON(cfid->on_list); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); } @@ -688,7 +704,7 @@ static void cfids_invalidation_worker(struct work_struct *work) list_for_each_entry_safe(cfid, q, &entry, entry) { list_del(&cfid->entry); /* Drop the ref-count acquired in invalidate_all_cached_dirs */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } } @@ -741,7 +757,7 @@ static void cfids_laundromat_worker(struct work_struct *work) * Drop the ref-count from above, either the lease-ref (if there * was one) or the extra one acquired. */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ);
find_or_create_cached_dir() could grab a new reference after kref_put() had seen the refcount drop to zero but before cfid_list_lock is acquired in smb2_close_cached_fid(), leading to use-after-free. Switch to kref_put_lock() so cfid_release() runs with cfid_list_lock held, closing that gap. Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com> --- V1 -> V2: kept the original function names and added __releases annotation to silence sparse warning fs/smb/client/cached_dir.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-)