diff mbox series

[v2,1/2] smb: cached_dir.c: fix race in cfid release

Message ID 20250502225213.330418-1-henrique.carvalho@suse.com
State New
Headers show
Series [v2,1/2] smb: cached_dir.c: fix race in cfid release | expand

Commit Message

Henrique Carvalho May 2, 2025, 10:52 p.m. UTC
find_or_create_cached_dir() could grab a new reference after kref_put()
had seen the refcount drop to zero but before cfid_list_lock is acquired
in smb2_close_cached_fid(), leading to use-after-free.

Switch to kref_put_lock() so cfid_release() runs with cfid_list_lock
held, closing that gap.

Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
---
V1 -> V2: kept the original function names and added __releases annotation 
to silence sparse warning

 fs/smb/client/cached_dir.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

Comments

Steve French May 5, 2025, 7:41 p.m. UTC | #1
The first few times I tried his patch it seemed to help avoid the
generic/241 dentry crash, but the last four times I have run it, it
still fails the same way so may have been a coincidence that the patch
avoided the problem a couple of times.

Suggestions welcome ... also looking at Shyam's and Enzo's patches
which address similar locking issues

On Fri, May 2, 2025 at 5:53 PM Henrique Carvalho
<henrique.carvalho@suse.com> wrote:
>
> find_or_create_cached_dir() could grab a new reference after kref_put()
> had seen the refcount drop to zero but before cfid_list_lock is acquired
> in smb2_close_cached_fid(), leading to use-after-free.
>
> Switch to kref_put_lock() so cfid_release() runs with cfid_list_lock
> held, closing that gap.
>
> Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
> ---
> V1 -> V2: kept the original function names and added __releases annotation
> to silence sparse warning
>
>  fs/smb/client/cached_dir.c | 32 ++++++++++++++++++++++++--------
>  1 file changed, 24 insertions(+), 8 deletions(-)
>
> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> index fe738623cf1b..fc19c26bb014 100644
> --- a/fs/smb/client/cached_dir.c
> +++ b/fs/smb/client/cached_dir.c
> @@ -370,11 +370,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>                          * lease. Release one here, and the second below.
>                          */
>                         cfid->has_lease = false;
> -                       kref_put(&cfid->refcount, smb2_close_cached_fid);
> +
> +                       /*
> +                        * Safe to call while cfid_list_lock is held.
> +                        * If close_cached_dir() ever ends up invoking smb2_close_cached_fid()
> +                        * (our kref release callback) recursively, the reference‑counting logic
> +                        * is already broken, so that would be a bug.
> +                        */
> +                       close_cached_dir(cfid);
>                 }
>                 spin_unlock(&cfids->cfid_list_lock);
>
> -               kref_put(&cfid->refcount, smb2_close_cached_fid);
> +               close_cached_dir(cfid);
>         } else {
>                 *ret_cfid = cfid;
>                 atomic_inc(&tcon->num_remote_opens);
> @@ -414,12 +421,12 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
>
>  static void
>  smb2_close_cached_fid(struct kref *ref)
> +__releases(&cfid->cfids->cfid_list_lock)
>  {
>         struct cached_fid *cfid = container_of(ref, struct cached_fid,
>                                                refcount);
>         int rc;
>
> -       spin_lock(&cfid->cfids->cfid_list_lock);
>         if (cfid->on_list) {
>                 list_del(&cfid->entry);
>                 cfid->on_list = false;
> @@ -454,7 +461,14 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
>         spin_lock(&cfid->cfids->cfid_list_lock);
>         if (cfid->has_lease) {
>                 cfid->has_lease = false;
> -               kref_put(&cfid->refcount, smb2_close_cached_fid);
> +
> +               /*
> +                * Safe to call while cfid_list_lock is held.
> +                * If close_cached_dir() ever ends up invoking smb2_close_cached_fid()
> +                * (the release callback) here, the reference‑counting logic
> +                * is already broken, so that would be a bug.
> +                */
> +               close_cached_dir(cfid);
>         }
>         spin_unlock(&cfid->cfids->cfid_list_lock);
>         close_cached_dir(cfid);
> @@ -463,7 +477,9 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
>
>  void close_cached_dir(struct cached_fid *cfid)
>  {
> -       kref_put(&cfid->refcount, smb2_close_cached_fid);
> +       kref_put_lock(&cfid->refcount,
> +                     smb2_close_cached_fid,
> +                     &cfid->cfids->cfid_list_lock);
>  }
>
>  /*
> @@ -564,7 +580,7 @@ cached_dir_offload_close(struct work_struct *work)
>
>         WARN_ON(cfid->on_list);
>
> -       kref_put(&cfid->refcount, smb2_close_cached_fid);
> +       close_cached_dir(cfid);
>         cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close);
>  }
>
> @@ -688,7 +704,7 @@ static void cfids_invalidation_worker(struct work_struct *work)
>         list_for_each_entry_safe(cfid, q, &entry, entry) {
>                 list_del(&cfid->entry);
>                 /* Drop the ref-count acquired in invalidate_all_cached_dirs */
> -               kref_put(&cfid->refcount, smb2_close_cached_fid);
> +               close_cached_dir(cfid);
>         }
>  }
>
> @@ -741,7 +757,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
>                          * Drop the ref-count from above, either the lease-ref (if there
>                          * was one) or the extra one acquired.
>                          */
> -                       kref_put(&cfid->refcount, smb2_close_cached_fid);
> +                       close_cached_dir(cfid);
>         }
>         queue_delayed_work(cfid_put_wq, &cfids->laundromat_work,
>                            dir_cache_timeout * HZ);
> --
> 2.47.0
>
diff mbox series

Patch

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index fe738623cf1b..fc19c26bb014 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -370,11 +370,18 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 			 * lease. Release one here, and the second below.
 			 */
 			cfid->has_lease = false;
-			kref_put(&cfid->refcount, smb2_close_cached_fid);
+
+			/*
+			 * Safe to call while cfid_list_lock is held.
+			 * If close_cached_dir() ever ends up invoking smb2_close_cached_fid()
+			 * (our kref release callback) recursively, the reference‑counting logic
+			 * is already broken, so that would be a bug.
+			 */
+			close_cached_dir(cfid);
 		}
 		spin_unlock(&cfids->cfid_list_lock);
 
-		kref_put(&cfid->refcount, smb2_close_cached_fid);
+		close_cached_dir(cfid);
 	} else {
 		*ret_cfid = cfid;
 		atomic_inc(&tcon->num_remote_opens);
@@ -414,12 +421,12 @@  int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
 
 static void
 smb2_close_cached_fid(struct kref *ref)
+__releases(&cfid->cfids->cfid_list_lock)
 {
 	struct cached_fid *cfid = container_of(ref, struct cached_fid,
 					       refcount);
 	int rc;
 
-	spin_lock(&cfid->cfids->cfid_list_lock);
 	if (cfid->on_list) {
 		list_del(&cfid->entry);
 		cfid->on_list = false;
@@ -454,7 +461,14 @@  void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
 	spin_lock(&cfid->cfids->cfid_list_lock);
 	if (cfid->has_lease) {
 		cfid->has_lease = false;
-		kref_put(&cfid->refcount, smb2_close_cached_fid);
+
+		/*
+		 * Safe to call while cfid_list_lock is held.
+		 * If close_cached_dir() ever ends up invoking smb2_close_cached_fid()
+		 * (the release callback) here, the reference‑counting logic
+		 * is already broken, so that would be a bug.
+		 */
+		close_cached_dir(cfid);
 	}
 	spin_unlock(&cfid->cfids->cfid_list_lock);
 	close_cached_dir(cfid);
@@ -463,7 +477,9 @@  void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
 
 void close_cached_dir(struct cached_fid *cfid)
 {
-	kref_put(&cfid->refcount, smb2_close_cached_fid);
+	kref_put_lock(&cfid->refcount,
+		      smb2_close_cached_fid,
+		      &cfid->cfids->cfid_list_lock);
 }
 
 /*
@@ -564,7 +580,7 @@  cached_dir_offload_close(struct work_struct *work)
 
 	WARN_ON(cfid->on_list);
 
-	kref_put(&cfid->refcount, smb2_close_cached_fid);
+	close_cached_dir(cfid);
 	cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close);
 }
 
@@ -688,7 +704,7 @@  static void cfids_invalidation_worker(struct work_struct *work)
 	list_for_each_entry_safe(cfid, q, &entry, entry) {
 		list_del(&cfid->entry);
 		/* Drop the ref-count acquired in invalidate_all_cached_dirs */
-		kref_put(&cfid->refcount, smb2_close_cached_fid);
+		close_cached_dir(cfid);
 	}
 }
 
@@ -741,7 +757,7 @@  static void cfids_laundromat_worker(struct work_struct *work)
 			 * Drop the ref-count from above, either the lease-ref (if there
 			 * was one) or the extra one acquired.
 			 */
-			kref_put(&cfid->refcount, smb2_close_cached_fid);
+			close_cached_dir(cfid);
 	}
 	queue_delayed_work(cfid_put_wq, &cfids->laundromat_work,
 			   dir_cache_timeout * HZ);