Message ID | 20250502051517.10449-1-sprasad@microsoft.com |
---|---|
State | New |
Headers | show |
Series | [1/5] cifs: protect cfid accesses with fid_lock | expand |
Hi Shyam, On Fri, May 02, 2025 at 05:13:40AM +0000, nspmangalore@gmail.com wrote: > From: Shyam Prasad N <sprasad@microsoft.com> > > There are several accesses to cfid structure today without > locking fid_lock. This can lead to race conditions that are > hard to debug. > > With this change, I'm trying to make sure that accesses to cfid > struct members happen with fid_lock held. > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > --- > fs/smb/client/cached_dir.c | 87 ++++++++++++++++++++++---------------- > 1 file changed, 50 insertions(+), 37 deletions(-) > > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c > index fe738623cf1b..f074675fa6be 100644 > --- a/fs/smb/client/cached_dir.c > +++ b/fs/smb/client/cached_dir.c > @@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, > > spin_lock(&cfids->cfid_list_lock); > list_for_each_entry(cfid, &cfids->entries, entry) { > + spin_lock(&cfid->fid_lock); > if (!strcmp(cfid->path, path)) { > /* > * If it doesn't have a lease it is either not yet > @@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, > * being deleted due to a lease break. > */ > if (!cfid->time || !cfid->has_lease) { > + spin_unlock(&cfid->fid_lock); > spin_unlock(&cfids->cfid_list_lock); > return NULL; > } > kref_get(&cfid->refcount); > + spin_unlock(&cfid->fid_lock); > spin_unlock(&cfids->cfid_list_lock); > return cfid; > } > + spin_unlock(&cfid->fid_lock); > } > if (lookup_only) { > spin_unlock(&cfids->cfid_list_lock); > @@ -192,19 +196,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > kfree(utf16_path); > return -ENOENT; > } > + > /* > * Return cached fid if it is valid (has a lease and has a time). > * Otherwise, it is either a new entry or laundromat worker removed it > * from @cfids->entries. Caller will put last reference if the latter. > */ > - spin_lock(&cfids->cfid_list_lock); > + spin_lock(&cfid->fid_lock); > if (cfid->has_lease && cfid->time) { > - spin_unlock(&cfids->cfid_list_lock); > + spin_unlock(&cfid->fid_lock); > *ret_cfid = cfid; > kfree(utf16_path); > return 0; > } > - spin_unlock(&cfids->cfid_list_lock); > + spin_unlock(&cfid->fid_lock); > > /* > * Skip any prefix paths in @path as lookup_positive_unlocked() ends up > @@ -219,17 +224,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > goto out; > } > > - if (!npath[0]) { > - dentry = dget(cifs_sb->root); > - } else { > - dentry = path_to_dentry(cifs_sb, npath); > - if (IS_ERR(dentry)) { > - rc = -ENOENT; > - goto out; > - } > - } > - cfid->dentry = dentry; > - cfid->tcon = tcon; > > /* > * We do not hold the lock for the open because in case > @@ -301,9 +295,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > } > goto oshr_free; > } > - cfid->is_open = true; > - > - spin_lock(&cfids->cfid_list_lock); > > o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; > oparms.fid->persistent_fid = o_rsp->PersistentFileId; > @@ -314,7 +305,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > > if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { > - spin_unlock(&cfids->cfid_list_lock); > rc = -EINVAL; > goto oshr_free; > } > @@ -323,21 +313,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > &oparms.fid->epoch, > oparms.fid->lease_key, > &oplock, NULL, NULL); > - if (rc) { > - spin_unlock(&cfids->cfid_list_lock); > + if (rc) > goto oshr_free; > - } > > rc = -EINVAL; > - if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { > - spin_unlock(&cfids->cfid_list_lock); > + if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) > goto oshr_free; > - } > qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; > - if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) { > - spin_unlock(&cfids->cfid_list_lock); > + if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) > goto oshr_free; > - } > if (!smb2_validate_and_copy_iov( > le16_to_cpu(qi_rsp->OutputBufferOffset), > sizeof(struct smb2_file_all_info), > @@ -345,10 +329,26 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > (char *)&cfid->file_all_info)) > cfid->file_all_info_is_valid = true; > > - cfid->time = jiffies; > - spin_unlock(&cfids->cfid_list_lock); > /* At this point the directory handle is fully cached */ > rc = 0; > + spin_lock(&cfid->fid_lock); > + if (!cfid->dentry) { > + if (!npath[0]) { > + dentry = dget(cifs_sb->root); > + } else { > + dentry = path_to_dentry(cifs_sb, npath); > + if (IS_ERR(dentry)) { > + spin_unlock(&cfid->fid_lock); > + rc = -ENOENT; > + goto out; > + } > + } > + cfid->dentry = dentry; > + } > + cfid->tcon = tcon; > + cfid->is_open = true; > + cfid->time = jiffies; > + spin_unlock(&cfid->fid_lock); > > oshr_free: > SMB2_open_free(&rqst[0]); > @@ -363,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > cfid->on_list = false; > cfids->num_entries--; > } > + spin_lock(&cfid->fid_lock); > if (cfid->has_lease) { > /* > * We are guaranteed to have two references at this > @@ -372,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > cfid->has_lease = false; > kref_put(&cfid->refcount, smb2_close_cached_fid); > } > + spin_unlock(&cfid->fid_lock); > spin_unlock(&cfids->cfid_list_lock); > > kref_put(&cfid->refcount, smb2_close_cached_fid); > @@ -400,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, > > spin_lock(&cfids->cfid_list_lock); > list_for_each_entry(cfid, &cfids->entries, entry) { > + spin_lock(&cfid->fid_lock); > if (dentry && cfid->dentry == dentry) { > cifs_dbg(FYI, "found a cached file handle by dentry\n"); > kref_get(&cfid->refcount); > + spin_unlock(&cfid->fid_lock); > *ret_cfid = cfid; > spin_unlock(&cfids->cfid_list_lock); > return 0; > } > + spin_unlock(&cfid->fid_lock); > } > spin_unlock(&cfids->cfid_list_lock); > return -ENOENT; > @@ -427,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref) > } > spin_unlock(&cfid->cfids->cfid_list_lock); > > - dput(cfid->dentry); > - cfid->dentry = NULL; > + /* no locking necessary as we're the last user of this cfid */ > + if (cfid->dentry) { > + dput(cfid->dentry); > + cfid->dentry = NULL; > + } > > if (cfid->is_open) { > rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, > @@ -451,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, > cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name); > return; > } > - spin_lock(&cfid->cfids->cfid_list_lock); > + spin_lock(&cfid->fid_lock); > if (cfid->has_lease) { > + /* mark as invalid */ > cfid->has_lease = false; > kref_put(&cfid->refcount, smb2_close_cached_fid); > } > - spin_unlock(&cfid->cfids->cfid_list_lock); > + spin_unlock(&cfid->fid_lock); > close_cached_dir(cfid); > } > > @@ -538,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > cfids->num_entries--; > cfid->is_open = false; > cfid->on_list = false; > + spin_lock(&cfid->fid_lock); > if (cfid->has_lease) { > /* > * The lease was never cancelled from the server, > @@ -546,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > cfid->has_lease = false; > } else > kref_get(&cfid->refcount); > + spin_unlock(&cfid->fid_lock); > } > /* > * Queue dropping of the dentries once locks have been dropped > @@ -600,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > spin_lock(&cfids->cfid_list_lock); > list_for_each_entry(cfid, &cfids->entries, entry) { > + spin_lock(&cfid->fid_lock); > if (cfid->has_lease && > !memcmp(lease_key, > cfid->fid.lease_key, > @@ -612,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > */ > list_del(&cfid->entry); > cfid->on_list = false; > + spin_unlock(&cfid->fid_lock); > cfids->num_entries--; > > ++tcon->tc_count; > @@ -621,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > spin_unlock(&cfids->cfid_list_lock); > return true; > } > + spin_unlock(&cfid->fid_lock); > } > spin_unlock(&cfids->cfid_list_lock); > return false; > @@ -656,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid) > WARN_ON(work_pending(&cfid->close_work)); > WARN_ON(work_pending(&cfid->put_work)); > > - dput(cfid->dentry); > - cfid->dentry = NULL; > - > /* > * Delete all cached dirent names > */ > @@ -703,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > > spin_lock(&cfids->cfid_list_lock); > list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { > + spin_lock(&cfid->fid_lock); > if (cfid->time && > time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { > cfid->on_list = false; > @@ -717,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > } else > kref_get(&cfid->refcount); > } > + spin_unlock(&cfid->fid_lock); > } > spin_unlock(&cfids->cfid_list_lock); > > @@ -726,7 +739,6 @@ static void cfids_laundromat_worker(struct work_struct *work) > spin_lock(&cfid->fid_lock); > dentry = cfid->dentry; > cfid->dentry = NULL; > - spin_unlock(&cfid->fid_lock); > > dput(dentry); > if (cfid->is_open) { > @@ -742,6 +754,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > * was one) or the extra one acquired. > */ > kref_put(&cfid->refcount, smb2_close_cached_fid); > + spin_unlock(&cfid->fid_lock); > } > queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, > dir_cache_timeout * HZ); You are calling dput() here with a lock held, both in path_to_dentry and in smb2_close_cached_fid. Is this correct? Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in another path? Thanks, Henrique
On Fri, May 02, 2025 at 09:37:34AM -0300, Henrique Carvalho wrote: > > You are calling dput() here with a lock held, both in path_to_dentry and > in smb2_close_cached_fid. Is this correct? > Correction: it is in cfids_laundromat_worker.
On 05/02, Steve French wrote: >In my testing this first patch led to hangs very quickly running xfstests >(in that example was to windows server with multichannel). More testing and >review feedback would be appreciated Same here. Very easy reproducer: # mount.cifs -o ... //srv/share /mnt # rm -rf /mnt/* <RCU stall on open_cached_dir_by_dentry() when spinning trying to lock fid_lock on loop> I'll debug it and reply with what I find. Cheers, Enzo >Thanks, > >Steve > >On Fri, May 2, 2025, 12:15 AM <nspmangalore@gmail.com> wrote: > >> From: Shyam Prasad N <sprasad@microsoft.com> >> >> There are several accesses to cfid structure today without >> locking fid_lock. This can lead to race conditions that are >> hard to debug. >> >> With this change, I'm trying to make sure that accesses to cfid >> struct members happen with fid_lock held. >> >> Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> >> --- >> fs/smb/client/cached_dir.c | 87 ++++++++++++++++++++++---------------- >> 1 file changed, 50 insertions(+), 37 deletions(-) >> >> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c >> index fe738623cf1b..f074675fa6be 100644 >> --- a/fs/smb/client/cached_dir.c >> +++ b/fs/smb/client/cached_dir.c >> @@ -31,6 +31,7 @@ static struct cached_fid >> *find_or_create_cached_dir(struct cached_fids *cfids, >> >> spin_lock(&cfids->cfid_list_lock); >> list_for_each_entry(cfid, &cfids->entries, entry) { >> + spin_lock(&cfid->fid_lock); >> if (!strcmp(cfid->path, path)) { >> /* >> * If it doesn't have a lease it is either not yet >> @@ -38,13 +39,16 @@ static struct cached_fid >> *find_or_create_cached_dir(struct cached_fids *cfids, >> * being deleted due to a lease break. >> */ >> if (!cfid->time || !cfid->has_lease) { >> + spin_unlock(&cfid->fid_lock); >> spin_unlock(&cfids->cfid_list_lock); >> return NULL; >> } >> kref_get(&cfid->refcount); >> + spin_unlock(&cfid->fid_lock); >> spin_unlock(&cfids->cfid_list_lock); >> return cfid; >> } >> + spin_unlock(&cfid->fid_lock); >> } >> if (lookup_only) { >> spin_unlock(&cfids->cfid_list_lock); >> @@ -192,19 +196,20 @@ int open_cached_dir(unsigned int xid, struct >> cifs_tcon *tcon, >> kfree(utf16_path); >> return -ENOENT; >> } >> + >> /* >> * Return cached fid if it is valid (has a lease and has a time). >> * Otherwise, it is either a new entry or laundromat worker >> removed it >> * from @cfids->entries. Caller will put last reference if the >> latter. >> */ >> - spin_lock(&cfids->cfid_list_lock); >> + spin_lock(&cfid->fid_lock); >> if (cfid->has_lease && cfid->time) { >> - spin_unlock(&cfids->cfid_list_lock); >> + spin_unlock(&cfid->fid_lock); >> *ret_cfid = cfid; >> kfree(utf16_path); >> return 0; >> } >> - spin_unlock(&cfids->cfid_list_lock); >> + spin_unlock(&cfid->fid_lock); >> >> /* >> * Skip any prefix paths in @path as lookup_positive_unlocked() >> ends up >> @@ -219,17 +224,6 @@ int open_cached_dir(unsigned int xid, struct >> cifs_tcon *tcon, >> goto out; >> } >> >> - if (!npath[0]) { >> - dentry = dget(cifs_sb->root); >> - } else { >> - dentry = path_to_dentry(cifs_sb, npath); >> - if (IS_ERR(dentry)) { >> - rc = -ENOENT; >> - goto out; >> - } >> - } >> - cfid->dentry = dentry; >> - cfid->tcon = tcon; >> >> /* >> * We do not hold the lock for the open because in case >> @@ -301,9 +295,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon >> *tcon, >> } >> goto oshr_free; >> } >> - cfid->is_open = true; >> - >> - spin_lock(&cfids->cfid_list_lock); >> >> o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; >> oparms.fid->persistent_fid = o_rsp->PersistentFileId; >> @@ -314,7 +305,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon >> *tcon, >> >> >> if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { >> - spin_unlock(&cfids->cfid_list_lock); >> rc = -EINVAL; >> goto oshr_free; >> } >> @@ -323,21 +313,15 @@ int open_cached_dir(unsigned int xid, struct >> cifs_tcon *tcon, >> &oparms.fid->epoch, >> oparms.fid->lease_key, >> &oplock, NULL, NULL); >> - if (rc) { >> - spin_unlock(&cfids->cfid_list_lock); >> + if (rc) >> goto oshr_free; >> - } >> >> rc = -EINVAL; >> - if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { >> - spin_unlock(&cfids->cfid_list_lock); >> + if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) >> goto oshr_free; >> - } >> qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; >> - if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct >> smb2_file_all_info)) { >> - spin_unlock(&cfids->cfid_list_lock); >> + if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct >> smb2_file_all_info)) >> goto oshr_free; >> - } >> if (!smb2_validate_and_copy_iov( >> le16_to_cpu(qi_rsp->OutputBufferOffset), >> sizeof(struct smb2_file_all_info), >> @@ -345,10 +329,26 @@ int open_cached_dir(unsigned int xid, struct >> cifs_tcon *tcon, >> (char *)&cfid->file_all_info)) >> cfid->file_all_info_is_valid = true; >> >> - cfid->time = jiffies; >> - spin_unlock(&cfids->cfid_list_lock); >> /* At this point the directory handle is fully cached */ >> rc = 0; >> + spin_lock(&cfid->fid_lock); >> + if (!cfid->dentry) { >> + if (!npath[0]) { >> + dentry = dget(cifs_sb->root); >> + } else { >> + dentry = path_to_dentry(cifs_sb, npath); >> + if (IS_ERR(dentry)) { >> + spin_unlock(&cfid->fid_lock); >> + rc = -ENOENT; >> + goto out; >> + } >> + } >> + cfid->dentry = dentry; >> + } >> + cfid->tcon = tcon; >> + cfid->is_open = true; >> + cfid->time = jiffies; >> + spin_unlock(&cfid->fid_lock); >> >> oshr_free: >> SMB2_open_free(&rqst[0]); >> @@ -363,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon >> *tcon, >> cfid->on_list = false; >> cfids->num_entries--; >> } >> + spin_lock(&cfid->fid_lock); >> if (cfid->has_lease) { >> /* >> * We are guaranteed to have two references at this >> @@ -372,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon >> *tcon, >> cfid->has_lease = false; >> kref_put(&cfid->refcount, smb2_close_cached_fid); >> } >> + spin_unlock(&cfid->fid_lock); >> spin_unlock(&cfids->cfid_list_lock); >> >> kref_put(&cfid->refcount, smb2_close_cached_fid); >> @@ -400,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, >> >> spin_lock(&cfids->cfid_list_lock); >> list_for_each_entry(cfid, &cfids->entries, entry) { >> + spin_lock(&cfid->fid_lock); >> if (dentry && cfid->dentry == dentry) { >> cifs_dbg(FYI, "found a cached file handle by >> dentry\n"); >> kref_get(&cfid->refcount); >> + spin_unlock(&cfid->fid_lock); >> *ret_cfid = cfid; >> spin_unlock(&cfids->cfid_list_lock); >> return 0; >> } >> + spin_unlock(&cfid->fid_lock); >> } >> spin_unlock(&cfids->cfid_list_lock); >> return -ENOENT; >> @@ -427,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref) >> } >> spin_unlock(&cfid->cfids->cfid_list_lock); >> >> - dput(cfid->dentry); >> - cfid->dentry = NULL; >> + /* no locking necessary as we're the last user of this cfid */ >> + if (cfid->dentry) { >> + dput(cfid->dentry); >> + cfid->dentry = NULL; >> + } >> >> if (cfid->is_open) { >> rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, >> @@ -451,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, >> struct cifs_tcon *tcon, >> cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name); >> return; >> } >> - spin_lock(&cfid->cfids->cfid_list_lock); >> + spin_lock(&cfid->fid_lock); >> if (cfid->has_lease) { >> + /* mark as invalid */ >> cfid->has_lease = false; >> kref_put(&cfid->refcount, smb2_close_cached_fid); >> } >> - spin_unlock(&cfid->cfids->cfid_list_lock); >> + spin_unlock(&cfid->fid_lock); >> close_cached_dir(cfid); >> } >> >> @@ -538,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) >> cfids->num_entries--; >> cfid->is_open = false; >> cfid->on_list = false; >> + spin_lock(&cfid->fid_lock); >> if (cfid->has_lease) { >> /* >> * The lease was never cancelled from the server, >> @@ -546,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) >> cfid->has_lease = false; >> } else >> kref_get(&cfid->refcount); >> + spin_unlock(&cfid->fid_lock); >> } >> /* >> * Queue dropping of the dentries once locks have been dropped >> @@ -600,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, >> __u8 lease_key[16]) >> >> spin_lock(&cfids->cfid_list_lock); >> list_for_each_entry(cfid, &cfids->entries, entry) { >> + spin_lock(&cfid->fid_lock); >> if (cfid->has_lease && >> !memcmp(lease_key, >> cfid->fid.lease_key, >> @@ -612,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, >> __u8 lease_key[16]) >> */ >> list_del(&cfid->entry); >> cfid->on_list = false; >> + spin_unlock(&cfid->fid_lock); >> cfids->num_entries--; >> >> ++tcon->tc_count; >> @@ -621,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, >> __u8 lease_key[16]) >> spin_unlock(&cfids->cfid_list_lock); >> return true; >> } >> + spin_unlock(&cfid->fid_lock); >> } >> spin_unlock(&cfids->cfid_list_lock); >> return false; >> @@ -656,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid) >> WARN_ON(work_pending(&cfid->close_work)); >> WARN_ON(work_pending(&cfid->put_work)); >> >> - dput(cfid->dentry); >> - cfid->dentry = NULL; >> - >> /* >> * Delete all cached dirent names >> */ >> @@ -703,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct >> *work) >> >> spin_lock(&cfids->cfid_list_lock); >> list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { >> + spin_lock(&cfid->fid_lock); >> if (cfid->time && >> time_after(jiffies, cfid->time + HZ * >> dir_cache_timeout)) { >> cfid->on_list = false; >> @@ -717,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct >> *work) >> } else >> kref_get(&cfid->refcount); >> } >> + spin_unlock(&cfid->fid_lock); >> } >> spin_unlock(&cfids->cfid_list_lock); >> >> @@ -726,7 +739,6 @@ static void cfids_laundromat_worker(struct work_struct >> *work) >> spin_lock(&cfid->fid_lock); >> dentry = cfid->dentry; >> cfid->dentry = NULL; >> - spin_unlock(&cfid->fid_lock); >> >> dput(dentry); >> if (cfid->is_open) { >> @@ -742,6 +754,7 @@ static void cfids_laundromat_worker(struct work_struct >> *work) >> * was one) or the extra one acquired. >> */ >> kref_put(&cfid->refcount, smb2_close_cached_fid); >> + spin_unlock(&cfid->fid_lock); >> } >> queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, >> dir_cache_timeout * HZ); >> -- >> 2.43.0 >> >>
On Fri, May 2, 2025 at 6:09 PM Henrique Carvalho <henrique.carvalho@suse.com> wrote: > > Hi Shyam, > > On Fri, May 02, 2025 at 05:13:40AM +0000, nspmangalore@gmail.com wrote: > > From: Shyam Prasad N <sprasad@microsoft.com> > > > > There are several accesses to cfid structure today without > > locking fid_lock. This can lead to race conditions that are > > hard to debug. > > > > With this change, I'm trying to make sure that accesses to cfid > > struct members happen with fid_lock held. > > > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > > --- > > fs/smb/client/cached_dir.c | 87 ++++++++++++++++++++++---------------- > > 1 file changed, 50 insertions(+), 37 deletions(-) > > > > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c > > index fe738623cf1b..f074675fa6be 100644 > > --- a/fs/smb/client/cached_dir.c > > +++ b/fs/smb/client/cached_dir.c > > @@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, > > > > spin_lock(&cfids->cfid_list_lock); > > list_for_each_entry(cfid, &cfids->entries, entry) { > > + spin_lock(&cfid->fid_lock); > > if (!strcmp(cfid->path, path)) { > > /* > > * If it doesn't have a lease it is either not yet > > @@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, > > * being deleted due to a lease break. > > */ > > if (!cfid->time || !cfid->has_lease) { > > + spin_unlock(&cfid->fid_lock); > > spin_unlock(&cfids->cfid_list_lock); > > return NULL; > > } > > kref_get(&cfid->refcount); > > + spin_unlock(&cfid->fid_lock); > > spin_unlock(&cfids->cfid_list_lock); > > return cfid; > > } > > + spin_unlock(&cfid->fid_lock); > > } > > if (lookup_only) { > > spin_unlock(&cfids->cfid_list_lock); > > @@ -192,19 +196,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > kfree(utf16_path); > > return -ENOENT; > > } > > + > > /* > > * Return cached fid if it is valid (has a lease and has a time). > > * Otherwise, it is either a new entry or laundromat worker removed it > > * from @cfids->entries. Caller will put last reference if the latter. > > */ > > - spin_lock(&cfids->cfid_list_lock); > > + spin_lock(&cfid->fid_lock); > > if (cfid->has_lease && cfid->time) { > > - spin_unlock(&cfids->cfid_list_lock); > > + spin_unlock(&cfid->fid_lock); > > *ret_cfid = cfid; > > kfree(utf16_path); > > return 0; > > } > > - spin_unlock(&cfids->cfid_list_lock); > > + spin_unlock(&cfid->fid_lock); > > > > /* > > * Skip any prefix paths in @path as lookup_positive_unlocked() ends up > > @@ -219,17 +224,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > goto out; > > } > > > > - if (!npath[0]) { > > - dentry = dget(cifs_sb->root); > > - } else { > > - dentry = path_to_dentry(cifs_sb, npath); > > - if (IS_ERR(dentry)) { > > - rc = -ENOENT; > > - goto out; > > - } > > - } > > - cfid->dentry = dentry; > > - cfid->tcon = tcon; > > > > /* > > * We do not hold the lock for the open because in case > > @@ -301,9 +295,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > } > > goto oshr_free; > > } > > - cfid->is_open = true; > > - > > - spin_lock(&cfids->cfid_list_lock); > > > > o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; > > oparms.fid->persistent_fid = o_rsp->PersistentFileId; > > @@ -314,7 +305,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > > > > > if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { > > - spin_unlock(&cfids->cfid_list_lock); > > rc = -EINVAL; > > goto oshr_free; > > } > > @@ -323,21 +313,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > &oparms.fid->epoch, > > oparms.fid->lease_key, > > &oplock, NULL, NULL); > > - if (rc) { > > - spin_unlock(&cfids->cfid_list_lock); > > + if (rc) > > goto oshr_free; > > - } > > > > rc = -EINVAL; > > - if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { > > - spin_unlock(&cfids->cfid_list_lock); > > + if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) > > goto oshr_free; > > - } > > qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; > > - if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) { > > - spin_unlock(&cfids->cfid_list_lock); > > + if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) > > goto oshr_free; > > - } > > if (!smb2_validate_and_copy_iov( > > le16_to_cpu(qi_rsp->OutputBufferOffset), > > sizeof(struct smb2_file_all_info), > > @@ -345,10 +329,26 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > (char *)&cfid->file_all_info)) > > cfid->file_all_info_is_valid = true; > > > > - cfid->time = jiffies; > > - spin_unlock(&cfids->cfid_list_lock); > > /* At this point the directory handle is fully cached */ > > rc = 0; > > + spin_lock(&cfid->fid_lock); > > + if (!cfid->dentry) { > > + if (!npath[0]) { > > + dentry = dget(cifs_sb->root); > > + } else { > > + dentry = path_to_dentry(cifs_sb, npath); > > + if (IS_ERR(dentry)) { > > + spin_unlock(&cfid->fid_lock); > > + rc = -ENOENT; > > + goto out; > > + } > > + } > > + cfid->dentry = dentry; > > + } > > + cfid->tcon = tcon; > > + cfid->is_open = true; > > + cfid->time = jiffies; > > + spin_unlock(&cfid->fid_lock); > > > > oshr_free: > > SMB2_open_free(&rqst[0]); > > @@ -363,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > cfid->on_list = false; > > cfids->num_entries--; > > } > > + spin_lock(&cfid->fid_lock); > > if (cfid->has_lease) { > > /* > > * We are guaranteed to have two references at this > > @@ -372,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > cfid->has_lease = false; > > kref_put(&cfid->refcount, smb2_close_cached_fid); > > } > > + spin_unlock(&cfid->fid_lock); > > spin_unlock(&cfids->cfid_list_lock); > > > > kref_put(&cfid->refcount, smb2_close_cached_fid); > > @@ -400,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, > > > > spin_lock(&cfids->cfid_list_lock); > > list_for_each_entry(cfid, &cfids->entries, entry) { > > + spin_lock(&cfid->fid_lock); > > if (dentry && cfid->dentry == dentry) { > > cifs_dbg(FYI, "found a cached file handle by dentry\n"); > > kref_get(&cfid->refcount); > > + spin_unlock(&cfid->fid_lock); > > *ret_cfid = cfid; > > spin_unlock(&cfids->cfid_list_lock); > > return 0; > > } > > + spin_unlock(&cfid->fid_lock); > > } > > spin_unlock(&cfids->cfid_list_lock); > > return -ENOENT; > > @@ -427,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref) > > } > > spin_unlock(&cfid->cfids->cfid_list_lock); > > > > - dput(cfid->dentry); > > - cfid->dentry = NULL; > > + /* no locking necessary as we're the last user of this cfid */ > > + if (cfid->dentry) { > > + dput(cfid->dentry); > > + cfid->dentry = NULL; > > + } > > > > if (cfid->is_open) { > > rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, > > @@ -451,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, > > cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name); > > return; > > } > > - spin_lock(&cfid->cfids->cfid_list_lock); > > + spin_lock(&cfid->fid_lock); > > if (cfid->has_lease) { > > + /* mark as invalid */ > > cfid->has_lease = false; > > kref_put(&cfid->refcount, smb2_close_cached_fid); > > } > > - spin_unlock(&cfid->cfids->cfid_list_lock); > > + spin_unlock(&cfid->fid_lock); > > close_cached_dir(cfid); > > } > > > > @@ -538,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > > cfids->num_entries--; > > cfid->is_open = false; > > cfid->on_list = false; > > + spin_lock(&cfid->fid_lock); > > if (cfid->has_lease) { > > /* > > * The lease was never cancelled from the server, > > @@ -546,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > > cfid->has_lease = false; > > } else > > kref_get(&cfid->refcount); > > + spin_unlock(&cfid->fid_lock); > > } > > /* > > * Queue dropping of the dentries once locks have been dropped > > @@ -600,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > > > spin_lock(&cfids->cfid_list_lock); > > list_for_each_entry(cfid, &cfids->entries, entry) { > > + spin_lock(&cfid->fid_lock); > > if (cfid->has_lease && > > !memcmp(lease_key, > > cfid->fid.lease_key, > > @@ -612,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > */ > > list_del(&cfid->entry); > > cfid->on_list = false; > > + spin_unlock(&cfid->fid_lock); > > cfids->num_entries--; > > > > ++tcon->tc_count; > > @@ -621,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > spin_unlock(&cfids->cfid_list_lock); > > return true; > > } > > + spin_unlock(&cfid->fid_lock); > > } > > spin_unlock(&cfids->cfid_list_lock); > > return false; > > @@ -656,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid) > > WARN_ON(work_pending(&cfid->close_work)); > > WARN_ON(work_pending(&cfid->put_work)); > > > > - dput(cfid->dentry); > > - cfid->dentry = NULL; > > - > > /* > > * Delete all cached dirent names > > */ > > @@ -703,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > > > > spin_lock(&cfids->cfid_list_lock); > > list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { > > + spin_lock(&cfid->fid_lock); > > if (cfid->time && > > time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { > > cfid->on_list = false; > > @@ -717,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > > } else > > kref_get(&cfid->refcount); > > } > > + spin_unlock(&cfid->fid_lock); > > } > > spin_unlock(&cfids->cfid_list_lock); > > > > @@ -726,7 +739,6 @@ static void cfids_laundromat_worker(struct work_struct *work) > > spin_lock(&cfid->fid_lock); > > dentry = cfid->dentry; > > cfid->dentry = NULL; > > - spin_unlock(&cfid->fid_lock); > > > > dput(dentry); > > if (cfid->is_open) { > > @@ -742,6 +754,7 @@ static void cfids_laundromat_worker(struct work_struct *work) > > * was one) or the extra one acquired. > > */ > > kref_put(&cfid->refcount, smb2_close_cached_fid); > > + spin_unlock(&cfid->fid_lock); > > } > > queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, > > dir_cache_timeout * HZ); > > You are calling dput() here with a lock held, both in path_to_dentry and > in smb2_close_cached_fid. Is this correct? Hi Henrique, Thanks for reviewing the patches. Do you see any obvious problem with it? dput would call into VFS layer and might end up calling cifs_free_inode. But that does not take any of the competing locks. > > Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> > unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in > another path? Can you please elaborate which code path will result in this lock ordering? > > Thanks, > Henrique
On Sat, May 03, 2025 at 08:24:13AM +0530, Shyam Prasad N wrote: > On Fri, May 2, 2025 at 6:09 PM Henrique Carvalho > <henrique.carvalho@suse.com> wrote: > > > > Hi Shyam, > > > > On Fri, May 02, 2025 at 05:13:40AM +0000, nspmangalore@gmail.com wrote: > > > From: Shyam Prasad N <sprasad@microsoft.com> > > > > > > There are several accesses to cfid structure today without > > > locking fid_lock. This can lead to race conditions that are > > > hard to debug. > > > > > > With this change, I'm trying to make sure that accesses to cfid > > > struct members happen with fid_lock held. > > > > > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > > > --- > > > fs/smb/client/cached_dir.c | 87 ++++++++++++++++++++++---------------- > > > 1 file changed, 50 insertions(+), 37 deletions(-) > > > > > > > You are calling dput() here with a lock held, both in path_to_dentry and > > in smb2_close_cached_fid. Is this correct? > > Hi Henrique, > Thanks for reviewing the patches. > > Do you see any obvious problem with it? > dput would call into VFS layer and might end up calling > cifs_free_inode. But that does not take any of the competing locks. > Hi Shyam, Yes, dput() starts with might_sleep(), which means it may preemp (e.g., due to disk I/O), so it must not be called while holding a spinlock. If you compile the kernel with CONFIG_DEBUG_ATOMIC_SLEEP=y you will see this kind of stack dump. [ 305.667062][ T940] BUG: sleeping function called from invalid context at security/selinux/hooks.c:283 [ 305.668291][ T940] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 940, name: ls [ 305.669199][ T940] preempt_count: 1, expected: 0 [ 305.669493][ T940] RCU nest depth: 0, expected: 0 [ 305.670092][ T940] 3 locks held by ls/940: [ 305.670362][ T940] #0: ffff8881099b8f08 (&f->f_pos_lock){+.+.}-{4:4}, at: fdget_pos+0x18a/0x1c0 [ 305.671009][ T940] #1: ffff88811c490158 (&type->i_mutex_dir_key#7){.+.+}-{4:4}, at: iterate_dir+0x85/0x270 [ 305.671615][ T940] #2: ffff88810cc620b0 (&cfid->fid_lock){+.+.}-{3:3}, at: open_cached_dir+0x1098/0x14a0 [cifs] [ ... stack trace continues ... ] > > > > Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> > > unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in > > another path? > > Can you please elaborate which code path will result in this lock ordering? I was referring to the following pattern in cifs_laundromat_worker(): spin_lock(&cfid->fid_lock); ... spin_lock(&cifs_tcp_ses_lock); spin_unlock(&cifs_tcp_ses_lock); ... spin_unlock(&cfid->fid_lock); This was more of an open question. I am not certain this causes any issues, and I could not find any concrete problem with it. I brought it up because cifs_tcp_ses_lock is a more global lock than cfid->fid_lock. Best, Henrique
On Sun, May 4, 2025 at 7:27 PM Henrique Carvalho <henrique.carvalho@suse.com> wrote: <snip> > > > Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> > > > unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in > > > another path? > > > > Can you please elaborate which code path will result in this lock ordering? > > I was referring to the following pattern in cifs_laundromat_worker(): > > spin_lock(&cfid->fid_lock); > ... > spin_lock(&cifs_tcp_ses_lock); > spin_unlock(&cifs_tcp_ses_lock); > ... > spin_unlock(&cfid->fid_lock); > > This was more of an open question. I am not certain this causes any issues, > and I could not find any concrete problem with it. > > I brought it up because cifs_tcp_ses_lock is a more global lock than > cfid->fid_lock. That does look like a good catch In the lock ordering list (see cifsglob.h line 1980ff) cached_fid->fid_lock is almost at the end of the list so is after cifs_tcp_ses_lock "if two locks are to be held together, the lock that appears higher in this list needs to be taken before the other" so this does look like the wrong locking order in the example you pointed out. The updated lock ordering list with the proposed ordering for the new locks fid_lock and cfid_mutex is in Shyam's patch: "[PATCH 4/8] cifs: update the lock ordering comments with new mutex"
On Mon, May 5, 2025 at 6:18 AM Steve French <smfrench@gmail.com> wrote: > > On Sun, May 4, 2025 at 7:27 PM Henrique Carvalho > <henrique.carvalho@suse.com> wrote: > <snip> > > > > Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> > > > > unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in > > > > another path? > > > > > > Can you please elaborate which code path will result in this lock ordering? > > > > I was referring to the following pattern in cifs_laundromat_worker(): > > > > spin_lock(&cfid->fid_lock); > > ... > > spin_lock(&cifs_tcp_ses_lock); > > spin_unlock(&cifs_tcp_ses_lock); > > ... > > spin_unlock(&cfid->fid_lock); > > > > This was more of an open question. I am not certain this causes any issues, > > and I could not find any concrete problem with it. > > > > I brought it up because cifs_tcp_ses_lock is a more global lock than > > cfid->fid_lock. > > That does look like a good catch > > In the lock ordering list (see cifsglob.h line 1980ff) > cached_fid->fid_lock > is almost at the end of the list so is after > cifs_tcp_ses_lock > > "if two locks are to be held together, the lock that appears higher in > this list needs to be taken before the other" so this does look like > the wrong locking order in the example you pointed out. > > The updated lock ordering list with the proposed ordering for the new > locks fid_lock and cfid_mutex is in Shyam's patch: > "[PATCH 4/8] cifs: update the lock ordering comments with new mutex" > > -- > Thanks, > > Steve cifs_tcp_ses_lock is not the right lock to use here in the first place. These large locks have caused us plenty of problems in the past with deadlocks. We later decided to break it up into smaller locks for protecting individual fields. cifs_tcp_ses_lock is meant to protect cifs_tcp_ses_list only. tc_count should be protected by tc_lock. I'll make the changes here.
On Mon, May 5, 2025 at 5:57 AM Henrique Carvalho <henrique.carvalho@suse.com> wrote: > > On Sat, May 03, 2025 at 08:24:13AM +0530, Shyam Prasad N wrote: > > On Fri, May 2, 2025 at 6:09 PM Henrique Carvalho > > <henrique.carvalho@suse.com> wrote: > > > > > > Hi Shyam, > > > > > > On Fri, May 02, 2025 at 05:13:40AM +0000, nspmangalore@gmail.com wrote: > > > > From: Shyam Prasad N <sprasad@microsoft.com> > > > > > > > > There are several accesses to cfid structure today without > > > > locking fid_lock. This can lead to race conditions that are > > > > hard to debug. > > > > > > > > With this change, I'm trying to make sure that accesses to cfid > > > > struct members happen with fid_lock held. > > > > > > > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > > > > --- > > > > fs/smb/client/cached_dir.c | 87 ++++++++++++++++++++++---------------- > > > > 1 file changed, 50 insertions(+), 37 deletions(-) > > > > > > > > > > You are calling dput() here with a lock held, both in path_to_dentry and > > > in smb2_close_cached_fid. Is this correct? > > > > Hi Henrique, > > Thanks for reviewing the patches. > > > > Do you see any obvious problem with it? > > dput would call into VFS layer and might end up calling > > cifs_free_inode. But that does not take any of the competing locks. > > > > Hi Shyam, > > Yes, dput() starts with might_sleep(), which means it may preemp (e.g., > due to disk I/O), so it must not be called while holding a spinlock. > > If you compile the kernel with CONFIG_DEBUG_ATOMIC_SLEEP=y you will see > this kind of stack dump. > > [ 305.667062][ T940] BUG: sleeping function called from invalid context at security/selinux/hooks.c:283 > [ 305.668291][ T940] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 940, name: ls > [ 305.669199][ T940] preempt_count: 1, expected: 0 > [ 305.669493][ T940] RCU nest depth: 0, expected: 0 > [ 305.670092][ T940] 3 locks held by ls/940: > [ 305.670362][ T940] #0: ffff8881099b8f08 (&f->f_pos_lock){+.+.}-{4:4}, at: fdget_pos+0x18a/0x1c0 > [ 305.671009][ T940] #1: ffff88811c490158 (&type->i_mutex_dir_key#7){.+.+}-{4:4}, at: iterate_dir+0x85/0x270 > [ 305.671615][ T940] #2: ffff88810cc620b0 (&cfid->fid_lock){+.+.}-{3:3}, at: open_cached_dir+0x1098/0x14a0 [cifs] > [ ... stack trace continues ... ] > That's a good point. I'll make sure to dput outside spinlocks. > > > > > > Also, the lock ordering here is lock(fid_lock) -> lock(cifs_tcp_ses_lock) -> > > > unlock(cifs_tcp_ses_lock) -> unlock(fid_lock), won't this blow up in > > > another path? > > > > Can you please elaborate which code path will result in this lock ordering? > > I was referring to the following pattern in cifs_laundromat_worker(): > > spin_lock(&cfid->fid_lock); > ... > spin_lock(&cifs_tcp_ses_lock); > spin_unlock(&cifs_tcp_ses_lock); > ... > spin_unlock(&cfid->fid_lock); > > This was more of an open question. I am not certain this causes any issues, > and I could not find any concrete problem with it. > > I brought it up because cifs_tcp_ses_lock is a more global lock than > cfid->fid_lock. > > > Best, > Henrique
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1b..f074675fa6be 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { + spin_lock(&cfid->fid_lock); if (!strcmp(cfid->path, path)) { /* * If it doesn't have a lease it is either not yet @@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { + spin_unlock(&cfid->fid_lock); spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); + spin_unlock(&cfid->fid_lock); spin_unlock(&cfids->cfid_list_lock); return cfid; } + spin_unlock(&cfid->fid_lock); } if (lookup_only) { spin_unlock(&cfids->cfid_list_lock); @@ -192,19 +196,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, kfree(utf16_path); return -ENOENT; } + /* * Return cached fid if it is valid (has a lease and has a time). * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); + spin_lock(&cfid->fid_lock); if (cfid->has_lease && cfid->time) { - spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cfid->fid_lock); *ret_cfid = cfid; kfree(utf16_path); return 0; } - spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cfid->fid_lock); /* * Skip any prefix paths in @path as lookup_positive_unlocked() ends up @@ -219,17 +224,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, goto out; } - if (!npath[0]) { - dentry = dget(cifs_sb->root); - } else { - dentry = path_to_dentry(cifs_sb, npath); - if (IS_ERR(dentry)) { - rc = -ENOENT; - goto out; - } - } - cfid->dentry = dentry; - cfid->tcon = tcon; /* * We do not hold the lock for the open because in case @@ -301,9 +295,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, } goto oshr_free; } - cfid->is_open = true; - - spin_lock(&cfids->cfid_list_lock); o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; oparms.fid->persistent_fid = o_rsp->PersistentFileId; @@ -314,7 +305,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { - spin_unlock(&cfids->cfid_list_lock); rc = -EINVAL; goto oshr_free; } @@ -323,21 +313,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, &oparms.fid->epoch, oparms.fid->lease_key, &oplock, NULL, NULL); - if (rc) { - spin_unlock(&cfids->cfid_list_lock); + if (rc) goto oshr_free; - } rc = -EINVAL; - if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { - spin_unlock(&cfids->cfid_list_lock); + if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) goto oshr_free; - } qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; - if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) { - spin_unlock(&cfids->cfid_list_lock); + if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) goto oshr_free; - } if (!smb2_validate_and_copy_iov( le16_to_cpu(qi_rsp->OutputBufferOffset), sizeof(struct smb2_file_all_info), @@ -345,10 +329,26 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, (char *)&cfid->file_all_info)) cfid->file_all_info_is_valid = true; - cfid->time = jiffies; - spin_unlock(&cfids->cfid_list_lock); /* At this point the directory handle is fully cached */ rc = 0; + spin_lock(&cfid->fid_lock); + if (!cfid->dentry) { + if (!npath[0]) { + dentry = dget(cifs_sb->root); + } else { + dentry = path_to_dentry(cifs_sb, npath); + if (IS_ERR(dentry)) { + spin_unlock(&cfid->fid_lock); + rc = -ENOENT; + goto out; + } + } + cfid->dentry = dentry; + } + cfid->tcon = tcon; + cfid->is_open = true; + cfid->time = jiffies; + spin_unlock(&cfid->fid_lock); oshr_free: SMB2_open_free(&rqst[0]); @@ -363,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, cfid->on_list = false; cfids->num_entries--; } + spin_lock(&cfid->fid_lock); if (cfid->has_lease) { /* * We are guaranteed to have two references at this @@ -372,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, cfid->has_lease = false; kref_put(&cfid->refcount, smb2_close_cached_fid); } + spin_unlock(&cfid->fid_lock); spin_unlock(&cfids->cfid_list_lock); kref_put(&cfid->refcount, smb2_close_cached_fid); @@ -400,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { + spin_lock(&cfid->fid_lock); if (dentry && cfid->dentry == dentry) { cifs_dbg(FYI, "found a cached file handle by dentry\n"); kref_get(&cfid->refcount); + spin_unlock(&cfid->fid_lock); *ret_cfid = cfid; spin_unlock(&cfids->cfid_list_lock); return 0; } + spin_unlock(&cfid->fid_lock); } spin_unlock(&cfids->cfid_list_lock); return -ENOENT; @@ -427,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref) } spin_unlock(&cfid->cfids->cfid_list_lock); - dput(cfid->dentry); - cfid->dentry = NULL; + /* no locking necessary as we're the last user of this cfid */ + if (cfid->dentry) { + dput(cfid->dentry); + cfid->dentry = NULL; + } if (cfid->is_open) { rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, @@ -451,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name); return; } - spin_lock(&cfid->cfids->cfid_list_lock); + spin_lock(&cfid->fid_lock); if (cfid->has_lease) { + /* mark as invalid */ cfid->has_lease = false; kref_put(&cfid->refcount, smb2_close_cached_fid); } - spin_unlock(&cfid->cfids->cfid_list_lock); + spin_unlock(&cfid->fid_lock); close_cached_dir(cfid); } @@ -538,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) cfids->num_entries--; cfid->is_open = false; cfid->on_list = false; + spin_lock(&cfid->fid_lock); if (cfid->has_lease) { /* * The lease was never cancelled from the server, @@ -546,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) cfid->has_lease = false; } else kref_get(&cfid->refcount); + spin_unlock(&cfid->fid_lock); } /* * Queue dropping of the dentries once locks have been dropped @@ -600,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { + spin_lock(&cfid->fid_lock); if (cfid->has_lease && !memcmp(lease_key, cfid->fid.lease_key, @@ -612,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) */ list_del(&cfid->entry); cfid->on_list = false; + spin_unlock(&cfid->fid_lock); cfids->num_entries--; ++tcon->tc_count; @@ -621,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) spin_unlock(&cfids->cfid_list_lock); return true; } + spin_unlock(&cfid->fid_lock); } spin_unlock(&cfids->cfid_list_lock); return false; @@ -656,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid) WARN_ON(work_pending(&cfid->close_work)); WARN_ON(work_pending(&cfid->put_work)); - dput(cfid->dentry); - cfid->dentry = NULL; - /* * Delete all cached dirent names */ @@ -703,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { + spin_lock(&cfid->fid_lock); if (cfid->time && time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { cfid->on_list = false; @@ -717,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work) } else kref_get(&cfid->refcount); } + spin_unlock(&cfid->fid_lock); } spin_unlock(&cfids->cfid_list_lock); @@ -726,7 +739,6 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_lock(&cfid->fid_lock); dentry = cfid->dentry; cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); dput(dentry); if (cfid->is_open) { @@ -742,6 +754,7 @@ static void cfids_laundromat_worker(struct work_struct *work) * was one) or the extra one acquired. */ kref_put(&cfid->refcount, smb2_close_cached_fid); + spin_unlock(&cfid->fid_lock); } queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ);