diff mbox series

[SMB3] allow controlling length of time directory entries are cached with dir leases

Message ID CAH2r5mt99zVnZfTP_9Z4BNEa2L8Yw=8ds1USPhasbO06hLaGjQ@mail.gmail.com
State New
Headers show
Series [SMB3] allow controlling length of time directory entries are cached with dir leases | expand

Commit Message

Steve French Aug. 27, 2023, 5:12 a.m. UTC
Currently with directory leases we cache directory contents for a fixed period
of time (default 30 seconds) but for many workloads this is too short.  Allow
configuring the maximum amount of time directory entries are cached when a
directory lease is held on that directory (and set default timeout to
60 seconds).
Add module load parm "max_dir_cache"

For example to set the timeout to 10 minutes you would do:

  echo 600 > /sys/module/cifs/parameters/max_dir_cache

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cached_dir.c |  2 +-
 fs/smb/client/cifsfs.c     | 12 ++++++++++++
 fs/smb/client/cifsglob.h   |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

 extern atomic_t mid_count;

Comments

Steve French Aug. 31, 2023, 3:52 a.m. UTC | #1
updated patch with Barath's suggestion of renaming it from
/sys/module/cifs/parameters/max_dir_cache to
/sys/module/cifs/parameters/dir_cache_timeout and also changed it so
if set to zero we disable
directory entry caching.

See attached.

On Sun, Aug 27, 2023 at 12:12 AM Steve French <smfrench@gmail.com> wrote:
>
> Currently with directory leases we cache directory contents for a fixed period
> of time (default 30 seconds) but for many workloads this is too short.  Allow
> configuring the maximum amount of time directory entries are cached when a
> directory lease is held on that directory (and set default timeout to
> 60 seconds).
> Add module load parm "max_dir_cache"
>
> For example to set the timeout to 10 minutes you would do:
>
>   echo 600 > /sys/module/cifs/parameters/max_dir_cache
>
> Signed-off-by: Steve French <stfrench@microsoft.com>
> ---
>  fs/smb/client/cached_dir.c |  2 +-
>  fs/smb/client/cifsfs.c     | 12 ++++++++++++
>  fs/smb/client/cifsglob.h   |  1 +
>  3 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> index 2d5e9a9d5b8b..e48a902efd52 100644
> --- a/fs/smb/client/cached_dir.c
> +++ b/fs/smb/client/cached_dir.c
> @@ -582,7 +582,7 @@ cifs_cfids_laundromat_thread(void *p)
>   return 0;
>   spin_lock(&cfids->cfid_list_lock);
>   list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> - if (time_after(jiffies, cfid->time + HZ * 30)) {
> + if (time_after(jiffies, cfid->time + HZ * max_dir_cache)) {
>   list_del(&cfid->entry);
>   list_add(&cfid->entry, &entry);
>   cfids->num_entries--;
> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
> index d49fd2bf71b0..7a89718d2a59 100644
> --- a/fs/smb/client/cifsfs.c
> +++ b/fs/smb/client/cifsfs.c
> @@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
>  MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
>      "CIFS/SMB1 dialect (N/A for SMB3) "
>      "Default: 32767 Range: 2 to 32767.");
> +unsigned int max_dir_cache = 60;
> +module_param(max_dir_cache, uint, 0644);
> +MODULE_PARM_DESC(max_dir_cache, "Number of seconds to cache directory
> contents for which we have a lease. Default: 60 "
> + "Range: 1 to 65000 seconds");
>  #ifdef CONFIG_CIFS_STATS2
>  unsigned int slow_rsp_threshold = 1;
>  module_param(slow_rsp_threshold, uint, 0644);
> @@ -1679,6 +1683,14 @@ init_cifs(void)
>   CIFS_MAX_REQ);
>   }
>
> + if (max_dir_cache < 1) {
> + max_dir_cache = 1;
> + cifs_dbg(VFS, "max_dir_cache timeout set to min of 1 second\n");
> + } else if (max_dir_cache > 65000) {
> + max_dir_cache = 65000;
> + cifs_dbg(VFS, "max_dir_cache timeout set to max of 65000 seconds\n");
> + }
> +
>   cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
>   if (!cifsiod_wq) {
>   rc = -ENOMEM;
> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
> index 259e231f8b4f..7aeeaa260cce 100644
> --- a/fs/smb/client/cifsglob.h
> +++ b/fs/smb/client/cifsglob.h
> @@ -2016,6 +2016,7 @@ extern unsigned int CIFSMaxBufSize;  /* max size
> not including hdr */
>  extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
>  extern unsigned int cifs_min_small;  /* min size of small buf pool */
>  extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
> +extern unsigned int max_dir_cache; /* max time for directory lease
> caching of dir */
>  extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
>  extern atomic_t mid_count;
>
>
> --
> Thanks,
>
> Steve
Ralph Boehme Sept. 1, 2023, 9:04 a.m. UTC | #2
Just wondering: how does a Windows client handle this? Does it use a 
timeout too? Which one? 60 seconds still seem rather short.

-slow

On 8/31/23 05:52, Steve French wrote:
> updated patch with Barath's suggestion of renaming it from
> /sys/module/cifs/parameters/max_dir_cache to
> /sys/module/cifs/parameters/dir_cache_timeout and also changed it so
> if set to zero we disable
> directory entry caching.
> 
> See attached.
> 
> On Sun, Aug 27, 2023 at 12:12 AM Steve French <smfrench@gmail.com> wrote:
>>
>> Currently with directory leases we cache directory contents for a fixed period
>> of time (default 30 seconds) but for many workloads this is too short.  Allow
>> configuring the maximum amount of time directory entries are cached when a
>> directory lease is held on that directory (and set default timeout to
>> 60 seconds).
>> Add module load parm "max_dir_cache"
>>
>> For example to set the timeout to 10 minutes you would do:
>>
>>    echo 600 > /sys/module/cifs/parameters/max_dir_cache
>>
>> Signed-off-by: Steve French <stfrench@microsoft.com>
>> ---
>>   fs/smb/client/cached_dir.c |  2 +-
>>   fs/smb/client/cifsfs.c     | 12 ++++++++++++
>>   fs/smb/client/cifsglob.h   |  1 +
>>   3 files changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
>> index 2d5e9a9d5b8b..e48a902efd52 100644
>> --- a/fs/smb/client/cached_dir.c
>> +++ b/fs/smb/client/cached_dir.c
>> @@ -582,7 +582,7 @@ cifs_cfids_laundromat_thread(void *p)
>>    return 0;
>>    spin_lock(&cfids->cfid_list_lock);
>>    list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
>> - if (time_after(jiffies, cfid->time + HZ * 30)) {
>> + if (time_after(jiffies, cfid->time + HZ * max_dir_cache)) {
>>    list_del(&cfid->entry);
>>    list_add(&cfid->entry, &entry);
>>    cfids->num_entries--;
>> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
>> index d49fd2bf71b0..7a89718d2a59 100644
>> --- a/fs/smb/client/cifsfs.c
>> +++ b/fs/smb/client/cifsfs.c
>> @@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
>>   MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
>>       "CIFS/SMB1 dialect (N/A for SMB3) "
>>       "Default: 32767 Range: 2 to 32767.");
>> +unsigned int max_dir_cache = 60;
>> +module_param(max_dir_cache, uint, 0644);
>> +MODULE_PARM_DESC(max_dir_cache, "Number of seconds to cache directory
>> contents for which we have a lease. Default: 60 "
>> + "Range: 1 to 65000 seconds");
>>   #ifdef CONFIG_CIFS_STATS2
>>   unsigned int slow_rsp_threshold = 1;
>>   module_param(slow_rsp_threshold, uint, 0644);
>> @@ -1679,6 +1683,14 @@ init_cifs(void)
>>    CIFS_MAX_REQ);
>>    }
>>
>> + if (max_dir_cache < 1) {
>> + max_dir_cache = 1;
>> + cifs_dbg(VFS, "max_dir_cache timeout set to min of 1 second\n");
>> + } else if (max_dir_cache > 65000) {
>> + max_dir_cache = 65000;
>> + cifs_dbg(VFS, "max_dir_cache timeout set to max of 65000 seconds\n");
>> + }
>> +
>>    cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
>>    if (!cifsiod_wq) {
>>    rc = -ENOMEM;
>> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
>> index 259e231f8b4f..7aeeaa260cce 100644
>> --- a/fs/smb/client/cifsglob.h
>> +++ b/fs/smb/client/cifsglob.h
>> @@ -2016,6 +2016,7 @@ extern unsigned int CIFSMaxBufSize;  /* max size
>> not including hdr */
>>   extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
>>   extern unsigned int cifs_min_small;  /* min size of small buf pool */
>>   extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
>> +extern unsigned int max_dir_cache; /* max time for directory lease
>> caching of dir */
>>   extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
>>   extern atomic_t mid_count;
>>
>>
>> --
>> Thanks,
>>
>> Steve
> 
> 
>
Steve French Sept. 1, 2023, 4:22 p.m. UTC | #3
I lean toward eventually increasing the default to something more
similar to Windows (or at least 5 minutes) to improve some common
scenarios, but it looks like Windows sets it to 10 minutes based on
this link, but first want to make sure we are grabbing leases in all
of the obvious places (and not unnecessarily breaking leases - there
are a few places where we aren't sending the lease key and we also are
missing setting the parent lease key)

See "DirectoryCacheEntriesMax" iand "DormantDirectoryTimeout" in
LanmanWorkstation config.  The description
https://learn.microsoft.com/en-us/windows-server/administration/performance-tuning/role/file-server/is
of some of the related parms was a little vague but presumably
DirectoryCacheEntriesMax is the number of directories (16) that are
cached by default (ie number of directory leases that are held) and
DormantDirectoryTimeout (default 600 seconds) is how long they are
held.

On Fri, Sep 1, 2023 at 4:04 AM Ralph Boehme <slow@samba.org> wrote:
>
> Just wondering: how does a Windows client handle this? Does it use a
> timeout too? Which one? 60 seconds still seem rather short.
>
> -slow
>
> On 8/31/23 05:52, Steve French wrote:
> > updated patch with Barath's suggestion of renaming it from
> > /sys/module/cifs/parameters/max_dir_cache to
> > /sys/module/cifs/parameters/dir_cache_timeout and also changed it so
> > if set to zero we disable
> > directory entry caching.
> >
> > See attached.
> >
> > On Sun, Aug 27, 2023 at 12:12 AM Steve French <smfrench@gmail.com> wrote:
> >>
> >> Currently with directory leases we cache directory contents for a fixed period
> >> of time (default 30 seconds) but for many workloads this is too short.  Allow
> >> configuring the maximum amount of time directory entries are cached when a
> >> directory lease is held on that directory (and set default timeout to
> >> 60 seconds).
> >> Add module load parm "max_dir_cache"
> >>
> >> For example to set the timeout to 10 minutes you would do:
> >>
> >>    echo 600 > /sys/module/cifs/parameters/max_dir_cache
> >>
> >> Signed-off-by: Steve French <stfrench@microsoft.com>
> >> ---
> >>   fs/smb/client/cached_dir.c |  2 +-
> >>   fs/smb/client/cifsfs.c     | 12 ++++++++++++
> >>   fs/smb/client/cifsglob.h   |  1 +
> >>   3 files changed, 14 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> >> index 2d5e9a9d5b8b..e48a902efd52 100644
> >> --- a/fs/smb/client/cached_dir.c
> >> +++ b/fs/smb/client/cached_dir.c
> >> @@ -582,7 +582,7 @@ cifs_cfids_laundromat_thread(void *p)
> >>    return 0;
> >>    spin_lock(&cfids->cfid_list_lock);
> >>    list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> >> - if (time_after(jiffies, cfid->time + HZ * 30)) {
> >> + if (time_after(jiffies, cfid->time + HZ * max_dir_cache)) {
> >>    list_del(&cfid->entry);
> >>    list_add(&cfid->entry, &entry);
> >>    cfids->num_entries--;
> >> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
> >> index d49fd2bf71b0..7a89718d2a59 100644
> >> --- a/fs/smb/client/cifsfs.c
> >> +++ b/fs/smb/client/cifsfs.c
> >> @@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
> >>   MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
> >>       "CIFS/SMB1 dialect (N/A for SMB3) "
> >>       "Default: 32767 Range: 2 to 32767.");
> >> +unsigned int max_dir_cache = 60;
> >> +module_param(max_dir_cache, uint, 0644);
> >> +MODULE_PARM_DESC(max_dir_cache, "Number of seconds to cache directory
> >> contents for which we have a lease. Default: 60 "
> >> + "Range: 1 to 65000 seconds");
> >>   #ifdef CONFIG_CIFS_STATS2
> >>   unsigned int slow_rsp_threshold = 1;
> >>   module_param(slow_rsp_threshold, uint, 0644);
> >> @@ -1679,6 +1683,14 @@ init_cifs(void)
> >>    CIFS_MAX_REQ);
> >>    }
> >>
> >> + if (max_dir_cache < 1) {
> >> + max_dir_cache = 1;
> >> + cifs_dbg(VFS, "max_dir_cache timeout set to min of 1 second\n");
> >> + } else if (max_dir_cache > 65000) {
> >> + max_dir_cache = 65000;
> >> + cifs_dbg(VFS, "max_dir_cache timeout set to max of 65000 seconds\n");
> >> + }
> >> +
> >>    cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
> >>    if (!cifsiod_wq) {
> >>    rc = -ENOMEM;
> >> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
> >> index 259e231f8b4f..7aeeaa260cce 100644
> >> --- a/fs/smb/client/cifsglob.h
> >> +++ b/fs/smb/client/cifsglob.h
> >> @@ -2016,6 +2016,7 @@ extern unsigned int CIFSMaxBufSize;  /* max size
> >> not including hdr */
> >>   extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
> >>   extern unsigned int cifs_min_small;  /* min size of small buf pool */
> >>   extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
> >> +extern unsigned int max_dir_cache; /* max time for directory lease
> >> caching of dir */
> >>   extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
> >>   extern atomic_t mid_count;
> >>
> >>
> >> --
> >> Thanks,
> >>
> >> Steve
> >
> >
> >
>
ronnie sahlberg Sept. 1, 2023, 4:31 p.m. UTC | #4
Maybe just re-set the timestamp every time the cached directory is reopened,
that way a hot directory will remain in cache indefinitely but one
that is cold will
quickly time out and make space for something else to be chaced.

On Sat, 2 Sept 2023 at 02:22, Steve French <smfrench@gmail.com> wrote:
>
> I lean toward eventually increasing the default to something more
> similar to Windows (or at least 5 minutes) to improve some common
> scenarios, but it looks like Windows sets it to 10 minutes based on
> this link, but first want to make sure we are grabbing leases in all
> of the obvious places (and not unnecessarily breaking leases - there
> are a few places where we aren't sending the lease key and we also are
> missing setting the parent lease key)
>
> See "DirectoryCacheEntriesMax" iand "DormantDirectoryTimeout" in
> LanmanWorkstation config.  The description
> https://learn.microsoft.com/en-us/windows-server/administration/performance-tuning/role/file-server/is
> of some of the related parms was a little vague but presumably
> DirectoryCacheEntriesMax is the number of directories (16) that are
> cached by default (ie number of directory leases that are held) and
> DormantDirectoryTimeout (default 600 seconds) is how long they are
> held.
>
> On Fri, Sep 1, 2023 at 4:04 AM Ralph Boehme <slow@samba.org> wrote:
> >
> > Just wondering: how does a Windows client handle this? Does it use a
> > timeout too? Which one? 60 seconds still seem rather short.
> >
> > -slow
> >
> > On 8/31/23 05:52, Steve French wrote:
> > > updated patch with Barath's suggestion of renaming it from
> > > /sys/module/cifs/parameters/max_dir_cache to
> > > /sys/module/cifs/parameters/dir_cache_timeout and also changed it so
> > > if set to zero we disable
> > > directory entry caching.
> > >
> > > See attached.
> > >
> > > On Sun, Aug 27, 2023 at 12:12 AM Steve French <smfrench@gmail.com> wrote:
> > >>
> > >> Currently with directory leases we cache directory contents for a fixed period
> > >> of time (default 30 seconds) but for many workloads this is too short.  Allow
> > >> configuring the maximum amount of time directory entries are cached when a
> > >> directory lease is held on that directory (and set default timeout to
> > >> 60 seconds).
> > >> Add module load parm "max_dir_cache"
> > >>
> > >> For example to set the timeout to 10 minutes you would do:
> > >>
> > >>    echo 600 > /sys/module/cifs/parameters/max_dir_cache
> > >>
> > >> Signed-off-by: Steve French <stfrench@microsoft.com>
> > >> ---
> > >>   fs/smb/client/cached_dir.c |  2 +-
> > >>   fs/smb/client/cifsfs.c     | 12 ++++++++++++
> > >>   fs/smb/client/cifsglob.h   |  1 +
> > >>   3 files changed, 14 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> > >> index 2d5e9a9d5b8b..e48a902efd52 100644
> > >> --- a/fs/smb/client/cached_dir.c
> > >> +++ b/fs/smb/client/cached_dir.c
> > >> @@ -582,7 +582,7 @@ cifs_cfids_laundromat_thread(void *p)
> > >>    return 0;
> > >>    spin_lock(&cfids->cfid_list_lock);
> > >>    list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> > >> - if (time_after(jiffies, cfid->time + HZ * 30)) {
> > >> + if (time_after(jiffies, cfid->time + HZ * max_dir_cache)) {
> > >>    list_del(&cfid->entry);
> > >>    list_add(&cfid->entry, &entry);
> > >>    cfids->num_entries--;
> > >> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
> > >> index d49fd2bf71b0..7a89718d2a59 100644
> > >> --- a/fs/smb/client/cifsfs.c
> > >> +++ b/fs/smb/client/cifsfs.c
> > >> @@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
> > >>   MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
> > >>       "CIFS/SMB1 dialect (N/A for SMB3) "
> > >>       "Default: 32767 Range: 2 to 32767.");
> > >> +unsigned int max_dir_cache = 60;
> > >> +module_param(max_dir_cache, uint, 0644);
> > >> +MODULE_PARM_DESC(max_dir_cache, "Number of seconds to cache directory
> > >> contents for which we have a lease. Default: 60 "
> > >> + "Range: 1 to 65000 seconds");
> > >>   #ifdef CONFIG_CIFS_STATS2
> > >>   unsigned int slow_rsp_threshold = 1;
> > >>   module_param(slow_rsp_threshold, uint, 0644);
> > >> @@ -1679,6 +1683,14 @@ init_cifs(void)
> > >>    CIFS_MAX_REQ);
> > >>    }
> > >>
> > >> + if (max_dir_cache < 1) {
> > >> + max_dir_cache = 1;
> > >> + cifs_dbg(VFS, "max_dir_cache timeout set to min of 1 second\n");
> > >> + } else if (max_dir_cache > 65000) {
> > >> + max_dir_cache = 65000;
> > >> + cifs_dbg(VFS, "max_dir_cache timeout set to max of 65000 seconds\n");
> > >> + }
> > >> +
> > >>    cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
> > >>    if (!cifsiod_wq) {
> > >>    rc = -ENOMEM;
> > >> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
> > >> index 259e231f8b4f..7aeeaa260cce 100644
> > >> --- a/fs/smb/client/cifsglob.h
> > >> +++ b/fs/smb/client/cifsglob.h
> > >> @@ -2016,6 +2016,7 @@ extern unsigned int CIFSMaxBufSize;  /* max size
> > >> not including hdr */
> > >>   extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
> > >>   extern unsigned int cifs_min_small;  /* min size of small buf pool */
> > >>   extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
> > >> +extern unsigned int max_dir_cache; /* max time for directory lease
> > >> caching of dir */
> > >>   extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
> > >>   extern atomic_t mid_count;
> > >>
> > >>
> > >> --
> > >> Thanks,
> > >>
> > >> Steve
> > >
> > >
> > >
> >
>
>
> --
> Thanks,
>
> Steve
Steve French Sept. 1, 2023, 10:20 p.m. UTC | #5
On Fri, Sep 1, 2023 at 11:31 AM ronnie sahlberg
<ronniesahlberg@gmail.com> wrote:
>
> Maybe just re-set the timestamp every time the cached directory is reopened,
> that way a hot directory will remain in cache indefinitely but one
> that is cold will
> quickly time out and make space for something else to be chaced.


Makes sense
Steve French Sept. 1, 2023, 10:46 p.m. UTC | #6
I also noticed that Windows apparently lets you control the size of
the directory entry cache (the file info cached for directories). See
below:

DirectoryCacheEntrySizeMax
HKLM\System\CurrentControlSet\Services\LanmanWorkstation\Parameters\DirectoryCacheEntrySizeMax
The default is 64 KB. This is the maximum size of directory cache entries.

Should we add a tuneable similar to this (per mount? per system?)

On Fri, Sep 1, 2023 at 5:20 PM Steve French <smfrench@gmail.com> wrote:
>
> On Fri, Sep 1, 2023 at 11:31 AM ronnie sahlberg
> <ronniesahlberg@gmail.com> wrote:
> >
> > Maybe just re-set the timestamp every time the cached directory is reopened,
> > that way a hot directory will remain in cache indefinitely but one
> > that is cold will
> > quickly time out and make space for something else to be chaced.
>
>
> Makes sense
>
>
> --
> Thanks,
>
> Steve
ronnie sahlberg Sept. 4, 2023, 3:44 a.m. UTC | #7
On Sat, 2 Sept 2023 at 08:47, Steve French <smfrench@gmail.com> wrote:
>
> I also noticed that Windows apparently lets you control the size of
> the directory entry cache (the file info cached for directories). See
> below:
>
> DirectoryCacheEntrySizeMax
> HKLM\System\CurrentControlSet\Services\LanmanWorkstation\Parameters\DirectoryCacheEntrySizeMax
> The default is 64 KB. This is the maximum size of directory cache entries.
>
> Should we add a tuneable similar to this (per mount? per system?)

Probably not because most of the time these settings do not really
work, and often, when you increase them you make things worse.

I think you want to implement something like when a cached directory
is re-opened then the timeout is reset so that
hot directories will remain in the cache longer. Which is what you
want. You want hot data in the case.

On the other hand, you want cold data to expire as quickly as
possible, because cache that is held up by cold data can not
be used to store hot data until the cold data has expired from the
cache. So you want this timer as short as possible.
The shortest possible timeout without it also expiring out hot data.

Shorter timeouts and quicker expunge oc cache == better performance.
It might not sound intuitively but I can show with a simple example.

Assume your cache has 10 slots to store a directory.
Assume you have 1000 cold directories that are accessed relatively infrequently.
Assume you have 1 directory that is hot and is accessed 10 times more
frequently than a cold directory.

We now changed the timeout to 60 seconds. This means any cold
directory that enters the cache will sit in the cache and block that
entry for 60 seconds
until it is cleared and something else can use that cache slot.
This is akin a model where every 60 seconds you have a lottery where
10 directories will win entry to the cache.
What is the probability that a hot directory wins the lottery and
becomes cached?
In this example it is 1%  because the access to the hot directory is
only 1% of all access.
All the cold directories have just a 0.1% chance of becomming cached
but since there are so  many of them they will still dominate.
(The problem we have to solve now is to get the hot directory into the
cache as fast as possible and to get it to remain in cache for as long
as possible.)

On average thus we will have to wait 50 iterations until the hot
directory will even enter the cache for the first time,  or it will on
average take 50 minutes
before the hot directory is even cached.
If we had left the original 30 second timeout it would "only" have
taken 25 minutes on average to get this directory into the cache.
This kind of suggests that even 30 seconds would be way to big for
this example and maybe we should use 10 seconds, or less.

You want hot directories in the cache for as long as possible. A good
way to do this is to make them sticky, so that if they are frequently
accessed while
cached, make them sticky so they do not expire as easily from the cache.
On the other hand, any cold directory you want to expire from the
cache as fast as possible since every time you hold a cold directory
in cache, that part of the cache becomes
useless and wasted.  You do this by, for example, setting this timeout
as low as possible. So they are kicked out as soon as possible.







>
> On Fri, Sep 1, 2023 at 5:20 PM Steve French <smfrench@gmail.com> wrote:
> >
> > On Fri, Sep 1, 2023 at 11:31 AM ronnie sahlberg
> > <ronniesahlberg@gmail.com> wrote:
> > >
> > > Maybe just re-set the timestamp every time the cached directory is reopened,
> > > that way a hot directory will remain in cache indefinitely but one
> > > that is cold will
> > > quickly time out and make space for something else to be chaced.
> >
> >
> > Makes sense
> >
> >
> > --
> > Thanks,
> >
> > Steve
>
>
>
> --
> Thanks,
>
> Steve
ronnie sahlberg Sept. 4, 2023, 3:57 a.m. UTC | #8
On Mon, 4 Sept 2023 at 13:44, ronnie sahlberg <ronniesahlberg@gmail.com> wrote:
>
> On Sat, 2 Sept 2023 at 08:47, Steve French <smfrench@gmail.com> wrote:
> >
> > I also noticed that Windows apparently lets you control the size of
> > the directory entry cache (the file info cached for directories). See
> > below:
> >
> > DirectoryCacheEntrySizeMax
> > HKLM\System\CurrentControlSet\Services\LanmanWorkstation\Parameters\DirectoryCacheEntrySizeMax
> > The default is 64 KB. This is the maximum size of directory cache entries.
> >
> > Should we add a tuneable similar to this (per mount? per system?)
>
> Probably not because most of the time these settings do not really
> work, and often, when you increase them you make things worse.
>
> I think you want to implement something like when a cached directory
> is re-opened then the timeout is reset so that
> hot directories will remain in the cache longer. Which is what you
> want. You want hot data in the case.
>
> On the other hand, you want cold data to expire as quickly as
> possible, because cache that is held up by cold data can not
> be used to store hot data until the cold data has expired from the
> cache. So you want this timer as short as possible.
> The shortest possible timeout without it also expiring out hot data.
>
> Shorter timeouts and quicker expunge oc cache == better performance.
> It might not sound intuitively but I can show with a simple example.
>
> Assume your cache has 10 slots to store a directory.
> Assume you have 1000 cold directories that are accessed relatively infrequently.
> Assume you have 1 directory that is hot and is accessed 10 times more
> frequently than a cold directory.
>
> We now changed the timeout to 60 seconds. This means any cold
> directory that enters the cache will sit in the cache and block that
> entry for 60 seconds
> until it is cleared and something else can use that cache slot.
> This is akin a model where every 60 seconds you have a lottery where
> 10 directories will win entry to the cache.
> What is the probability that a hot directory wins the lottery and
> becomes cached?
> In this example it is 1%  because the access to the hot directory is
> only 1% of all access.
> All the cold directories have just a 0.1% chance of becomming cached
> but since there are so  many of them they will still dominate.
> (The problem we have to solve now is to get the hot directory into the
> cache as fast as possible and to get it to remain in cache for as long
> as possible.)
>
> On average thus we will have to wait 50 iterations until the hot
> directory will even enter the cache for the first time,  or it will on
> average take 50 minutes
> before the hot directory is even cached.
> If we had left the original 30 second timeout it would "only" have
> taken 25 minutes on average to get this directory into the cache.
> This kind of suggests that even 30 seconds would be way to big for
> this example and maybe we should use 10 seconds, or less.
>
> You want hot directories in the cache for as long as possible. A good
> way to do this is to make them sticky, so that if they are frequently
> accessed while
> cached, make them sticky so they do not expire as easily from the cache.
> On the other hand, any cold directory you want to expire from the
> cache as fast as possible since every time you hold a cold directory
> in cache, that part of the cache becomes
> useless and wasted.  You do this by, for example, setting this timeout
> as low as possible. So they are kicked out as soon as possible.
>
>


A slightly more complex implementation might be that
when you add a directory to the cache you set an initial, relatively
short timeout,
maybe 3 seconds, before it expires.
Then every time a directory is accessed while in the cache, you reset
the timeout
to double the expiry time up to a maximum of 30 seconds/60 seconds/...

That way a hot directory will relatively quickly become sticky while a
cold directory is kicked out
after just 3 seconds or so or whatever the initial short timeout is set to.


Probably would need a good set of data and real tests to tweak these
values and understand what
good defaults would be.
Probably would be a good idea with a bunch of dbench scripts to create
and perform some I/O resembling
common application workloads on sets of a few tens of thousands of
directories and measure cache-hit rate
as well as wall-clock time to run each test. Maybe also compare with
test runs agains a target with very roundtrip
versus very high roundtrips. And some mixture between the ratio of hot
vs cold directories.

To be realistic I think the total set of directories in the test would
need to be orders of magnitude
larger than the number of directories that can fit in the cache.

>
>
>
>
>
> >
> > On Fri, Sep 1, 2023 at 5:20 PM Steve French <smfrench@gmail.com> wrote:
> > >
> > > On Fri, Sep 1, 2023 at 11:31 AM ronnie sahlberg
> > > <ronniesahlberg@gmail.com> wrote:
> > > >
> > > > Maybe just re-set the timestamp every time the cached directory is reopened,
> > > > that way a hot directory will remain in cache indefinitely but one
> > > > that is cold will
> > > > quickly time out and make space for something else to be chaced.
> > >
> > >
> > > Makes sense
> > >
> > >
> > > --
> > > Thanks,
> > >
> > > Steve
> >
> >
> >
> > --
> > Thanks,
> >
> > Steve
diff mbox series

Patch

From c5143ccd59b38821f5c208d835b8b7f8616ca631 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 26 Aug 2023 23:59:25 -0500
Subject: [PATCH] smb3: allow controlling length of time directory entries are
 cached with dir leases

Currently with directory leases we cache directory contents for a fixed period
of time (default 30 seconds) but for many workloads this is too short.  Allow
configuring the maximum amount of time directory entries are cached when a
directory lease is held on that directory. Add module load parm "max_dir_cache"

For example to set the timeout to 10 minutes you would do:

  echo 600 > /sys/module/cifs/parameters/max_dir_cache

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cached_dir.c |  2 +-
 fs/smb/client/cifsfs.c     | 12 ++++++++++++
 fs/smb/client/cifsglob.h   |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 2d5e9a9d5b8b..e48a902efd52 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -582,7 +582,7 @@  cifs_cfids_laundromat_thread(void *p)
 			return 0;
 		spin_lock(&cfids->cfid_list_lock);
 		list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
-			if (time_after(jiffies, cfid->time + HZ * 30)) {
+			if (time_after(jiffies, cfid->time + HZ * max_dir_cache)) {
 				list_del(&cfid->entry);
 				list_add(&cfid->entry, &entry);
 				cfids->num_entries--;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index d49fd2bf71b0..7a89718d2a59 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -117,6 +117,10 @@  module_param(cifs_max_pending, uint, 0444);
 MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
 				   "CIFS/SMB1 dialect (N/A for SMB3) "
 				   "Default: 32767 Range: 2 to 32767.");
+unsigned int max_dir_cache = 60;
+module_param(max_dir_cache, uint, 0644);
+MODULE_PARM_DESC(max_dir_cache, "Number of seconds to cache directory contents for which we have a lease. Default: 60 "
+				 "Range: 1 to 65000 seconds");
 #ifdef CONFIG_CIFS_STATS2
 unsigned int slow_rsp_threshold = 1;
 module_param(slow_rsp_threshold, uint, 0644);
@@ -1679,6 +1683,14 @@  init_cifs(void)
 			 CIFS_MAX_REQ);
 	}
 
+	if (max_dir_cache < 1) {
+		max_dir_cache = 1;
+		cifs_dbg(VFS, "max_dir_cache timeout set to min of 1 second\n");
+	} else if (max_dir_cache > 65000) {
+		max_dir_cache = 65000;
+		cifs_dbg(VFS, "max_dir_cache timeout set to max of 65000 seconds\n");
+	}
+
 	cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 	if (!cifsiod_wq) {
 		rc = -ENOMEM;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 259e231f8b4f..7aeeaa260cce 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -2016,6 +2016,7 @@  extern unsigned int CIFSMaxBufSize;  /* max size not including hdr */
 extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
 extern unsigned int cifs_min_small;  /* min size of small buf pool */
 extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int max_dir_cache; /* max time for directory lease caching of dir */
 extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
 extern atomic_t mid_count;
 
-- 
2.34.1