@@ -694,6 +694,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",noblocksend");
if (tcon->ses->server->nosharesock)
seq_puts(s, ",nosharesock");
+ if (tcon->ses->server->max_blocking_reconnect != DEFAULT_MAX_BLOCKING_RECONNECT)
+ seq_printf(s, ",max_blocking_reconnect=%lu",
+ tcon->ses->server->max_blocking_reconnect);
if (tcon->snapshot_time)
seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
@@ -84,6 +84,10 @@
/* maximum number of PDUs in one compound */
#define MAX_COMPOUND 5
+/* maximum failed reconnects before file access fails without waiting */
+#define DEFAULT_MAX_BLOCKING_RECONNECT 0
+
+
/*
* Default number of credits to keep available for SMB3.
* This value is chosen somewhat arbitrarily. The Windows client
@@ -731,6 +735,8 @@ struct TCP_Server_Info {
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
unsigned long echo_interval;
+ unsigned long max_blocking_reconnect; /* maximum failed reconnects before file access fails without waiting */
+ unsigned long reconnect_fail_cnt; /* subsequent reconnect timeout on file access */
/*
* Number of targets available for reconnect. The more targets
@@ -1740,6 +1740,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
goto out_err_crypto_release;
}
}
+ tcp_ses->max_blocking_reconnect = ctx->max_blocking_reconnect;
+ tcp_ses->reconnect_fail_cnt = 0;
rc = ip_connect(tcp_ses);
if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -154,6 +154,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("handletimeout", Opt_handletimeout),
fsparam_u64("snapshot", Opt_snapshot),
fsparam_u32("max_channels", Opt_max_channels),
+ fsparam_u32("max_blocking_recon", Opt_max_blocking_reconnect),
/* Mount options which take string value */
fsparam_string("source", Opt_source),
@@ -1166,6 +1167,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
if (result.uint_32 > 1)
ctx->multichannel = true;
break;
+ case Opt_max_blocking_reconnect:
+ ctx->max_blocking_reconnect = result.uint_32;
+ break;
case Opt_max_cached_dirs:
if (result.uint_32 < 1) {
cifs_errorf(fc, "%s: Invalid max_cached_dirs, needs to be 1 or more\n",
@@ -1615,6 +1619,8 @@ int smb3_init_fs_context(struct fs_context *fc)
ctx->multichannel = false;
ctx->max_channels = 1;
+ ctx->max_blocking_reconnect = DEFAULT_MAX_BLOCKING_RECONNECT;
+
ctx->backupuid_specified = false; /* no backup intent for a user */
ctx->backupgid_specified = false; /* no backup intent for a group */
@@ -131,6 +131,7 @@ enum cifs_param {
Opt_max_cached_dirs,
Opt_snapshot,
Opt_max_channels,
+ Opt_max_blocking_reconnect,
Opt_handletimeout,
/* Mount options which take string value */
@@ -262,6 +263,7 @@ struct smb3_fs_context {
__u32 handle_timeout; /* persistent and durable handle timeout in ms */
unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
unsigned int max_channels;
+ unsigned int max_blocking_reconnect;
unsigned int max_cached_dirs;
__u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
bool rootfs:1; /* if it's a SMB root file system */
@@ -1318,6 +1318,13 @@ int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry)
return 0;
}
timeout *= server->nr_targets;
+ /* return immediatly on repeated timeouts */
+ if (server->max_blocking_reconnect &&
+ server->reconnect_fail_cnt >= server->max_blocking_reconnect) {
+ spin_unlock(&server->srv_lock);
+ cifs_dbg(FYI, "%s: not waiting for reconnect as requested\n", __func__);
+ return -EHOSTDOWN;
+ }
spin_unlock(&server->srv_lock);
/*
@@ -1341,12 +1348,18 @@ int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry)
/* are we still trying to reconnect? */
spin_lock(&server->srv_lock);
if (server->tcpStatus != CifsNeedReconnect) {
+ server->reconnect_fail_cnt = 0;
spin_unlock(&server->srv_lock);
return 0;
}
spin_unlock(&server->srv_lock);
} while (retry);
+ /* increase failed attempt counter */
+ spin_lock(&server->srv_lock);
+ server->reconnect_fail_cnt += 1;
+ spin_unlock(&server->srv_lock);
+
cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
return -EHOSTDOWN;
}
Hello everyone, The "soft" mount option is described in the man page as: "The program accessing a file on the cifs mounted file system will not hang when the server crashes and will return errors to the user application." In practice, this is not the case. Modern software, especially GUIs, makes multiple calls to the filesystem at once. Each of which will result in a reconnect attempt with a 10-second timeout. In practice, this results in frozen user interfaces and shells. It seems illogical to wait 10 seconds for every filesystem call once we know the server is inaccessible. I wrote a patch that limits the number of blocking calls two years ago but failed to send it correctly. I have been using it without issues for that time. I have recently seen interest in this idea from others. The mount option "max_blocking_recon" limits the number of successive failed connection attempts, after which EHOSTDOWN will be returned immediately. This avoids locking up whole desktop environments. I recommend setting it to 1. Any comments on this idea? Thanks in advance, Lucy Kueny From 98e2e44d39f4f5172e3ce416a2e65a48b51e2de1 Mon Sep 17 00:00:00 2001 From: Lucy Kueny <lucy@kueny.fr> Date: Fri, 22 Sep 2023 11:06:20 +0200 Subject: [PATCH] Stop reconnect timeouts from freezing userspace --- fs/smb/client/cifsfs.c | 3 +++ fs/smb/client/cifsglob.h | 6 ++++++ fs/smb/client/connect.c | 2 ++ fs/smb/client/fs_context.c | 6 ++++++ fs/smb/client/fs_context.h | 2 ++ fs/smb/client/misc.c | 13 +++++++++++++ 6 files changed, 32 insertions(+) -- 2.42.0