diff mbox series

cifs: retry lookup and readdir when EAGAIN is returned.

Message ID 20210615164256.173715-1-trbecker@gmail.com
State New
Headers show
Series cifs: retry lookup and readdir when EAGAIN is returned. | expand

Commit Message

Thiago Rafael Becker June 15, 2021, 4:42 p.m. UTC
According to the investigation performed by Jacob Shivers at Red Hat,
cifs_lookup and cifs_readdir leak EAGAIN when the user session is
deleted on the server. Fix this issue by implementing a retry with
limits, as is implemented in cifs_revalidate_dentry_attr.

Reproducer based on the work by Jacob Shivers:

  ~~~
  $ cat readdir-cifs-test.sh
  #!/bin/bash

  # Install and configure powershell and sshd on the windows
  #  server as descibed in
  # https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_overview
  # This script uses expect(1)

  USER=dude
  SERVER=192.168.0.2
  RPATH=root
  PASS='password'

  function debug_funcs {
  	for line in $@ ; do
  		echo "func $line +p" > /sys/kernel/debug/dynamic_debug/control
  	done
  }

  function setup {
  	echo 1 > /proc/fs/cifs/cifsFYI
  	debug_funcs wait_for_compound_request \
                smb2_query_dir_first cifs_readdir \
                compound_send_recv cifs_reconnect_tcon \
                generic_ip_connect cifs_reconnect \
                smb2_reconnect_server smb2_reconnect \
                cifs_readv_from_socket cifs_readv_receive
  	tcpdump -i eth0 -w cifs.pcap host 192.168.2.182 & sleep 5
  	dmesg -C
  }

  function test_call {
  	if [[ $1 == 1 ]] ; then
  		tracer="strace -tt -f -s 4096 -o trace-$(date -Iseconds).txt"
  	fi
        # Change the command here to anything apropriate
  	$tracer ls $2 > /dev/null
  	res=$?
  	if [[ $1 == 1 ]] ; then
  		if [[ $res == 0 ]] ; then
  			1>&2 echo success
  		else
  			1>&2 echo "failure ($res)"
  		fi
  	fi
  }

  mountpoint /mnt > /dev/null || mount -t cifs -o username=$USER,pass=$PASS //$SERVER/$RPATH /mnt

  test_call 0 /mnt/

  /usr/bin/expect << EOF
  	set timeout 60

  	spawn ssh $USER@$SERVER

  	expect "yes/no" {
  		send "yes\r"
  		expect "*?assword" { send "$PASS\r" }
  	} "*?assword" { send "$PASS\r" }

  	expect ">" { send "powershell close-smbsession -force\r" }
  	expect ">" { send "exit\r" }
  	expect eof
  EOF

  sysctl -w vm.drop_caches=2 > /dev/null
  sysctl -w vm.drop_caches=2 > /dev/null

  setup

  test_call 1 /mnt/
  ~~~

Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
---
 fs/cifs/dir.c     | 4 ++++
 fs/cifs/smb2ops.c | 5 +++++
 2 files changed, 9 insertions(+)

Comments

ronnie sahlberg June 16, 2021, 6:09 a.m. UTC | #1
Looks good to me.

Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>

On Wed, Jun 16, 2021 at 2:44 AM Thiago Rafael Becker <trbecker@gmail.com> wrote:
>
> According to the investigation performed by Jacob Shivers at Red Hat,
> cifs_lookup and cifs_readdir leak EAGAIN when the user session is
> deleted on the server. Fix this issue by implementing a retry with
> limits, as is implemented in cifs_revalidate_dentry_attr.
>
> Reproducer based on the work by Jacob Shivers:
>
>   ~~~
>   $ cat readdir-cifs-test.sh
>   #!/bin/bash
>
>   # Install and configure powershell and sshd on the windows
>   #  server as descibed in
>   # https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_overview
>   # This script uses expect(1)
>
>   USER=dude
>   SERVER=192.168.0.2
>   RPATH=root
>   PASS='password'
>
>   function debug_funcs {
>         for line in $@ ; do
>                 echo "func $line +p" > /sys/kernel/debug/dynamic_debug/control
>         done
>   }
>
>   function setup {
>         echo 1 > /proc/fs/cifs/cifsFYI
>         debug_funcs wait_for_compound_request \
>                 smb2_query_dir_first cifs_readdir \
>                 compound_send_recv cifs_reconnect_tcon \
>                 generic_ip_connect cifs_reconnect \
>                 smb2_reconnect_server smb2_reconnect \
>                 cifs_readv_from_socket cifs_readv_receive
>         tcpdump -i eth0 -w cifs.pcap host 192.168.2.182 & sleep 5
>         dmesg -C
>   }
>
>   function test_call {
>         if [[ $1 == 1 ]] ; then
>                 tracer="strace -tt -f -s 4096 -o trace-$(date -Iseconds).txt"
>         fi
>         # Change the command here to anything apropriate
>         $tracer ls $2 > /dev/null
>         res=$?
>         if [[ $1 == 1 ]] ; then
>                 if [[ $res == 0 ]] ; then
>                         1>&2 echo success
>                 else
>                         1>&2 echo "failure ($res)"
>                 fi
>         fi
>   }
>
>   mountpoint /mnt > /dev/null || mount -t cifs -o username=$USER,pass=$PASS //$SERVER/$RPATH /mnt
>
>   test_call 0 /mnt/
>
>   /usr/bin/expect << EOF
>         set timeout 60
>
>         spawn ssh $USER@$SERVER
>
>         expect "yes/no" {
>                 send "yes\r"
>                 expect "*?assword" { send "$PASS\r" }
>         } "*?assword" { send "$PASS\r" }
>
>         expect ">" { send "powershell close-smbsession -force\r" }
>         expect ">" { send "exit\r" }
>         expect eof
>   EOF
>
>   sysctl -w vm.drop_caches=2 > /dev/null
>   sysctl -w vm.drop_caches=2 > /dev/null
>
>   setup
>
>   test_call 1 /mnt/
>   ~~~
>
> Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
> ---
>  fs/cifs/dir.c     | 4 ++++
>  fs/cifs/smb2ops.c | 5 +++++
>  2 files changed, 9 insertions(+)
>
> diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
> index 6bcd3e8f7cda..7c641f9a3dac 100644
> --- a/fs/cifs/dir.c
> +++ b/fs/cifs/dir.c
> @@ -630,6 +630,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
>         struct inode *newInode = NULL;
>         const char *full_path;
>         void *page;
> +       int retry_count = 0;
>
>         xid = get_xid();
>
> @@ -673,6 +674,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
>         cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
>                  full_path, d_inode(direntry));
>
> +again:
>         if (pTcon->posix_extensions)
>                 rc = smb311_posix_get_inode_info(&newInode, full_path, parent_dir_inode->i_sb, xid);
>         else if (pTcon->unix_ext) {
> @@ -687,6 +689,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
>                 /* since paths are not looked up by component - the parent
>                    directories are presumed to be good here */
>                 renew_parental_timestamps(direntry);
> +       } else if (rc == -EAGAIN && retry_count++ < 10) {
> +               goto again;
>         } else if (rc == -ENOENT) {
>                 cifs_set_time(direntry, jiffies);
>                 newInode = NULL;
> diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
> index 21ef51d338e0..d241e6af8fe4 100644
> --- a/fs/cifs/smb2ops.c
> +++ b/fs/cifs/smb2ops.c
> @@ -2325,6 +2325,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
>         struct smb2_query_directory_rsp *qd_rsp = NULL;
>         struct smb2_create_rsp *op_rsp = NULL;
>         struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
> +       int retry_count = 0;
>
>         utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
>         if (!utf16_path)
> @@ -2372,10 +2373,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
>
>         smb2_set_related(&rqst[1]);
>
> +again:
>         rc = compound_send_recv(xid, tcon->ses, server,
>                                 flags, 2, rqst,
>                                 resp_buftype, rsp_iov);
>
> +       if (rc == -EAGAIN && retry_count++ < 10)
> +               goto again;
> +
>         /* If the open failed there is nothing to do */
>         op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
>         if (op_rsp == NULL || op_rsp->sync_hdr.Status != STATUS_SUCCESS) {
> --
> 2.31.1
>
Aurélien Aptel June 16, 2021, 10:07 a.m. UTC | #2
Thanks for the patch and reproducer Thiago.

ronnie sahlberg <ronniesahlberg@gmail.com> writes:
> Looks good to me.

I guess it looks ok as a quick fix for the issue at hand but:
- should we check for more error codes (use is_retryable_error()?)
- all syscalls will need something similar, the session can be closed at
  any point
  * either we add a loop everywhere
  * or we change cifs_send_receive to retry (most?) calls
  * might be worth looking at what nfs does here
- Should this be done for both soft (default) and hard mounts? I guess
  for hard we would retry indefinitely

Cheers,
Thiago Rafael Becker June 16, 2021, 3:40 p.m. UTC | #3
Thanks for the comments, I'm working on them.

On Wed, Jun 16, 2021 at 12:07:50PM +0200, Aurélien Aptel wrote:

> I guess it looks ok as a quick fix for the issue at hand but:
> - should we check for more error codes (use is_retryable_error()?)
> - all syscalls will need something similar, the session can be closed at
>   any point
>   * either we add a loop everywhere
>   * or we change cifs_send_receive to retry (most?) calls
>   * might be worth looking at what nfs does here

Some syscall can return EAGAIN, so we would need to check which
operation is retryable and if the error is retryable. I don't know if
all Linux syscalls mappable to smb2 operations in the wire, but it may
be worth mapping.

NFS requeues the calls, and fails after a configurable timeout
for soft mounts, or issues an error message and requeues the request
for hard mounts (retrans and timeo mount options).

> - Should this be done for both soft (default) and hard mounts? I guess
>   for hard we would retry indefinitely

Good point, but the correct option is to retry on hard mounts until the
operation succeeds.

NFS hard mounts create issues like being unable to do a clean shutdown on
a server failure, because oustanding I/O blocks it. The NFS has atempted
to fix this by adding options to kill all outstanding I/O, but I'm not
current on the efforts/issues in this front.

So this would create the same issue with outstanding mounts on cifs 
shares, so a similar solution to NFS may be adapted/created in the future.

One thing that I forgot to mention in the patch is that this uncovered a
bug in compuond requests, where wait_for_compund_request will block the
readdir request with EDEADLK, if the share needs reconnect. I'm looking
into this patch, and will submmit it separatelly, as this looks like a
corner case uncovered by the patch and specific conditions in the tests.

> Cheers,
Best regards,
Thiago
Steve French June 16, 2021, 5:09 p.m. UTC | #4
tentatively merged into cifs-2.6.git for-next pending testing and any
more review feedback

On Wed, Jun 16, 2021 at 1:09 AM ronnie sahlberg
<ronniesahlberg@gmail.com> wrote:
>
> Looks good to me.
>
> Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>
>
> On Wed, Jun 16, 2021 at 2:44 AM Thiago Rafael Becker <trbecker@gmail.com> wrote:
> >
> > According to the investigation performed by Jacob Shivers at Red Hat,
> > cifs_lookup and cifs_readdir leak EAGAIN when the user session is
> > deleted on the server. Fix this issue by implementing a retry with
> > limits, as is implemented in cifs_revalidate_dentry_attr.
> >
> > Reproducer based on the work by Jacob Shivers:
> >
> >   ~~~
> >   $ cat readdir-cifs-test.sh
> >   #!/bin/bash
> >
> >   # Install and configure powershell and sshd on the windows
> >   #  server as descibed in
> >   # https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_overview
> >   # This script uses expect(1)
> >
> >   USER=dude
> >   SERVER=192.168.0.2
> >   RPATH=root
> >   PASS='password'
> >
> >   function debug_funcs {
> >         for line in $@ ; do
> >                 echo "func $line +p" > /sys/kernel/debug/dynamic_debug/control
> >         done
> >   }
> >
> >   function setup {
> >         echo 1 > /proc/fs/cifs/cifsFYI
> >         debug_funcs wait_for_compound_request \
> >                 smb2_query_dir_first cifs_readdir \
> >                 compound_send_recv cifs_reconnect_tcon \
> >                 generic_ip_connect cifs_reconnect \
> >                 smb2_reconnect_server smb2_reconnect \
> >                 cifs_readv_from_socket cifs_readv_receive
> >         tcpdump -i eth0 -w cifs.pcap host 192.168.2.182 & sleep 5
> >         dmesg -C
> >   }
> >
> >   function test_call {
> >         if [[ $1 == 1 ]] ; then
> >                 tracer="strace -tt -f -s 4096 -o trace-$(date -Iseconds).txt"
> >         fi
> >         # Change the command here to anything apropriate
> >         $tracer ls $2 > /dev/null
> >         res=$?
> >         if [[ $1 == 1 ]] ; then
> >                 if [[ $res == 0 ]] ; then
> >                         1>&2 echo success
> >                 else
> >                         1>&2 echo "failure ($res)"
> >                 fi
> >         fi
> >   }
> >
> >   mountpoint /mnt > /dev/null || mount -t cifs -o username=$USER,pass=$PASS //$SERVER/$RPATH /mnt
> >
> >   test_call 0 /mnt/
> >
> >   /usr/bin/expect << EOF
> >         set timeout 60
> >
> >         spawn ssh $USER@$SERVER
> >
> >         expect "yes/no" {
> >                 send "yes\r"
> >                 expect "*?assword" { send "$PASS\r" }
> >         } "*?assword" { send "$PASS\r" }
> >
> >         expect ">" { send "powershell close-smbsession -force\r" }
> >         expect ">" { send "exit\r" }
> >         expect eof
> >   EOF
> >
> >   sysctl -w vm.drop_caches=2 > /dev/null
> >   sysctl -w vm.drop_caches=2 > /dev/null
> >
> >   setup
> >
> >   test_call 1 /mnt/
> >   ~~~
> >
> > Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
> > ---
> >  fs/cifs/dir.c     | 4 ++++
> >  fs/cifs/smb2ops.c | 5 +++++
> >  2 files changed, 9 insertions(+)
> >
> > diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
> > index 6bcd3e8f7cda..7c641f9a3dac 100644
> > --- a/fs/cifs/dir.c
> > +++ b/fs/cifs/dir.c
> > @@ -630,6 +630,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
> >         struct inode *newInode = NULL;
> >         const char *full_path;
> >         void *page;
> > +       int retry_count = 0;
> >
> >         xid = get_xid();
> >
> > @@ -673,6 +674,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
> >         cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
> >                  full_path, d_inode(direntry));
> >
> > +again:
> >         if (pTcon->posix_extensions)
> >                 rc = smb311_posix_get_inode_info(&newInode, full_path, parent_dir_inode->i_sb, xid);
> >         else if (pTcon->unix_ext) {
> > @@ -687,6 +689,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
> >                 /* since paths are not looked up by component - the parent
> >                    directories are presumed to be good here */
> >                 renew_parental_timestamps(direntry);
> > +       } else if (rc == -EAGAIN && retry_count++ < 10) {
> > +               goto again;
> >         } else if (rc == -ENOENT) {
> >                 cifs_set_time(direntry, jiffies);
> >                 newInode = NULL;
> > diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
> > index 21ef51d338e0..d241e6af8fe4 100644
> > --- a/fs/cifs/smb2ops.c
> > +++ b/fs/cifs/smb2ops.c
> > @@ -2325,6 +2325,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
> >         struct smb2_query_directory_rsp *qd_rsp = NULL;
> >         struct smb2_create_rsp *op_rsp = NULL;
> >         struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
> > +       int retry_count = 0;
> >
> >         utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
> >         if (!utf16_path)
> > @@ -2372,10 +2373,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
> >
> >         smb2_set_related(&rqst[1]);
> >
> > +again:
> >         rc = compound_send_recv(xid, tcon->ses, server,
> >                                 flags, 2, rqst,
> >                                 resp_buftype, rsp_iov);
> >
> > +       if (rc == -EAGAIN && retry_count++ < 10)
> > +               goto again;
> > +
> >         /* If the open failed there is nothing to do */
> >         op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
> >         if (op_rsp == NULL || op_rsp->sync_hdr.Status != STATUS_SUCCESS) {
> > --
> > 2.31.1
> >
diff mbox series

Patch

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6bcd3e8f7cda..7c641f9a3dac 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -630,6 +630,7 @@  cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	struct inode *newInode = NULL;
 	const char *full_path;
 	void *page;
+	int retry_count = 0;
 
 	xid = get_xid();
 
@@ -673,6 +674,7 @@  cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
 		 full_path, d_inode(direntry));
 
+again:
 	if (pTcon->posix_extensions)
 		rc = smb311_posix_get_inode_info(&newInode, full_path, parent_dir_inode->i_sb, xid);
 	else if (pTcon->unix_ext) {
@@ -687,6 +689,8 @@  cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
+	} else if (rc == -EAGAIN && retry_count++ < 10) {
+		goto again;
 	} else if (rc == -ENOENT) {
 		cifs_set_time(direntry, jiffies);
 		newInode = NULL;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 21ef51d338e0..d241e6af8fe4 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2325,6 +2325,7 @@  smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	struct smb2_query_directory_rsp *qd_rsp = NULL;
 	struct smb2_create_rsp *op_rsp = NULL;
 	struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
+	int retry_count = 0;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
 	if (!utf16_path)
@@ -2372,10 +2373,14 @@  smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 
 	smb2_set_related(&rqst[1]);
 
+again:
 	rc = compound_send_recv(xid, tcon->ses, server,
 				flags, 2, rqst,
 				resp_buftype, rsp_iov);
 
+	if (rc == -EAGAIN && retry_count++ < 10)
+		goto again;
+
 	/* If the open failed there is nothing to do */
 	op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
 	if (op_rsp == NULL || op_rsp->sync_hdr.Status != STATUS_SUCCESS) {