diff mbox series

[12/14] cifs: handle when server stops supporting multichannel

Message ID 20231030110020.45627-12-sprasad@microsoft.com
State New
Headers show
Series [01/14] cifs: print server capabilities in DebugData | expand

Commit Message

Shyam Prasad N Oct. 30, 2023, 11 a.m. UTC
From: Shyam Prasad N <sprasad@microsoft.com>

When a server stops supporting multichannel, we will
keep attempting reconnects to the secondary channels today.
Avoid this by freeing extra channels when negotiate
returns no multichannel support.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cifsglob.h  |  1 +
 fs/smb/client/cifsproto.h |  2 ++
 fs/smb/client/connect.c   | 10 ++++++
 fs/smb/client/sess.c      | 64 ++++++++++++++++++++++++++++++-----
 fs/smb/client/smb2pdu.c   | 70 ++++++++++++++++++++++++++++++++++++++-
 fs/smb/client/transport.c |  2 +-
 6 files changed, 139 insertions(+), 10 deletions(-)

Comments

Paulo Alcantara Nov. 8, 2023, 4:35 p.m. UTC | #1
nspmangalore@gmail.com writes:

> From: Shyam Prasad N <sprasad@microsoft.com>
>
> When a server stops supporting multichannel, we will
> keep attempting reconnects to the secondary channels today.
> Avoid this by freeing extra channels when negotiate
> returns no multichannel support.
>
> Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> ---
>  fs/smb/client/cifsglob.h  |  1 +
>  fs/smb/client/cifsproto.h |  2 ++
>  fs/smb/client/connect.c   | 10 ++++++
>  fs/smb/client/sess.c      | 64 ++++++++++++++++++++++++++++++-----
>  fs/smb/client/smb2pdu.c   | 70 ++++++++++++++++++++++++++++++++++++++-
>  fs/smb/client/transport.c |  2 +-
>  6 files changed, 139 insertions(+), 10 deletions(-)

Several reconnect tests are triggering this when running against
for-next branch

[  360.487827] dftest>> [*] mount opts: vers=3.1.1,echo_interval=10,nohandlecache,noserverino
[  361.570676] dftest>> [*] mount opts: vers=3.1.1,echo_interval=10,nohandlecache,noserverino
[  361.572133] dftest>> [*] TEST: mount //gandalf.test/dfstest3 , cd /mnt/dfs/link , ls ., expect:["target\d_file" "tsub"]
[  361.572133]       disconnect /mnt/dfs/link , ls#1 . (fail here is ok),  ls#2 (fail here NOT ok)
[  361.613964] CIFS: Attempting to mount //gandalf.test/dfstest3
[  361.637663] virtio_net virtio1 enp1s0: entered promiscuous mode
[  362.262990] CIFS: Attempting to mount //gandalf.test/dfstest3/link
[  362.483166] ls (978) used greatest stack depth: 21904 bytes left
[  398.362399] CIFS: VFS: \\w22-root2.gandalf.test has not responded in 30 seconds. Reconnecting...
[  398.364632] BUG: sleeping function called from invalid context at kernel/workqueue.c:3344
[  398.366399] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 981, name: cifsd
[  398.367318] preempt_count: 1, expected: 0
[  398.367826] RCU nest depth: 0, expected: 0
[  398.368414] 1 lock held by cifsd/981:
[  398.368905]  #0: ffffffffc028fb58 (&cifs_tcp_ses_lock){+.+.}-{2:2}, at: cifs_mark_tcp_ses_conns_for_reconnect+0x5f/0x3b0 [cifs]
[  398.371053] CPU: 3 PID: 981 Comm: cifsd Not tainted 6.6.0 #1
[  398.371720] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[  398.373072] Call Trace:
[  398.373404]  <TASK>
[  398.373700]  dump_stack_lvl+0x64/0x80
[  398.374188]  __might_resched+0x23c/0x360
[  398.374708]  ? __pfx___might_resched+0x10/0x10
[  398.375188]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.375688]  ? __might_sleep+0x2b/0xb0
[  398.376086]  ? __flush_work+0xc5/0x640
[  398.376539]  __flush_work+0xfd/0x640
[  398.377019]  ? __flush_work+0xc5/0x640
[  398.377524]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.378155]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.378798]  ? __pfx___flush_work+0x10/0x10
[  398.379408]  ? hlock_class+0x32/0xc0
[  398.379840]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.380355]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.380959]  ? mark_held_locks+0x5d/0x90
[  398.381537]  __cancel_work_timer+0x210/0x2c0
[  398.382117]  ? __pfx___cancel_work_timer+0x10/0x10
[  398.382757]  ? do_raw_spin_trylock+0xd1/0x120
[  398.383363]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.383921]  ? do_raw_spin_unlock+0x9b/0x100
[  398.384382]  cifs_mark_tcp_ses_conns_for_reconnect+0x227/0x3b0 [cifs]
[  398.385834]  reconnect_dfs_server+0x162/0x6b0 [cifs]
[  398.387107]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.387715]  ? __mutex_unlock_slowpath+0x11e/0x400
[  398.388219]  ? __pfx___mutex_lock+0x10/0x10
[  398.388736]  ? __pfx_reconnect_dfs_server+0x10/0x10 [cifs]
[  398.390057]  ? __pfx____ratelimit+0x10/0x10
[  398.390637]  cifs_readv_from_socket+0x335/0x490 [cifs]
[  398.391898]  ? __pfx_cifs_readv_from_socket+0x10/0x10 [cifs]
[  398.393084]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.393730]  cifs_read_from_socket+0xb5/0x100 [cifs]
[  398.394985]  ? __pfx_cifs_read_from_socket+0x10/0x10 [cifs]
[  398.396176]  ? __pfx_mempool_alloc+0x10/0x10
[  398.396734]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.397386]  ? cifs_small_buf_get+0x53/0x70 [cifs]
[  398.398619]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.399245]  ? allocate_buffers+0xa7/0x1d0 [cifs]
[  398.400294]  cifs_demultiplex_thread+0x274/0x1270 [cifs]
[  398.401346]  ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs]
[  398.402680]  ? __pfx_lock_release+0x10/0x10
[  398.403053]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.403464]  ? mark_held_locks+0x1a/0x90
[  398.403813]  ? lockdep_hardirqs_on_prepare+0x136/0x210
[  398.404250]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.404663]  ? srso_alias_return_thunk+0x5/0xfbef5
[  398.405079]  ? __kthread_parkme+0xce/0xf0
[  398.405433]  ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs]
[  398.406293]  kthread+0x18d/0x1d0
[  398.406582]  ? kthread+0xdb/0x1d0
[  398.406885]  ? __pfx_kthread+0x10/0x10
[  398.407237]  ret_from_fork+0x34/0x60
[  398.407551]  ? __pfx_kthread+0x10/0x10
[  398.407881]  ret_from_fork_asm+0x1b/0x30
[  398.408258]  </TASK>
Paulo Alcantara Nov. 8, 2023, 7:13 p.m. UTC | #2
Also found memory leaks

        ...
	unreferenced object 0xffff88801c8ca600 (size 192):
	  comm "python3", pid 14002, jiffies 4300343883 (age 4393.481s)
	  hex dump (first 32 bytes):
	    f0 a2 8b 17 80 88 ff ff f0 a2 8b 17 80 88 ff ff  ................
	    01 00 00 00 00 00 00 00 00 ca 9a 3b 00 00 00 00  ...........;....
	  backtrace:
	    [<ffffffff8144e3f5>] __kmem_cache_alloc_node+0x295/0x2d0
	    [<ffffffff813ab1aa>] kmalloc_trace+0x2a/0xc0
	    [<ffffffffc016178d>] parse_server_interfaces+0x4ed/0xcc0 [cifs]
	    [<ffffffffc016ae13>] SMB3_request_interfaces+0x163/0x2b0 [cifs]
	    [<ffffffffc016b0dd>] smb3_qfs_tcon+0x16d/0x2c0 [cifs]
	    [<ffffffffc01102c1>] cifs_mount_get_tcon+0x3b1/0x550 [cifs]
	    [<ffffffffc01a2471>] dfs_mount_share+0xcc1/0xfa0 [cifs]
	    [<ffffffffc011089a>] cifs_mount+0xda/0x4c0 [cifs]
	    [<ffffffffc01006d5>] cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
	    [<ffffffffc01954cd>] smb3_get_tree+0x16d/0x380 [cifs]
	    [<ffffffff8147c1ad>] vfs_get_tree+0x4d/0x190
	    [<ffffffff814bdbc3>] fc_mount+0x13/0x60
	    [<ffffffffc019aae4>] cifs_do_automount.isra.0+0x3c4/0x4d0 [cifs]
	    [<ffffffffc019ae3d>] cifs_d_automount+0x4d/0x1b0 [cifs]
	    [<ffffffff8148b01c>] __traverse_mounts+0xcc/0x300
	    [<ffffffff8149350b>] step_into+0x54b/0xd30

and leaked server connections

	Display Internal CIFS Data Structures for Debugging
	---------------------------------------------------
	CIFS Version 2.46
	Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
	CIFSMaxBufSize: 16384
	Active VFS Requests: 0
	
	Servers: 
	1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test 
	ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
	Number of credits: 1,1,1 Dialect 0x210
	Server capabilities: 0x300007
	TCP status: 4 Instance: 77
	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
	In Send: 0 In MaxReq Wait: 0
	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
	
	        Sessions: 
	                [NONE]
	2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test 
	ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
	Number of credits: 1,1,1 Dialect 0x210
	Server capabilities: 0x300007
	TCP status: 4 Instance: 81
	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
	In Send: 0 In MaxReq Wait: 0
	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
	
	        Sessions: 
	                [NONE]
	3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test 
	ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
	Number of credits: 1,1,1 Dialect 0x210
	Server capabilities: 0x300007
	TCP status: 4 Instance: 96
	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
	In Send: 0 In MaxReq Wait: 0
	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
	
	        Sessions: 
	                [NONE]
        ...

while running reconnect tests.
Paulo Alcantara Nov. 8, 2023, 7:41 p.m. UTC | #3
> and leaked server connections
>
> 	Display Internal CIFS Data Structures for Debugging
> 	---------------------------------------------------
> 	CIFS Version 2.46
> 	Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
> 	CIFSMaxBufSize: 16384
> 	Active VFS Requests: 0
> 	
> 	Servers: 
> 	1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test 
> 	ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
> 	Number of credits: 1,1,1 Dialect 0x210
> 	Server capabilities: 0x300007
> 	TCP status: 4 Instance: 77
> 	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
> 	In Send: 0 In MaxReq Wait: 0
> 	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
> 	
> 	        Sessions: 
> 	                [NONE]
> 	2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test 
> 	ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
> 	Number of credits: 1,1,1 Dialect 0x210
> 	Server capabilities: 0x300007
> 	TCP status: 4 Instance: 81
> 	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
> 	In Send: 0 In MaxReq Wait: 0
> 	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
> 	
> 	        Sessions: 
> 	                [NONE]
> 	3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test 
> 	ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
> 	Number of credits: 1,1,1 Dialect 0x210
> 	Server capabilities: 0x300007
> 	TCP status: 4 Instance: 96
> 	Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 
> 	In Send: 0 In MaxReq Wait: 0
> 	DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
> 	
> 	        Sessions: 
> 	                [NONE]
>         ...

I ended up with a simple reproducer for the above

        mount.cifs //srv/share /mnt/1 -o ...,echo_interval=10
        iptables -I INPUT -s $srvaddr -j DROP
        stat -f /mnt/1
        # check dmesg for "BUG: sleeping function called from invalid context"
        iptables -I INPUT -s $srvaddr -j ACCEPT
        stat -f /mnt/1
        umount /mnt/1
        # check /proc/fs/cifs/DebugData for leaked server connection
Shyam Prasad N Nov. 9, 2023, 11:44 a.m. UTC | #4
On Thu, Nov 9, 2023 at 1:11 AM Paulo Alcantara <pc@manguebit.com> wrote:
>
> > and leaked server connections
> >
> >       Display Internal CIFS Data Structures for Debugging
> >       ---------------------------------------------------
> >       CIFS Version 2.46
> >       Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
> >       CIFSMaxBufSize: 16384
> >       Active VFS Requests: 0
> >
> >       Servers:
> >       1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test
> >       ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
> >       Number of credits: 1,1,1 Dialect 0x210
> >       Server capabilities: 0x300007
> >       TCP status: 4 Instance: 77
> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >       In Send: 0 In MaxReq Wait: 0
> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
> >
> >               Sessions:
> >                       [NONE]
> >       2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test
> >       ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
> >       Number of credits: 1,1,1 Dialect 0x210
> >       Server capabilities: 0x300007
> >       TCP status: 4 Instance: 81
> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >       In Send: 0 In MaxReq Wait: 0
> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
> >
> >               Sessions:
> >                       [NONE]
> >       3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test
> >       ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
> >       Number of credits: 1,1,1 Dialect 0x210
> >       Server capabilities: 0x300007
> >       TCP status: 4 Instance: 96
> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >       In Send: 0 In MaxReq Wait: 0
> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
> >
> >               Sessions:
> >                       [NONE]
> >         ...
>
> I ended up with a simple reproducer for the above
>
>         mount.cifs //srv/share /mnt/1 -o ...,echo_interval=10
>         iptables -I INPUT -s $srvaddr -j DROP
>         stat -f /mnt/1
>         # check dmesg for "BUG: sleeping function called from invalid context"
>         iptables -I INPUT -s $srvaddr -j ACCEPT
>         stat -f /mnt/1
>         umount /mnt/1
>         # check /proc/fs/cifs/DebugData for leaked server connection

Ack. I'll try and get a repro locally and debug this one.
Paulo Alcantara Nov. 9, 2023, 1:28 p.m. UTC | #5
Shyam Prasad N <nspmangalore@gmail.com> writes:

> On Thu, Nov 9, 2023 at 1:11 AM Paulo Alcantara <pc@manguebit.com> wrote:
>>
>> > and leaked server connections
>> >
>> >       Display Internal CIFS Data Structures for Debugging
>> >       ---------------------------------------------------
>> >       CIFS Version 2.46
>> >       Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
>> >       CIFSMaxBufSize: 16384
>> >       Active VFS Requests: 0
>> >
>> >       Servers:
>> >       1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test
>> >       ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
>> >       Number of credits: 1,1,1 Dialect 0x210
>> >       Server capabilities: 0x300007
>> >       TCP status: 4 Instance: 77
>> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
>> >       In Send: 0 In MaxReq Wait: 0
>> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
>> >
>> >               Sessions:
>> >                       [NONE]
>> >       2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test
>> >       ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
>> >       Number of credits: 1,1,1 Dialect 0x210
>> >       Server capabilities: 0x300007
>> >       TCP status: 4 Instance: 81
>> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
>> >       In Send: 0 In MaxReq Wait: 0
>> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
>> >
>> >               Sessions:
>> >                       [NONE]
>> >       3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test
>> >       ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
>> >       Number of credits: 1,1,1 Dialect 0x210
>> >       Server capabilities: 0x300007
>> >       TCP status: 4 Instance: 96
>> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
>> >       In Send: 0 In MaxReq Wait: 0
>> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
>> >
>> >               Sessions:
>> >                       [NONE]
>> >         ...
>>
>> I ended up with a simple reproducer for the above
>>
>>         mount.cifs //srv/share /mnt/1 -o ...,echo_interval=10
>>         iptables -I INPUT -s $srvaddr -j DROP
>>         stat -f /mnt/1
>>         # check dmesg for "BUG: sleeping function called from invalid context"
>>         iptables -I INPUT -s $srvaddr -j ACCEPT
>>         stat -f /mnt/1
>>         umount /mnt/1
>>         # check /proc/fs/cifs/DebugData for leaked server connection
>
> Ack. I'll try and get a repro locally and debug this one.

This should be related to patch 10/14 as you're taking an extra
reference of @server over reconnect, and when the client reconnects and
umount, that reference don't seem to be put afterwards.
Shyam Prasad N Nov. 9, 2023, 1:49 p.m. UTC | #6
On Thu, Nov 9, 2023 at 6:58 PM Paulo Alcantara <pc@manguebit.com> wrote:
>
> Shyam Prasad N <nspmangalore@gmail.com> writes:
>
> > On Thu, Nov 9, 2023 at 1:11 AM Paulo Alcantara <pc@manguebit.com> wrote:
> >>
> >> > and leaked server connections
> >> >
> >> >       Display Internal CIFS Data Structures for Debugging
> >> >       ---------------------------------------------------
> >> >       CIFS Version 2.46
> >> >       Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
> >> >       CIFSMaxBufSize: 16384
> >> >       Active VFS Requests: 0
> >> >
> >> >       Servers:
> >> >       1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test
> >> >       ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
> >> >       Number of credits: 1,1,1 Dialect 0x210
> >> >       Server capabilities: 0x300007
> >> >       TCP status: 4 Instance: 77
> >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >> >       In Send: 0 In MaxReq Wait: 0
> >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
> >> >
> >> >               Sessions:
> >> >                       [NONE]
> >> >       2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test
> >> >       ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
> >> >       Number of credits: 1,1,1 Dialect 0x210
> >> >       Server capabilities: 0x300007
> >> >       TCP status: 4 Instance: 81
> >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >> >       In Send: 0 In MaxReq Wait: 0
> >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
> >> >
> >> >               Sessions:
> >> >                       [NONE]
> >> >       3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test
> >> >       ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
> >> >       Number of credits: 1,1,1 Dialect 0x210
> >> >       Server capabilities: 0x300007
> >> >       TCP status: 4 Instance: 96
> >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> >> >       In Send: 0 In MaxReq Wait: 0
> >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
> >> >
> >> >               Sessions:
> >> >                       [NONE]
> >> >         ...
> >>
> >> I ended up with a simple reproducer for the above
> >>
> >>         mount.cifs //srv/share /mnt/1 -o ...,echo_interval=10
> >>         iptables -I INPUT -s $srvaddr -j DROP
> >>         stat -f /mnt/1
> >>         # check dmesg for "BUG: sleeping function called from invalid context"
> >>         iptables -I INPUT -s $srvaddr -j ACCEPT
> >>         stat -f /mnt/1
> >>         umount /mnt/1
> >>         # check /proc/fs/cifs/DebugData for leaked server connection
> >
> > Ack. I'll try and get a repro locally and debug this one.
>
> This should be related to patch 10/14 as you're taking an extra
> reference of @server over reconnect, and when the client reconnects and
> umount, that reference don't seem to be put afterwards.

The idea is that the reference is put in the reconnect worker.
I think the issue is that I don't drop the final reference if
cancel_delayed_work_sync was able to cancel a work.
Let me work on the updated patch.

Also, I found the reason for "BUG: sleeping function called from
invalid context". I've enabled CONFIG_DEBUG_ATOMIC_SLEEP in my test
config now.
Shyam Prasad N Nov. 10, 2023, 4:09 a.m. UTC | #7
On Thu, Nov 9, 2023 at 7:19 PM Shyam Prasad N <nspmangalore@gmail.com> wrote:
>
> On Thu, Nov 9, 2023 at 6:58 PM Paulo Alcantara <pc@manguebit.com> wrote:
> >
> > Shyam Prasad N <nspmangalore@gmail.com> writes:
> >
> > > On Thu, Nov 9, 2023 at 1:11 AM Paulo Alcantara <pc@manguebit.com> wrote:
> > >>
> > >> > and leaked server connections
> > >> >
> > >> >       Display Internal CIFS Data Structures for Debugging
> > >> >       ---------------------------------------------------
> > >> >       CIFS Version 2.46
> > >> >       Features: DFS,FSCACHE,STATS2,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL,WITNESS
> > >> >       CIFSMaxBufSize: 16384
> > >> >       Active VFS Requests: 0
> > >> >
> > >> >       Servers:
> > >> >       1) ConnectionId: 0x70e Hostname: w22-root2.gandalf.test
> > >> >       ClientGUID: 44DAE383-3E91-3042-85FE-87D6F17298B7
> > >> >       Number of credits: 1,1,1 Dialect 0x210
> > >> >       Server capabilities: 0x300007
> > >> >       TCP status: 4 Instance: 77
> > >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> > >> >       In Send: 0 In MaxReq Wait: 0
> > >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link10
> > >> >
> > >> >               Sessions:
> > >> >                       [NONE]
> > >> >       2) ConnectionId: 0x706 Hostname: w22-root2.gandalf.test
> > >> >       ClientGUID: C8CF45E4-F70D-DF40-8821-0234A2E20DD4
> > >> >       Number of credits: 1,1,1 Dialect 0x210
> > >> >       Server capabilities: 0x300007
> > >> >       TCP status: 4 Instance: 81
> > >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> > >> >       In Send: 0 In MaxReq Wait: 0
> > >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link6
> > >> >
> > >> >               Sessions:
> > >> >                       [NONE]
> > >> >       3) ConnectionId: 0x6ae Hostname: w22-root1.gandalf.test
> > >> >       ClientGUID: AB059CDD-12FF-B94D-B30C-9E1928ACBA95
> > >> >       Number of credits: 1,1,1 Dialect 0x210
> > >> >       Server capabilities: 0x300007
> > >> >       TCP status: 4 Instance: 96
> > >> >       Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840
> > >> >       In Send: 0 In MaxReq Wait: 0
> > >> >       DFS leaf full path: \\W22-ROOT1.GANDALF.TEST\dfstest3\link9
> > >> >
> > >> >               Sessions:
> > >> >                       [NONE]
> > >> >         ...
> > >>
> > >> I ended up with a simple reproducer for the above
> > >>
> > >>         mount.cifs //srv/share /mnt/1 -o ...,echo_interval=10
> > >>         iptables -I INPUT -s $srvaddr -j DROP
> > >>         stat -f /mnt/1
> > >>         # check dmesg for "BUG: sleeping function called from invalid context"
> > >>         iptables -I INPUT -s $srvaddr -j ACCEPT
> > >>         stat -f /mnt/1
> > >>         umount /mnt/1
> > >>         # check /proc/fs/cifs/DebugData for leaked server connection
> > >
> > > Ack. I'll try and get a repro locally and debug this one.
> >
> > This should be related to patch 10/14 as you're taking an extra
> > reference of @server over reconnect, and when the client reconnects and
> > umount, that reference don't seem to be put afterwards.
>
> The idea is that the reference is put in the reconnect worker.
> I think the issue is that I don't drop the final reference if
> cancel_delayed_work_sync was able to cancel a work.
This was already taken care of.

> Let me work on the updated patch.
>
> Also, I found the reason for "BUG: sleeping function called from
> invalid context". I've enabled CONFIG_DEBUG_ATOMIC_SLEEP in my test
> config now.
>
> --
> Regards,
> Shyam

Hi Paulo,

Can you please check if the problem is still seen with these updated patches?
I was unable to reproduce the issue with the steps you provided.
Paulo Alcantara Nov. 11, 2023, 5:23 p.m. UTC | #8
Shyam Prasad N <nspmangalore@gmail.com> writes:

> Can you please check if the problem is still seen with these updated patches?
> I was unable to reproduce the issue with the steps you provided.

I couldn't reproduce that reconnect issue anymore.

However, some other problems as shown below with lockdep, kmemleak and
kasan enabled.

$ mount.cifs //w22-root1.gandalf.test/test /mnt/1 -o username=administrator,password=***,multichannel,echo_interval=10
# disable multichannel on w22-root1.gandalf.test server
#   Set-SmbServerConfiguration -EnableMultichannel $false
$ iptables -I INPUT -s 192.168.1.11 -j DROP
$ stat -f /mnt/1
stat: cannot read file system information for '/mnt/1': Host is down
$ iptables -I INPUT -s 192.168.1.11 -j ACCEPT
$ stat -f /mnt/1
stat: cannot read file system information for '/mnt/1': Resource temporarily unavailable

...
[ 1465.290096] CIFS: VFS: \\w22-root1.gandalf.test does not support multichannel anymore. disabling all other channels
[ 1465.292488] 
[ 1465.292863] ======================================================
[ 1465.294183] WARNING: possible circular locking dependency detected
[ 1465.295309] 6.6.0 #2 Not tainted
[ 1465.295863] ------------------------------------------------------
[ 1465.296876] kworker/2:1/1127 is trying to acquire lock:
[ 1465.297745] ffff8880129482c0 (&ret_buf->iface_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x117/0x280 [cifs]
[ 1465.300314] 
[ 1465.300314] but task is already holding lock:
[ 1465.301267] ffff888012948328 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x29/0x280 [cifs]
[ 1465.303804] 
[ 1465.303804] which lock already depends on the new lock.
[ 1465.303804] 
[ 1465.305107] 
[ 1465.305107] the existing dependency chain (in reverse order) is:
[ 1465.306309] 
[ 1465.306309] -> #1 (&ret_buf->chan_lock){+.+.}-{2:2}:
[ 1465.307391]        _raw_spin_lock+0x34/0x80
[ 1465.308093]        cifs_try_adding_channels+0x205/0x1290 [cifs]
[ 1465.309865]        cifs_mount+0xfb/0x4c0 [cifs]
[ 1465.311423]        cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
[ 1465.313094]        smb3_get_tree+0x16d/0x380 [cifs]
[ 1465.314702]        vfs_get_tree+0x4d/0x190
[ 1465.315275]        path_mount+0x3c4/0xf90
[ 1465.315811]        __x64_sys_mount+0x1aa/0x1f0
[ 1465.316392]        do_syscall_64+0x47/0xf0
[ 1465.316950]        entry_SYSCALL_64_after_hwframe+0x6f/0x77
[ 1465.317681] 
[ 1465.317681] -> #0 (&ret_buf->iface_lock){+.+.}-{2:2}:
[ 1465.318548]        __lock_acquire+0x1793/0x2110
[ 1465.319156]        lock_acquire+0x14a/0x3a0
[ 1465.319714]        _raw_spin_lock+0x34/0x80
[ 1465.320266]        cifs_disable_secondary_channels+0x117/0x280 [cifs]
[ 1465.321729]        smb2_reconnect+0x520/0xcb0 [cifs]
[ 1465.323001]        smb2_reconnect_server+0x771/0xb00 [cifs]
[ 1465.324352]        process_one_work+0x43c/0x8e0
[ 1465.324959]        worker_thread+0x397/0x690
[ 1465.325522]        kthread+0x18d/0x1d0
[ 1465.326028]        ret_from_fork+0x34/0x60
[ 1465.326564]        ret_from_fork_asm+0x1b/0x30
[ 1465.327164] 
[ 1465.327164] other info that might help us debug this:
[ 1465.327164] 
[ 1465.328180]  Possible unsafe locking scenario:
[ 1465.328180] 
[ 1465.328943]        CPU0                    CPU1
[ 1465.329523]        ----                    ----
[ 1465.330119]   lock(&ret_buf->chan_lock);
[ 1465.330649]                                lock(&ret_buf->iface_lock);
[ 1465.331493]                                lock(&ret_buf->chan_lock);
[ 1465.332326]   lock(&ret_buf->iface_lock);
[ 1465.332865] 
[ 1465.332865]  *** DEADLOCK ***
[ 1465.332865] 
[ 1465.333623] 5 locks held by kworker/2:1/1127:
[ 1465.334199]  #0: ffff88800dd09d48 ((wq_completion)cifsiod){+.+.}-{0:0}, at: process_one_work+0x39a/0x8e0
[ 1465.335430]  #1: ffff88800de97dc0 ((work_completion)(&(&tcp_ses->reconnect)->work)){+.+.}-{0:0}, at: process_one_work+0x39a/0x8e0
[ 1465.336947]  #2: ffff88800dd7a8c8 (&tcp_ses->reconnect_mutex){+.+.}-{3:3}, at: smb2_reconnect_server+0xde/0xb00 [cifs]
[ 1465.338953]  #3: ffff8880129480f0 (&ret_buf->session_mutex){+.+.}-{3:3}, at: smb2_reconnect+0x234/0xcb0 [cifs]
[ 1465.340838]  #4: ffff888012948328 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x29/0x280 [cifs]
[ 1465.342073] 
[ 1465.342073] stack backtrace:
[ 1465.342421] CPU: 2 PID: 1127 Comm: kworker/2:1 Not tainted 6.6.0 #2
[ 1465.342917] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[ 1465.343782] Workqueue: cifsiod smb2_reconnect_server [cifs]
[ 1465.344595] Call Trace:
[ 1465.344845]  <TASK>
[ 1465.345025]  dump_stack_lvl+0x4a/0x80
[ 1465.345329]  check_noncircular+0x269/0x2b0
[ 1465.345672]  ? __pfx_check_noncircular+0x10/0x10
[ 1465.346047]  ? __pfx_stack_trace_save+0x10/0x10
[ 1465.346420]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.346814]  ? lockdep_lock+0xa3/0x160
[ 1465.347128]  ? __pfx_lockdep_lock+0x10/0x10
[ 1465.347465]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.347858]  ? add_chain_block+0x1d8/0x280
[ 1465.348191]  __lock_acquire+0x1793/0x2110
[ 1465.348524]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.348918]  ? __pfx___lock_acquire+0x10/0x10
[ 1465.349271]  ? __pfx_prb_read_valid+0x10/0x10
[ 1465.349624]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.350018]  ? llist_add_batch+0x9d/0xf0
[ 1465.350344]  ? __pfx_llist_add_batch+0x10/0x10
[ 1465.350712]  lock_acquire+0x14a/0x3a0
[ 1465.351014]  ? cifs_disable_secondary_channels+0x117/0x280 [cifs]
[ 1465.351874]  ? __pfx_lock_acquire+0x10/0x10
[ 1465.352214]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.352600]  ? hlock_class+0x32/0xc0
[ 1465.352900]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.353286]  ? lock_acquired+0x2df/0x510
[ 1465.353612]  ? do_raw_spin_trylock+0xd1/0x120
[ 1465.353973]  ? __pfx_lock_acquired+0x10/0x10
[ 1465.354323]  _raw_spin_lock+0x34/0x80
[ 1465.354622]  ? cifs_disable_secondary_channels+0x117/0x280 [cifs]
[ 1465.355484]  cifs_disable_secondary_channels+0x117/0x280 [cifs]
[ 1465.356332]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.356730]  smb2_reconnect+0x520/0xcb0 [cifs]
[ 1465.357477]  smb2_reconnect_server+0x771/0xb00 [cifs]
[ 1465.358269]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
[ 1465.359089]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.359478]  ? lock_is_held_type+0x90/0x100
[ 1465.359830]  ? mark_held_locks+0x1a/0x90
[ 1465.360155]  process_one_work+0x43c/0x8e0
[ 1465.360493]  ? __pfx_process_one_work+0x10/0x10
[ 1465.360912]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.361359]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.361754]  ? __list_add_valid_or_report+0x37/0xf0
[ 1465.362155]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.362544]  worker_thread+0x397/0x690
[ 1465.362863]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.363250]  ? __kthread_parkme+0xce/0xf0
[ 1465.363583]  ? __pfx_worker_thread+0x10/0x10
[ 1465.363942]  kthread+0x18d/0x1d0
[ 1465.364210]  ? kthread+0xdb/0x1d0
[ 1465.364483]  ? __pfx_kthread+0x10/0x10
[ 1465.364798]  ret_from_fork+0x34/0x60
[ 1465.365093]  ? __pfx_kthread+0x10/0x10
[ 1465.365400]  ret_from_fork_asm+0x1b/0x30
[ 1465.365733]  </TASK>
[ 1465.365954] BUG: sleeping function called from invalid context at kernel/workqueue.c:3344
[ 1465.366821] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1127, name: kworker/2:1
[ 1465.367690] preempt_count: 1, expected: 0
[ 1465.368115] RCU nest depth: 0, expected: 0
[ 1465.368547] INFO: lockdep is turned off.
[ 1465.368984] CPU: 2 PID: 1127 Comm: kworker/2:1 Not tainted 6.6.0 #2
[ 1465.369642] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[ 1465.370788] Workqueue: cifsiod smb2_reconnect_server [cifs]
[ 1465.371888] Call Trace:
[ 1465.372164]  <TASK>
[ 1465.372401]  dump_stack_lvl+0x64/0x80
[ 1465.372809]  __might_resched+0x23c/0x360
[ 1465.373236]  ? __pfx___might_resched+0x10/0x10
[ 1465.373722]  ? rcu_is_watching+0x23/0x50
[ 1465.374149]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.374661]  ? __might_sleep+0x2b/0xb0
[ 1465.375119]  ? __flush_work+0xc5/0x640
[ 1465.375524]  __flush_work+0xfd/0x640
[ 1465.375914]  ? __flush_work+0xc5/0x640
[ 1465.376320]  ? add_chain_block+0x1d8/0x280
[ 1465.376771]  ? __pfx___flush_work+0x10/0x10
[ 1465.377240]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.377686]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.378072]  ? try_to_grab_pending+0x47/0x3a0
[ 1465.378430]  __cancel_work_timer+0x210/0x2c0
[ 1465.378785]  ? __pfx___cancel_work_timer+0x10/0x10
[ 1465.379176]  ? do_raw_spin_trylock+0xd1/0x120
[ 1465.379537]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.379933]  ? do_raw_spin_unlock+0x9b/0x100
[ 1465.380293]  cifs_put_tcp_session+0x118/0x290 [cifs]
[ 1465.381076]  cifs_disable_secondary_channels+0xdb/0x280 [cifs]
[ 1465.381930]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.382323]  smb2_reconnect+0x520/0xcb0 [cifs]
[ 1465.383072]  smb2_reconnect_server+0x771/0xb00 [cifs]
[ 1465.383870]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
[ 1465.384694]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.385084]  ? lock_is_held_type+0x90/0x100
[ 1465.385424]  ? mark_held_locks+0x1a/0x90
[ 1465.385708] CIFS: fs/smb/client/cifsfs.c: VFS: in cifs_statfs as Xid: 9 with uid: 0
[ 1465.385754]  process_one_work+0x43c/0x8e0
[ 1465.387019]  ? __pfx_process_one_work+0x10/0x10
[ 1465.387387]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.387821]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.388334]  ? __list_add_valid_or_report+0x37/0xf0
[ 1465.388870]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.389385]  worker_thread+0x397/0x690
[ 1465.389753]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.390144]  ? __kthread_parkme+0xce/0xf0
[ 1465.390476]  ? __pfx_worker_thread+0x10/0x10
[ 1465.390833]  kthread+0x18d/0x1d0
[ 1465.391103]  ? kthread+0xdb/0x1d0
[ 1465.391378]  ? __pfx_kthread+0x10/0x10
[ 1465.391695]  ret_from_fork+0x34/0x60
[ 1465.391991]  ? __pfx_kthread+0x10/0x10
[ 1465.392302]  ret_from_fork_asm+0x1b/0x30
[ 1465.392632]  </TASK>
[ 1465.393120] CIFS: fs/smb/client/connect.c: cifs_setup_session: channel connect bitmap: 0x1
[ 1465.393959] CIFS: fs/smb/client/connect.c: Free previous auth_key.response = 0000000018de71c6
[ 1465.393960] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x1, tcon reconnect: 0
[ 1465.394666] CIFS: fs/smb/client/connect.c: Security Mode: 0x1 Capabilities: 0x300067 TimeAdjust: 0
[ 1465.396191] CIFS: fs/smb/client/smb2pdu.c: Session Setup
[ 1465.396675] CIFS: fs/smb/client/smb2pdu.c: sess setup type 2
[ 1465.397269] CIFS: fs/smb/client/smb2pdu.c: Fresh session. Previous: 1e800d8000049
[ 1465.399253] CIFS: fs/smb/client/smb2maperror.c: Mapping SMB2 status code 0xc0000016 to POSIX err -5
[ 1465.400409] CIFS: fs/smb/client/sess.c: decode_ntlmssp_challenge: negotiate=0xe2088235 challenge=0xe2898235
[ 1465.401245] CIFS: fs/smb/client/smb2pdu.c: rawntlmssp session setup challenge phase
[ 1465.402058] CIFS: fs/smb/client/smb2pdu.c: Fresh session. Previous: 1e800d8000049
[ 1465.404585] CIFS: fs/smb/client/smb2pdu.c: SMB2/3 session established successfully
[ 1465.405543] CIFS: fs/smb/client/sess.c: Cleared reconnect bitmask for chan 0; now 0x0
[ 1465.406375] CIFS: fs/smb/client/connect.c: __cifs_put_smb_ses: ses_count=2
[ 1465.407092] CIFS: fs/smb/client/connect.c: __cifs_put_smb_ses: ses ipc: \\w22-root1.gandalf.test\IPC$
[ 1465.408048] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
[ 1465.408794] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels
[ 1465.409349] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
[ 1465.409949] CIFS: Server share \\w22-root1.gandalf.test\test deleted.
[ 1465.410961] CIFS: fs/smb/client/smb2maperror.c: Mapping SMB2 status code 0xc00000c9 to POSIX err -78
[ 1465.412204] CIFS: server share \\w22-root1.gandalf.test\test deleted
[ 1465.413298] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
[ 1465.413311] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
[ 1465.415272] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
[ 1465.415285] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
[ 1465.417047] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels
[ 1465.417911] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
[ 1465.417924] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
[ 1465.421431] CIFS: fs/smb/client/connect.c: cifs_put_tcon: tc_count=2
[ 1465.422397] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
[ 1465.423725] ------------[ cut here ]------------
[ 1465.424158] WARNING: CPU: 3 PID: 85 at fs/smb/client/connect.c:1616 cifs_put_tcp_session+0x27a/0x290 [cifs]
[ 1465.425407] Modules linked in: cifs cifs_arc4 nls_ucs2_utils fscache cifs_md4 [last unloaded: cifs]
[ 1465.426242] CPU: 3 PID: 85 Comm: kworker/3:3 Tainted: G        W          6.6.0 #2
[ 1465.426929] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[ 1465.427915] Workqueue: cifsiod smb2_reconnect_server [cifs]
[ 1465.428828] RIP: 0010:cifs_put_tcp_session+0x27a/0x290 [cifs]
[ 1465.429754] Code: b1 15 c2 e9 32 fe ff ff 89 ee 48 89 df e8 9e fd ff ff e9 cd fe ff ff be 03 00 00 00 4c 89 ef e8 ac 6d ad c1 e9 11 fe ff ff 90 <0f> 0b 90 e9 c0 fd ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00
[ 1465.431382] RSP: 0018:ffff88800b8afc10 EFLAGS: 00010286
[ 1465.431872] RAX: 00000000ffffffff RBX: ffff88800dd7a000 RCX: dffffc0000000000
[ 1465.432498] RDX: 0000000000000003 RSI: ffffffffc00e901b RDI: ffff88800dd7a070
[ 1465.433134] RBP: 0000000000000001 R08: 0000000000000000 R09: fffffbfff082290a
[ 1465.433788] R10: ffffffff84114857 R11: 0000000000000000 R12: 1ffff11001715f90
[ 1465.434416] R13: ffff88800b8afca0 R14: ffff88800dd7a7a0 R15: ffff88800b8afc88
[ 1465.435086] FS:  0000000000000000(0000) GS:ffff88805b000000(0000) knlGS:0000000000000000
[ 1465.435818] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1465.436330] CR2: 00007f00682affae CR3: 0000000009fe6000 CR4: 0000000000750ef0
[ 1465.436981] PKRU: 55555554
[ 1465.437227] Call Trace:
[ 1465.437458]  <TASK>
[ 1465.437676]  ? __warn+0xa5/0x200
[ 1465.437979]  ? cifs_put_tcp_session+0x27a/0x290 [cifs]
[ 1465.438833]  ? report_bug+0x1b2/0x1e0
[ 1465.439168]  ? handle_bug+0x6f/0x90
[ 1465.439484]  ? exc_invalid_op+0x17/0x50
[ 1465.439846]  ? asm_exc_invalid_op+0x1a/0x20
[ 1465.440247]  ? cifs_put_tcp_session+0x2b/0x290 [cifs]
[ 1465.441102]  ? cifs_put_tcp_session+0x27a/0x290 [cifs]
[ 1465.441970]  smb2_reconnect_server+0x646/0xb00 [cifs]
[ 1465.442823]  ? lock_sync+0xd0/0xe0
[ 1465.443138]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
[ 1465.444042]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.444467]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.444912]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.445342]  ? read_word_at_a_time+0xe/0x20
[ 1465.445727]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.446150]  process_one_work+0x43c/0x8e0
[ 1465.446513]  ? __pfx_process_one_work+0x10/0x10
[ 1465.446946]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.447376]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.447826]  ? __list_add_valid_or_report+0x37/0xf0
[ 1465.448278]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.448731]  worker_thread+0x397/0x690
[ 1465.449080]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1465.449515]  ? __kthread_parkme+0xce/0xf0
[ 1465.449991]  ? __pfx_worker_thread+0x10/0x10
[ 1465.450390]  kthread+0x18d/0x1d0
[ 1465.450712]  ? kthread+0xdb/0x1d0
[ 1465.451022]  ? __pfx_kthread+0x10/0x10
[ 1465.451362]  ret_from_fork+0x34/0x60
[ 1465.451699]  ? __pfx_kthread+0x10/0x10
[ 1465.452038]  ret_from_fork_asm+0x1b/0x30
[ 1465.452405]  </TASK>
[ 1465.452605] irq event stamp: 87714
[ 1465.452927] hardirqs last  enabled at (87713): [<ffffffff82864168>] _raw_spin_unlock_irq+0x28/0x50
[ 1465.453732] hardirqs last disabled at (87714): [<ffffffff82856f8d>] __schedule+0xc0d/0x1560
[ 1465.454468] softirqs last  enabled at (87624): [<ffffffff8110918c>] process_one_work+0x43c/0x8e0
[ 1465.455257] softirqs last disabled at (87620): [<ffffffff8227f67a>] neigh_managed_work+0x2a/0x110
[ 1465.456055] ---[ end trace 0000000000000000 ]---
[ 1465.456937] CIFS: fs/smb/client/cifsfs.c: VFS: leaving cifs_statfs (xid = 9) rc = -11
[ 1528.811802] CIFS: fs/smb/client/connect.c: VFS: in smb2_query_server_interfaces as Xid: 10 with uid: 0
[ 1528.813395] CIFS: fs/smb/client/smb2pdu.c: SMB2 IOCTL
[ 1528.814246] ==================================================================
[ 1528.815401] BUG: KASAN: slab-use-after-free in cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.817404] Read of size 1 at addr ffff88800dd7a2c4 by task kworker/0:2/1114
[ 1528.818539] 
[ 1528.818810] CPU: 0 PID: 1114 Comm: kworker/0:2 Tainted: G        W          6.6.0 #2
[ 1528.820040] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[ 1528.821789] Workqueue: cifsiod smb2_query_server_interfaces [cifs]
[ 1528.823573] Call Trace:
[ 1528.823991]  <TASK>
[ 1528.824360]  dump_stack_lvl+0x4a/0x80
[ 1528.824976]  print_report+0xcf/0x650
[ 1528.825577]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.826361]  ? rcu_is_watching+0x23/0x50
[ 1528.827011]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.827804]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.828590]  ? __phys_addr+0x46/0x90
[ 1528.829193]  kasan_report+0xd8/0x110
[ 1528.829794]  ? cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.831379]  ? cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.832952]  cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.834506]  SMB2_ioctl+0x1b5/0x6f0 [cifs]
[ 1528.835969]  ? __pfx_console_unlock+0x10/0x10
[ 1528.836692]  ? tick_nohz_tick_stopped+0x21/0x30
[ 1528.837441]  ? __pfx_SMB2_ioctl+0x10/0x10 [cifs]
[ 1528.838992]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.839779]  ? irq_work_queue+0x2c/0x40
[ 1528.840416]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.841210]  ? lock_acquire+0xc1/0x3a0
[ 1528.841840]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.842626]  ? _printk+0xc0/0xf0
[ 1528.843191]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.843977]  ? __dynamic_pr_debug+0x1f5/0x260
[ 1528.844705]  ? __pfx___dynamic_pr_debug+0x10/0x10
[ 1528.845476]  ? lock_release+0xb6/0x5a0
[ 1528.846104]  ? __pfx_lock_acquire+0x10/0x10
[ 1528.846802]  ? SMB3_request_interfaces+0x137/0x2b0 [cifs]
[ 1528.848468]  SMB3_request_interfaces+0x137/0x2b0 [cifs]
[ 1528.850103]  ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs]
[ 1528.851828]  ? ___ratelimit+0x133/0x210
[ 1528.852535]  ? __pfx____ratelimit+0x10/0x10
[ 1528.853207]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.853839]  smb2_query_server_interfaces+0x54/0x1f0 [cifs]
[ 1528.855179]  process_one_work+0x43c/0x8e0
[ 1528.855848]  ? __pfx_process_one_work+0x10/0x10
[ 1528.856595]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.857380]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.858170]  ? __list_add_valid_or_report+0x37/0xf0
[ 1528.858985]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.859773]  worker_thread+0x397/0x690
[ 1528.860410]  ? __pfx_worker_thread+0x10/0x10
[ 1528.861124]  kthread+0x18d/0x1d0
[ 1528.861668]  ? kthread+0xdb/0x1d0
[ 1528.862226]  ? __pfx_kthread+0x10/0x10
[ 1528.862851]  ret_from_fork+0x34/0x60
[ 1528.863449]  ? __pfx_kthread+0x10/0x10
[ 1528.864074]  ret_from_fork_asm+0x1b/0x30
[ 1528.864745]  </TASK>
[ 1528.865120] 
[ 1528.865389] Allocated by task 1253:
[ 1528.865965]  kasan_save_stack+0x33/0x60
[ 1528.866602]  kasan_set_track+0x25/0x30
[ 1528.867226]  __kasan_kmalloc+0x8f/0xa0
[ 1528.867856]  cifs_get_tcp_session+0xbc/0xc70 [cifs]
[ 1528.869161]  cifs_mount_get_session+0x70/0x220 [cifs]
[ 1528.870425]  dfs_mount_share+0x249/0x1060 [cifs]
[ 1528.871650]  cifs_mount+0xda/0x4c0 [cifs]
[ 1528.872785]  cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
[ 1528.874016]  smb3_get_tree+0x16d/0x380 [cifs]
[ 1528.875208]  vfs_get_tree+0x4d/0x190
[ 1528.875684]  path_mount+0x3c4/0xf90
[ 1528.876149]  __x64_sys_mount+0x1aa/0x1f0
[ 1528.876664]  do_syscall_64+0x47/0xf0
[ 1528.877140]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
[ 1528.877800] 
[ 1528.878013] Freed by task 1255:
[ 1528.878428]  kasan_save_stack+0x33/0x60
[ 1528.878934]  kasan_set_track+0x25/0x30
[ 1528.879430]  kasan_save_free_info+0x2b/0x50
[ 1528.879975]  ____kasan_slab_free+0x126/0x170
[ 1528.880536]  slab_free_freelist_hook+0x9d/0x1e0
[ 1528.881131]  __kmem_cache_free+0x9d/0x190
[ 1528.881506]  clean_demultiplex_info+0x3bb/0x640 [cifs]
[ 1528.882306]  cifs_demultiplex_thread+0x3de/0x1270 [cifs]
[ 1528.883086]  kthread+0x18d/0x1d0
[ 1528.883351]  ret_from_fork+0x34/0x60
[ 1528.883643]  ret_from_fork_asm+0x1b/0x30
[ 1528.883960] 
[ 1528.884092] Last potentially related work creation:
[ 1528.884481]  kasan_save_stack+0x33/0x60
[ 1528.884792]  __kasan_record_aux_stack+0x94/0xa0
[ 1528.885155]  __queue_work+0x334/0x8a0
[ 1528.885454]  mod_delayed_work_on+0xa5/0x100
[ 1528.885792]  smb2_reconnect+0x735/0xcb0 [cifs]
[ 1528.886518]  SMB2_query_info_init+0xca/0x250 [cifs]
[ 1528.887273]  smb2_query_info_compound+0x473/0x6d0 [cifs]
[ 1528.888065]  smb2_queryfs+0xc2/0x2c0 [cifs]
[ 1528.888763]  smb311_queryfs+0x210/0x220 [cifs]
[ 1528.889485]  cifs_statfs+0x164/0x290 [cifs]
[ 1528.890185]  statfs_by_dentry+0x9b/0xf0
[ 1528.890497]  user_statfs+0xab/0x130
[ 1528.890783]  __do_sys_statfs+0x81/0xe0
[ 1528.891090]  do_syscall_64+0x47/0xf0
[ 1528.891383]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
[ 1528.891786] 
[ 1528.891918] Second to last potentially related work creation:
[ 1528.892368]  kasan_save_stack+0x33/0x60
[ 1528.892679]  __kasan_record_aux_stack+0x94/0xa0
[ 1528.893043]  __queue_work+0x334/0x8a0
[ 1528.893342]  mod_delayed_work_on+0xa5/0x100
[ 1528.893680]  smb2_reconnect+0x735/0xcb0 [cifs]
[ 1528.894409]  SMB2_open_init+0xf8/0x13f0 [cifs]
[ 1528.895135]  smb2_query_info_compound+0x2b6/0x6d0 [cifs]
[ 1528.895925]  smb2_queryfs+0xc2/0x2c0 [cifs]
[ 1528.896628]  smb311_queryfs+0x210/0x220 [cifs]
[ 1528.897353]  cifs_statfs+0x164/0x290 [cifs]
[ 1528.898060]  statfs_by_dentry+0x9b/0xf0
[ 1528.898371]  user_statfs+0xab/0x130
[ 1528.898656]  __do_sys_statfs+0x81/0xe0
[ 1528.898961]  do_syscall_64+0x47/0xf0
[ 1528.899255]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
[ 1528.899658] 
[ 1528.899790] The buggy address belongs to the object at ffff88800dd7a000
[ 1528.899790]  which belongs to the cache kmalloc-4k of size 4096
[ 1528.900751] The buggy address is located 708 bytes inside of
[ 1528.900751]  freed 4096-byte region [ffff88800dd7a000, ffff88800dd7b000)
[ 1528.901699] 
[ 1528.901830] The buggy address belongs to the physical page:
[ 1528.902268] page:00000000b974ece7 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xdd78
[ 1528.902981] head:00000000b974ece7 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[ 1528.903609] anon flags: 0x100000000000840(slab|head|node=0|zone=1)
[ 1528.904098] page_type: 0xffffffff()
[ 1528.904386] raw: 0100000000000840 ffff888006443040 0000000000000000 dead000000000001
[ 1528.904989] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000
[ 1528.905591] page dumped because: kasan: bad access detected
[ 1528.906030] 
[ 1528.906161] Memory state around the buggy address:
[ 1528.906542]  ffff88800dd7a180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1528.907107]  ffff88800dd7a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1528.907675] >ffff88800dd7a280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1528.908240]                                            ^
[ 1528.908660]  ffff88800dd7a300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1528.909225]  ffff88800dd7a380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1528.909787] ==================================================================
[ 1528.910379] Kernel panic - not syncing: kasan.fault=panic set ...
[ 1528.910867] CPU: 0 PID: 1114 Comm: kworker/0:2 Tainted: G        W          6.6.0 #2
[ 1528.911475] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
[ 1528.912368] Workqueue: cifsiod smb2_query_server_interfaces [cifs]
[ 1528.913235] Call Trace:
[ 1528.913439]  <TASK>
[ 1528.913616]  dump_stack_lvl+0x4a/0x80
[ 1528.913917]  panic+0x41f/0x460
[ 1528.914177]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.914563]  ? __pfx_panic+0x10/0x10
[ 1528.914855]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.915248]  ? check_panic_on_warn+0x2f/0x80
[ 1528.915594]  end_report+0x125/0x130
[ 1528.915882]  kasan_report+0xe8/0x110
[ 1528.916176]  ? cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.916935]  ? cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.917695]  cifs_pick_channel+0xa2/0x170 [cifs]
[ 1528.918437]  SMB2_ioctl+0x1b5/0x6f0 [cifs]
[ 1528.919138]  ? __pfx_console_unlock+0x10/0x10
[ 1528.919490]  ? tick_nohz_tick_stopped+0x21/0x30
[ 1528.919857]  ? __pfx_SMB2_ioctl+0x10/0x10 [cifs]
[ 1528.920596]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.920985]  ? irq_work_queue+0x2c/0x40
[ 1528.921298]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.921685]  ? lock_acquire+0xc1/0x3a0
[ 1528.921998]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.922416]  ? _printk+0xc0/0xf0
[ 1528.922726]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.923157]  ? __dynamic_pr_debug+0x1f5/0x260
[ 1528.923546]  ? __pfx___dynamic_pr_debug+0x10/0x10
[ 1528.924097]  ? lock_release+0xb6/0x5a0
[ 1528.924707]  ? __pfx_lock_acquire+0x10/0x10
[ 1528.925558]  ? SMB3_request_interfaces+0x137/0x2b0 [cifs]
[ 1528.926794]  SMB3_request_interfaces+0x137/0x2b0 [cifs]
[ 1528.927962]  ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs]
[ 1528.929230]  ? ___ratelimit+0x133/0x210
[ 1528.929847]  ? __pfx____ratelimit+0x10/0x10
[ 1528.930217]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.930730]  smb2_query_server_interfaces+0x54/0x1f0 [cifs]
[ 1528.931564]  process_one_work+0x43c/0x8e0
[ 1528.931900]  ? __pfx_process_one_work+0x10/0x10
[ 1528.932273]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.932663]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.933055]  ? __list_add_valid_or_report+0x37/0xf0
[ 1528.933459]  ? srso_alias_return_thunk+0x5/0xfbef5
[ 1528.933852]  worker_thread+0x397/0x690
[ 1528.934173]  ? __pfx_worker_thread+0x10/0x10
[ 1528.934526]  kthread+0x18d/0x1d0
[ 1528.934799]  ? kthread+0xdb/0x1d0
[ 1528.935077]  ? __pfx_kthread+0x10/0x10
[ 1528.935388]  ret_from_fork+0x34/0x60
[ 1528.935686]  ? __pfx_kthread+0x10/0x10
[ 1528.935997]  ret_from_fork_asm+0x1b/0x30
[ 1528.936328]  </TASK>
[ 1528.937856] Kernel Offset: disabled
[ 1528.938146] ---[ end Kernel panic - not syncing: kasan.fault=panic set ... ]---

$ mount.cifs //w22-root1.gandalf.test/test /mnt/1 -o username=administrator,password=***
# ...wait until smb2_query_server_interfaces() is executed at least once...
$ cat /sys/kernel/debug/kmemleak
unreferenced object 0xffff888007821c00 (size 192):
  comm "mount.cifs", pid 885, jiffies 4294765292 (age 656.515s)
  hex dump (first 32 bytes):
    f0 52 da 16 80 88 ff ff f0 52 da 16 80 88 ff ff  .R.......R......
    01 00 00 00 00 00 00 00 00 ca 9a 3b 00 00 00 00  ...........;....
  backtrace:
    [<ffffffff8144e3f5>] __kmem_cache_alloc_node+0x295/0x2d0
    [<ffffffff813ab1aa>] kmalloc_trace+0x2a/0xc0
    [<ffffffffc0141c9d>] parse_server_interfaces+0x4ed/0xcc0 [cifs]
    [<ffffffffc014b323>] SMB3_request_interfaces+0x163/0x2b0 [cifs]
    [<ffffffffc014b5ed>] smb3_qfs_tcon+0x16d/0x2c0 [cifs]
    [<ffffffffc00f02c1>] cifs_mount_get_tcon+0x3b1/0x550 [cifs]
    [<ffffffffc0181f7a>] dfs_mount_share+0x1da/0x1060 [cifs]
    [<ffffffffc00f089a>] cifs_mount+0xda/0x4c0 [cifs]
    [<ffffffffc00e06d5>] cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
    [<ffffffffc0175a1d>] smb3_get_tree+0x16d/0x380 [cifs]
    [<ffffffff8147c1ad>] vfs_get_tree+0x4d/0x190
    [<ffffffff814c66d4>] path_mount+0x3c4/0xf90
    [<ffffffff814c7b5a>] __x64_sys_mount+0x1aa/0x1f0
    [<ffffffff8284c3d7>] do_syscall_64+0x47/0xf0
    [<ffffffff82a000eb>] entry_SYSCALL_64_after_hwframe+0x6f/0x77
    ...

$ ./scripts/faddr2line --list fs/smb/client/cifs.o parse_server_interfaces+0x4ed
parse_server_interfaces+0x4ed/0xcc0:

kmalloc at /home/pc/g/linux/./include/linux/slab.h:600
 595 
 596                    if (size > KMALLOC_MAX_CACHE_SIZE)
 597                            return kmalloc_large(size, flags);
 598 
 599                    index = kmalloc_index(size);
>600<                   return kmalloc_trace(
 601                                    kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
 602                                    flags, size);
 603            }
 604            return __kmalloc(size, flags);
 605    }

(inlined by) parse_server_interfaces at /home/pc/g/linux/fs/smb/client/smb2ops.c:694
 689                            }
 690                    }
 691                    spin_unlock(&ses->iface_lock);
 692 
 693                    /* no match. insert the entry in the list */
>694<                   info = kmalloc(sizeof(struct cifs_server_iface),
 695                                   GFP_KERNEL);
 696                    if (!info) {
 697                            rc = -ENOMEM;
 698                            goto out;
 699                    }
Steve French Nov. 12, 2023, 6:52 p.m. UTC | #9
I did reproduce a problem (memory leak) though.  After umount in the
scenario described below (similar to Paulo's repro).  I can umount but
am unable to do an rmmod, and the memory leak detector did spot the
following:

root@smfrench-ThinkPad-P52:/home/smfrench# cat /sys/kernel/debug/kmemleak
unreferenced object 0xffff8881a1186a00 (size 192):
  comm "apparmor_parser", pid 987, jiffies 4294916984 (age 2878.840s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 07 01 9f 81 88 ff ff  ................
    00 80 40 7a 81 88 ff ff 14 00 00 00 00 00 00 00  ..@z............
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffff89ce725c>] aa_alloc_pdb+0x5c/0xd0
    [<ffffffff89cece8e>] unpack_pdb+0xbe/0x1080
    [<ffffffff89cee1da>] unpack_profile+0x37a/0x1fe0
    [<ffffffff89cf0704>] aa_unpack+0x254/0xba0
    [<ffffffff89ce9681>] aa_replace_profiles+0x151/0x1e50
    [<ffffffff89cd2bef>] policy_update+0x13f/0x1b0
    [<ffffffff89cd2da9>] profile_replace+0x139/0x180
    [<ffffffff899275ae>] vfs_write+0x1de/0x720
    [<ffffffff8992812f>] ksys_write+0xdf/0x190
    [<ffffffff89928232>] __x64_sys_write+0x42/0x60
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
unreferenced object 0xffff8881a1187b00 (size 192):
  comm "apparmor_parser", pid 987, jiffies 4294916984 (age 2878.840s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 09 01 9f 81 88 ff ff  ................
    00 90 40 7a 81 88 ff ff 24 00 00 00 00 00 00 00  ..@z....$.......
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffff89ce725c>] aa_alloc_pdb+0x5c/0xd0
    [<ffffffff89cece8e>] unpack_pdb+0xbe/0x1080
    [<ffffffff89cef26b>] unpack_profile+0x140b/0x1fe0
    [<ffffffff89cf0704>] aa_unpack+0x254/0xba0
    [<ffffffff89ce9681>] aa_replace_profiles+0x151/0x1e50
    [<ffffffff89cd2bef>] policy_update+0x13f/0x1b0
    [<ffffffff89cd2da9>] profile_replace+0x139/0x180
    [<ffffffff899275ae>] vfs_write+0x1de/0x720
    [<ffffffff8992812f>] ksys_write+0xdf/0x190
    [<ffffffff89928232>] __x64_sys_write+0x42/0x60
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
unreferenced object 0xffff8881824d9600 (size 192):
  comm "apparmor_parser", pid 987, jiffies 4294916984 (age 2878.844s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 0a 01 9f 81 88 ff ff  ................
    00 00 78 65 81 88 ff ff e8 0b 00 00 00 00 00 00  ..xe............
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffff89ce725c>] aa_alloc_pdb+0x5c/0xd0
    [<ffffffff89cece8e>] unpack_pdb+0xbe/0x1080
    [<ffffffff89ceeaae>] unpack_profile+0xc4e/0x1fe0
    [<ffffffff89cf0704>] aa_unpack+0x254/0xba0
    [<ffffffff89ce9681>] aa_replace_profiles+0x151/0x1e50
    [<ffffffff89cd2bef>] policy_update+0x13f/0x1b0
    [<ffffffff89cd2da9>] profile_replace+0x139/0x180
    [<ffffffff899275ae>] vfs_write+0x1de/0x720
    [<ffffffff8992812f>] ksys_write+0xdf/0x190
    [<ffffffff89928232>] __x64_sys_write+0x42/0x60
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
unreferenced object 0xffff888105f81600 (size 192):
  comm "mount.cifs", pid 4420, jiffies 4295067087 (age 2279.232s)
  hex dump (first 32 bytes):
    d0 02 16 90 88 88 ff ff d0 02 16 90 88 88 ff ff  ................
    01 00 00 00 00 00 00 00 00 e4 0b 54 02 00 00 00  ...........T....
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffffc4360b85>] parse_server_interfaces+0x4c5/0xc20 [cifs]
    [<ffffffffc436bbce>] SMB3_request_interfaces+0x17e/0x2e0 [cifs]
    [<ffffffffc436becb>] smb3_qfs_tcon+0x18b/0x2d0 [cifs]
    [<ffffffffc43062b8>] cifs_mount_get_tcon+0x3c8/0x570 [cifs]
    [<ffffffffc43a776d>] dfs_mount_share+0x15d/0x1190 [cifs]
    [<ffffffffc43068b2>] cifs_mount+0xe2/0x5a0 [cifs]
    [<ffffffffc42f489f>] cifs_smb3_do_mount+0x1ef/0xd60 [cifs]
    [<ffffffffc439a501>] smb3_get_tree+0x1f1/0x3e0 [cifs]
    [<ffffffff8992bccd>] vfs_get_tree+0x4d/0x190
    [<ffffffff8997bc15>] path_mount+0x675/0x1040
    [<ffffffff8997c7e8>] __x64_sys_mount+0x1f8/0x240
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
unreferenced object 0xffff888104ac2a00 (size 192):
  comm "mount.cifs", pid 4734, jiffies 4295145078 (age 1967.296s)
  hex dump (first 32 bytes):
    d0 e2 51 33 81 88 ff ff d0 e2 51 33 81 88 ff ff  ..Q3......Q3....
    01 00 00 00 00 00 00 00 00 e4 0b 54 02 00 00 00  ...........T....
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffffc4360b85>] parse_server_interfaces+0x4c5/0xc20 [cifs]
    [<ffffffffc436bbce>] SMB3_request_interfaces+0x17e/0x2e0 [cifs]
    [<ffffffffc436becb>] smb3_qfs_tcon+0x18b/0x2d0 [cifs]
    [<ffffffffc43062b8>] cifs_mount_get_tcon+0x3c8/0x570 [cifs]
    [<ffffffffc43a776d>] dfs_mount_share+0x15d/0x1190 [cifs]
    [<ffffffffc43068b2>] cifs_mount+0xe2/0x5a0 [cifs]
    [<ffffffffc42f489f>] cifs_smb3_do_mount+0x1ef/0xd60 [cifs]
    [<ffffffffc439a501>] smb3_get_tree+0x1f1/0x3e0 [cifs]
    [<ffffffff8992bccd>] vfs_get_tree+0x4d/0x190
    [<ffffffff8997bc15>] path_mount+0x675/0x1040
    [<ffffffff8997c7e8>] __x64_sys_mount+0x1f8/0x240
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76
unreferenced object 0xffff8881103ece00 (size 192):
  comm "mount.cifs", pid 5653, jiffies 4295317051 (age 1279.476s)
  hex dump (first 32 bytes):
    d0 e2 b4 25 81 88 ff ff d0 e2 b4 25 81 88 ff ff  ...%.......%....
    01 00 00 00 00 00 00 00 80 5e 9b 08 00 00 00 00  .........^......
  backtrace:
    [<ffffffff8ac40b4a>] kmemleak_alloc+0x4a/0x90
    [<ffffffff898a94c3>] __kmem_cache_alloc_node+0x233/0x350
    [<ffffffff897cc67a>] kmalloc_trace+0x2a/0xf0
    [<ffffffffc4360b85>] parse_server_interfaces+0x4c5/0xc20 [cifs]
    [<ffffffffc436bbce>] SMB3_request_interfaces+0x17e/0x2e0 [cifs]
    [<ffffffffc436becb>] smb3_qfs_tcon+0x18b/0x2d0 [cifs]
    [<ffffffffc43062b8>] cifs_mount_get_tcon+0x3c8/0x570 [cifs]
    [<ffffffffc43a776d>] dfs_mount_share+0x15d/0x1190 [cifs]
    [<ffffffffc43068b2>] cifs_mount+0xe2/0x5a0 [cifs]
    [<ffffffffc42f489f>] cifs_smb3_do_mount+0x1ef/0xd60 [cifs]
    [<ffffffffc439a501>] smb3_get_tree+0x1f1/0x3e0 [cifs]
    [<ffffffff8992bccd>] vfs_get_tree+0x4d/0x190
    [<ffffffff8997bc15>] path_mount+0x675/0x1040
    [<ffffffff8997c7e8>] __x64_sys_mount+0x1f8/0x240
    [<ffffffff8ac2fdcc>] do_syscall_64+0x5c/0xf0
    [<ffffffff8ae000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76


On Sat, Nov 11, 2023 at 11:23 AM Paulo Alcantara <pc@manguebit.com> wrote:
>
> Shyam Prasad N <nspmangalore@gmail.com> writes:
>
> > Can you please check if the problem is still seen with these updated patches?
> > I was unable to reproduce the issue with the steps you provided.
>
> I couldn't reproduce that reconnect issue anymore.
>
> However, some other problems as shown below with lockdep, kmemleak and
> kasan enabled.
>
> $ mount.cifs //w22-root1.gandalf.test/test /mnt/1 -o username=administrator,password=***,multichannel,echo_interval=10
> # disable multichannel on w22-root1.gandalf.test server
> #   Set-SmbServerConfiguration -EnableMultichannel $false
> $ iptables -I INPUT -s 192.168.1.11 -j DROP
> $ stat -f /mnt/1
> stat: cannot read file system information for '/mnt/1': Host is down
> $ iptables -I INPUT -s 192.168.1.11 -j ACCEPT
> $ stat -f /mnt/1
> stat: cannot read file system information for '/mnt/1': Resource temporarily unavailable
>
> ...
> [ 1465.290096] CIFS: VFS: \\w22-root1.gandalf.test does not support multichannel anymore. disabling all other channels
> [ 1465.292488]
> [ 1465.292863] ======================================================
> [ 1465.294183] WARNING: possible circular locking dependency detected
> [ 1465.295309] 6.6.0 #2 Not tainted
> [ 1465.295863] ------------------------------------------------------
> [ 1465.296876] kworker/2:1/1127 is trying to acquire lock:
> [ 1465.297745] ffff8880129482c0 (&ret_buf->iface_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x117/0x280 [cifs]
> [ 1465.300314]
> [ 1465.300314] but task is already holding lock:
> [ 1465.301267] ffff888012948328 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x29/0x280 [cifs]
> [ 1465.303804]
> [ 1465.303804] which lock already depends on the new lock.
> [ 1465.303804]
> [ 1465.305107]
> [ 1465.305107] the existing dependency chain (in reverse order) is:
> [ 1465.306309]
> [ 1465.306309] -> #1 (&ret_buf->chan_lock){+.+.}-{2:2}:
> [ 1465.307391]        _raw_spin_lock+0x34/0x80
> [ 1465.308093]        cifs_try_adding_channels+0x205/0x1290 [cifs]
> [ 1465.309865]        cifs_mount+0xfb/0x4c0 [cifs]
> [ 1465.311423]        cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
> [ 1465.313094]        smb3_get_tree+0x16d/0x380 [cifs]
> [ 1465.314702]        vfs_get_tree+0x4d/0x190
> [ 1465.315275]        path_mount+0x3c4/0xf90
> [ 1465.315811]        __x64_sys_mount+0x1aa/0x1f0
> [ 1465.316392]        do_syscall_64+0x47/0xf0
> [ 1465.316950]        entry_SYSCALL_64_after_hwframe+0x6f/0x77
> [ 1465.317681]
> [ 1465.317681] -> #0 (&ret_buf->iface_lock){+.+.}-{2:2}:
> [ 1465.318548]        __lock_acquire+0x1793/0x2110
> [ 1465.319156]        lock_acquire+0x14a/0x3a0
> [ 1465.319714]        _raw_spin_lock+0x34/0x80
> [ 1465.320266]        cifs_disable_secondary_channels+0x117/0x280 [cifs]
> [ 1465.321729]        smb2_reconnect+0x520/0xcb0 [cifs]
> [ 1465.323001]        smb2_reconnect_server+0x771/0xb00 [cifs]
> [ 1465.324352]        process_one_work+0x43c/0x8e0
> [ 1465.324959]        worker_thread+0x397/0x690
> [ 1465.325522]        kthread+0x18d/0x1d0
> [ 1465.326028]        ret_from_fork+0x34/0x60
> [ 1465.326564]        ret_from_fork_asm+0x1b/0x30
> [ 1465.327164]
> [ 1465.327164] other info that might help us debug this:
> [ 1465.327164]
> [ 1465.328180]  Possible unsafe locking scenario:
> [ 1465.328180]
> [ 1465.328943]        CPU0                    CPU1
> [ 1465.329523]        ----                    ----
> [ 1465.330119]   lock(&ret_buf->chan_lock);
> [ 1465.330649]                                lock(&ret_buf->iface_lock);
> [ 1465.331493]                                lock(&ret_buf->chan_lock);
> [ 1465.332326]   lock(&ret_buf->iface_lock);
> [ 1465.332865]
> [ 1465.332865]  *** DEADLOCK ***
> [ 1465.332865]
> [ 1465.333623] 5 locks held by kworker/2:1/1127:
> [ 1465.334199]  #0: ffff88800dd09d48 ((wq_completion)cifsiod){+.+.}-{0:0}, at: process_one_work+0x39a/0x8e0
> [ 1465.335430]  #1: ffff88800de97dc0 ((work_completion)(&(&tcp_ses->reconnect)->work)){+.+.}-{0:0}, at: process_one_work+0x39a/0x8e0
> [ 1465.336947]  #2: ffff88800dd7a8c8 (&tcp_ses->reconnect_mutex){+.+.}-{3:3}, at: smb2_reconnect_server+0xde/0xb00 [cifs]
> [ 1465.338953]  #3: ffff8880129480f0 (&ret_buf->session_mutex){+.+.}-{3:3}, at: smb2_reconnect+0x234/0xcb0 [cifs]
> [ 1465.340838]  #4: ffff888012948328 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_disable_secondary_channels+0x29/0x280 [cifs]
> [ 1465.342073]
> [ 1465.342073] stack backtrace:
> [ 1465.342421] CPU: 2 PID: 1127 Comm: kworker/2:1 Not tainted 6.6.0 #2
> [ 1465.342917] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
> [ 1465.343782] Workqueue: cifsiod smb2_reconnect_server [cifs]
> [ 1465.344595] Call Trace:
> [ 1465.344845]  <TASK>
> [ 1465.345025]  dump_stack_lvl+0x4a/0x80
> [ 1465.345329]  check_noncircular+0x269/0x2b0
> [ 1465.345672]  ? __pfx_check_noncircular+0x10/0x10
> [ 1465.346047]  ? __pfx_stack_trace_save+0x10/0x10
> [ 1465.346420]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.346814]  ? lockdep_lock+0xa3/0x160
> [ 1465.347128]  ? __pfx_lockdep_lock+0x10/0x10
> [ 1465.347465]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.347858]  ? add_chain_block+0x1d8/0x280
> [ 1465.348191]  __lock_acquire+0x1793/0x2110
> [ 1465.348524]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.348918]  ? __pfx___lock_acquire+0x10/0x10
> [ 1465.349271]  ? __pfx_prb_read_valid+0x10/0x10
> [ 1465.349624]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.350018]  ? llist_add_batch+0x9d/0xf0
> [ 1465.350344]  ? __pfx_llist_add_batch+0x10/0x10
> [ 1465.350712]  lock_acquire+0x14a/0x3a0
> [ 1465.351014]  ? cifs_disable_secondary_channels+0x117/0x280 [cifs]
> [ 1465.351874]  ? __pfx_lock_acquire+0x10/0x10
> [ 1465.352214]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.352600]  ? hlock_class+0x32/0xc0
> [ 1465.352900]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.353286]  ? lock_acquired+0x2df/0x510
> [ 1465.353612]  ? do_raw_spin_trylock+0xd1/0x120
> [ 1465.353973]  ? __pfx_lock_acquired+0x10/0x10
> [ 1465.354323]  _raw_spin_lock+0x34/0x80
> [ 1465.354622]  ? cifs_disable_secondary_channels+0x117/0x280 [cifs]
> [ 1465.355484]  cifs_disable_secondary_channels+0x117/0x280 [cifs]
> [ 1465.356332]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.356730]  smb2_reconnect+0x520/0xcb0 [cifs]
> [ 1465.357477]  smb2_reconnect_server+0x771/0xb00 [cifs]
> [ 1465.358269]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
> [ 1465.359089]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.359478]  ? lock_is_held_type+0x90/0x100
> [ 1465.359830]  ? mark_held_locks+0x1a/0x90
> [ 1465.360155]  process_one_work+0x43c/0x8e0
> [ 1465.360493]  ? __pfx_process_one_work+0x10/0x10
> [ 1465.360912]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.361359]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.361754]  ? __list_add_valid_or_report+0x37/0xf0
> [ 1465.362155]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.362544]  worker_thread+0x397/0x690
> [ 1465.362863]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.363250]  ? __kthread_parkme+0xce/0xf0
> [ 1465.363583]  ? __pfx_worker_thread+0x10/0x10
> [ 1465.363942]  kthread+0x18d/0x1d0
> [ 1465.364210]  ? kthread+0xdb/0x1d0
> [ 1465.364483]  ? __pfx_kthread+0x10/0x10
> [ 1465.364798]  ret_from_fork+0x34/0x60
> [ 1465.365093]  ? __pfx_kthread+0x10/0x10
> [ 1465.365400]  ret_from_fork_asm+0x1b/0x30
> [ 1465.365733]  </TASK>
> [ 1465.365954] BUG: sleeping function called from invalid context at kernel/workqueue.c:3344
> [ 1465.366821] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1127, name: kworker/2:1
> [ 1465.367690] preempt_count: 1, expected: 0
> [ 1465.368115] RCU nest depth: 0, expected: 0
> [ 1465.368547] INFO: lockdep is turned off.
> [ 1465.368984] CPU: 2 PID: 1127 Comm: kworker/2:1 Not tainted 6.6.0 #2
> [ 1465.369642] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
> [ 1465.370788] Workqueue: cifsiod smb2_reconnect_server [cifs]
> [ 1465.371888] Call Trace:
> [ 1465.372164]  <TASK>
> [ 1465.372401]  dump_stack_lvl+0x64/0x80
> [ 1465.372809]  __might_resched+0x23c/0x360
> [ 1465.373236]  ? __pfx___might_resched+0x10/0x10
> [ 1465.373722]  ? rcu_is_watching+0x23/0x50
> [ 1465.374149]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.374661]  ? __might_sleep+0x2b/0xb0
> [ 1465.375119]  ? __flush_work+0xc5/0x640
> [ 1465.375524]  __flush_work+0xfd/0x640
> [ 1465.375914]  ? __flush_work+0xc5/0x640
> [ 1465.376320]  ? add_chain_block+0x1d8/0x280
> [ 1465.376771]  ? __pfx___flush_work+0x10/0x10
> [ 1465.377240]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.377686]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.378072]  ? try_to_grab_pending+0x47/0x3a0
> [ 1465.378430]  __cancel_work_timer+0x210/0x2c0
> [ 1465.378785]  ? __pfx___cancel_work_timer+0x10/0x10
> [ 1465.379176]  ? do_raw_spin_trylock+0xd1/0x120
> [ 1465.379537]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.379933]  ? do_raw_spin_unlock+0x9b/0x100
> [ 1465.380293]  cifs_put_tcp_session+0x118/0x290 [cifs]
> [ 1465.381076]  cifs_disable_secondary_channels+0xdb/0x280 [cifs]
> [ 1465.381930]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.382323]  smb2_reconnect+0x520/0xcb0 [cifs]
> [ 1465.383072]  smb2_reconnect_server+0x771/0xb00 [cifs]
> [ 1465.383870]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
> [ 1465.384694]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.385084]  ? lock_is_held_type+0x90/0x100
> [ 1465.385424]  ? mark_held_locks+0x1a/0x90
> [ 1465.385708] CIFS: fs/smb/client/cifsfs.c: VFS: in cifs_statfs as Xid: 9 with uid: 0
> [ 1465.385754]  process_one_work+0x43c/0x8e0
> [ 1465.387019]  ? __pfx_process_one_work+0x10/0x10
> [ 1465.387387]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.387821]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.388334]  ? __list_add_valid_or_report+0x37/0xf0
> [ 1465.388870]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.389385]  worker_thread+0x397/0x690
> [ 1465.389753]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.390144]  ? __kthread_parkme+0xce/0xf0
> [ 1465.390476]  ? __pfx_worker_thread+0x10/0x10
> [ 1465.390833]  kthread+0x18d/0x1d0
> [ 1465.391103]  ? kthread+0xdb/0x1d0
> [ 1465.391378]  ? __pfx_kthread+0x10/0x10
> [ 1465.391695]  ret_from_fork+0x34/0x60
> [ 1465.391991]  ? __pfx_kthread+0x10/0x10
> [ 1465.392302]  ret_from_fork_asm+0x1b/0x30
> [ 1465.392632]  </TASK>
> [ 1465.393120] CIFS: fs/smb/client/connect.c: cifs_setup_session: channel connect bitmap: 0x1
> [ 1465.393959] CIFS: fs/smb/client/connect.c: Free previous auth_key.response = 0000000018de71c6
> [ 1465.393960] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x1, tcon reconnect: 0
> [ 1465.394666] CIFS: fs/smb/client/connect.c: Security Mode: 0x1 Capabilities: 0x300067 TimeAdjust: 0
> [ 1465.396191] CIFS: fs/smb/client/smb2pdu.c: Session Setup
> [ 1465.396675] CIFS: fs/smb/client/smb2pdu.c: sess setup type 2
> [ 1465.397269] CIFS: fs/smb/client/smb2pdu.c: Fresh session. Previous: 1e800d8000049
> [ 1465.399253] CIFS: fs/smb/client/smb2maperror.c: Mapping SMB2 status code 0xc0000016 to POSIX err -5
> [ 1465.400409] CIFS: fs/smb/client/sess.c: decode_ntlmssp_challenge: negotiate=0xe2088235 challenge=0xe2898235
> [ 1465.401245] CIFS: fs/smb/client/smb2pdu.c: rawntlmssp session setup challenge phase
> [ 1465.402058] CIFS: fs/smb/client/smb2pdu.c: Fresh session. Previous: 1e800d8000049
> [ 1465.404585] CIFS: fs/smb/client/smb2pdu.c: SMB2/3 session established successfully
> [ 1465.405543] CIFS: fs/smb/client/sess.c: Cleared reconnect bitmask for chan 0; now 0x0
> [ 1465.406375] CIFS: fs/smb/client/connect.c: __cifs_put_smb_ses: ses_count=2
> [ 1465.407092] CIFS: fs/smb/client/connect.c: __cifs_put_smb_ses: ses ipc: \\w22-root1.gandalf.test\IPC$
> [ 1465.408048] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
> [ 1465.408794] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels
> [ 1465.409349] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
> [ 1465.409949] CIFS: Server share \\w22-root1.gandalf.test\test deleted.
> [ 1465.410961] CIFS: fs/smb/client/smb2maperror.c: Mapping SMB2 status code 0xc00000c9 to POSIX err -78
> [ 1465.412204] CIFS: server share \\w22-root1.gandalf.test\test deleted
> [ 1465.413298] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
> [ 1465.413311] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
> [ 1465.415272] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
> [ 1465.415285] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
> [ 1465.417047] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels
> [ 1465.417911] CIFS: fs/smb/client/smb2pdu.c: sess reconnect mask: 0x0, tcon reconnect: 1
> [ 1465.417924] CIFS: fs/smb/client/smb2pdu.c: reconnect tcon rc = 0
> [ 1465.421431] CIFS: fs/smb/client/connect.c: cifs_put_tcon: tc_count=2
> [ 1465.422397] CIFS: fs/smb/client/smb2pdu.c: Reconnecting tcons and channels finished
> [ 1465.423725] ------------[ cut here ]------------
> [ 1465.424158] WARNING: CPU: 3 PID: 85 at fs/smb/client/connect.c:1616 cifs_put_tcp_session+0x27a/0x290 [cifs]
> [ 1465.425407] Modules linked in: cifs cifs_arc4 nls_ucs2_utils fscache cifs_md4 [last unloaded: cifs]
> [ 1465.426242] CPU: 3 PID: 85 Comm: kworker/3:3 Tainted: G        W          6.6.0 #2
> [ 1465.426929] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
> [ 1465.427915] Workqueue: cifsiod smb2_reconnect_server [cifs]
> [ 1465.428828] RIP: 0010:cifs_put_tcp_session+0x27a/0x290 [cifs]
> [ 1465.429754] Code: b1 15 c2 e9 32 fe ff ff 89 ee 48 89 df e8 9e fd ff ff e9 cd fe ff ff be 03 00 00 00 4c 89 ef e8 ac 6d ad c1 e9 11 fe ff ff 90 <0f> 0b 90 e9 c0 fd ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00
> [ 1465.431382] RSP: 0018:ffff88800b8afc10 EFLAGS: 00010286
> [ 1465.431872] RAX: 00000000ffffffff RBX: ffff88800dd7a000 RCX: dffffc0000000000
> [ 1465.432498] RDX: 0000000000000003 RSI: ffffffffc00e901b RDI: ffff88800dd7a070
> [ 1465.433134] RBP: 0000000000000001 R08: 0000000000000000 R09: fffffbfff082290a
> [ 1465.433788] R10: ffffffff84114857 R11: 0000000000000000 R12: 1ffff11001715f90
> [ 1465.434416] R13: ffff88800b8afca0 R14: ffff88800dd7a7a0 R15: ffff88800b8afc88
> [ 1465.435086] FS:  0000000000000000(0000) GS:ffff88805b000000(0000) knlGS:0000000000000000
> [ 1465.435818] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1465.436330] CR2: 00007f00682affae CR3: 0000000009fe6000 CR4: 0000000000750ef0
> [ 1465.436981] PKRU: 55555554
> [ 1465.437227] Call Trace:
> [ 1465.437458]  <TASK>
> [ 1465.437676]  ? __warn+0xa5/0x200
> [ 1465.437979]  ? cifs_put_tcp_session+0x27a/0x290 [cifs]
> [ 1465.438833]  ? report_bug+0x1b2/0x1e0
> [ 1465.439168]  ? handle_bug+0x6f/0x90
> [ 1465.439484]  ? exc_invalid_op+0x17/0x50
> [ 1465.439846]  ? asm_exc_invalid_op+0x1a/0x20
> [ 1465.440247]  ? cifs_put_tcp_session+0x2b/0x290 [cifs]
> [ 1465.441102]  ? cifs_put_tcp_session+0x27a/0x290 [cifs]
> [ 1465.441970]  smb2_reconnect_server+0x646/0xb00 [cifs]
> [ 1465.442823]  ? lock_sync+0xd0/0xe0
> [ 1465.443138]  ? __pfx_smb2_reconnect_server+0x10/0x10 [cifs]
> [ 1465.444042]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.444467]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.444912]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.445342]  ? read_word_at_a_time+0xe/0x20
> [ 1465.445727]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.446150]  process_one_work+0x43c/0x8e0
> [ 1465.446513]  ? __pfx_process_one_work+0x10/0x10
> [ 1465.446946]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.447376]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.447826]  ? __list_add_valid_or_report+0x37/0xf0
> [ 1465.448278]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.448731]  worker_thread+0x397/0x690
> [ 1465.449080]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1465.449515]  ? __kthread_parkme+0xce/0xf0
> [ 1465.449991]  ? __pfx_worker_thread+0x10/0x10
> [ 1465.450390]  kthread+0x18d/0x1d0
> [ 1465.450712]  ? kthread+0xdb/0x1d0
> [ 1465.451022]  ? __pfx_kthread+0x10/0x10
> [ 1465.451362]  ret_from_fork+0x34/0x60
> [ 1465.451699]  ? __pfx_kthread+0x10/0x10
> [ 1465.452038]  ret_from_fork_asm+0x1b/0x30
> [ 1465.452405]  </TASK>
> [ 1465.452605] irq event stamp: 87714
> [ 1465.452927] hardirqs last  enabled at (87713): [<ffffffff82864168>] _raw_spin_unlock_irq+0x28/0x50
> [ 1465.453732] hardirqs last disabled at (87714): [<ffffffff82856f8d>] __schedule+0xc0d/0x1560
> [ 1465.454468] softirqs last  enabled at (87624): [<ffffffff8110918c>] process_one_work+0x43c/0x8e0
> [ 1465.455257] softirqs last disabled at (87620): [<ffffffff8227f67a>] neigh_managed_work+0x2a/0x110
> [ 1465.456055] ---[ end trace 0000000000000000 ]---
> [ 1465.456937] CIFS: fs/smb/client/cifsfs.c: VFS: leaving cifs_statfs (xid = 9) rc = -11
> [ 1528.811802] CIFS: fs/smb/client/connect.c: VFS: in smb2_query_server_interfaces as Xid: 10 with uid: 0
> [ 1528.813395] CIFS: fs/smb/client/smb2pdu.c: SMB2 IOCTL
> [ 1528.814246] ==================================================================
> [ 1528.815401] BUG: KASAN: slab-use-after-free in cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.817404] Read of size 1 at addr ffff88800dd7a2c4 by task kworker/0:2/1114
> [ 1528.818539]
> [ 1528.818810] CPU: 0 PID: 1114 Comm: kworker/0:2 Tainted: G        W          6.6.0 #2
> [ 1528.820040] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
> [ 1528.821789] Workqueue: cifsiod smb2_query_server_interfaces [cifs]
> [ 1528.823573] Call Trace:
> [ 1528.823991]  <TASK>
> [ 1528.824360]  dump_stack_lvl+0x4a/0x80
> [ 1528.824976]  print_report+0xcf/0x650
> [ 1528.825577]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.826361]  ? rcu_is_watching+0x23/0x50
> [ 1528.827011]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.827804]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.828590]  ? __phys_addr+0x46/0x90
> [ 1528.829193]  kasan_report+0xd8/0x110
> [ 1528.829794]  ? cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.831379]  ? cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.832952]  cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.834506]  SMB2_ioctl+0x1b5/0x6f0 [cifs]
> [ 1528.835969]  ? __pfx_console_unlock+0x10/0x10
> [ 1528.836692]  ? tick_nohz_tick_stopped+0x21/0x30
> [ 1528.837441]  ? __pfx_SMB2_ioctl+0x10/0x10 [cifs]
> [ 1528.838992]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.839779]  ? irq_work_queue+0x2c/0x40
> [ 1528.840416]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.841210]  ? lock_acquire+0xc1/0x3a0
> [ 1528.841840]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.842626]  ? _printk+0xc0/0xf0
> [ 1528.843191]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.843977]  ? __dynamic_pr_debug+0x1f5/0x260
> [ 1528.844705]  ? __pfx___dynamic_pr_debug+0x10/0x10
> [ 1528.845476]  ? lock_release+0xb6/0x5a0
> [ 1528.846104]  ? __pfx_lock_acquire+0x10/0x10
> [ 1528.846802]  ? SMB3_request_interfaces+0x137/0x2b0 [cifs]
> [ 1528.848468]  SMB3_request_interfaces+0x137/0x2b0 [cifs]
> [ 1528.850103]  ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs]
> [ 1528.851828]  ? ___ratelimit+0x133/0x210
> [ 1528.852535]  ? __pfx____ratelimit+0x10/0x10
> [ 1528.853207]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.853839]  smb2_query_server_interfaces+0x54/0x1f0 [cifs]
> [ 1528.855179]  process_one_work+0x43c/0x8e0
> [ 1528.855848]  ? __pfx_process_one_work+0x10/0x10
> [ 1528.856595]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.857380]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.858170]  ? __list_add_valid_or_report+0x37/0xf0
> [ 1528.858985]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.859773]  worker_thread+0x397/0x690
> [ 1528.860410]  ? __pfx_worker_thread+0x10/0x10
> [ 1528.861124]  kthread+0x18d/0x1d0
> [ 1528.861668]  ? kthread+0xdb/0x1d0
> [ 1528.862226]  ? __pfx_kthread+0x10/0x10
> [ 1528.862851]  ret_from_fork+0x34/0x60
> [ 1528.863449]  ? __pfx_kthread+0x10/0x10
> [ 1528.864074]  ret_from_fork_asm+0x1b/0x30
> [ 1528.864745]  </TASK>
> [ 1528.865120]
> [ 1528.865389] Allocated by task 1253:
> [ 1528.865965]  kasan_save_stack+0x33/0x60
> [ 1528.866602]  kasan_set_track+0x25/0x30
> [ 1528.867226]  __kasan_kmalloc+0x8f/0xa0
> [ 1528.867856]  cifs_get_tcp_session+0xbc/0xc70 [cifs]
> [ 1528.869161]  cifs_mount_get_session+0x70/0x220 [cifs]
> [ 1528.870425]  dfs_mount_share+0x249/0x1060 [cifs]
> [ 1528.871650]  cifs_mount+0xda/0x4c0 [cifs]
> [ 1528.872785]  cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
> [ 1528.874016]  smb3_get_tree+0x16d/0x380 [cifs]
> [ 1528.875208]  vfs_get_tree+0x4d/0x190
> [ 1528.875684]  path_mount+0x3c4/0xf90
> [ 1528.876149]  __x64_sys_mount+0x1aa/0x1f0
> [ 1528.876664]  do_syscall_64+0x47/0xf0
> [ 1528.877140]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
> [ 1528.877800]
> [ 1528.878013] Freed by task 1255:
> [ 1528.878428]  kasan_save_stack+0x33/0x60
> [ 1528.878934]  kasan_set_track+0x25/0x30
> [ 1528.879430]  kasan_save_free_info+0x2b/0x50
> [ 1528.879975]  ____kasan_slab_free+0x126/0x170
> [ 1528.880536]  slab_free_freelist_hook+0x9d/0x1e0
> [ 1528.881131]  __kmem_cache_free+0x9d/0x190
> [ 1528.881506]  clean_demultiplex_info+0x3bb/0x640 [cifs]
> [ 1528.882306]  cifs_demultiplex_thread+0x3de/0x1270 [cifs]
> [ 1528.883086]  kthread+0x18d/0x1d0
> [ 1528.883351]  ret_from_fork+0x34/0x60
> [ 1528.883643]  ret_from_fork_asm+0x1b/0x30
> [ 1528.883960]
> [ 1528.884092] Last potentially related work creation:
> [ 1528.884481]  kasan_save_stack+0x33/0x60
> [ 1528.884792]  __kasan_record_aux_stack+0x94/0xa0
> [ 1528.885155]  __queue_work+0x334/0x8a0
> [ 1528.885454]  mod_delayed_work_on+0xa5/0x100
> [ 1528.885792]  smb2_reconnect+0x735/0xcb0 [cifs]
> [ 1528.886518]  SMB2_query_info_init+0xca/0x250 [cifs]
> [ 1528.887273]  smb2_query_info_compound+0x473/0x6d0 [cifs]
> [ 1528.888065]  smb2_queryfs+0xc2/0x2c0 [cifs]
> [ 1528.888763]  smb311_queryfs+0x210/0x220 [cifs]
> [ 1528.889485]  cifs_statfs+0x164/0x290 [cifs]
> [ 1528.890185]  statfs_by_dentry+0x9b/0xf0
> [ 1528.890497]  user_statfs+0xab/0x130
> [ 1528.890783]  __do_sys_statfs+0x81/0xe0
> [ 1528.891090]  do_syscall_64+0x47/0xf0
> [ 1528.891383]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
> [ 1528.891786]
> [ 1528.891918] Second to last potentially related work creation:
> [ 1528.892368]  kasan_save_stack+0x33/0x60
> [ 1528.892679]  __kasan_record_aux_stack+0x94/0xa0
> [ 1528.893043]  __queue_work+0x334/0x8a0
> [ 1528.893342]  mod_delayed_work_on+0xa5/0x100
> [ 1528.893680]  smb2_reconnect+0x735/0xcb0 [cifs]
> [ 1528.894409]  SMB2_open_init+0xf8/0x13f0 [cifs]
> [ 1528.895135]  smb2_query_info_compound+0x2b6/0x6d0 [cifs]
> [ 1528.895925]  smb2_queryfs+0xc2/0x2c0 [cifs]
> [ 1528.896628]  smb311_queryfs+0x210/0x220 [cifs]
> [ 1528.897353]  cifs_statfs+0x164/0x290 [cifs]
> [ 1528.898060]  statfs_by_dentry+0x9b/0xf0
> [ 1528.898371]  user_statfs+0xab/0x130
> [ 1528.898656]  __do_sys_statfs+0x81/0xe0
> [ 1528.898961]  do_syscall_64+0x47/0xf0
> [ 1528.899255]  entry_SYSCALL_64_after_hwframe+0x6f/0x77
> [ 1528.899658]
> [ 1528.899790] The buggy address belongs to the object at ffff88800dd7a000
> [ 1528.899790]  which belongs to the cache kmalloc-4k of size 4096
> [ 1528.900751] The buggy address is located 708 bytes inside of
> [ 1528.900751]  freed 4096-byte region [ffff88800dd7a000, ffff88800dd7b000)
> [ 1528.901699]
> [ 1528.901830] The buggy address belongs to the physical page:
> [ 1528.902268] page:00000000b974ece7 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xdd78
> [ 1528.902981] head:00000000b974ece7 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
> [ 1528.903609] anon flags: 0x100000000000840(slab|head|node=0|zone=1)
> [ 1528.904098] page_type: 0xffffffff()
> [ 1528.904386] raw: 0100000000000840 ffff888006443040 0000000000000000 dead000000000001
> [ 1528.904989] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000
> [ 1528.905591] page dumped because: kasan: bad access detected
> [ 1528.906030]
> [ 1528.906161] Memory state around the buggy address:
> [ 1528.906542]  ffff88800dd7a180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 1528.907107]  ffff88800dd7a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 1528.907675] >ffff88800dd7a280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 1528.908240]                                            ^
> [ 1528.908660]  ffff88800dd7a300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 1528.909225]  ffff88800dd7a380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 1528.909787] ==================================================================
> [ 1528.910379] Kernel panic - not syncing: kasan.fault=panic set ...
> [ 1528.910867] CPU: 0 PID: 1114 Comm: kworker/0:2 Tainted: G        W          6.6.0 #2
> [ 1528.911475] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014
> [ 1528.912368] Workqueue: cifsiod smb2_query_server_interfaces [cifs]
> [ 1528.913235] Call Trace:
> [ 1528.913439]  <TASK>
> [ 1528.913616]  dump_stack_lvl+0x4a/0x80
> [ 1528.913917]  panic+0x41f/0x460
> [ 1528.914177]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.914563]  ? __pfx_panic+0x10/0x10
> [ 1528.914855]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.915248]  ? check_panic_on_warn+0x2f/0x80
> [ 1528.915594]  end_report+0x125/0x130
> [ 1528.915882]  kasan_report+0xe8/0x110
> [ 1528.916176]  ? cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.916935]  ? cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.917695]  cifs_pick_channel+0xa2/0x170 [cifs]
> [ 1528.918437]  SMB2_ioctl+0x1b5/0x6f0 [cifs]
> [ 1528.919138]  ? __pfx_console_unlock+0x10/0x10
> [ 1528.919490]  ? tick_nohz_tick_stopped+0x21/0x30
> [ 1528.919857]  ? __pfx_SMB2_ioctl+0x10/0x10 [cifs]
> [ 1528.920596]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.920985]  ? irq_work_queue+0x2c/0x40
> [ 1528.921298]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.921685]  ? lock_acquire+0xc1/0x3a0
> [ 1528.921998]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.922416]  ? _printk+0xc0/0xf0
> [ 1528.922726]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.923157]  ? __dynamic_pr_debug+0x1f5/0x260
> [ 1528.923546]  ? __pfx___dynamic_pr_debug+0x10/0x10
> [ 1528.924097]  ? lock_release+0xb6/0x5a0
> [ 1528.924707]  ? __pfx_lock_acquire+0x10/0x10
> [ 1528.925558]  ? SMB3_request_interfaces+0x137/0x2b0 [cifs]
> [ 1528.926794]  SMB3_request_interfaces+0x137/0x2b0 [cifs]
> [ 1528.927962]  ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs]
> [ 1528.929230]  ? ___ratelimit+0x133/0x210
> [ 1528.929847]  ? __pfx____ratelimit+0x10/0x10
> [ 1528.930217]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.930730]  smb2_query_server_interfaces+0x54/0x1f0 [cifs]
> [ 1528.931564]  process_one_work+0x43c/0x8e0
> [ 1528.931900]  ? __pfx_process_one_work+0x10/0x10
> [ 1528.932273]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.932663]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.933055]  ? __list_add_valid_or_report+0x37/0xf0
> [ 1528.933459]  ? srso_alias_return_thunk+0x5/0xfbef5
> [ 1528.933852]  worker_thread+0x397/0x690
> [ 1528.934173]  ? __pfx_worker_thread+0x10/0x10
> [ 1528.934526]  kthread+0x18d/0x1d0
> [ 1528.934799]  ? kthread+0xdb/0x1d0
> [ 1528.935077]  ? __pfx_kthread+0x10/0x10
> [ 1528.935388]  ret_from_fork+0x34/0x60
> [ 1528.935686]  ? __pfx_kthread+0x10/0x10
> [ 1528.935997]  ret_from_fork_asm+0x1b/0x30
> [ 1528.936328]  </TASK>
> [ 1528.937856] Kernel Offset: disabled
> [ 1528.938146] ---[ end Kernel panic - not syncing: kasan.fault=panic set ... ]---
>
> $ mount.cifs //w22-root1.gandalf.test/test /mnt/1 -o username=administrator,password=***
> # ...wait until smb2_query_server_interfaces() is executed at least once...
> $ cat /sys/kernel/debug/kmemleak
> unreferenced object 0xffff888007821c00 (size 192):
>   comm "mount.cifs", pid 885, jiffies 4294765292 (age 656.515s)
>   hex dump (first 32 bytes):
>     f0 52 da 16 80 88 ff ff f0 52 da 16 80 88 ff ff  .R.......R......
>     01 00 00 00 00 00 00 00 00 ca 9a 3b 00 00 00 00  ...........;....
>   backtrace:
>     [<ffffffff8144e3f5>] __kmem_cache_alloc_node+0x295/0x2d0
>     [<ffffffff813ab1aa>] kmalloc_trace+0x2a/0xc0
>     [<ffffffffc0141c9d>] parse_server_interfaces+0x4ed/0xcc0 [cifs]
>     [<ffffffffc014b323>] SMB3_request_interfaces+0x163/0x2b0 [cifs]
>     [<ffffffffc014b5ed>] smb3_qfs_tcon+0x16d/0x2c0 [cifs]
>     [<ffffffffc00f02c1>] cifs_mount_get_tcon+0x3b1/0x550 [cifs]
>     [<ffffffffc0181f7a>] dfs_mount_share+0x1da/0x1060 [cifs]
>     [<ffffffffc00f089a>] cifs_mount+0xda/0x4c0 [cifs]
>     [<ffffffffc00e06d5>] cifs_smb3_do_mount+0x1e5/0xcc0 [cifs]
>     [<ffffffffc0175a1d>] smb3_get_tree+0x16d/0x380 [cifs]
>     [<ffffffff8147c1ad>] vfs_get_tree+0x4d/0x190
>     [<ffffffff814c66d4>] path_mount+0x3c4/0xf90
>     [<ffffffff814c7b5a>] __x64_sys_mount+0x1aa/0x1f0
>     [<ffffffff8284c3d7>] do_syscall_64+0x47/0xf0
>     [<ffffffff82a000eb>] entry_SYSCALL_64_after_hwframe+0x6f/0x77
>     ...
>
> $ ./scripts/faddr2line --list fs/smb/client/cifs.o parse_server_interfaces+0x4ed
> parse_server_interfaces+0x4ed/0xcc0:
>
> kmalloc at /home/pc/g/linux/./include/linux/slab.h:600
>  595
>  596                    if (size > KMALLOC_MAX_CACHE_SIZE)
>  597                            return kmalloc_large(size, flags);
>  598
>  599                    index = kmalloc_index(size);
> >600<                   return kmalloc_trace(
>  601                                    kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
>  602                                    flags, size);
>  603            }
>  604            return __kmalloc(size, flags);
>  605    }
>
> (inlined by) parse_server_interfaces at /home/pc/g/linux/fs/smb/client/smb2ops.c:694
>  689                            }
>  690                    }
>  691                    spin_unlock(&ses->iface_lock);
>  692
>  693                    /* no match. insert the entry in the list */
> >694<                   info = kmalloc(sizeof(struct cifs_server_iface),
>  695                                   GFP_KERNEL);
>  696                    if (!info) {
>  697                            rc = -ENOMEM;
>  698                            goto out;
>  699                    }
Paulo Alcantara Nov. 12, 2023, 7:32 p.m. UTC | #10
Steve French <smfrench@gmail.com> writes:

> I tried reproducing this to Windows (a little trickier than I expected
> because I had to block the IPv6, not just the IPv4 addresses as in your
> example).  In my example (and built with the stricter kernel config you
> suggested) I do see expected warning messages (although puzzled at first
> why server name is blank in some debug messages - although I did mount with
> IPv4 address not hostname).  After reconnect I can access the mount and it
> seems to work fine.

You could also bring the ifaces down rather than blocking the specific
IP addresses.

I'd expect to see the other warnings in case you have lockdep and
CONFIG_DEBUG_ATOMIC_SLEEP=y enabled.

The blank hostnames are due to secondary channels being reconnected as
we only set it for the primary one.
diff mbox series

Patch

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index cdbc2cd207dc..1e1a5f3ab600 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -650,6 +650,7 @@  struct TCP_Server_Info {
 	bool noautotune;		/* do not autotune send buf sizes */
 	bool nosharesock;
 	bool tcp_nodelay;
+	bool terminate;
 	unsigned int credits;  /* send no more requests at once */
 	unsigned int max_credits; /* can override large 32000 default at mnt */
 	unsigned int in_flight;  /* number of requests on the wire to server */
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 5a4c1f1e0d91..828c3916cb88 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -644,6 +644,8 @@  cifs_chan_needs_reconnect(struct cifs_ses *ses,
 bool
 cifs_chan_is_iface_active(struct cifs_ses *ses,
 			  struct TCP_Server_Info *server);
+void
+cifs_disable_secondary_channels(struct cifs_ses *ses);
 int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server);
 int
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 149cde77500e..6fe80d5d7c23 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -220,6 +220,14 @@  cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
+		/*
+		 * if channel has been marked for termination, nothing to do
+		 * for the channel. in fact, we cannot find the channel for the
+		 * server. So safe to exit here
+		 */
+		if (server->terminate)
+			break;
+
 		/* check if iface is still active */
 		if (!cifs_chan_is_iface_active(ses, server))
 			cifs_chan_update_iface(ses, server);
@@ -254,6 +262,8 @@  cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 			spin_lock(&tcon->tc_lock);
 			tcon->status = TID_NEED_RECON;
 			spin_unlock(&tcon->tc_lock);
+
+			cancel_delayed_work_sync(&tcon->query_interfaces);
 		}
 		if (ses->tcon_ipc) {
 			ses->tcon_ipc->need_reconnect = true;
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 6843deed6119..51bcd78fc5bf 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -288,6 +288,60 @@  int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 	return new_chan_count - old_chan_count;
 }
 
+/*
+ * called when multichannel is disabled by the server.
+ * this always gets called from smb2_reconnect
+ * and cannot get called in parallel threads.
+ */
+void
+cifs_disable_secondary_channels(struct cifs_ses *ses)
+{
+	int i, chan_count;
+	struct TCP_Server_Info *server;
+	struct cifs_server_iface *iface;
+
+	spin_lock(&ses->chan_lock);
+	chan_count = ses->chan_count;
+	if (chan_count == 1)
+		goto done;
+
+	ses->chan_count = 1;
+
+	/* for all secondary channels reset the need reconnect bit */
+	ses->chans_need_reconnect &= 1;
+
+	for (i = 1; i < chan_count; i++) {
+		iface = ses->chans[i].iface;
+		server = ses->chans[i].server;
+
+		if (iface) {
+			spin_lock(&ses->iface_lock);
+			kref_put(&iface->refcount, release_iface);
+			ses->chans[i].iface = NULL;
+			iface->num_channels--;
+			if (iface->weight_fulfilled)
+				iface->weight_fulfilled--;
+			spin_unlock(&ses->iface_lock);
+		}
+
+		spin_unlock(&ses->chan_lock);
+		if (server && !server->terminate) {
+			server->terminate = true;
+			cifs_signal_cifsd_for_reconnect(server, false);
+		}
+		spin_lock(&ses->chan_lock);
+
+		if (server) {
+			ses->chans[i].server = NULL;
+			cifs_put_tcp_session(server, false);
+		}
+
+	}
+
+done:
+	spin_unlock(&ses->chan_lock);
+}
+
 /*
  * update the iface for the channel if necessary.
  * will return 0 when iface is updated, 1 if removed, 2 otherwise
@@ -580,14 +634,10 @@  cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 
 out:
 	if (rc && chan->server) {
-		/*
-		 * we should avoid race with these delayed works before we
-		 * remove this channel
-		 */
-		cancel_delayed_work_sync(&chan->server->echo);
-		cancel_delayed_work_sync(&chan->server->reconnect);
+		cifs_put_tcp_session(chan->server, 0);
 
 		spin_lock(&ses->chan_lock);
+
 		/* we rely on all bits beyond chan_count to be clear */
 		cifs_chan_clear_need_reconnect(ses, chan->server);
 		ses->chan_count--;
@@ -597,8 +647,6 @@  cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 		 */
 		WARN_ON(ses->chan_count < 1);
 		spin_unlock(&ses->chan_lock);
-
-		cifs_put_tcp_session(chan->server, 0);
 	}
 
 	kfree(ctx->UNC);
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 2617437a4627..d1d4d9100870 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -164,6 +164,8 @@  smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 	struct nls_table *nls_codepage = NULL;
 	struct cifs_ses *ses;
 	int xid;
+	struct TCP_Server_Info *pserver;
+	unsigned int chan_index;
 
 	/*
 	 * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so
@@ -224,6 +226,12 @@  smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 			return -EAGAIN;
 		}
 	}
+
+	/* if server is marked for termination, cifsd will cleanup */
+	if (server->terminate) {
+		spin_unlock(&server->srv_lock);
+		return -EHOSTDOWN;
+	}
 	spin_unlock(&server->srv_lock);
 
 again:
@@ -242,12 +250,24 @@  smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 		 tcon->need_reconnect);
 
 	mutex_lock(&ses->session_mutex);
+	/*
+	 * if this is called by delayed work, and the channel has been disabled
+	 * in parallel, the delayed work can continue to execute in parallel
+	 * there's a chance that this channel may not exist anymore
+	 */
+	spin_lock(&server->srv_lock);
+	if (server->tcpStatus == CifsExiting) {
+		spin_unlock(&server->srv_lock);
+		mutex_unlock(&ses->session_mutex);
+		rc = -EHOSTDOWN;
+		goto out;
+	}
+
 	/*
 	 * Recheck after acquire mutex. If another thread is negotiating
 	 * and the server never sends an answer the socket will be closed
 	 * and tcpStatus set to reconnect.
 	 */
-	spin_lock(&server->srv_lock);
 	if (server->tcpStatus == CifsNeedReconnect) {
 		spin_unlock(&server->srv_lock);
 		mutex_unlock(&ses->session_mutex);
@@ -284,6 +304,47 @@  smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 
 	rc = cifs_negotiate_protocol(0, ses, server);
 	if (!rc) {
+		/*
+		 * if server stopped supporting multichannel
+		 * and the first channel reconnected, disable all the others.
+		 */
+		if (ses->chan_count > 1 &&
+		    !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+			if (SERVER_IS_CHAN(server)) {
+				cifs_dbg(VFS, "server %s does not support " \
+					 "multichannel anymore. skipping secondary channel\n",
+					 ses->server->hostname);
+
+				spin_lock(&ses->chan_lock);
+				chan_index = cifs_ses_get_chan_index(ses, server);
+				if (chan_index == CIFS_INVAL_CHAN_INDEX) {
+					spin_unlock(&ses->chan_lock);
+					goto skip_terminate;
+				}
+
+				ses->chans[chan_index].server = NULL;
+				cifs_put_tcp_session(server, 1);
+				spin_unlock(&ses->chan_lock);
+
+				server->terminate = true;
+				cifs_signal_cifsd_for_reconnect(server, false);
+
+				/* mark primary server as needing reconnect */
+				pserver = server->primary_server;
+				cifs_signal_cifsd_for_reconnect(pserver, false);
+
+skip_terminate:
+				mutex_unlock(&ses->session_mutex);
+				rc = -EHOSTDOWN;
+				goto out;
+			} else {
+				cifs_dbg(VFS, "server %s does not support " \
+					 "multichannel anymore. disabling all other channels\n",
+					 ses->server->hostname);
+				cifs_disable_secondary_channels(ses);
+			}
+		}
+
 		rc = cifs_setup_session(0, ses, server, nls_codepage);
 		if ((rc == -EACCES) && !tcon->retry) {
 			mutex_unlock(&ses->session_mutex);
@@ -3833,6 +3894,13 @@  void smb2_reconnect_server(struct work_struct *work)
 	/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
 	mutex_lock(&pserver->reconnect_mutex);
 
+	/* if the server is marked for termination, drop the ref count here */
+	if (server->terminate) {
+		cifs_put_tcp_session(server, true);
+		mutex_unlock(&pserver->reconnect_mutex);
+		return;
+	}
+
 	INIT_LIST_HEAD(&tmp_list);
 	INIT_LIST_HEAD(&tmp_ses_list);
 	cifs_dbg(FYI, "Reconnecting tcons and channels\n");
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 14710afdc2a3..c60c65926933 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1032,7 +1032,7 @@  struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
 	spin_lock(&ses->chan_lock);
 	for (i = 0; i < ses->chan_count; i++) {
 		server = ses->chans[i].server;
-		if (!server)
+		if (!server || server->terminate)
 			continue;
 
 		/*