diff mbox series

[mptcp-net] mptcp: fix spurious retransmissions

Message ID e9bf65f5e84ca731dd600ef97ca94d610a065410.1612265962.git.pabeni@redhat.com
State Superseded, archived
Headers show
Series [mptcp-net] mptcp: fix spurious retransmissions | expand

Commit Message

Paolo Abeni Feb. 2, 2021, 11:55 a.m. UTC
Syzkaller was able to trigger again the following splat:

WARNING: CPU: 1 PID: 12512 at net/mptcp/protocol.c:761 mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
Modules linked in:
CPU: 1 PID: 12512 Comm: kworker/1:6 Not tainted 5.10.0-rc6 #52
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: events mptcp_worker
RIP: 0010:mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
Code: e8 4b 0c ad ff e8 56 21 88 fe 48 b8 00 00 00 00 00 fc ff df 48 c7 04 03 00 00 00 00 48 83 c4 40 5b 5d 41 5c c3 e8 36 21 88 fe <0f> 0b 41 bc c8 00 00 00 eb 98 e8 e7 b1 af fe e9 30 ff ff ff 48 c7
RSP: 0018:ffffc900018c7c68 EFLAGS: 00010293
RAX: ffff888108cb1c80 RBX: 1ffff92000318f8d RCX: ffffffff82ad0307
RDX: 0000000000000000 RSI: ffffffff82ad036a RDI: 0000000000000007
RBP: ffff888113e2d000 R08: ffff888108cb1c80 R09: ffffed10227c5ab7
R10: ffff888113e2d5b7 R11: ffffed10227c5ab6 R12: 0000000000000000
R13: ffff88801f100000 R14: ffff888113e2d5b0 R15: 0000000000000001
FS:  0000000000000000(0000) GS:ffff88811b500000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fd76a874ef8 CR3: 000000001689c005 CR4: 0000000000170ee0
Call Trace:
 mptcp_worker+0xaa4/0x1560 net/mptcp/protocol.c:2334
 process_one_work+0x8d3/0x1200 kernel/workqueue.c:2272
 worker_thread+0x9c/0x1090 kernel/workqueue.c:2418
 kthread+0x303/0x410 kernel/kthread.c:292
 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296

The mptcp_worker tries to update the MPTCP retransmission timer
even if such timer is not currently scheduled.

mptcp_check_data_fin_ack() can clear the rtx timer just before
mptcp_rtx_head(), but leaving data in the rtx queue - that will
be cleared at msk sock release_cb time.

The above may additionally cause spurious, unneeded MPTCP-level
retransissions.

Fix the issue adding explicit clearing the rtx queue before
trying to retransmit and dropping the unneeded timer stop.
Additionally drop the unused mptcp_rtx_head() helper.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
Note: I hope this should fix https://github.com/multipath-tcp/mptcp_net-next/issues/126
@Christoph could you please give this one a spin?
---
 net/mptcp/protocol.c |  3 +--
 net/mptcp/protocol.h | 10 ----------
 2 files changed, 1 insertion(+), 12 deletions(-)

Comments

Matthieu Baerts Feb. 2, 2021, 3:12 p.m. UTC | #1
Hi Paolo,

On 02/02/2021 12:55, Paolo Abeni wrote:
> Syzkaller was able to trigger again the following splat:
> 
> WARNING: CPU: 1 PID: 12512 at net/mptcp/protocol.c:761 mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
> Modules linked in:
> CPU: 1 PID: 12512 Comm: kworker/1:6 Not tainted 5.10.0-rc6 #52
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
> Workqueue: events mptcp_worker
> RIP: 0010:mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
> Code: e8 4b 0c ad ff e8 56 21 88 fe 48 b8 00 00 00 00 00 fc ff df 48 c7 04 03 00 00 00 00 48 83 c4 40 5b 5d 41 5c c3 e8 36 21 88 fe <0f> 0b 41 bc c8 00 00 00 eb 98 e8 e7 b1 af fe e9 30 ff ff ff 48 c7
> RSP: 0018:ffffc900018c7c68 EFLAGS: 00010293
> RAX: ffff888108cb1c80 RBX: 1ffff92000318f8d RCX: ffffffff82ad0307
> RDX: 0000000000000000 RSI: ffffffff82ad036a RDI: 0000000000000007
> RBP: ffff888113e2d000 R08: ffff888108cb1c80 R09: ffffed10227c5ab7
> R10: ffff888113e2d5b7 R11: ffffed10227c5ab6 R12: 0000000000000000
> R13: ffff88801f100000 R14: ffff888113e2d5b0 R15: 0000000000000001
> FS:  0000000000000000(0000) GS:ffff88811b500000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fd76a874ef8 CR3: 000000001689c005 CR4: 0000000000170ee0
> Call Trace:
>   mptcp_worker+0xaa4/0x1560 net/mptcp/protocol.c:2334
>   process_one_work+0x8d3/0x1200 kernel/workqueue.c:2272
>   worker_thread+0x9c/0x1090 kernel/workqueue.c:2418
>   kthread+0x303/0x410 kernel/kthread.c:292
>   ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296
> 
> The mptcp_worker tries to update the MPTCP retransmission timer
> even if such timer is not currently scheduled.
> 
> mptcp_check_data_fin_ack() can clear the rtx timer just before
> mptcp_rtx_head(), but leaving data in the rtx queue - that will
> be cleared at msk sock release_cb time.
> 
> The above may additionally cause spurious, unneeded MPTCP-level
> retransissions.
> 
> Fix the issue adding explicit clearing the rtx queue before
> trying to retransmit and dropping the unneeded timer stop.
> Additionally drop the unused mptcp_rtx_head() helper.

Small detail, it should be mptcp_rtx_tail(), not "head()".
I can fix it (with the "retrans(m)issions" typo above) when applying 
this patch if everything else is OK ;-)

Cheers,
Matt
Christoph Paasch Feb. 2, 2021, 5:11 p.m. UTC | #2
On 02/02/21 - 12:55, Paolo Abeni wrote:
> Syzkaller was able to trigger again the following splat:
> 
> WARNING: CPU: 1 PID: 12512 at net/mptcp/protocol.c:761 mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
> Modules linked in:
> CPU: 1 PID: 12512 Comm: kworker/1:6 Not tainted 5.10.0-rc6 #52
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
> Workqueue: events mptcp_worker
> RIP: 0010:mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
> Code: e8 4b 0c ad ff e8 56 21 88 fe 48 b8 00 00 00 00 00 fc ff df 48 c7 04 03 00 00 00 00 48 83 c4 40 5b 5d 41 5c c3 e8 36 21 88 fe <0f> 0b 41 bc c8 00 00 00 eb 98 e8 e7 b1 af fe e9 30 ff ff ff 48 c7
> RSP: 0018:ffffc900018c7c68 EFLAGS: 00010293
> RAX: ffff888108cb1c80 RBX: 1ffff92000318f8d RCX: ffffffff82ad0307
> RDX: 0000000000000000 RSI: ffffffff82ad036a RDI: 0000000000000007
> RBP: ffff888113e2d000 R08: ffff888108cb1c80 R09: ffffed10227c5ab7
> R10: ffff888113e2d5b7 R11: ffffed10227c5ab6 R12: 0000000000000000
> R13: ffff88801f100000 R14: ffff888113e2d5b0 R15: 0000000000000001
> FS:  0000000000000000(0000) GS:ffff88811b500000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fd76a874ef8 CR3: 000000001689c005 CR4: 0000000000170ee0
> Call Trace:
>  mptcp_worker+0xaa4/0x1560 net/mptcp/protocol.c:2334
>  process_one_work+0x8d3/0x1200 kernel/workqueue.c:2272
>  worker_thread+0x9c/0x1090 kernel/workqueue.c:2418
>  kthread+0x303/0x410 kernel/kthread.c:292
>  ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296
> 
> The mptcp_worker tries to update the MPTCP retransmission timer
> even if such timer is not currently scheduled.
> 
> mptcp_check_data_fin_ack() can clear the rtx timer just before
> mptcp_rtx_head(), but leaving data in the rtx queue - that will
> be cleared at msk sock release_cb time.
> 
> The above may additionally cause spurious, unneeded MPTCP-level
> retransissions.
> 
> Fix the issue adding explicit clearing the rtx queue before
> trying to retransmit and dropping the unneeded timer stop.
> Additionally drop the unused mptcp_rtx_head() helper.
> 
> Reported-by: Christoph Paasch <cpaasch@apple.com>
> Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> Note: I hope this should fix https://github.com/multipath-tcp/mptcp_net-next/issues/126
> @Christoph could you please give this one a spin?

syzkaller is running!


Christoph

> ---
>  net/mptcp/protocol.c |  3 +--
>  net/mptcp/protocol.h | 10 ----------
>  2 files changed, 1 insertion(+), 12 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 305d25cbc216a..88a6fb6f7ecc8 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -363,8 +363,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
>  
>  	/* Look for an acknowledged DATA_FIN */
>  	if (mptcp_pending_data_fin_ack(sk)) {
> -		mptcp_stop_timer(sk);
> -
>  		WRITE_ONCE(msk->snd_data_fin_enable, 0);
>  
>  		switch (sk->sk_state) {
> @@ -2270,6 +2268,7 @@ static void mptcp_worker(struct work_struct *work)
>  	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
>  		goto unlock;
>  
> +	__mptcp_clean_una(sk);
>  	dfrag = mptcp_rtx_head(sk);
>  	if (!dfrag)
>  		goto unlock;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 1024ea1512d2b..1bb44a4baf4a5 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -335,16 +335,6 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
>  	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
>  }
>  
> -static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
> -{
> -	struct mptcp_sock *msk = mptcp_sk(sk);
> -
> -	if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
> -		return NULL;
> -
> -	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
> -}
> -
>  static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
>  {
>  	struct mptcp_sock *msk = mptcp_sk(sk);
> -- 
> 2.26.2
>
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 305d25cbc216a..88a6fb6f7ecc8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -363,8 +363,6 @@  static void mptcp_check_data_fin_ack(struct sock *sk)
 
 	/* Look for an acknowledged DATA_FIN */
 	if (mptcp_pending_data_fin_ack(sk)) {
-		mptcp_stop_timer(sk);
-
 		WRITE_ONCE(msk->snd_data_fin_enable, 0);
 
 		switch (sk->sk_state) {
@@ -2270,6 +2268,7 @@  static void mptcp_worker(struct work_struct *work)
 	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
 		goto unlock;
 
+	__mptcp_clean_una(sk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag)
 		goto unlock;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 1024ea1512d2b..1bb44a4baf4a5 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -335,16 +335,6 @@  static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
 	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
 }
 
-static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-
-	if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
-		return NULL;
-
-	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
-}
-
 static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);