diff mbox series

[RFC,5/6] mptcp: add ack work queue

Message ID 20191108220953.30904-6-fw@strlen.de
State Superseded, archived
Headers show
Series mptcp: add and use wmem accounting for rtx queue | expand

Commit Message

Florian Westphal Nov. 8, 2019, 10:09 p.m. UTC
This is needed in the (unlikely) case that userspace is blocked in
mptcp_sendmsg because wmem is exhausted.

In that case, only the rtx work queue will clean the rtx backlog, but it
could take several milliseconds until it runs next, so add a dedicated
work queue that will run as soon as possible.

We can avoid scheduling the work if the mptcp socket is writeable, next
send/recv call will also clean acked dfrags.

An alternative approach is to add a new mptcp flag and re-use the
existing retrans worker, let me know if this is the preferred solution.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/options.c  |  2 +-
 net/mptcp/protocol.c | 35 +++++++++++++++++++++++++++++++----
 net/mptcp/protocol.h |  3 ++-
 3 files changed, 34 insertions(+), 6 deletions(-)

Comments

Paolo Abeni Nov. 11, 2019, 9:19 a.m. UTC | #1
On Fri, 2019-11-08 at 23:09 +0100, Florian Westphal wrote:
> This is needed in the (unlikely) case that userspace is blocked in
> mptcp_sendmsg because wmem is exhausted.
> 
> In that case, only the rtx work queue will clean the rtx backlog, but it
> could take several milliseconds until it runs next, so add a dedicated
> work queue that will run as soon as possible.
> 
> We can avoid scheduling the work if the mptcp socket is writeable, next
> send/recv call will also clean acked dfrags.
> 
> An alternative approach is to add a new mptcp flag and re-use the
> existing retrans worker, let me know if this is the preferred solution.

It looks like core TCP uses both approach for timers (overloading a
timer with multiple duties, and using multiple timers for different
activities).

Since mptcp_ack_work() matches closely the initial bits of
mptcp_retransmit(), In this specific scenario I think reusing the
existing worker (plus flag) would be better.

Cheers,

Paolo
Florian Westphal Nov. 11, 2019, 3:21 p.m. UTC | #2
Paolo Abeni <pabeni@redhat.com> wrote:
> Since mptcp_ack_work() matches closely the initial bits of
> mptcp_retransmit(), In this specific scenario I think reusing the
> existing worker (plus flag) would be better.

Done: https://git.breakpoint.cc/cgit/fw/mptcp-next.git/commit/?h=wmem_acct_05&id=fea55d959e29da51c605dad9c5e84544622d1b49

I'll resend once others had a chance to look at this series as well.
Mat Martineau Nov. 12, 2019, 1:04 a.m. UTC | #3
On Mon, 11 Nov 2019, Florian Westphal wrote:

> Paolo Abeni <pabeni@redhat.com> wrote:
>> Since mptcp_ack_work() matches closely the initial bits of
>> mptcp_retransmit(), In this specific scenario I think reusing the
>> existing worker (plus flag) would be better.
>
> Done: https://git.breakpoint.cc/cgit/fw/mptcp-next.git/commit/?h=wmem_acct_05&id=fea55d959e29da51c605dad9c5e84544622d1b49
>
> I'll resend once others had a chance to look at this series as well.

I think the combined worker should be ok for DATA_FIN too (which Florian 
had mentioned sharing the ack workqueue with).

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b488bf07ada5..81d7c3c026c8 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -590,7 +590,7 @@  static void update_una(struct mptcp_sock *msk,
 		old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
 					       new_snd_una);
 		if (old_snd_una == snd_una) {
-			mptcp_reset_timer((struct sock *)msk);
+			mptcp_data_acked((struct sock *)msk);
 			break;
 		}
 	}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b93050056195..5c6948085ef4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -40,7 +40,7 @@  static bool mptcp_timer_pending(struct sock *sk)
 	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
 }
 
-void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned long tout;
@@ -52,6 +52,15 @@  void mptcp_reset_timer(struct sock *sk)
 	sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
 }
 
+void mptcp_data_acked(struct sock *sk)
+{
+	mptcp_reset_timer(sk);
+
+	if (!sk_stream_is_writeable(sk) &&
+	    schedule_work(&mptcp_sk(sk)->ack_work))
+		sock_hold(sk);
+}
+
 static void mptcp_stop_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -647,6 +656,21 @@  static void mptcp_retransmit_timer(struct timer_list *t)
 	sock_put(sk);
 }
 
+static void mptcp_ack_work(struct work_struct *work)
+{
+	struct mptcp_sock *msk;
+	struct sock *sk;
+
+	msk = container_of(work, struct mptcp_sock, ack_work);
+	sk = &msk->sk.icsk_inet.sk;
+
+	lock_sock(sk);
+	mptcp_clean_una(sk);
+	release_sock(sk);
+
+	sock_put(sk);
+}
+
 static void mptcp_retransmit(struct work_struct *work)
 {
 	int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
@@ -716,6 +740,7 @@  static int __mptcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&msk->rtx_queue);
 
 	INIT_WORK(&msk->rtx_work, mptcp_retransmit);
+	INIT_WORK(&msk->ack_work, mptcp_ack_work);
 
 	/* re-use the csk retrans timer for MPTCP-level retrans */
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -755,12 +780,14 @@  static void __mptcp_clear_xmit(struct sock *sk)
 		dfrag_clear(sk, dfrag);
 }
 
-static void mptcp_cancel_rtx_work(struct sock *sk)
+static void mptcp_cancel_work(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	if (cancel_work_sync(&msk->rtx_work))
 		sock_put(sk);
+	if (cancel_work_sync(&msk->ack_work))
+		sock_put(sk);
 }
 
 static void mptcp_close(struct sock *sk, long timeout)
@@ -792,7 +819,7 @@  static void mptcp_close(struct sock *sk, long timeout)
 	__mptcp_clear_xmit(sk);
 	release_sock(sk);
 
-	mptcp_cancel_rtx_work(sk);
+	mptcp_cancel_work(sk);
 
 	sk_common_release(sk);
 }
@@ -802,7 +829,7 @@  static int mptcp_disconnect(struct sock *sk, int flags)
 	lock_sock(sk);
 	__mptcp_clear_xmit(sk);
 	release_sock(sk);
-	mptcp_cancel_rtx_work(sk);
+	mptcp_cancel_work(sk);
 	return tcp_disconnect(sk, flags);
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index bc7e8fa54afa..aeda56466219 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -134,6 +134,7 @@  struct mptcp_sock {
 	unsigned long	flags;
 	u16		dport;
 	struct work_struct rtx_work;
+	struct work_struct ack_work;
 	struct list_head conn_list;
 	struct list_head rtx_queue;
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
@@ -290,7 +291,7 @@  void mptcp_get_options(const struct sk_buff *skb,
 
 void mptcp_finish_connect(struct sock *sk, int mp_capable);
 void mptcp_finish_join(struct sock *sk);
-void mptcp_reset_timer(struct sock *sk);
+void mptcp_data_acked(struct sock *sk);
 
 int mptcp_token_new_request(struct request_sock *req);
 void mptcp_token_destroy_request(u32 token);