diff mbox series

[8/9] subflow: place further subflows on new 'join_list'

Message ID 20191130224900.8694-9-fw@strlen.de
State Accepted, archived
Delegated to: Matthieu Baerts
Headers show
Series None | expand

Commit Message

Florian Westphal Nov. 30, 2019, 10:48 p.m. UTC
When the pm worker queue establishes a new subflow in addition to an
existing one, we never place this socket on any list, ie. if such a request
would time out/never complete we can't clean it up.

Place them on a new join_list. When the connection completes, the
subflow is moved to the conn_list.
If it never completes, the socket gets closed at mptcp_shutdown time.

Not done yet:
1. extend mptcp worker to collect timed-out entries
2. prevent arbitrary growth of pending ssks.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.c | 14 +++++++++++++-
 net/mptcp/protocol.h |  1 +
 net/mptcp/subflow.c  | 16 +++++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

Comments

Paolo Abeni Dec. 2, 2019, 12:35 p.m. UTC | #1
On Sat, 2019-11-30 at 23:48 +0100, Florian Westphal wrote:
> When the pm worker queue establishes a new subflow in addition to an
> existing one, we never place this socket on any list, ie. if such a request
> would time out/never complete we can't clean it up.
> 
> Place them on a new join_list. When the connection completes, the
> subflow is moved to the conn_list.
> If it never completes, the socket gets closed at mptcp_shutdown time.
> 
> Not done yet:
> 1. extend mptcp worker to collect timed-out entries
> 2. prevent arbitrary growth of pending ssks.

Uhm... am I correct, the above will always happen if the client send
[multiple] MP_JOIN at synack reception time? what can we do to prevent
that? scheduling a cleanup workqueue on error?

> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/mptcp/protocol.c | 14 +++++++++++++-
>  net/mptcp/protocol.h |  1 +
>  net/mptcp/subflow.c  | 16 +++++++++++-----
>  3 files changed, 25 insertions(+), 6 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 0c61487a587d..36097008dd4a 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -859,6 +859,7 @@ static int __mptcp_init_sock(struct sock *sk)
>  	struct mptcp_sock *msk = mptcp_sk(sk);
>  
>  	INIT_LIST_HEAD(&msk->conn_list);
> +	INIT_LIST_HEAD(&msk->join_list);
>  	INIT_LIST_HEAD(&msk->rtx_queue);
>  
>  	INIT_WORK(&msk->rtx_work, mptcp_worker);
> @@ -948,6 +949,12 @@ static void mptcp_close(struct sock *sk, long timeout)
>  		msk->subflow = NULL;
>  	}
>  
> +	list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
> +		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> +
> +		__mptcp_close_ssk(sk, ssk, subflow, timeout);
> +	}
> +
>  	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
>  		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
>  
> @@ -1223,7 +1230,7 @@ void mptcp_finish_connect(struct sock *ssk)
>  		subflow->map_seq = ack_seq;
>  		subflow->map_subflow_seq = 1;
>  		subflow->rel_write_seq = 1;
> -		list_add(&subflow->node, &msk->conn_list);
> +		list_move(&subflow->node, &msk->conn_list);
>  		msk->subflow = NULL;
>  		bh_unlock_sock(sk);
>  		local_bh_enable();
> @@ -1260,6 +1267,11 @@ bool mptcp_finish_join(struct sock *sk)
>  	if (parent->sk_state != TCP_ESTABLISHED)
>  		goto out;
>  
> +	/* unlink from join_list; caller must free ssk if
> +	 * we return false below.
> +	 */
> +	list_del_init(&subflow->node);
> +
>  	parent_sock = READ_ONCE(parent->sk_socket);
>  	if (parent_sock) {
>  		if (!sk->sk_socket)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 90af5a84f16c..990da61fa51b 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -137,6 +137,7 @@ struct mptcp_sock {
>  	struct work_struct rtx_work;
>  	struct list_head conn_list;
>  	struct list_head rtx_queue;
> +	struct list_head join_list;
>  	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
>  	struct mptcp_pm_data	pm;
>  	u8		addr_signal;
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index a89513210ea1..8c13e9a145d5 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -700,6 +700,11 @@ int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
>  	int err;
>  
>  	lock_sock(sk);
> +	if (sk->sk_state != TCP_ESTABLISHED) {
> +		release_sock(sk);
> +		return -ENOTCONN;
> +	}
> +
>  	err = mptcp_subflow_create_socket(sk, &sf);
>  	if (err) {
>  		release_sock(sk);
> @@ -711,9 +716,6 @@ int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
>  	subflow->local_key = msk->local_key;
>  	subflow->token = msk->token;
>  
> -	sock_hold(sf->sk);
> -	release_sock(sk);
> -
>  	addrlen = sizeof(struct sockaddr_in);
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>  	if (local->sa_family == AF_INET6)
> @@ -730,15 +732,18 @@ int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
>  	subflow->request_join = 1;
>  	subflow->request_bkup = 1;
>  
> +	list_add(&subflow->node, &msk->join_list);
> +
>  	err = kernel_connect(sf, remote, addrlen, O_NONBLOCK);
>  	if (err && err != -EINPROGRESS)
>  		goto failed;
>  
> -	sock_put(sf->sk);
> +	release_sock(sk);
>  	return err;
>  
>  failed:
> -	sock_put(sf->sk);
> +	list_del_init(&subflow->node);

Can we release the subflow socket immediatelly here?

Thanks!

Paolo
Florian Westphal Dec. 2, 2019, 4:56 p.m. UTC | #2
Paolo Abeni <pabeni@redhat.com> wrote:
> > Not done yet:
> > 1. extend mptcp worker to collect timed-out entries
> > 2. prevent arbitrary growth of pending ssks.
> 
> Uhm... am I correct, the above will always happen if the client send
> [multiple] MP_JOIN at synack reception time? what can we do to prevent
> that? scheduling a cleanup workqueue on error?

We can't prevent it, client could flood us with hundreds of joins.

I think its best if we just add a upper ceiling on both
join and conn lists.

As for cleanup, I think we could have the rtx worker scan the
list for timed-out/"dead" joins.
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0c61487a587d..36097008dd4a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -859,6 +859,7 @@  static int __mptcp_init_sock(struct sock *sk)
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	INIT_LIST_HEAD(&msk->conn_list);
+	INIT_LIST_HEAD(&msk->join_list);
 	INIT_LIST_HEAD(&msk->rtx_queue);
 
 	INIT_WORK(&msk->rtx_work, mptcp_worker);
@@ -948,6 +949,12 @@  static void mptcp_close(struct sock *sk, long timeout)
 		msk->subflow = NULL;
 	}
 
+	list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		__mptcp_close_ssk(sk, ssk, subflow, timeout);
+	}
+
 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
@@ -1223,7 +1230,7 @@  void mptcp_finish_connect(struct sock *ssk)
 		subflow->map_seq = ack_seq;
 		subflow->map_subflow_seq = 1;
 		subflow->rel_write_seq = 1;
-		list_add(&subflow->node, &msk->conn_list);
+		list_move(&subflow->node, &msk->conn_list);
 		msk->subflow = NULL;
 		bh_unlock_sock(sk);
 		local_bh_enable();
@@ -1260,6 +1267,11 @@  bool mptcp_finish_join(struct sock *sk)
 	if (parent->sk_state != TCP_ESTABLISHED)
 		goto out;
 
+	/* unlink from join_list; caller must free ssk if
+	 * we return false below.
+	 */
+	list_del_init(&subflow->node);
+
 	parent_sock = READ_ONCE(parent->sk_socket);
 	if (parent_sock) {
 		if (!sk->sk_socket)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 90af5a84f16c..990da61fa51b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -137,6 +137,7 @@  struct mptcp_sock {
 	struct work_struct rtx_work;
 	struct list_head conn_list;
 	struct list_head rtx_queue;
+	struct list_head join_list;
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 	struct mptcp_pm_data	pm;
 	u8		addr_signal;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a89513210ea1..8c13e9a145d5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -700,6 +700,11 @@  int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
 	int err;
 
 	lock_sock(sk);
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		release_sock(sk);
+		return -ENOTCONN;
+	}
+
 	err = mptcp_subflow_create_socket(sk, &sf);
 	if (err) {
 		release_sock(sk);
@@ -711,9 +716,6 @@  int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
 	subflow->local_key = msk->local_key;
 	subflow->token = msk->token;
 
-	sock_hold(sf->sk);
-	release_sock(sk);
-
 	addrlen = sizeof(struct sockaddr_in);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 	if (local->sa_family == AF_INET6)
@@ -730,15 +732,18 @@  int mptcp_subflow_connect(struct sock *sk, struct sockaddr *local,
 	subflow->request_join = 1;
 	subflow->request_bkup = 1;
 
+	list_add(&subflow->node, &msk->join_list);
+
 	err = kernel_connect(sf, remote, addrlen, O_NONBLOCK);
 	if (err && err != -EINPROGRESS)
 		goto failed;
 
-	sock_put(sf->sk);
+	release_sock(sk);
 	return err;
 
 failed:
-	sock_put(sf->sk);
+	list_del_init(&subflow->node);
+	release_sock(sk);
 	sock_release(sf);
 	return err;
 }
@@ -782,6 +787,7 @@  static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
 	if (!ctx)
 		return NULL;
 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
+	INIT_LIST_HEAD(&ctx->node);
 
 	pr_debug("subflow=%p", ctx);