diff mbox series

[RFC,5/5] subflow: do not create child subflow for fallback MP_JOIN

Message ID 63292184bb1aed7c8f64d66fe39ef6089266f873.1593515429.git.pabeni@redhat.com
State Rejected, archived
Delegated to: Mat Martineau
Headers show
Series mptcp: cope better with mp_join storm | expand

Commit Message

Paolo Abeni June 30, 2020, 11:12 a.m. UTC
On MP_JOIN fallback, the child socket was used just to send-out
the due reset packet. As we don't use the child sock anymore
for such role, when can avoid creating the fallback MP_JOIN
sock entirely.

This saves a busy server from quite a bit of useless work, if
clients are storming with MP_JOIN reqs above the server limits

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
Note: to avoid changing the hook into tcp_check_req(), I had
to introduce a quite ugly const cast in subflow_syn_recv_sock().
I *think* it should be safe, but I'm 110% sure about unintended
implications - alike cc optimizing the calling code assuming sk
never changes, and generated code being bugged due to the above
cast.

I think should be safe, as the struct sock is not accessed
by tcp_check_req() when the relevant conditions held after
syn_recv_sock(), still something require additional eyeballs.
---
 net/mptcp/subflow.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Comments

Mat Martineau July 10, 2020, 12:39 a.m. UTC | #1
On Tue, 30 Jun 2020, Paolo Abeni wrote:

> On MP_JOIN fallback, the child socket was used just to send-out
> the due reset packet. As we don't use the child sock anymore
> for such role, when can avoid creating the fallback MP_JOIN
> sock entirely.
>
> This saves a busy server from quite a bit of useless work, if
> clients are storming with MP_JOIN reqs above the server limits
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> Note: to avoid changing the hook into tcp_check_req(), I had
> to introduce a quite ugly const cast in subflow_syn_recv_sock().
> I *think* it should be safe, but I'm 110% sure about unintended
> implications - alike cc optimizing the calling code assuming sk
> never changes, and generated code being bugged due to the above
> cast.
>
> I think should be safe, as the struct sock is not accessed
> by tcp_check_req() when the relevant conditions held after
> syn_recv_sock(), still something require additional eyeballs.
> ---
> net/mptcp/subflow.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 3077e07375b5..9f4f3e29587d 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -405,8 +405,11 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> 	subflow_req = mptcp_subflow_rsk(req);
> 	fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
> 	fallback = !tcp_rsk(req)->is_mptcp;
> -	if (fallback)
> +	if (fallback) {
> +		if (fallback_is_fatal)
> +			goto dispose_req;
> 		goto create_child;
> +	}
>
> 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
> 	if (subflow_req->mp_capable) {
> @@ -434,7 +437,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> 		    !mptcp_can_accept_new_subflow(subflow_req->msk) ||
> 		    !subflow_hmac_valid(req, &mp_opt)) {
> 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
> -			fallback = true;
> +			goto dispose_req;
> 		}
> 	}
>
> @@ -506,15 +509,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> 		      !mptcp_subflow_ctx(child)->conn));
> 	return child;
>
> +dispose_req:
> +	/* The last req reference will be released by the caller */
> +	child = NULL;
> +	*own_req = false;
> +	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
> +	inet_csk_reqsk_queue_drop((struct sock *)sk, req);

I'm not sure about unintended consequences of this cast either. Even if it 
looks ok now, the calling code could be changed.

Eric constified this parameter in 0c27171e66d9, saying "We'll soon no 
longer hold listener socket lock, these functions do not modify the socket 
in any way". It looks like the only modification to the socket by calling 
inet_csk_reqsk_queue_drop() is an atomic_dec() or two, which should be an 
ok thing to do. Do you think that would justify reverting that change to 
the sk parameter?

Looking at the call from tcp_check_req(), it looks like 
ient_csk_reqsk_queue_drop() and req->rsk_ops->send_reset() can get called 
again. This function will return NULL on this code path, and if the 
tcp_abort_on_overflow sysctl is true, the drop/reset functions can get 
called again. Is that not possible for not-obvious-to-me reasons?

> +	goto reset;
> +
> dispose_child:
> +	/* The last child reference will be released by the caller */
> 	subflow_drop_ctx(child);
> 	tcp_rsk(req)->drop_req = true;
> 	inet_csk_prepare_for_destroy_sock(child);
> 	tcp_done(child);
> -	req->rsk_ops->send_reset(sk, skb);
>
> -	/* The last child reference will be released by the caller */
> +reset:
> +	req->rsk_ops->send_reset(sk, skb);
> 	return child;
> +
> }
>
> static struct inet_connection_sock_af_ops subflow_specific;
> -- 
> 2.26.2

--
Mat Martineau
Intel
Paolo Abeni July 10, 2020, 4:21 p.m. UTC | #2
On Thu, 2020-07-09 at 17:39 -0700, Mat Martineau wrote:
> On Tue, 30 Jun 2020, Paolo Abeni wrote:
> 
> > On MP_JOIN fallback, the child socket was used just to send-out
> > the due reset packet. As we don't use the child sock anymore
> > for such role, when can avoid creating the fallback MP_JOIN
> > sock entirely.
> > 
> > This saves a busy server from quite a bit of useless work, if
> > clients are storming with MP_JOIN reqs above the server limits
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > Note: to avoid changing the hook into tcp_check_req(), I had
> > to introduce a quite ugly const cast in subflow_syn_recv_sock().
> > I *think* it should be safe, but I'm 110% sure about unintended
> > implications - alike cc optimizing the calling code assuming sk
> > never changes, and generated code being bugged due to the above
> > cast.
> > 
> > I think should be safe, as the struct sock is not accessed
> > by tcp_check_req() when the relevant conditions held after
> > syn_recv_sock(), still something require additional eyeballs.
> > ---
> > net/mptcp/subflow.c | 21 +++++++++++++++++----
> > 1 file changed, 17 insertions(+), 4 deletions(-)
> > 
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 3077e07375b5..9f4f3e29587d 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -405,8 +405,11 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > 	subflow_req = mptcp_subflow_rsk(req);
> > 	fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
> > 	fallback = !tcp_rsk(req)->is_mptcp;
> > -	if (fallback)
> > +	if (fallback) {
> > +		if (fallback_is_fatal)
> > +			goto dispose_req;
> > 		goto create_child;
> > +	}
> > 
> > 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
> > 	if (subflow_req->mp_capable) {
> > @@ -434,7 +437,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > 		    !mptcp_can_accept_new_subflow(subflow_req->msk) ||
> > 		    !subflow_hmac_valid(req, &mp_opt)) {
> > 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
> > -			fallback = true;
> > +			goto dispose_req;
> > 		}
> > 	}
> > 
> > @@ -506,15 +509,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > 		      !mptcp_subflow_ctx(child)->conn));
> > 	return child;
> > 
> > +dispose_req:
> > +	/* The last req reference will be released by the caller */
> > +	child = NULL;
> > +	*own_req = false;
> > +	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
> > +	inet_csk_reqsk_queue_drop((struct sock *)sk, req);
> 
> I'm not sure about unintended consequences of this cast either. Even if it 
> looks ok now, the calling code could be changed.
> 
> Eric constified this parameter in 0c27171e66d9, saying "We'll soon no 
> longer hold listener socket lock, these functions do not modify the socket 
> in any way". It looks like the only modification to the socket by calling 
> inet_csk_reqsk_queue_drop() is an atomic_dec() or two, which should be an 
> ok thing to do. Do you think that would justify reverting that change to 
> the sk parameter?

Thank you for the feedback!

No, I think removing the 'const' from 'syn_recv_sock()' for this change
is a no-go.

> Looking at the call from tcp_check_req(), it looks like 
> ient_csk_reqsk_queue_drop() and req->rsk_ops->send_reset() can get called 
> again. This function will return NULL on this code path, and if the 
> tcp_abort_on_overflow sysctl is true, the drop/reset functions can get 
> called again. Is that not possible for not-obvious-to-me reasons?

No nothing prevent that code path from being executed - even if
requires non-default sysctl. Anyhow a duplicated rst packet should not
be too much problematic, I hope, and the 2nd execution of
inet_csk_reqsk_queue_drop() will be a no-op, as the sk will not be
hashed anymore.

Perhaps we can avoid the const cast changing a bit the hooking
in tcp_check_req(), but I'm reluctant to do that, as I changed the code
there recently. 

@Florian: do you foresee to have to touch tcp_check_req() for syncookie
handling? Perhaps we can unify the change there?

For the short term I'll send a v1 without 5/5 and including some other
fixes I'm assembling while working on issues/17.

Thanks!

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3077e07375b5..9f4f3e29587d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -405,8 +405,11 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	subflow_req = mptcp_subflow_rsk(req);
 	fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
 	fallback = !tcp_rsk(req)->is_mptcp;
-	if (fallback)
+	if (fallback) {
+		if (fallback_is_fatal)
+			goto dispose_req;
 		goto create_child;
+	}
 
 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
 	if (subflow_req->mp_capable) {
@@ -434,7 +437,7 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		    !mptcp_can_accept_new_subflow(subflow_req->msk) ||
 		    !subflow_hmac_valid(req, &mp_opt)) {
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
-			fallback = true;
+			goto dispose_req;
 		}
 	}
 
@@ -506,15 +509,25 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		      !mptcp_subflow_ctx(child)->conn));
 	return child;
 
+dispose_req:
+	/* The last req reference will be released by the caller */
+	child = NULL;
+	*own_req = false;
+	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+	inet_csk_reqsk_queue_drop((struct sock *)sk, req);
+	goto reset;
+
 dispose_child:
+	/* The last child reference will be released by the caller */
 	subflow_drop_ctx(child);
 	tcp_rsk(req)->drop_req = true;
 	inet_csk_prepare_for_destroy_sock(child);
 	tcp_done(child);
-	req->rsk_ops->send_reset(sk, skb);
 
-	/* The last child reference will be released by the caller */
+reset:
+	req->rsk_ops->send_reset(sk, skb);
 	return child;
+
 }
 
 static struct inet_connection_sock_af_ops subflow_specific;