diff mbox series

[RFC] net: mptcp: improve fallback after successful 3-way-handshake

Message ID 8e5fcbd2e3c6fbf90f6d836fa82de2ceed3298e2.1587490709.git.dcaratti@redhat.com
State Superseded, archived
Delegated to: Mat Martineau
Headers show
Series [RFC] net: mptcp: improve fallback after successful 3-way-handshake | expand

Commit Message

Davide Caratti April 21, 2020, 5:42 p.m. UTC
From: Davide Caratti <dcaratti@nst.lab.eng.brq.redhat.com>

a socket can receive data without a DSS option after MP_CAPABLE exchange
in the three way handshake. In this case, it must fall-back to normal
TCP in accordance to RFC8684 §3.7. In addition, fall-back to TCP must
occur if the peer sends data with a DSS option with zero DLL (so-called
"infinite mapping"). In case a server needs to fallback and msk->subflow
is NULL, msk->first can be used for send/receive operations.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 net/mptcp/protocol.c | 20 +++++++++++++++++++-
 net/mptcp/subflow.c  | 18 ++++++++++++++----
 2 files changed, 33 insertions(+), 5 deletions(-)

Comments

Mat Martineau April 21, 2020, 9:57 p.m. UTC | #1
On Tue, 21 Apr 2020, Davide Caratti wrote:

> From: Davide Caratti <dcaratti@nst.lab.eng.brq.redhat.com>

I'm guessing this isn't the address you want in the git "Author" field!

>
> a socket can receive data without a DSS option after MP_CAPABLE exchange
> in the three way handshake. In this case, it must fall-back to normal
> TCP in accordance to RFC8684 §3.7. In addition, fall-back to TCP must
> occur if the peer sends data with a DSS option with zero DLL (so-called
> "infinite mapping"). In case a server needs to fallback and msk->subflow
> is NULL, msk->first can be used for send/receive operations.
>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> ---
> net/mptcp/protocol.c | 20 +++++++++++++++++++-
> net/mptcp/subflow.c  | 18 ++++++++++++++----
> 2 files changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 457655ea875b..045aacb68e13 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -736,6 +736,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		pr_debug("fallback passthrough");
> 		ret = sock_sendmsg(ssock, msg);
> 		return ret >= 0 ? ret + copied : (copied ? copied : ret);
> +	} else if (__mptcp_needs_tcp_fallback(msk)) {
> +		ssk = msk->first;
> +		release_sock((struct sock *)msk);
> +		pr_debug("msk->first passthrough");
> +		ret = ssk->sk_prot->sendmsg(ssk, msg, len);
> +		return ret >= 0 ? ret + copied : (copied ? copied : ret);
> 	}
>
> 	mptcp_clean_una(sk);
> @@ -899,6 +905,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> 			 mptcp_subflow_ctx(ssock->sk));
> 		copied = sock_recvmsg(ssock, msg, flags);
> 		return copied;
> +	} else if (__mptcp_needs_tcp_fallback(msk)) {
> +use_mskfirst:
> +		pr_debug("fallback-read using msk->first");
> +		sk = msk->first;
> +		release_sock((struct sock *)msk);

sk is one of the function arguments referring to the MPTCP-level socket, 
so a "struct sock *ssk" to use in this block of code would be clearer and 
would remove the cast in the release_sock().

I recognize that in this fallback scenario there is no struct sock 
available, so sock_recvmsg() can't be used, but is there any way to 
handle sendmsg/recvmsg the same after either type of fallback?


> +		copied = sk->sk_prot->recvmsg(sk, msg, len, nonblock, flags,
> +					      addr_len);
> +		return copied;
> 	}
>
> 	timeo = sock_rcvtimeo(sk, nonblock);
> @@ -963,8 +977,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>
> 		pr_debug("block timeout %ld", timeo);
> 		mptcp_wait_data(sk, &timeo);
> -		if (unlikely(__mptcp_tcp_fallback(msk)))
> +		if (unlikely(__mptcp_tcp_fallback(msk))) {
> +			ssock = __mptcp_tcp_fallback(msk);
> +			if (!ssock)
> +				goto use_mskfirst;

This goto doesn't seem reachable - __mptcp_tcp_fallback(msk) should have 
the same return value both times.

> 			goto fallback;
> +		}
> 	}
>
> 	if (skb_queue_empty(&sk->sk_receive_queue)) {
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 47f901b712f9..460288c1c8ff 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -460,7 +460,8 @@ enum mapping_status {
> 	MAPPING_OK,
> 	MAPPING_INVALID,
> 	MAPPING_EMPTY,
> -	MAPPING_DATA_FIN
> +	MAPPING_DATA_FIN,
> +	MAPPING_INFINITE
> };
>
> static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
> @@ -540,8 +541,14 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
> 			return MAPPING_EMPTY;
> 		}
>
> -		if (!subflow->map_valid)
> +		if (!subflow->map_valid) {
> +			if (!mpext && !subflow->local_id) {
> +				pr_debug("no mpext and 0 local id");
> +				skb_ext_del(skb, SKB_EXT_MPTCP);

skb_ext_del() wouldn't be required, mptcp_get_ext() already returned NULL 
indicating that mpext isn't there.

> +				return MAPPING_INFINITE;
> +			}

This will treat any skb without a DSS option on the initial subflow as an 
implicit infinite mapping, if there is not a current valid mapping. It's 
possible to encounter this after valid mappings were found and DATA_ACKed.

Section 3.7 is specific about what to do when an ACK (not a data packet) 
without DSS is received on the initial subflow before any other subflows 
are added: this is treated as an implicit infinite mapping, and the next 
data packet that is sent should include an infinite mapping to signal the 
fallback to the peer.

While section 3.7 says a sender "MUST" include a DSS option in every 
segment until a DATA_ACK is received, I don't see a description of what 
happens if the DSS option is missing when the receiver processes it - is 
it supposed to also be treated as a fallback? If so, the code above must 
only return MAPPING_INFINITE if DATA_ACK has never been sent and no other 
subflows have been created.

> 			return MAPPING_INVALID;
> +		}
>
> 		goto validate_seq;
> 	}
> @@ -552,9 +559,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
>
> 	data_len = mpext->data_len;
> 	if (data_len == 0) {
> -		pr_err("Infinite mapping not handled");
> 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
> -		return MAPPING_INVALID;
> +		return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE;

Like above, I think this requires different logic to determine that there 
are no other subflows and that DATA_ACK has not been sent -- if that's the 
narrow case the code is trying to detect.

The RFC seems to allow infinite mappings on any single subflow connection. 
If a second subflow was established and then the initial subflow (local_id 
== 0) was closed, then there could be an infinite mapping on a subflow 
with a different local_id. (If I'm misreading the spec, please let me 
know!). This may be outside the scope of this patch anyway.

> 	}
>
> 	if (mpext->data_fin == 1) {
> @@ -663,6 +669,10 @@ static bool subflow_check_data_avail(struct sock *ssk)
> 			ssk->sk_err = EBADMSG;
> 			goto fatal;
> 		}
> +		if (status == MAPPING_INFINITE) {
> +			tcp_sk(ssk)->is_mptcp = 0;
> +			return false;
> +		}
>
> 		if (status != MAPPING_OK)
> 			return false;
> -- 
> 2.25.2

--
Mat Martineau
Intel
Paolo Abeni April 22, 2020, 11:45 a.m. UTC | #2
On Tue, 2020-04-21 at 14:57 -0700, Mat Martineau wrote:
> On Tue, 21 Apr 2020, Davide Caratti wrote:
> 
> > From: Davide Caratti <dcaratti@nst.lab.eng.brq.redhat.com>
> 
> I'm guessing this isn't the address you want in the git "Author" field!
> 
> > a socket can receive data without a DSS option after MP_CAPABLE exchange
> > in the three way handshake. In this case, it must fall-back to normal
> > TCP in accordance to RFC8684 §3.7. In addition, fall-back to TCP must
> > occur if the peer sends data with a DSS option with zero DLL (so-called
> > "infinite mapping"). In case a server needs to fallback and msk->subflow
> > is NULL, msk->first can be used for send/receive operations.
> > 
> > Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> > ---
> > net/mptcp/protocol.c | 20 +++++++++++++++++++-
> > net/mptcp/subflow.c  | 18 ++++++++++++++----
> > 2 files changed, 33 insertions(+), 5 deletions(-)
> > 
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 457655ea875b..045aacb68e13 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -736,6 +736,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> > 		pr_debug("fallback passthrough");
> > 		ret = sock_sendmsg(ssock, msg);
> > 		return ret >= 0 ? ret + copied : (copied ? copied : ret);
> > +	} else if (__mptcp_needs_tcp_fallback(msk)) {
> > +		ssk = msk->first;
> > +		release_sock((struct sock *)msk);
> > +		pr_debug("msk->first passthrough");
> > +		ret = ssk->sk_prot->sendmsg(ssk, msg, len);
> > +		return ret >= 0 ? ret + copied : (copied ? copied : ret);
> > 	}
> > 
> > 	mptcp_clean_una(sk);
> > @@ -899,6 +905,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> > 			 mptcp_subflow_ctx(ssock->sk));
> > 		copied = sock_recvmsg(ssock, msg, flags);
> > 		return copied;
> > +	} else if (__mptcp_needs_tcp_fallback(msk)) {
> > +use_mskfirst:
> > +		pr_debug("fallback-read using msk->first");
> > +		sk = msk->first;
> > +		release_sock((struct sock *)msk);
> 
> sk is one of the function arguments referring to the MPTCP-level socket, 
> so a "struct sock *ssk" to use in this block of code would be clearer and 
> would remove the cast in the release_sock().
> 
> I recognize that in this fallback scenario there is no struct sock 
> available, so sock_recvmsg() can't be used, but is there any way to 
> handle sendmsg/recvmsg the same after either type of fallback?

I *think* the above 2 conditionals can be replaced with only the second
one, e.g. always use 'ssk->sk_prot->{sendmsg,recvmsg}(ssk, msg, len);' 

> > +				return MAPPING_INFINITE;
> > +			}
> 
> This will treat any skb without a DSS option on the initial subflow as an 
> implicit infinite mapping, if there is not a current valid mapping. It's 
> possible to encounter this after valid mappings were found and DATA_ACKed.
> 
> Section 3.7 is specific about what to do when an ACK (not a data packet) 
> without DSS is received on the initial subflow before any other subflows 
> are added: this is treated as an implicit infinite mapping, and the next 
> data packet that is sent should include an infinite mapping to signal the 
> fallback to the peer.
> 
> While section 3.7 says a sender "MUST" include a DSS option in every 
> segment until a DATA_ACK is received, I don't see a description of what 
> happens if the DSS option is missing when the receiver processes it - is 
> it supposed to also be treated as a fallback? If so, the code above must 
> only return MAPPING_INFINITE if DATA_ACK has never been sent and no other 
                                                          ^^^^
received ?!?
                                                          
> subflows have been created.

I *think* we can add an additional per subflow flag to track the 'first
data_ack received' event, and check READ_ONCE(msk->pm.subflows) == 0 to
verify no additional subflows exits.

> > 			return MAPPING_INVALID;
> > +		}
> > 
> > 		goto validate_seq;
> > 	}
> > @@ -552,9 +559,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
> > 
> > 	data_len = mpext->data_len;
> > 	if (data_len == 0) {
> > -		pr_err("Infinite mapping not handled");
> > 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
> > -		return MAPPING_INVALID;
> > +		return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE;
> 
> Like above, I think this requires different logic to determine that there 
> are no other subflows and that DATA_ACK has not been sent -- if that's the 
> narrow case the code is trying to detect.
> 
> The RFC seems to allow infinite mappings on any single subflow connection. 
> If a second subflow was established and then the initial subflow (local_id 
> == 0) was closed, then there could be an infinite mapping on a subflow 
> with a different local_id. (If I'm misreading the spec, please let me 
> know!). This may be outside the scope of this patch anyway.

from 
https://datatracker.ietf.org/doc/rfc8684/?include_text=1, section 3.7.:

"""
      In the case of such an ACK being received on the first subflow
      (i.e., that started with MP_CAPABLE), before any additional
      subflows are added, the implementation MUST drop out of MPTCP mode
      and fall back to regular TCP.
"""

and:

"""
      In the case of this occurring
      on an additional subflow (i.e., one started with MP_JOIN), the
      host MUST close the subflow with a RST, which SHOULD contain an
      MP_TCPRST option (Section 3.6) with a "Middlebox interference"
      reason code.
"""

I think we should cope only with local_id == 0

thanks!

/P
Paolo Abeni April 22, 2020, 1:53 p.m. UTC | #3
[off-list, perche' vado di idee pazze per platea ristretta ;)]

On Tue, 2020-04-21 at 19:42 +0200, Davide Caratti wrote:
> @@ -663,6 +669,10 @@ static bool subflow_check_data_avail(struct sock *ssk)
>  			ssk->sk_err = EBADMSG;
>  			goto fatal;
>  		}
> +		if (status == MAPPING_INFINITE) {
> +			tcp_sk(ssk)->is_mptcp = 0;
> +			return false;
> +		}

Mi sovviene che bisogna anche impedire al PM di creare subflow
addizionali.

Idea pazza... ma se aggiungiano un flag a msk, 'fallback', che settiamo
qui ed in generale devo facciamo fallback (ma non subflow_syn_recv_sock
() perche' li' finiamo per restituire un socket tcp) e:

in 
mptcp_established_options() ->
	do nothing if (unlikely(msk->fallback))

in subflow_check_data_avail() -> 
	if (unlikely(msk->fallback)) {
		setta mappa valida per skb corrente
		return ok

possiamo rimuovere i casi speciali in recvmsg(),sendmsg(),poll()! -
spero :)

WDYT?!?

A prescindere c'e' un punto da gestire. In caso di fallback sul 4 ack,
adesso becchiamo:
	WARN_ON_ONCE(subflow->can_ack);
in check_fully_established() che probabilmente andrebbe tolto.

Lo raggiungiamo quando riceviamo come primo ack ai dati un paccketto
senza DATA_ACK - quindi forse possiamo usarlo per identificare il
fallback a TCP.

/P
Davide Caratti April 22, 2020, 1:56 p.m. UTC | #4
On Wed, 2020-04-22 at 15:53 +0200, Paolo Abeni wrote:
>  prescindere c'e' un punto da gestire. In caso di fallback sul 4 ack,
> adesso becchiamo:
>         WARN_ON_ONCE(subflow->can_ack);
> in check_fully_established() che probabilmente andrebbe tolto.

fammelo provare, a me non succede
Paolo Abeni April 22, 2020, 1:56 p.m. UTC | #5
On Wed, 2020-04-22 at 15:53 +0200, Paolo Abeni wrote:
> [off-list, perche' vado di idee pazze per platea ristretta ;)]

Oks, as you can guess the previous message was not supposed to land on
the ML ;)

Luckly gooogle translator should be able to prove it does not contain
offense to anyone ;)

Sorry for the noise!

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 457655ea875b..045aacb68e13 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -736,6 +736,12 @@  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		pr_debug("fallback passthrough");
 		ret = sock_sendmsg(ssock, msg);
 		return ret >= 0 ? ret + copied : (copied ? copied : ret);
+	} else if (__mptcp_needs_tcp_fallback(msk)) {
+		ssk = msk->first;
+		release_sock((struct sock *)msk);
+		pr_debug("msk->first passthrough");
+		ret = ssk->sk_prot->sendmsg(ssk, msg, len);
+		return ret >= 0 ? ret + copied : (copied ? copied : ret);
 	}
 
 	mptcp_clean_una(sk);
@@ -899,6 +905,14 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			 mptcp_subflow_ctx(ssock->sk));
 		copied = sock_recvmsg(ssock, msg, flags);
 		return copied;
+	} else if (__mptcp_needs_tcp_fallback(msk)) {
+use_mskfirst:
+		pr_debug("fallback-read using msk->first");
+		sk = msk->first;
+		release_sock((struct sock *)msk);
+		copied = sk->sk_prot->recvmsg(sk, msg, len, nonblock, flags,
+					      addr_len);
+		return copied;
 	}
 
 	timeo = sock_rcvtimeo(sk, nonblock);
@@ -963,8 +977,12 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		pr_debug("block timeout %ld", timeo);
 		mptcp_wait_data(sk, &timeo);
-		if (unlikely(__mptcp_tcp_fallback(msk)))
+		if (unlikely(__mptcp_tcp_fallback(msk))) {
+			ssock = __mptcp_tcp_fallback(msk);
+			if (!ssock)
+				goto use_mskfirst;
 			goto fallback;
+		}
 	}
 
 	if (skb_queue_empty(&sk->sk_receive_queue)) {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 47f901b712f9..460288c1c8ff 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -460,7 +460,8 @@  enum mapping_status {
 	MAPPING_OK,
 	MAPPING_INVALID,
 	MAPPING_EMPTY,
-	MAPPING_DATA_FIN
+	MAPPING_DATA_FIN,
+	MAPPING_INFINITE
 };
 
 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
@@ -540,8 +541,14 @@  static enum mapping_status get_mapping_status(struct sock *ssk)
 			return MAPPING_EMPTY;
 		}
 
-		if (!subflow->map_valid)
+		if (!subflow->map_valid) {
+			if (!mpext && !subflow->local_id) {
+				pr_debug("no mpext and 0 local id");
+				skb_ext_del(skb, SKB_EXT_MPTCP);
+				return MAPPING_INFINITE;
+			}
 			return MAPPING_INVALID;
+		}
 
 		goto validate_seq;
 	}
@@ -552,9 +559,8 @@  static enum mapping_status get_mapping_status(struct sock *ssk)
 
 	data_len = mpext->data_len;
 	if (data_len == 0) {
-		pr_err("Infinite mapping not handled");
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
-		return MAPPING_INVALID;
+		return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE;
 	}
 
 	if (mpext->data_fin == 1) {
@@ -663,6 +669,10 @@  static bool subflow_check_data_avail(struct sock *ssk)
 			ssk->sk_err = EBADMSG;
 			goto fatal;
 		}
+		if (status == MAPPING_INFINITE) {
+			tcp_sk(ssk)->is_mptcp = 0;
+			return false;
+		}
 
 		if (status != MAPPING_OK)
 			return false;