| Message ID | 8e5fcbd2e3c6fbf90f6d836fa82de2ceed3298e2.1587490709.git.dcaratti@redhat.com |
|---|---|
| State | Superseded, archived |
| Delegated to: | Mat Martineau |
| Headers | show |
| Series | [RFC] net: mptcp: improve fallback after successful 3-way-handshake | expand |
On Tue, 21 Apr 2020, Davide Caratti wrote: > From: Davide Caratti <dcaratti@nst.lab.eng.brq.redhat.com> I'm guessing this isn't the address you want in the git "Author" field! > > a socket can receive data without a DSS option after MP_CAPABLE exchange > in the three way handshake. In this case, it must fall-back to normal > TCP in accordance to RFC8684 §3.7. In addition, fall-back to TCP must > occur if the peer sends data with a DSS option with zero DLL (so-called > "infinite mapping"). In case a server needs to fallback and msk->subflow > is NULL, msk->first can be used for send/receive operations. > > Signed-off-by: Davide Caratti <dcaratti@redhat.com> > --- > net/mptcp/protocol.c | 20 +++++++++++++++++++- > net/mptcp/subflow.c | 18 ++++++++++++++---- > 2 files changed, 33 insertions(+), 5 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 457655ea875b..045aacb68e13 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -736,6 +736,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > pr_debug("fallback passthrough"); > ret = sock_sendmsg(ssock, msg); > return ret >= 0 ? ret + copied : (copied ? copied : ret); > + } else if (__mptcp_needs_tcp_fallback(msk)) { > + ssk = msk->first; > + release_sock((struct sock *)msk); > + pr_debug("msk->first passthrough"); > + ret = ssk->sk_prot->sendmsg(ssk, msg, len); > + return ret >= 0 ? ret + copied : (copied ? copied : ret); > } > > mptcp_clean_una(sk); > @@ -899,6 +905,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, > mptcp_subflow_ctx(ssock->sk)); > copied = sock_recvmsg(ssock, msg, flags); > return copied; > + } else if (__mptcp_needs_tcp_fallback(msk)) { > +use_mskfirst: > + pr_debug("fallback-read using msk->first"); > + sk = msk->first; > + release_sock((struct sock *)msk); sk is one of the function arguments referring to the MPTCP-level socket, so a "struct sock *ssk" to use in this block of code would be clearer and would remove the cast in the release_sock(). I recognize that in this fallback scenario there is no struct sock available, so sock_recvmsg() can't be used, but is there any way to handle sendmsg/recvmsg the same after either type of fallback? > + copied = sk->sk_prot->recvmsg(sk, msg, len, nonblock, flags, > + addr_len); > + return copied; > } > > timeo = sock_rcvtimeo(sk, nonblock); > @@ -963,8 +977,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, > > pr_debug("block timeout %ld", timeo); > mptcp_wait_data(sk, &timeo); > - if (unlikely(__mptcp_tcp_fallback(msk))) > + if (unlikely(__mptcp_tcp_fallback(msk))) { > + ssock = __mptcp_tcp_fallback(msk); > + if (!ssock) > + goto use_mskfirst; This goto doesn't seem reachable - __mptcp_tcp_fallback(msk) should have the same return value both times. > goto fallback; > + } > } > > if (skb_queue_empty(&sk->sk_receive_queue)) { > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > index 47f901b712f9..460288c1c8ff 100644 > --- a/net/mptcp/subflow.c > +++ b/net/mptcp/subflow.c > @@ -460,7 +460,8 @@ enum mapping_status { > MAPPING_OK, > MAPPING_INVALID, > MAPPING_EMPTY, > - MAPPING_DATA_FIN > + MAPPING_DATA_FIN, > + MAPPING_INFINITE > }; > > static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) > @@ -540,8 +541,14 @@ static enum mapping_status get_mapping_status(struct sock *ssk) > return MAPPING_EMPTY; > } > > - if (!subflow->map_valid) > + if (!subflow->map_valid) { > + if (!mpext && !subflow->local_id) { > + pr_debug("no mpext and 0 local id"); > + skb_ext_del(skb, SKB_EXT_MPTCP); skb_ext_del() wouldn't be required, mptcp_get_ext() already returned NULL indicating that mpext isn't there. > + return MAPPING_INFINITE; > + } This will treat any skb without a DSS option on the initial subflow as an implicit infinite mapping, if there is not a current valid mapping. It's possible to encounter this after valid mappings were found and DATA_ACKed. Section 3.7 is specific about what to do when an ACK (not a data packet) without DSS is received on the initial subflow before any other subflows are added: this is treated as an implicit infinite mapping, and the next data packet that is sent should include an infinite mapping to signal the fallback to the peer. While section 3.7 says a sender "MUST" include a DSS option in every segment until a DATA_ACK is received, I don't see a description of what happens if the DSS option is missing when the receiver processes it - is it supposed to also be treated as a fallback? If so, the code above must only return MAPPING_INFINITE if DATA_ACK has never been sent and no other subflows have been created. > return MAPPING_INVALID; > + } > > goto validate_seq; > } > @@ -552,9 +559,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk) > > data_len = mpext->data_len; > if (data_len == 0) { > - pr_err("Infinite mapping not handled"); > MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); > - return MAPPING_INVALID; > + return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE; Like above, I think this requires different logic to determine that there are no other subflows and that DATA_ACK has not been sent -- if that's the narrow case the code is trying to detect. The RFC seems to allow infinite mappings on any single subflow connection. If a second subflow was established and then the initial subflow (local_id == 0) was closed, then there could be an infinite mapping on a subflow with a different local_id. (If I'm misreading the spec, please let me know!). This may be outside the scope of this patch anyway. > } > > if (mpext->data_fin == 1) { > @@ -663,6 +669,10 @@ static bool subflow_check_data_avail(struct sock *ssk) > ssk->sk_err = EBADMSG; > goto fatal; > } > + if (status == MAPPING_INFINITE) { > + tcp_sk(ssk)->is_mptcp = 0; > + return false; > + } > > if (status != MAPPING_OK) > return false; > -- > 2.25.2 -- Mat Martineau Intel
On Tue, 2020-04-21 at 14:57 -0700, Mat Martineau wrote: > On Tue, 21 Apr 2020, Davide Caratti wrote: > > > From: Davide Caratti <dcaratti@nst.lab.eng.brq.redhat.com> > > I'm guessing this isn't the address you want in the git "Author" field! > > > a socket can receive data without a DSS option after MP_CAPABLE exchange > > in the three way handshake. In this case, it must fall-back to normal > > TCP in accordance to RFC8684 §3.7. In addition, fall-back to TCP must > > occur if the peer sends data with a DSS option with zero DLL (so-called > > "infinite mapping"). In case a server needs to fallback and msk->subflow > > is NULL, msk->first can be used for send/receive operations. > > > > Signed-off-by: Davide Caratti <dcaratti@redhat.com> > > --- > > net/mptcp/protocol.c | 20 +++++++++++++++++++- > > net/mptcp/subflow.c | 18 ++++++++++++++---- > > 2 files changed, 33 insertions(+), 5 deletions(-) > > > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > > index 457655ea875b..045aacb68e13 100644 > > --- a/net/mptcp/protocol.c > > +++ b/net/mptcp/protocol.c > > @@ -736,6 +736,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > pr_debug("fallback passthrough"); > > ret = sock_sendmsg(ssock, msg); > > return ret >= 0 ? ret + copied : (copied ? copied : ret); > > + } else if (__mptcp_needs_tcp_fallback(msk)) { > > + ssk = msk->first; > > + release_sock((struct sock *)msk); > > + pr_debug("msk->first passthrough"); > > + ret = ssk->sk_prot->sendmsg(ssk, msg, len); > > + return ret >= 0 ? ret + copied : (copied ? copied : ret); > > } > > > > mptcp_clean_una(sk); > > @@ -899,6 +905,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, > > mptcp_subflow_ctx(ssock->sk)); > > copied = sock_recvmsg(ssock, msg, flags); > > return copied; > > + } else if (__mptcp_needs_tcp_fallback(msk)) { > > +use_mskfirst: > > + pr_debug("fallback-read using msk->first"); > > + sk = msk->first; > > + release_sock((struct sock *)msk); > > sk is one of the function arguments referring to the MPTCP-level socket, > so a "struct sock *ssk" to use in this block of code would be clearer and > would remove the cast in the release_sock(). > > I recognize that in this fallback scenario there is no struct sock > available, so sock_recvmsg() can't be used, but is there any way to > handle sendmsg/recvmsg the same after either type of fallback? I *think* the above 2 conditionals can be replaced with only the second one, e.g. always use 'ssk->sk_prot->{sendmsg,recvmsg}(ssk, msg, len);' > > + return MAPPING_INFINITE; > > + } > > This will treat any skb without a DSS option on the initial subflow as an > implicit infinite mapping, if there is not a current valid mapping. It's > possible to encounter this after valid mappings were found and DATA_ACKed. > > Section 3.7 is specific about what to do when an ACK (not a data packet) > without DSS is received on the initial subflow before any other subflows > are added: this is treated as an implicit infinite mapping, and the next > data packet that is sent should include an infinite mapping to signal the > fallback to the peer. > > While section 3.7 says a sender "MUST" include a DSS option in every > segment until a DATA_ACK is received, I don't see a description of what > happens if the DSS option is missing when the receiver processes it - is > it supposed to also be treated as a fallback? If so, the code above must > only return MAPPING_INFINITE if DATA_ACK has never been sent and no other ^^^^ received ?!? > subflows have been created. I *think* we can add an additional per subflow flag to track the 'first data_ack received' event, and check READ_ONCE(msk->pm.subflows) == 0 to verify no additional subflows exits. > > return MAPPING_INVALID; > > + } > > > > goto validate_seq; > > } > > @@ -552,9 +559,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk) > > > > data_len = mpext->data_len; > > if (data_len == 0) { > > - pr_err("Infinite mapping not handled"); > > MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); > > - return MAPPING_INVALID; > > + return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE; > > Like above, I think this requires different logic to determine that there > are no other subflows and that DATA_ACK has not been sent -- if that's the > narrow case the code is trying to detect. > > The RFC seems to allow infinite mappings on any single subflow connection. > If a second subflow was established and then the initial subflow (local_id > == 0) was closed, then there could be an infinite mapping on a subflow > with a different local_id. (If I'm misreading the spec, please let me > know!). This may be outside the scope of this patch anyway. from https://datatracker.ietf.org/doc/rfc8684/?include_text=1, section 3.7.: """ In the case of such an ACK being received on the first subflow (i.e., that started with MP_CAPABLE), before any additional subflows are added, the implementation MUST drop out of MPTCP mode and fall back to regular TCP. """ and: """ In the case of this occurring on an additional subflow (i.e., one started with MP_JOIN), the host MUST close the subflow with a RST, which SHOULD contain an MP_TCPRST option (Section 3.6) with a "Middlebox interference" reason code. """ I think we should cope only with local_id == 0 thanks! /P
[off-list, perche' vado di idee pazze per platea ristretta ;)] On Tue, 2020-04-21 at 19:42 +0200, Davide Caratti wrote: > @@ -663,6 +669,10 @@ static bool subflow_check_data_avail(struct sock *ssk) > ssk->sk_err = EBADMSG; > goto fatal; > } > + if (status == MAPPING_INFINITE) { > + tcp_sk(ssk)->is_mptcp = 0; > + return false; > + } Mi sovviene che bisogna anche impedire al PM di creare subflow addizionali. Idea pazza... ma se aggiungiano un flag a msk, 'fallback', che settiamo qui ed in generale devo facciamo fallback (ma non subflow_syn_recv_sock () perche' li' finiamo per restituire un socket tcp) e: in mptcp_established_options() -> do nothing if (unlikely(msk->fallback)) in subflow_check_data_avail() -> if (unlikely(msk->fallback)) { setta mappa valida per skb corrente return ok possiamo rimuovere i casi speciali in recvmsg(),sendmsg(),poll()! - spero :) WDYT?!? A prescindere c'e' un punto da gestire. In caso di fallback sul 4 ack, adesso becchiamo: WARN_ON_ONCE(subflow->can_ack); in check_fully_established() che probabilmente andrebbe tolto. Lo raggiungiamo quando riceviamo come primo ack ai dati un paccketto senza DATA_ACK - quindi forse possiamo usarlo per identificare il fallback a TCP. /P
On Wed, 2020-04-22 at 15:53 +0200, Paolo Abeni wrote: > prescindere c'e' un punto da gestire. In caso di fallback sul 4 ack, > adesso becchiamo: > WARN_ON_ONCE(subflow->can_ack); > in check_fully_established() che probabilmente andrebbe tolto. fammelo provare, a me non succede
On Wed, 2020-04-22 at 15:53 +0200, Paolo Abeni wrote:
> [off-list, perche' vado di idee pazze per platea ristretta ;)]
Oks, as you can guess the previous message was not supposed to land on
the ML ;)
Luckly gooogle translator should be able to prove it does not contain
offense to anyone ;)
Sorry for the noise!
Paolo
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 457655ea875b..045aacb68e13 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -736,6 +736,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) pr_debug("fallback passthrough"); ret = sock_sendmsg(ssock, msg); return ret >= 0 ? ret + copied : (copied ? copied : ret); + } else if (__mptcp_needs_tcp_fallback(msk)) { + ssk = msk->first; + release_sock((struct sock *)msk); + pr_debug("msk->first passthrough"); + ret = ssk->sk_prot->sendmsg(ssk, msg, len); + return ret >= 0 ? ret + copied : (copied ? copied : ret); } mptcp_clean_una(sk); @@ -899,6 +905,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, mptcp_subflow_ctx(ssock->sk)); copied = sock_recvmsg(ssock, msg, flags); return copied; + } else if (__mptcp_needs_tcp_fallback(msk)) { +use_mskfirst: + pr_debug("fallback-read using msk->first"); + sk = msk->first; + release_sock((struct sock *)msk); + copied = sk->sk_prot->recvmsg(sk, msg, len, nonblock, flags, + addr_len); + return copied; } timeo = sock_rcvtimeo(sk, nonblock); @@ -963,8 +977,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, pr_debug("block timeout %ld", timeo); mptcp_wait_data(sk, &timeo); - if (unlikely(__mptcp_tcp_fallback(msk))) + if (unlikely(__mptcp_tcp_fallback(msk))) { + ssock = __mptcp_tcp_fallback(msk); + if (!ssock) + goto use_mskfirst; goto fallback; + } } if (skb_queue_empty(&sk->sk_receive_queue)) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 47f901b712f9..460288c1c8ff 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -460,7 +460,8 @@ enum mapping_status { MAPPING_OK, MAPPING_INVALID, MAPPING_EMPTY, - MAPPING_DATA_FIN + MAPPING_DATA_FIN, + MAPPING_INFINITE }; static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) @@ -540,8 +541,14 @@ static enum mapping_status get_mapping_status(struct sock *ssk) return MAPPING_EMPTY; } - if (!subflow->map_valid) + if (!subflow->map_valid) { + if (!mpext && !subflow->local_id) { + pr_debug("no mpext and 0 local id"); + skb_ext_del(skb, SKB_EXT_MPTCP); + return MAPPING_INFINITE; + } return MAPPING_INVALID; + } goto validate_seq; } @@ -552,9 +559,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk) data_len = mpext->data_len; if (data_len == 0) { - pr_err("Infinite mapping not handled"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); - return MAPPING_INVALID; + return subflow->local_id ? MAPPING_INVALID : MAPPING_INFINITE; } if (mpext->data_fin == 1) { @@ -663,6 +669,10 @@ static bool subflow_check_data_avail(struct sock *ssk) ssk->sk_err = EBADMSG; goto fatal; } + if (status == MAPPING_INFINITE) { + tcp_sk(ssk)->is_mptcp = 0; + return false; + } if (status != MAPPING_OK) return false;