diff mbox series

[1/2] mptcp: parse and emit MP_CAPABLE option according to v1 spec.

Message ID 20191107152827.54549-2-cpaasch@apple.com
State Superseded, archived
Headers show
Series [1/2] mptcp: parse and emit MP_CAPABLE option according to v1 spec. | expand

Commit Message

Christoph Paasch Nov. 7, 2019, 3:28 p.m. UTC
Local key is sent on syn/ack, and both keys are sent on 3rd ack.
MP_CAPABLE messages len are updated accordingly.
We need the skbuff to emit correctly the above, so we push the skbuff
struct as an argument all the way from tcp code to the relevant mptcp
callbacks.

When processing incoming MP_CAPABLE + data, build a full blown DSS-like
map info, to simplify later processing.
On child socket creation, we need to record the remote key, if available.

Signed-off-by: Christoph Paasch <cpaasch@apple.com>
---
 include/linux/tcp.h   |   3 +-
 include/net/mptcp.h   |  11 +--
 net/ipv4/tcp_input.c  |   2 +-
 net/ipv4/tcp_output.c |   2 +-
 net/mptcp/options.c   | 186 +++++++++++++++++++++++++++++++-----------
 net/mptcp/protocol.c  |   2 +-
 net/mptcp/protocol.h  |   6 +-
 net/mptcp/subflow.c   |  12 ++-
 8 files changed, 163 insertions(+), 61 deletions(-)

Comments

Paolo Abeni Nov. 7, 2019, 3:54 p.m. UTC | #1
Hi,

On Thu, 2019-11-07 at 07:28 -0800, Christoph Paasch wrote:
> @@ -356,28 +387,66 @@ void mptcp_rcv_synsent(struct sock *sk)
>  	}
>  }
>  
> -static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
> +static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
> +					 unsigned int *size,
>  					 unsigned int remaining,
>  					 struct mptcp_out_options *opts)
>  {
>  	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +	struct mptcp_ext *mpext;
> +	unsigned int data_len;
> +
> +	pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow,
> +		 subflow->fourth_ack, subflow->snd_isn,
> +		 skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
> +
> +	if (subflow->mp_capable && !subflow->fourth_ack && skb &&
> +	    subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
> +		/* When skb is not available, we better over-estimate the
> +		 * emitted options len. A full DSS option is longer than
> +		 * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
> +		 * that.
> +		 */
>  
> -	if (subflow->mp_capable && !subflow->fourth_ack &&
> -	    remaining >= TCPOLEN_MPTCP_MPC_ACK) {
> -		opts->suboptions = OPTION_MPTCP_MPC_ACK;
> -		opts->sndr_key = subflow->local_key;
> -		opts->rcvr_key = subflow->remote_key;
> -		*size = TCPOLEN_MPTCP_MPC_ACK;
> -		subflow->fourth_ack = 1;
> -		pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
> -			 subflow, subflow->local_key, subflow->remote_key);
> -		return true;
> +		mpext = mptcp_get_ext(skb);
> +		data_len = mpext ? mpext->data_len : 0;
> +		/* Section 3.1.
> +		 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
> +		 * packets that start the first subflow of an MPTCP connection,
> +		 * as well as the first packet that carries data
> +		 */
> +		if (data_len > 0 && remaining >= TCPOLEN_MPTCP_MPC_ACK_DATA) {

It's not clear to me which is the expected behavior when we can't fit
MPTCP options inside the TCP header. As the maximum MP_CAPABLE/MP_JOIN
opt len is 24 bytes, and we have at most 16 bytes in the previous opt
(tstamp, md5) it looks like this can't ever happen, so could emit a
WARN_ON_ONCE() here?

BTW we currently emit a warn if DSS exceeds the TCP option space, and
that can actually happen (max DSS without csum is 26 bytes, with csum
28). Perhaps we could switch to 32 bits seq if we hit such condition?

Cheers,

Paolo
Matthieu Baerts Nov. 20, 2019, 2:56 p.m. UTC | #2
Hi Christoph,

On 07/11/2019 16:28, Christoph Paasch wrote:
> Local key is sent on syn/ack, and both keys are sent on 3rd ack.
> MP_CAPABLE messages len are updated accordingly.
> We need the skbuff to emit correctly the above, so we push the skbuff
> struct as an argument all the way from tcp code to the relevant mptcp
> callbacks.
> 
> When processing incoming MP_CAPABLE + data, build a full blown DSS-like
> map info, to simplify later processing.
> On child socket creation, we need to record the remote key, if available.
> 
> Signed-off-by: Christoph Paasch <cpaasch@apple.com>

Thank you for sharing this! It looks very nice!

I just have a small question, please see below:

[...]

> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 1b08e1193991..9cdbd617c49a 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c

[...]

> @@ -24,12 +24,24 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
>   	 * 10-17: Receiver key (optional)
>   	 */
>   	case MPTCPOPT_MP_CAPABLE:
> -		if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
> -		    opsize != TCPOLEN_MPTCP_MPC_SYNACK)
> +		/* strict size checking */
> +		if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
> +			if (skb->len > tcp_hdr(skb)->doff << 2)
> +				expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
> +			else
> +				expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
> +		} else {
> +			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
> +				expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
> +			else
> +				expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
> +		}
> +		if (opsize != expected_opsize)
>   			break;
>   
> +		/* only support V1 of the protocol */
>   		mp_opt->version = *ptr++ & MPTCP_VERSION_MASK;
> -		if (mp_opt->version != 0)
> +		if (mp_opt->version != 1)

Should we not only "break" here is the version is < 1? To be compatible 
with possible newer versions.

It seems confirmed by the RFC, section 3.1:

    If a host supports multiple versions of MPTCP,
    the sender of the MP_CAPABLE option SHOULD signal the highest version
    number it supports.  In return, in its MP_CAPABLE option, the
    receiver will signal the version number it wishes to use, which MUST
    be equal to or lower than the version number indicated in the initial
    MP_CAPABLE.

detail: maybe we need a #define for this minimal version that we support?

Cheers,
Matt
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 41730d9bcb32..8c8d51d65adf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -120,7 +120,8 @@  struct tcp_options_received {
 			data_fin:1,
 			use_ack:1,
 			ack64:1,
-			__unused:3;
+			mp_capable_map:1,
+			__unused:2;
 		u8	add_addr : 1,
 			rm_addr : 1,
 			family : 4;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index eba39a881767..babff18e4ee8 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -25,7 +25,8 @@  struct mptcp_ext {
 			data_fin:1,
 			use_ack:1,
 			ack64:1,
-			__unused:2;
+			mp_capable_map:1,
+			__unused:1;
 };
 
 struct mptcp_out_options {
@@ -64,10 +65,10 @@  static inline bool rsk_is_mptcp(const struct request_sock *req)
 	return tcp_rsk(req)->is_mptcp;
 }
 
-void mptcp_parse_option(const unsigned char *ptr, int opsize,
-			struct tcp_options_received *opt_rx);
-bool mptcp_syn_options(struct sock *sk, unsigned int *size,
-		       struct mptcp_out_options *opts);
+void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
+			int opsize, struct tcp_options_received *opt_rx);
+bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+		       unsigned int *size, struct mptcp_out_options *opts);
 void mptcp_rcv_synsent(struct sock *sk);
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 			  struct mptcp_out_options *opts);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d1ab9edfb764..16bd46a097d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3922,7 +3922,7 @@  void tcp_parse_options(const struct net *net,
 				break;
 #endif
 			case TCPOPT_MPTCP:
-				mptcp_parse_option(ptr, opsize, opt_rx);
+				mptcp_parse_option(skb, ptr, opsize, opt_rx);
 				break;
 
 			case TCPOPT_FASTOPEN:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f864749a1ec1..a9a0e61285b1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -682,7 +682,7 @@  static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	if (sk_is_mptcp(sk)) {
 		unsigned int size;
 
-		if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
+		if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
 			opts->options |= OPTION_MPTCP;
 			remaining -= size;
 		}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 1b08e1193991..9cdbd617c49a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -9,8 +9,8 @@ 
 #include <net/mptcp.h>
 #include "protocol.h"
 
-void mptcp_parse_option(const unsigned char *ptr, int opsize,
-			struct tcp_options_received *opt_rx)
+void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
+			int opsize, struct tcp_options_received *opt_rx)
 {
 	struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
 	u8 subtype = *ptr >> 4;
@@ -24,12 +24,24 @@  void mptcp_parse_option(const unsigned char *ptr, int opsize,
 	 * 10-17: Receiver key (optional)
 	 */
 	case MPTCPOPT_MP_CAPABLE:
-		if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
-		    opsize != TCPOLEN_MPTCP_MPC_SYNACK)
+		/* strict size checking */
+		if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+			if (skb->len > tcp_hdr(skb)->doff << 2)
+				expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
+			else
+				expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
+		} else {
+			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
+				expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
+			else
+				expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
+		}
+		if (opsize != expected_opsize)
 			break;
 
+		/* only support V1 of the protocol */
 		mp_opt->version = *ptr++ & MPTCP_VERSION_MASK;
-		if (mp_opt->version != 0)
+		if (mp_opt->version != 1)
 			break;
 
 		mp_opt->flags = *ptr++;
@@ -54,19 +66,29 @@  void mptcp_parse_option(const unsigned char *ptr, int opsize,
 			break;
 
 		mp_opt->mp_capable = 1;
-		mp_opt->sndr_key = get_unaligned_be64(ptr);
-		ptr += 8;
-
-		if (opsize == TCPOLEN_MPTCP_MPC_SYNACK) {
+		if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
+			mp_opt->sndr_key = get_unaligned_be64(ptr);
+			ptr += 8;
+		}
+		if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
 			mp_opt->rcvr_key = get_unaligned_be64(ptr);
 			ptr += 8;
-			pr_debug("MP_CAPABLE flags=%x, sndr=%llu, rcvr=%llu",
-				 mp_opt->flags, mp_opt->sndr_key,
-				 mp_opt->rcvr_key);
-		} else {
-			pr_debug("MP_CAPABLE flags=%x, sndr=%llu",
-				 mp_opt->flags, mp_opt->sndr_key);
 		}
+		if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+			/* Section 3.1.:
+			 * "the data parameters in a MP_CAPABLE are semantically
+			 * equivalent to those in a DSS option and can be used
+			 * interchangeably."
+			 */
+			mp_opt->dss = 1;
+			mp_opt->use_map = 1;
+			mp_opt->mp_capable_map = 1;
+			mp_opt->data_len = get_unaligned_be16(ptr);
+			ptr += 2;
+		}
+		pr_debug("MP_CAPABLE flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+			 mp_opt->flags, opsize, mp_opt->sndr_key,
+			 mp_opt->rcvr_key, mp_opt->data_len);
 		break;
 
 	/* MPTCPOPT_MP_JOIN
@@ -132,6 +154,11 @@  void mptcp_parse_option(const unsigned char *ptr, int opsize,
 		pr_debug("DSS");
 		ptr++;
 
+		/* we must clear 'mp_capable_map' be able to detect MP_CAPABLE
+		 * map vs DSS map in mptcp_incoming_options(), and reconstruct
+		 * map info accordingly
+		 */
+		mp_opt->mp_capable_map = 0;
 		mp_opt->dss_flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
 		mp_opt->data_fin = (mp_opt->dss_flags & MPTCP_DSS_DATA_FIN) != 0;
 		mp_opt->dsn64 = (mp_opt->dss_flags & MPTCP_DSS_DSN64) != 0;
@@ -305,18 +332,22 @@  void mptcp_get_options(const struct sk_buff *skb,
 			if (opsize > length)
 				return;	/* don't parse partial options */
 			if (opcode == TCPOPT_MPTCP)
-				mptcp_parse_option(ptr, opsize, opt_rx);
+				mptcp_parse_option(skb, ptr, opsize, opt_rx);
 			ptr += opsize - 2;
 			length -= opsize;
 		}
 	}
 }
 
-bool mptcp_syn_options(struct sock *sk, unsigned int *size,
-		       struct mptcp_out_options *opts)
+bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+		       unsigned int *size, struct mptcp_out_options *opts)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 
+	/* we will use snd_isn to detect first pkt [re]transmission
+	 * in mptcp_established_options_mp()
+	 */
+	subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
 	if (subflow->request_mptcp) {
 		pr_debug("local_key=%llu", subflow->local_key);
 		opts->suboptions = OPTION_MPTCP_MPC_SYN;
@@ -356,28 +387,66 @@  void mptcp_rcv_synsent(struct sock *sk)
 	}
 }
 
-static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
+static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+					 unsigned int *size,
 					 unsigned int remaining,
 					 struct mptcp_out_options *opts)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_ext *mpext;
+	unsigned int data_len;
+
+	pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow,
+		 subflow->fourth_ack, subflow->snd_isn,
+		 skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
+
+	if (subflow->mp_capable && !subflow->fourth_ack && skb &&
+	    subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
+		/* When skb is not available, we better over-estimate the
+		 * emitted options len. A full DSS option is longer than
+		 * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
+		 * that.
+		 */
 
-	if (subflow->mp_capable && !subflow->fourth_ack &&
-	    remaining >= TCPOLEN_MPTCP_MPC_ACK) {
-		opts->suboptions = OPTION_MPTCP_MPC_ACK;
-		opts->sndr_key = subflow->local_key;
-		opts->rcvr_key = subflow->remote_key;
-		*size = TCPOLEN_MPTCP_MPC_ACK;
-		subflow->fourth_ack = 1;
-		pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
-			 subflow, subflow->local_key, subflow->remote_key);
-		return true;
+		mpext = mptcp_get_ext(skb);
+		data_len = mpext ? mpext->data_len : 0;
+		/* Section 3.1.
+		 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
+		 * packets that start the first subflow of an MPTCP connection,
+		 * as well as the first packet that carries data
+		 */
+		if (data_len > 0 && remaining >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
+			opts->suboptions = OPTION_MPTCP_MPC_ACK;
+			opts->sndr_key = subflow->local_key;
+			opts->rcvr_key = subflow->remote_key;
+
+			/* we will check ext_copy.data_len in
+			 * mptcp_write_options() to discriminate between
+			 * TCPOLEN_MPTCP_MPC_ACK_DATA and TCPOLEN_MPTCP_MPC_ACK
+			 */
+			opts->ext_copy.data_len = data_len;
+			*size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
+			pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
+				 subflow, subflow->local_key,
+				 subflow->remote_key, opts->ext_copy.data_len);
+			return true;
+		}
+		if (remaining >= TCPOLEN_MPTCP_MPC_ACK) {
+			opts->suboptions = OPTION_MPTCP_MPC_ACK;
+			opts->sndr_key = subflow->local_key;
+			opts->rcvr_key = subflow->remote_key;
+			opts->ext_copy.data_len = 0;
+			*size = TCPOLEN_MPTCP_MPC_ACK;
+			pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
+				 subflow, subflow->local_key,
+				 subflow->remote_key);
+			return true;
+		}
 	} else if (subflow->mp_join && !subflow->fourth_ack &&
 		   remaining >= TCPOLEN_MPTCP_MPJ_ACK) {
 		opts->suboptions = OPTION_MPTCP_MPJ_ACK;
 		memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
 		*size = TCPOLEN_MPTCP_MPJ_ACK;
-		subflow->fourth_ack = 1;
 		pr_debug("subflow=%p", subflow);
 		return true;
 	}
@@ -500,7 +569,7 @@  bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 		return false;
 
 	opts->suboptions = 0;
-	if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) {
+	if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) {
 		*size += opt_size;
 		remaining -= opt_size;
 		ret = true;
@@ -526,11 +595,9 @@  bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 	if (subflow_req->mp_capable) {
 		opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
 		opts->sndr_key = subflow_req->local_key;
-		opts->rcvr_key = subflow_req->remote_key;
 		*size = TCPOLEN_MPTCP_MPC_SYNACK;
-		pr_debug("req=%p, local_key=%llu, remote_key=%llu",
-			 subflow_req, subflow_req->local_key,
-			 subflow_req->remote_key);
+		pr_debug("req=%p, local_key=%llu",
+			 subflow_req, subflow_req->local_key);
 		return true;
 	} else if (subflow_req->mp_join) {
 		opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
@@ -623,11 +690,23 @@  void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 	memset(mpext, 0, sizeof(*mpext));
 
 	if (mp_opt->use_map) {
-		mpext->data_seq = mp_opt->data_seq;
-		mpext->subflow_seq = mp_opt->subflow_seq;
+		if (mp_opt->mp_capable_map) {
+			/* this is an MP_CAPABLE carrying MPTCP data
+			 * we know this map the first chunk of data
+			 */
+			mptcp_crypto_key_sha1(subflow->remote_key, NULL,
+					      &mpext->data_seq);
+			mpext->data_seq++;
+			mpext->subflow_seq = 1;
+			mpext->dsn64 = 1;
+			mpext->mp_capable_map = 1;
+		} else {
+			mpext->data_seq = mp_opt->data_seq;
+			mpext->subflow_seq = mp_opt->subflow_seq;
+			mpext->dsn64 = mp_opt->dsn64;
+		}
 		mpext->data_len = mp_opt->data_len;
 		mpext->use_map = 1;
-		mpext->dsn64 = mp_opt->dsn64;
 	}
 
 	mpext->data_fin = mp_opt->data_fin;
@@ -638,8 +717,7 @@  void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 
 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
 {
-	if ((OPTION_MPTCP_MPC_SYN |
-	     OPTION_MPTCP_MPC_SYNACK |
+	if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
 		u8 len;
 
@@ -647,20 +725,34 @@  void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
 			len = TCPOLEN_MPTCP_MPC_SYN;
 		else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
 			len = TCPOLEN_MPTCP_MPC_SYNACK;
+		else if (opts->ext_copy.data_len)
+			len = TCPOLEN_MPTCP_MPC_ACK_DATA;
 		else
 			len = TCPOLEN_MPTCP_MPC_ACK;
 
-		*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, 0,
+		*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, 1,
 				      MPTCP_CAP_HMAC_SHA1);
+
+		if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
+		    opts->suboptions))
+			goto mp_capable_done;
+
 		put_unaligned_be64(opts->sndr_key, ptr);
 		ptr += 2;
-		if ((OPTION_MPTCP_MPC_SYNACK |
-		     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
-			put_unaligned_be64(opts->rcvr_key, ptr);
-			ptr += 2;
-		}
+		if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
+			goto mp_capable_done;
+
+		put_unaligned_be64(opts->rcvr_key, ptr);
+		ptr += 2;
+		if (!opts->ext_copy.data_len)
+			goto mp_capable_done;
+
+		put_unaligned_be32(opts->ext_copy.data_len << 16 |
+				   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+		ptr += 1;
 	}
 
+mp_capable_done:
 	if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, TCPOLEN_MPTCP_ADD_ADDR,
 				      MPTCP_ADDR_IPVERSION_4, opts->addr_id);
@@ -731,7 +823,7 @@  void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
 				flags |= MPTCP_DSS_DATA_FIN;
 		}
 
-		*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
+		*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 1, flags);
 
 		if (mpext->use_ack) {
 			put_unaligned_be64(mpext->data_ack, ptr);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 656e2fda0c7e..9ad735a62330 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1093,7 +1093,7 @@  static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk)
 	msk->subflow = ssock;
 	subflow = mptcp_subflow_ctx(msk->subflow->sk);
 	subflow->request_mptcp = 1; /* @@ if MPTCP enabled */
-	subflow->request_version = 0; /* currently only v0 supported */
+	subflow->request_version = 1; /* only v1 supported */
 
 	sock_hold(ssock->sk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 791f2c19cfb8..5c96777f9b3c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -33,9 +33,10 @@ 
 #define MPTCPOPT_MP_FASTCLOSE	7
 
 /* MPTCP suboption lengths */
-#define TCPOLEN_MPTCP_MPC_SYN		12
-#define TCPOLEN_MPTCP_MPC_SYNACK	20
+#define TCPOLEN_MPTCP_MPC_SYN		4
+#define TCPOLEN_MPTCP_MPC_SYNACK	12
 #define TCPOLEN_MPTCP_MPC_ACK		20
+#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
 #define TCPOLEN_MPTCP_MPJ_SYN		12
 #define TCPOLEN_MPTCP_MPJ_SYNACK	16
 #define TCPOLEN_MPTCP_MPJ_ACK		24
@@ -200,6 +201,7 @@  struct mptcp_subflow_context {
 	u64	remote_key;
 	u64	idsn;
 	u64	map_seq;
+	u32	snd_isn;
 	u32	token;
 	u32	rel_write_seq;
 	u32	map_subflow_seq;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index f08c4b713978..dce5bf137eea 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -129,7 +129,6 @@  static void subflow_v4_init_req(struct request_sock *req,
 			subflow_req->version = listener->request_version;
 		else
 			subflow_req->version = rx_opt.mptcp.version;
-		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 	} else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
 		subflow_req->mp_join = 1;
@@ -266,9 +265,16 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 
-	/* if the sk is MP_CAPABLE, we already received the client key */
+	/* if the sk is MP_CAPABLE, we need to fetch the client key */
 	subflow_req = mptcp_subflow_rsk(req);
-	if (!subflow_req->mp_capable && subflow_req->mp_join) {
+	if (subflow_req->mp_capable) {
+		opt_rx.mptcp.mp_capable = 0;
+		mptcp_get_options(skb, &opt_rx);
+		if (!opt_rx.mptcp.mp_capable)
+			subflow_req->mp_capable = 0;
+		else
+			subflow_req->remote_key = opt_rx.mptcp.sndr_key;
+	} else if (subflow_req->mp_join) {
 		opt_rx.mptcp.mp_join = 0;
 		mptcp_get_options(skb, &opt_rx);
 		if (!opt_rx.mptcp.mp_join ||