diff mbox series

[02/15] mptcp: Handle MPTCP TCP options

Message ID 20191213205106.2467313-3-matthieu.baerts@tessares.net
State Deferred, archived
Headers show
Series Multipath TCP part 2: Single subflow | expand

Commit Message

Matthieu Baerts Dec. 13, 2019, 8:50 p.m. UTC
From: Peter Krystad <peter.krystad@linux.intel.com>

These options are handled according to MPTCPv0 (RFC6824).
RFC6824bis/RFC8684 MPTCPv1 MP_CAPABLE is added later in coordination
with related code changes.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Co-developed-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 include/linux/tcp.h   |  18 +++++
 include/net/mptcp.h   |  18 +++++
 net/ipv4/tcp_input.c  |   5 ++
 net/ipv4/tcp_output.c |  13 ++++
 net/mptcp/Makefile    |   2 +-
 net/mptcp/options.c   | 159 ++++++++++++++++++++++++++++++++++++++++++
 net/mptcp/protocol.h  |  29 ++++++++
 7 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100644 net/mptcp/options.c

Comments

Peter Krystad Dec. 14, 2019, 5:06 a.m. UTC | #1
On Fri, 2019-12-13 at 21:50 +0100, Matthieu Baerts wrote:
> From: Peter Krystad <peter.krystad@linux.intel.com>

Here add

"Add routines to parse and format the MP_CAPABLE option."

to the commit text, something needs to state what the patch does.

> These options are handled according to MPTCPv0 (RFC6824).
> RFC6824bis/RFC8684 MPTCPv1 MP_CAPABLE is added later in coordination
> with related code changes.
> 
> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Co-developed-by: Florian Westphal <fw@strlen.de>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> Co-developed-by: Davide Caratti <dcaratti@redhat.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> ---
>  include/linux/tcp.h   |  18 +++++
>  include/net/mptcp.h   |  18 +++++
>  net/ipv4/tcp_input.c  |   5 ++
>  net/ipv4/tcp_output.c |  13 ++++
>  net/mptcp/Makefile    |   2 +-
>  net/mptcp/options.c   | 159 ++++++++++++++++++++++++++++++++++++++++++
>  net/mptcp/protocol.h  |  29 ++++++++
>  7 files changed, 243 insertions(+), 1 deletion(-)
>  create mode 100644 net/mptcp/options.c
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index ca6f01531e64..52798ab00394 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -78,6 +78,16 @@ struct tcp_sack_block {
>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
>  
> +#if IS_ENABLED(CONFIG_MPTCP)
> +struct mptcp_options_received {
> +	u64	sndr_key;
> +	u64	rcvr_key;
> +	u8	mp_capable : 1,
> +		mp_join : 1,
> +		dss : 1;
> +};
> +#endif
> +
>  struct tcp_options_received {
>  /*	PAWS/RTTM data	*/
>  	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
> @@ -95,6 +105,9 @@ struct tcp_options_received {
>  	u8	num_sacks;	/* Number of SACK blocks		*/
>  	u16	user_mss;	/* mss requested by user in ioctl	*/
>  	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
> +#if IS_ENABLED(CONFIG_MPTCP)
> +	struct mptcp_options_received	mptcp;
> +#endif
>  };
>  
>  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
> @@ -104,6 +117,11 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
>  #if IS_ENABLED(CONFIG_SMC)
>  	rx_opt->smc_ok = 0;
>  #endif
> +#if IS_ENABLED(CONFIG_MPTCP)
> +	rx_opt->mptcp.mp_capable = 0;
> +	rx_opt->mptcp.mp_join = 0;
> +	rx_opt->mptcp.dss = 0;
> +#endif
>  }
>  
>  /* This is the max number of SACKS that we'll generate and process. It's safe
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 4113e063f728..ea96308ae546 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -9,6 +9,7 @@
>  #define __NET_MPTCP_H
>  
>  #include <linux/skbuff.h>
> +#include <linux/tcp.h>
>  #include <linux/types.h>
>  
>  /* MPTCP sk_buff extension data */
> @@ -25,10 +26,22 @@ struct mptcp_ext {
>  			__unused:2;
>  };
>  
> +struct mptcp_out_options {
> +#if IS_ENABLED(CONFIG_MPTCP)
> +	u16 suboptions;
> +	u64 sndr_key;
> +	u64 rcvr_key;
> +#endif
> +};
> +
>  #ifdef CONFIG_MPTCP
>  
>  void mptcp_init(void);
>  
> +void mptcp_parse_option(const unsigned char *ptr, int opsize,
> +			struct tcp_options_received *opt_rx);
> +void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
> +
>  static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
>  {
>  	return skb_ext_exist(skb, SKB_EXT_MPTCP);
> @@ -40,6 +53,11 @@ static inline void mptcp_init(void)
>  {
>  }
>  
> +static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
> +				      struct tcp_options_received *opt_rx)
> +{
> +}
> +
>  static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
>  {
>  	return false;
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 55b460a2ece2..4fc649b72ae4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -79,6 +79,7 @@
>  #include <trace/events/tcp.h>
>  #include <linux/jump_label_ratelimit.h>
>  #include <net/busy_poll.h>
> +#include <net/mptcp.h>
>  
>  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>  
> @@ -3920,6 +3921,10 @@ void tcp_parse_options(const struct net *net,
>  				 */
>  				break;
>  #endif
> +			case TCPOPT_MPTCP:
> +				mptcp_parse_option(ptr, opsize, opt_rx);
> +				break;
> +
>  			case TCPOPT_FASTOPEN:
>  				tcp_parse_fastopen_option(
>  					opsize - TCPOLEN_FASTOPEN_BASE,
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 710ab45badfa..5c91fc3b126b 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -38,6 +38,7 @@
>  #define pr_fmt(fmt) "TCP: " fmt
>  
>  #include <net/tcp.h>
> +#include <net/mptcp.h>
>  
>  #include <linux/compiler.h>
>  #include <linux/gfp.h>
> @@ -411,6 +412,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
>  #define OPTION_WSCALE		(1 << 3)
>  #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
>  #define OPTION_SMC		(1 << 9)
> +#define OPTION_MPTCP		(1 << 10)
>  
>  static void smc_options_write(__be32 *ptr, u16 *options)
>  {
> @@ -436,8 +438,17 @@ struct tcp_out_options {
>  	__u8 *hash_location;	/* temporary pointer, overloaded */
>  	__u32 tsval, tsecr;	/* need to include OPTION_TS */
>  	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
> +	struct mptcp_out_options mptcp;
>  };
>  
> +static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
> +{
> +#if IS_ENABLED(CONFIG_MPTCP)
> +	if (unlikely(OPTION_MPTCP & opts->options))
> +		mptcp_write_options(ptr, &opts->mptcp);
> +#endif
> +}
> +
>  /* Write previously computed TCP options to the packet.
>   *
>   * Beware: Something in the Internet is very sensitive to the ordering of
> @@ -546,6 +557,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>  	}
>  
>  	smc_options_write(ptr, &options);
> +
> +	mptcp_options_write(ptr, opts);
>  }
>  
>  static void smc_set_option(const struct tcp_sock *tp,
> diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
> index 659129d1fcbf..27a846263f08 100644
> --- a/net/mptcp/Makefile
> +++ b/net/mptcp/Makefile
> @@ -1,4 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-$(CONFIG_MPTCP) += mptcp.o
>  
> -mptcp-y := protocol.o
> +mptcp-y := protocol.o options.o
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> new file mode 100644
> index 000000000000..cd4c0c8de6e0
> --- /dev/null
> +++ b/net/mptcp/options.c
> @@ -0,0 +1,159 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Multipath TCP
> + *
> + * Copyright (c) 2017 - 2019, Intel Corporation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <net/tcp.h>
> +#include <net/mptcp.h>
> +#include "protocol.h"
> +
> +void mptcp_parse_option(const unsigned char *ptr, int opsize,
> +			struct tcp_options_received *opt_rx)
> +{
> +	struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
> +	u8 subtype = *ptr >> 4;
> +	u8 version;
> +	u8 flags;
> +
> +	switch (subtype) {
> +	/* MPTCPOPT_MP_CAPABLE
> +	 * 0: 4MSB=subtype, 4LSB=version
> +	 * 1: Handshake flags
> +	 * 2-9: Sender key
> +	 * 10-17: Receiver key (optional)
> +	 */
> +	case MPTCPOPT_MP_CAPABLE:
> +		if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
> +		    opsize != TCPOLEN_MPTCP_MPC_ACK)
> +			break;
> +
> +		version = *ptr++ & MPTCP_VERSION_MASK;
> +		if (version != MPTCP_SUPPORTED_VERSION)
> +			break;
> +
> +		flags = *ptr++;
> +		if (!((flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA1) ||
> +		    (flags & MPTCP_CAP_EXTENSIBILITY))
> +			break;
> +
> +		/* RFC 6824, Section 3.1:
> +		 * "For the Checksum Required bit (labeled "A"), if either
> +		 * host requires the use of checksums, checksums MUST be used.
> +		 * In other words, the only way for checksums not to be used
> +		 * is if both hosts in their SYNs set A=0."
> +		 *
> +		 * Section 3.3.0:
> +		 * "If a checksum is not present when its use has been
> +		 * negotiated, the receiver MUST close the subflow with a RST as
> +		 * it is considered broken."
> +		 *
> +		 * We don't implement DSS checksum - fall back to TCP.
> +		 */
> +		if (flags & MPTCP_CAP_CHECKSUM_REQD)
> +			break;
> +
> +		mp_opt->mp_capable = 1;
> +		mp_opt->sndr_key = get_unaligned_be64(ptr);
> +		ptr += 8;
> +
> +		if (opsize == TCPOLEN_MPTCP_MPC_ACK) {
> +			mp_opt->rcvr_key = get_unaligned_be64(ptr);
> +			ptr += 8;
> +			pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu",
> +				 mp_opt->sndr_key, mp_opt->rcvr_key);
> +		} else {
> +			pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key);
> +		}
> +		break;
> +
> +	/* MPTCPOPT_MP_JOIN
> +	 * Initial SYN
> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
> +	 * 1: Address ID
> +	 * 2-5: Receiver token
> +	 * 6-9: Sender random number
> +	 * SYN/ACK response
> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
> +	 * 1: Address ID
> +	 * 2-9: Sender truncated HMAC
> +	 * 10-13: Sender random number
> +	 * Third ACK
> +	 * 0: 4MSB=subtype, 0000
> +	 * 1: 0 (Reserved)
> +	 * 2-21: Sender HMAC
> +	 */
> +
> +	/* MPTCPOPT_DSS
> +	 * 0: 4MSB=subtype, 0000
> +	 * 1: 3MSB=0, F=Data FIN, m=DSN length, M=has DSN/SSN/DLL/checksum,
> +	 *    a=DACK length, A=has DACK
> +	 * 0, 4, or 8 bytes of DACK (depending on A/a)
> +	 * 0, 4, or 8 bytes of DSN (depending on M/m)
> +	 * 0 or 4 bytes of SSN (depending on M)
> +	 * 0 or 2 bytes of DLL (depending on M)
> +	 * 0 or 2 bytes of checksum (depending on M)
> +	 */
> +	case MPTCPOPT_DSS:
> +		pr_debug("DSS");
> +		mp_opt->dss = 1;
> +		break;
> +
> +	/* MPTCPOPT_ADD_ADDR
> +	 * 0: 4MSB=subtype, 4LSB=IP version (4 or 6)
> +	 * 1: Address ID
> +	 * 4 or 16 bytes of address (depending on ip version)
> +	 * 0 or 2 bytes of port (depending on length)
> +	 */
> +
> +	/* MPTCPOPT_RM_ADDR
> +	 * 0: 4MSB=subtype, 0000
> +	 * 1: Address ID
> +	 * Additional bytes: More address IDs (depending on length)
> +	 */
> +
> +	/* MPTCPOPT_MP_PRIO
> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
> +	 * 1: Address ID (optional, current addr implied if not present)
> +	 */
> +
> +	/* MPTCPOPT_MP_FAIL
> +	 * 0: 4MSB=subtype, 0000
> +	 * 1: 0 (Reserved)
> +	 * 2-9: DSN
> +	 */
> +
> +	/* MPTCPOPT_MP_FASTCLOSE
> +	 * 0: 4MSB=subtype, 0000
> +	 * 1: 0 (Reserved)
> +	 * 2-9: Receiver key
> +	 */
> +	default:
> +		break;
> +	}
> +}
> +
> +void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
> +{
> +	if ((OPTION_MPTCP_MPC_SYN |
> +	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
> +		u8 len;
> +
> +		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
> +			len = TCPOLEN_MPTCP_MPC_SYN;
> +		else
> +			len = TCPOLEN_MPTCP_MPC_ACK;
> +
> +		*ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) |
> +			       (MPTCPOPT_MP_CAPABLE << 12) |
> +			       (MPTCP_SUPPORTED_VERSION << 8) |
> +			       MPTCP_CAP_HMAC_SHA1);
> +		put_unaligned_be64(opts->sndr_key, ptr);
> +		ptr += 2;
> +		if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
> +			put_unaligned_be64(opts->rcvr_key, ptr);
> +			ptr += 2;
> +		}
> +	}
> +}
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index ee04a01bffd3..c59cf8b220b0 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -7,6 +7,35 @@
>  #ifndef __MPTCP_PROTOCOL_H
>  #define __MPTCP_PROTOCOL_H
>  
> +#define MPTCP_SUPPORTED_VERSION	0
> +
> +/* MPTCP option bits */
> +#define OPTION_MPTCP_MPC_SYN	BIT(0)
> +#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
> +#define OPTION_MPTCP_MPC_ACK	BIT(2)
> +
> +/* MPTCP option subtypes */
> +#define MPTCPOPT_MP_CAPABLE	0
> +#define MPTCPOPT_MP_JOIN	1
> +#define MPTCPOPT_DSS		2
> +#define MPTCPOPT_ADD_ADDR	3
> +#define MPTCPOPT_RM_ADDR	4
> +#define MPTCPOPT_MP_PRIO	5
> +#define MPTCPOPT_MP_FAIL	6
> +#define MPTCPOPT_MP_FASTCLOSE	7
> +
> +/* MPTCP suboption lengths */
> +#define TCPOLEN_MPTCP_MPC_SYN		12
> +#define TCPOLEN_MPTCP_MPC_SYNACK	12
> +#define TCPOLEN_MPTCP_MPC_ACK		20
> +
> +/* MPTCP MP_CAPABLE flags */
> +#define MPTCP_VERSION_MASK	(0x0F)
> +#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
> +#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
> +#define MPTCP_CAP_HMAC_SHA1	BIT(0)
> +#define MPTCP_CAP_FLAG_MASK	(0x3F)
> +
>  /* MPTCP connection sock */
>  struct mptcp_sock {
>  	/* inet_connection_sock must be the first member */
Mat Martineau Dec. 14, 2019, 5:17 a.m. UTC | #2
On Fri, 13 Dec 2019, Peter Krystad wrote:

> On Fri, 2019-12-13 at 21:50 +0100, Matthieu Baerts wrote:
>> From: Peter Krystad <peter.krystad@linux.intel.com>
>
> Here add
>
> "Add routines to parse and format the MP_CAPABLE option."
>
> to the commit text, something needs to state what the patch does.
>

Done.

I'm editing these commits locally and will push updated tags to github 
when I have all the part 2 updates. The netdev-v1-part1 tag will remain 
the same since that's already sent to netdev.

I'll send a summary of part 2 commit message changes to Matthieu, who will 
update the topgit tree and export branch later.


Thanks,

Mat


>> These options are handled according to MPTCPv0 (RFC6824).
>> RFC6824bis/RFC8684 MPTCPv1 MP_CAPABLE is added later in coordination
>> with related code changes.
>>
>> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
>> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
>> Co-developed-by: Florian Westphal <fw@strlen.de>
>> Signed-off-by: Florian Westphal <fw@strlen.de>
>> Co-developed-by: Davide Caratti <dcaratti@redhat.com>
>> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
>> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
>> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
>> ---
>>  include/linux/tcp.h   |  18 +++++
>>  include/net/mptcp.h   |  18 +++++
>>  net/ipv4/tcp_input.c  |   5 ++
>>  net/ipv4/tcp_output.c |  13 ++++
>>  net/mptcp/Makefile    |   2 +-
>>  net/mptcp/options.c   | 159 ++++++++++++++++++++++++++++++++++++++++++
>>  net/mptcp/protocol.h  |  29 ++++++++
>>  7 files changed, 243 insertions(+), 1 deletion(-)
>>  create mode 100644 net/mptcp/options.c
>>
>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
>> index ca6f01531e64..52798ab00394 100644
>> --- a/include/linux/tcp.h
>> +++ b/include/linux/tcp.h
>> @@ -78,6 +78,16 @@ struct tcp_sack_block {
>>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
>>
>> +#if IS_ENABLED(CONFIG_MPTCP)
>> +struct mptcp_options_received {
>> +	u64	sndr_key;
>> +	u64	rcvr_key;
>> +	u8	mp_capable : 1,
>> +		mp_join : 1,
>> +		dss : 1;
>> +};
>> +#endif
>> +
>>  struct tcp_options_received {
>>  /*	PAWS/RTTM data	*/
>>  	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
>> @@ -95,6 +105,9 @@ struct tcp_options_received {
>>  	u8	num_sacks;	/* Number of SACK blocks		*/
>>  	u16	user_mss;	/* mss requested by user in ioctl	*/
>>  	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
>> +#if IS_ENABLED(CONFIG_MPTCP)
>> +	struct mptcp_options_received	mptcp;
>> +#endif
>>  };
>>
>>  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
>> @@ -104,6 +117,11 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
>>  #if IS_ENABLED(CONFIG_SMC)
>>  	rx_opt->smc_ok = 0;
>>  #endif
>> +#if IS_ENABLED(CONFIG_MPTCP)
>> +	rx_opt->mptcp.mp_capable = 0;
>> +	rx_opt->mptcp.mp_join = 0;
>> +	rx_opt->mptcp.dss = 0;
>> +#endif
>>  }
>>
>>  /* This is the max number of SACKS that we'll generate and process. It's safe
>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>> index 4113e063f728..ea96308ae546 100644
>> --- a/include/net/mptcp.h
>> +++ b/include/net/mptcp.h
>> @@ -9,6 +9,7 @@
>>  #define __NET_MPTCP_H
>>
>>  #include <linux/skbuff.h>
>> +#include <linux/tcp.h>
>>  #include <linux/types.h>
>>
>>  /* MPTCP sk_buff extension data */
>> @@ -25,10 +26,22 @@ struct mptcp_ext {
>>  			__unused:2;
>>  };
>>
>> +struct mptcp_out_options {
>> +#if IS_ENABLED(CONFIG_MPTCP)
>> +	u16 suboptions;
>> +	u64 sndr_key;
>> +	u64 rcvr_key;
>> +#endif
>> +};
>> +
>>  #ifdef CONFIG_MPTCP
>>
>>  void mptcp_init(void);
>>
>> +void mptcp_parse_option(const unsigned char *ptr, int opsize,
>> +			struct tcp_options_received *opt_rx);
>> +void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
>> +
>>  static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
>>  {
>>  	return skb_ext_exist(skb, SKB_EXT_MPTCP);
>> @@ -40,6 +53,11 @@ static inline void mptcp_init(void)
>>  {
>>  }
>>
>> +static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
>> +				      struct tcp_options_received *opt_rx)
>> +{
>> +}
>> +
>>  static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
>>  {
>>  	return false;
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index 55b460a2ece2..4fc649b72ae4 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -79,6 +79,7 @@
>>  #include <trace/events/tcp.h>
>>  #include <linux/jump_label_ratelimit.h>
>>  #include <net/busy_poll.h>
>> +#include <net/mptcp.h>
>>
>>  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>>
>> @@ -3920,6 +3921,10 @@ void tcp_parse_options(const struct net *net,
>>  				 */
>>  				break;
>>  #endif
>> +			case TCPOPT_MPTCP:
>> +				mptcp_parse_option(ptr, opsize, opt_rx);
>> +				break;
>> +
>>  			case TCPOPT_FASTOPEN:
>>  				tcp_parse_fastopen_option(
>>  					opsize - TCPOLEN_FASTOPEN_BASE,
>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>> index 710ab45badfa..5c91fc3b126b 100644
>> --- a/net/ipv4/tcp_output.c
>> +++ b/net/ipv4/tcp_output.c
>> @@ -38,6 +38,7 @@
>>  #define pr_fmt(fmt) "TCP: " fmt
>>
>>  #include <net/tcp.h>
>> +#include <net/mptcp.h>
>>
>>  #include <linux/compiler.h>
>>  #include <linux/gfp.h>
>> @@ -411,6 +412,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
>>  #define OPTION_WSCALE		(1 << 3)
>>  #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
>>  #define OPTION_SMC		(1 << 9)
>> +#define OPTION_MPTCP		(1 << 10)
>>
>>  static void smc_options_write(__be32 *ptr, u16 *options)
>>  {
>> @@ -436,8 +438,17 @@ struct tcp_out_options {
>>  	__u8 *hash_location;	/* temporary pointer, overloaded */
>>  	__u32 tsval, tsecr;	/* need to include OPTION_TS */
>>  	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
>> +	struct mptcp_out_options mptcp;
>>  };
>>
>> +static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
>> +{
>> +#if IS_ENABLED(CONFIG_MPTCP)
>> +	if (unlikely(OPTION_MPTCP & opts->options))
>> +		mptcp_write_options(ptr, &opts->mptcp);
>> +#endif
>> +}
>> +
>>  /* Write previously computed TCP options to the packet.
>>   *
>>   * Beware: Something in the Internet is very sensitive to the ordering of
>> @@ -546,6 +557,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>>  	}
>>
>>  	smc_options_write(ptr, &options);
>> +
>> +	mptcp_options_write(ptr, opts);
>>  }
>>
>>  static void smc_set_option(const struct tcp_sock *tp,
>> diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
>> index 659129d1fcbf..27a846263f08 100644
>> --- a/net/mptcp/Makefile
>> +++ b/net/mptcp/Makefile
>> @@ -1,4 +1,4 @@
>>  # SPDX-License-Identifier: GPL-2.0
>>  obj-$(CONFIG_MPTCP) += mptcp.o
>>
>> -mptcp-y := protocol.o
>> +mptcp-y := protocol.o options.o
>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>> new file mode 100644
>> index 000000000000..cd4c0c8de6e0
>> --- /dev/null
>> +++ b/net/mptcp/options.c
>> @@ -0,0 +1,159 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Multipath TCP
>> + *
>> + * Copyright (c) 2017 - 2019, Intel Corporation.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <net/tcp.h>
>> +#include <net/mptcp.h>
>> +#include "protocol.h"
>> +
>> +void mptcp_parse_option(const unsigned char *ptr, int opsize,
>> +			struct tcp_options_received *opt_rx)
>> +{
>> +	struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
>> +	u8 subtype = *ptr >> 4;
>> +	u8 version;
>> +	u8 flags;
>> +
>> +	switch (subtype) {
>> +	/* MPTCPOPT_MP_CAPABLE
>> +	 * 0: 4MSB=subtype, 4LSB=version
>> +	 * 1: Handshake flags
>> +	 * 2-9: Sender key
>> +	 * 10-17: Receiver key (optional)
>> +	 */
>> +	case MPTCPOPT_MP_CAPABLE:
>> +		if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
>> +		    opsize != TCPOLEN_MPTCP_MPC_ACK)
>> +			break;
>> +
>> +		version = *ptr++ & MPTCP_VERSION_MASK;
>> +		if (version != MPTCP_SUPPORTED_VERSION)
>> +			break;
>> +
>> +		flags = *ptr++;
>> +		if (!((flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA1) ||
>> +		    (flags & MPTCP_CAP_EXTENSIBILITY))
>> +			break;
>> +
>> +		/* RFC 6824, Section 3.1:
>> +		 * "For the Checksum Required bit (labeled "A"), if either
>> +		 * host requires the use of checksums, checksums MUST be used.
>> +		 * In other words, the only way for checksums not to be used
>> +		 * is if both hosts in their SYNs set A=0."
>> +		 *
>> +		 * Section 3.3.0:
>> +		 * "If a checksum is not present when its use has been
>> +		 * negotiated, the receiver MUST close the subflow with a RST as
>> +		 * it is considered broken."
>> +		 *
>> +		 * We don't implement DSS checksum - fall back to TCP.
>> +		 */
>> +		if (flags & MPTCP_CAP_CHECKSUM_REQD)
>> +			break;
>> +
>> +		mp_opt->mp_capable = 1;
>> +		mp_opt->sndr_key = get_unaligned_be64(ptr);
>> +		ptr += 8;
>> +
>> +		if (opsize == TCPOLEN_MPTCP_MPC_ACK) {
>> +			mp_opt->rcvr_key = get_unaligned_be64(ptr);
>> +			ptr += 8;
>> +			pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu",
>> +				 mp_opt->sndr_key, mp_opt->rcvr_key);
>> +		} else {
>> +			pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key);
>> +		}
>> +		break;
>> +
>> +	/* MPTCPOPT_MP_JOIN
>> +	 * Initial SYN
>> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
>> +	 * 1: Address ID
>> +	 * 2-5: Receiver token
>> +	 * 6-9: Sender random number
>> +	 * SYN/ACK response
>> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
>> +	 * 1: Address ID
>> +	 * 2-9: Sender truncated HMAC
>> +	 * 10-13: Sender random number
>> +	 * Third ACK
>> +	 * 0: 4MSB=subtype, 0000
>> +	 * 1: 0 (Reserved)
>> +	 * 2-21: Sender HMAC
>> +	 */
>> +
>> +	/* MPTCPOPT_DSS
>> +	 * 0: 4MSB=subtype, 0000
>> +	 * 1: 3MSB=0, F=Data FIN, m=DSN length, M=has DSN/SSN/DLL/checksum,
>> +	 *    a=DACK length, A=has DACK
>> +	 * 0, 4, or 8 bytes of DACK (depending on A/a)
>> +	 * 0, 4, or 8 bytes of DSN (depending on M/m)
>> +	 * 0 or 4 bytes of SSN (depending on M)
>> +	 * 0 or 2 bytes of DLL (depending on M)
>> +	 * 0 or 2 bytes of checksum (depending on M)
>> +	 */
>> +	case MPTCPOPT_DSS:
>> +		pr_debug("DSS");
>> +		mp_opt->dss = 1;
>> +		break;
>> +
>> +	/* MPTCPOPT_ADD_ADDR
>> +	 * 0: 4MSB=subtype, 4LSB=IP version (4 or 6)
>> +	 * 1: Address ID
>> +	 * 4 or 16 bytes of address (depending on ip version)
>> +	 * 0 or 2 bytes of port (depending on length)
>> +	 */
>> +
>> +	/* MPTCPOPT_RM_ADDR
>> +	 * 0: 4MSB=subtype, 0000
>> +	 * 1: Address ID
>> +	 * Additional bytes: More address IDs (depending on length)
>> +	 */
>> +
>> +	/* MPTCPOPT_MP_PRIO
>> +	 * 0: 4MSB=subtype, 000, 1LSB=Backup
>> +	 * 1: Address ID (optional, current addr implied if not present)
>> +	 */
>> +
>> +	/* MPTCPOPT_MP_FAIL
>> +	 * 0: 4MSB=subtype, 0000
>> +	 * 1: 0 (Reserved)
>> +	 * 2-9: DSN
>> +	 */
>> +
>> +	/* MPTCPOPT_MP_FASTCLOSE
>> +	 * 0: 4MSB=subtype, 0000
>> +	 * 1: 0 (Reserved)
>> +	 * 2-9: Receiver key
>> +	 */
>> +	default:
>> +		break;
>> +	}
>> +}
>> +
>> +void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
>> +{
>> +	if ((OPTION_MPTCP_MPC_SYN |
>> +	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
>> +		u8 len;
>> +
>> +		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
>> +			len = TCPOLEN_MPTCP_MPC_SYN;
>> +		else
>> +			len = TCPOLEN_MPTCP_MPC_ACK;
>> +
>> +		*ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) |
>> +			       (MPTCPOPT_MP_CAPABLE << 12) |
>> +			       (MPTCP_SUPPORTED_VERSION << 8) |
>> +			       MPTCP_CAP_HMAC_SHA1);
>> +		put_unaligned_be64(opts->sndr_key, ptr);
>> +		ptr += 2;
>> +		if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
>> +			put_unaligned_be64(opts->rcvr_key, ptr);
>> +			ptr += 2;
>> +		}
>> +	}
>> +}
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index ee04a01bffd3..c59cf8b220b0 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -7,6 +7,35 @@
>>  #ifndef __MPTCP_PROTOCOL_H
>>  #define __MPTCP_PROTOCOL_H
>>
>> +#define MPTCP_SUPPORTED_VERSION	0
>> +
>> +/* MPTCP option bits */
>> +#define OPTION_MPTCP_MPC_SYN	BIT(0)
>> +#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
>> +#define OPTION_MPTCP_MPC_ACK	BIT(2)
>> +
>> +/* MPTCP option subtypes */
>> +#define MPTCPOPT_MP_CAPABLE	0
>> +#define MPTCPOPT_MP_JOIN	1
>> +#define MPTCPOPT_DSS		2
>> +#define MPTCPOPT_ADD_ADDR	3
>> +#define MPTCPOPT_RM_ADDR	4
>> +#define MPTCPOPT_MP_PRIO	5
>> +#define MPTCPOPT_MP_FAIL	6
>> +#define MPTCPOPT_MP_FASTCLOSE	7
>> +
>> +/* MPTCP suboption lengths */
>> +#define TCPOLEN_MPTCP_MPC_SYN		12
>> +#define TCPOLEN_MPTCP_MPC_SYNACK	12
>> +#define TCPOLEN_MPTCP_MPC_ACK		20
>> +
>> +/* MPTCP MP_CAPABLE flags */
>> +#define MPTCP_VERSION_MASK	(0x0F)
>> +#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
>> +#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
>> +#define MPTCP_CAP_HMAC_SHA1	BIT(0)
>> +#define MPTCP_CAP_FLAG_MASK	(0x3F)
>> +
>>  /* MPTCP connection sock */
>>  struct mptcp_sock {
>>  	/* inet_connection_sock must be the first member */
>
>

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ca6f01531e64..52798ab00394 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -78,6 +78,16 @@  struct tcp_sack_block {
 #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
 #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
 
+#if IS_ENABLED(CONFIG_MPTCP)
+struct mptcp_options_received {
+	u64	sndr_key;
+	u64	rcvr_key;
+	u8	mp_capable : 1,
+		mp_join : 1,
+		dss : 1;
+};
+#endif
+
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
 	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
@@ -95,6 +105,9 @@  struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+#if IS_ENABLED(CONFIG_MPTCP)
+	struct mptcp_options_received	mptcp;
+#endif
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
@@ -104,6 +117,11 @@  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 #if IS_ENABLED(CONFIG_SMC)
 	rx_opt->smc_ok = 0;
 #endif
+#if IS_ENABLED(CONFIG_MPTCP)
+	rx_opt->mptcp.mp_capable = 0;
+	rx_opt->mptcp.mp_join = 0;
+	rx_opt->mptcp.dss = 0;
+#endif
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 4113e063f728..ea96308ae546 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -9,6 +9,7 @@ 
 #define __NET_MPTCP_H
 
 #include <linux/skbuff.h>
+#include <linux/tcp.h>
 #include <linux/types.h>
 
 /* MPTCP sk_buff extension data */
@@ -25,10 +26,22 @@  struct mptcp_ext {
 			__unused:2;
 };
 
+struct mptcp_out_options {
+#if IS_ENABLED(CONFIG_MPTCP)
+	u16 suboptions;
+	u64 sndr_key;
+	u64 rcvr_key;
+#endif
+};
+
 #ifdef CONFIG_MPTCP
 
 void mptcp_init(void);
 
+void mptcp_parse_option(const unsigned char *ptr, int opsize,
+			struct tcp_options_received *opt_rx);
+void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
+
 static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
 {
 	return skb_ext_exist(skb, SKB_EXT_MPTCP);
@@ -40,6 +53,11 @@  static inline void mptcp_init(void)
 {
 }
 
+static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
+				      struct tcp_options_received *opt_rx)
+{
+}
+
 static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb)
 {
 	return false;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 55b460a2ece2..4fc649b72ae4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,6 +79,7 @@ 
 #include <trace/events/tcp.h>
 #include <linux/jump_label_ratelimit.h>
 #include <net/busy_poll.h>
+#include <net/mptcp.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -3920,6 +3921,10 @@  void tcp_parse_options(const struct net *net,
 				 */
 				break;
 #endif
+			case TCPOPT_MPTCP:
+				mptcp_parse_option(ptr, opsize, opt_rx);
+				break;
+
 			case TCPOPT_FASTOPEN:
 				tcp_parse_fastopen_option(
 					opsize - TCPOLEN_FASTOPEN_BASE,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 710ab45badfa..5c91fc3b126b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -38,6 +38,7 @@ 
 #define pr_fmt(fmt) "TCP: " fmt
 
 #include <net/tcp.h>
+#include <net/mptcp.h>
 
 #include <linux/compiler.h>
 #include <linux/gfp.h>
@@ -411,6 +412,7 @@  static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_WSCALE		(1 << 3)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
 #define OPTION_SMC		(1 << 9)
+#define OPTION_MPTCP		(1 << 10)
 
 static void smc_options_write(__be32 *ptr, u16 *options)
 {
@@ -436,8 +438,17 @@  struct tcp_out_options {
 	__u8 *hash_location;	/* temporary pointer, overloaded */
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
 	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
+	struct mptcp_out_options mptcp;
 };
 
+static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
+{
+#if IS_ENABLED(CONFIG_MPTCP)
+	if (unlikely(OPTION_MPTCP & opts->options))
+		mptcp_write_options(ptr, &opts->mptcp);
+#endif
+}
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -546,6 +557,8 @@  static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 	}
 
 	smc_options_write(ptr, &options);
+
+	mptcp_options_write(ptr, opts);
 }
 
 static void smc_set_option(const struct tcp_sock *tp,
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 659129d1fcbf..27a846263f08 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -1,4 +1,4 @@ 
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MPTCP) += mptcp.o
 
-mptcp-y := protocol.o
+mptcp-y := protocol.o options.o
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
new file mode 100644
index 000000000000..cd4c0c8de6e0
--- /dev/null
+++ b/net/mptcp/options.c
@@ -0,0 +1,159 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2017 - 2019, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+#include "protocol.h"
+
+void mptcp_parse_option(const unsigned char *ptr, int opsize,
+			struct tcp_options_received *opt_rx)
+{
+	struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
+	u8 subtype = *ptr >> 4;
+	u8 version;
+	u8 flags;
+
+	switch (subtype) {
+	/* MPTCPOPT_MP_CAPABLE
+	 * 0: 4MSB=subtype, 4LSB=version
+	 * 1: Handshake flags
+	 * 2-9: Sender key
+	 * 10-17: Receiver key (optional)
+	 */
+	case MPTCPOPT_MP_CAPABLE:
+		if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
+		    opsize != TCPOLEN_MPTCP_MPC_ACK)
+			break;
+
+		version = *ptr++ & MPTCP_VERSION_MASK;
+		if (version != MPTCP_SUPPORTED_VERSION)
+			break;
+
+		flags = *ptr++;
+		if (!((flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA1) ||
+		    (flags & MPTCP_CAP_EXTENSIBILITY))
+			break;
+
+		/* RFC 6824, Section 3.1:
+		 * "For the Checksum Required bit (labeled "A"), if either
+		 * host requires the use of checksums, checksums MUST be used.
+		 * In other words, the only way for checksums not to be used
+		 * is if both hosts in their SYNs set A=0."
+		 *
+		 * Section 3.3.0:
+		 * "If a checksum is not present when its use has been
+		 * negotiated, the receiver MUST close the subflow with a RST as
+		 * it is considered broken."
+		 *
+		 * We don't implement DSS checksum - fall back to TCP.
+		 */
+		if (flags & MPTCP_CAP_CHECKSUM_REQD)
+			break;
+
+		mp_opt->mp_capable = 1;
+		mp_opt->sndr_key = get_unaligned_be64(ptr);
+		ptr += 8;
+
+		if (opsize == TCPOLEN_MPTCP_MPC_ACK) {
+			mp_opt->rcvr_key = get_unaligned_be64(ptr);
+			ptr += 8;
+			pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu",
+				 mp_opt->sndr_key, mp_opt->rcvr_key);
+		} else {
+			pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key);
+		}
+		break;
+
+	/* MPTCPOPT_MP_JOIN
+	 * Initial SYN
+	 * 0: 4MSB=subtype, 000, 1LSB=Backup
+	 * 1: Address ID
+	 * 2-5: Receiver token
+	 * 6-9: Sender random number
+	 * SYN/ACK response
+	 * 0: 4MSB=subtype, 000, 1LSB=Backup
+	 * 1: Address ID
+	 * 2-9: Sender truncated HMAC
+	 * 10-13: Sender random number
+	 * Third ACK
+	 * 0: 4MSB=subtype, 0000
+	 * 1: 0 (Reserved)
+	 * 2-21: Sender HMAC
+	 */
+
+	/* MPTCPOPT_DSS
+	 * 0: 4MSB=subtype, 0000
+	 * 1: 3MSB=0, F=Data FIN, m=DSN length, M=has DSN/SSN/DLL/checksum,
+	 *    a=DACK length, A=has DACK
+	 * 0, 4, or 8 bytes of DACK (depending on A/a)
+	 * 0, 4, or 8 bytes of DSN (depending on M/m)
+	 * 0 or 4 bytes of SSN (depending on M)
+	 * 0 or 2 bytes of DLL (depending on M)
+	 * 0 or 2 bytes of checksum (depending on M)
+	 */
+	case MPTCPOPT_DSS:
+		pr_debug("DSS");
+		mp_opt->dss = 1;
+		break;
+
+	/* MPTCPOPT_ADD_ADDR
+	 * 0: 4MSB=subtype, 4LSB=IP version (4 or 6)
+	 * 1: Address ID
+	 * 4 or 16 bytes of address (depending on ip version)
+	 * 0 or 2 bytes of port (depending on length)
+	 */
+
+	/* MPTCPOPT_RM_ADDR
+	 * 0: 4MSB=subtype, 0000
+	 * 1: Address ID
+	 * Additional bytes: More address IDs (depending on length)
+	 */
+
+	/* MPTCPOPT_MP_PRIO
+	 * 0: 4MSB=subtype, 000, 1LSB=Backup
+	 * 1: Address ID (optional, current addr implied if not present)
+	 */
+
+	/* MPTCPOPT_MP_FAIL
+	 * 0: 4MSB=subtype, 0000
+	 * 1: 0 (Reserved)
+	 * 2-9: DSN
+	 */
+
+	/* MPTCPOPT_MP_FASTCLOSE
+	 * 0: 4MSB=subtype, 0000
+	 * 1: 0 (Reserved)
+	 * 2-9: Receiver key
+	 */
+	default:
+		break;
+	}
+}
+
+void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
+{
+	if ((OPTION_MPTCP_MPC_SYN |
+	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+		u8 len;
+
+		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+			len = TCPOLEN_MPTCP_MPC_SYN;
+		else
+			len = TCPOLEN_MPTCP_MPC_ACK;
+
+		*ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) |
+			       (MPTCPOPT_MP_CAPABLE << 12) |
+			       (MPTCP_SUPPORTED_VERSION << 8) |
+			       MPTCP_CAP_HMAC_SHA1);
+		put_unaligned_be64(opts->sndr_key, ptr);
+		ptr += 2;
+		if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
+			put_unaligned_be64(opts->rcvr_key, ptr);
+			ptr += 2;
+		}
+	}
+}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ee04a01bffd3..c59cf8b220b0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -7,6 +7,35 @@ 
 #ifndef __MPTCP_PROTOCOL_H
 #define __MPTCP_PROTOCOL_H
 
+#define MPTCP_SUPPORTED_VERSION	0
+
+/* MPTCP option bits */
+#define OPTION_MPTCP_MPC_SYN	BIT(0)
+#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
+#define OPTION_MPTCP_MPC_ACK	BIT(2)
+
+/* MPTCP option subtypes */
+#define MPTCPOPT_MP_CAPABLE	0
+#define MPTCPOPT_MP_JOIN	1
+#define MPTCPOPT_DSS		2
+#define MPTCPOPT_ADD_ADDR	3
+#define MPTCPOPT_RM_ADDR	4
+#define MPTCPOPT_MP_PRIO	5
+#define MPTCPOPT_MP_FAIL	6
+#define MPTCPOPT_MP_FASTCLOSE	7
+
+/* MPTCP suboption lengths */
+#define TCPOLEN_MPTCP_MPC_SYN		12
+#define TCPOLEN_MPTCP_MPC_SYNACK	12
+#define TCPOLEN_MPTCP_MPC_ACK		20
+
+/* MPTCP MP_CAPABLE flags */
+#define MPTCP_VERSION_MASK	(0x0F)
+#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
+#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
+#define MPTCP_CAP_HMAC_SHA1	BIT(0)
+#define MPTCP_CAP_FLAG_MASK	(0x3F)
+
 /* MPTCP connection sock */
 struct mptcp_sock {
 	/* inet_connection_sock must be the first member */