diff mbox series

[RFC,2/2] mptcp: add MP_FAIL support

Message ID dfa64550c8ae54a3d831998f048f7b6b74db8d16.1615378283.git.geliangtang@gmail.com
State Superseded, archived
Delegated to: Mat Martineau
Headers show
Series DSS checksum and MP_FAIL support | expand

Commit Message

Geliang Tang March 10, 2021, 12:39 p.m. UTC
Add handling for sending and receiving MP_FAIL suboption.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 61 +++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  5 ++++
 net/mptcp/subflow.c  |  4 +++
 4 files changed, 70 insertions(+), 1 deletion(-)

Comments

Mat Martineau March 20, 2021, 1:35 a.m. UTC | #1
On Wed, 10 Mar 2021, Geliang Tang wrote:

> Add handling for sending and receiving MP_FAIL suboption.
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> include/net/mptcp.h  |  1 +
> net/mptcp/options.c  | 61 +++++++++++++++++++++++++++++++++++++++++++-
> net/mptcp/protocol.h |  5 ++++
> net/mptcp/subflow.c  |  4 +++
> 4 files changed, 70 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index de88f38e60b1..6635689ce03f 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -64,6 +64,7 @@ struct mptcp_out_options {
> 	u32 nonce;
> 	u64 thmac;
> 	u32 token;
> +	u64 fail_seq;
> 	u8 hmac[20];
> 	struct mptcp_ext ext_copy;
> #endif
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 9df26291cf9a..1b5aaab80ba0 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
> 		flags = *ptr++;
> 		mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
> 		mp_opt->reset_reason = *ptr;
> +		pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
> +		break;
> +
> +	case MPTCPOPT_MP_FAIL:
> +		if (opsize != TCPOLEN_MPTCP_FAIL)
> +			break;
> +
> +		ptr += 2;
> +		mp_opt->mp_fail = 1;
> +		mp_opt->fail_seq = get_unaligned_be64(ptr);
> +		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
> 		break;
>
> 	default:
> @@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb,
> 	mp_opt->mp_prio = 0;
> 	mp_opt->reset = 0;
> 	mp_opt->csum = 0;
> +	mp_opt->mp_fail = 0;
>
> 	length = (th->doff * 4) - sizeof(struct tcphdr);
> 	ptr = (const unsigned char *)(th + 1);
> @@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> 	opts->reset_reason = subflow->reset_reason;
> }
>
> +static bool mptcp_established_options_mp_fail(struct sock *sk,
> +					      unsigned int *size,
> +					      unsigned int remaining,
> +					      struct mptcp_out_options *opts)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +
> +	if (!subflow->send_mp_fail)
> +		return false;
> +
> +	if (remaining < TCPOLEN_MPTCP_FAIL)
> +		return false;
> +
> +	*size = TCPOLEN_MPTCP_FAIL;
> +	opts->suboptions |= OPTION_MPTCP_FAIL;
> +	opts->fail_seq = subflow->map_seq;
> +
> +	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> +
> +	return true;
> +}
> +
> bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 			       unsigned int *size, unsigned int remaining,
> 			       struct mptcp_out_options *opts)
> @@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 		return false;
>
> 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> -		mptcp_established_options_rst(sk, skb, size, remaining, opts);
> +		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> +			*size += opt_size;
> +			remaining -= opt_size;
> +		}
> +		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> +		*size += opt_size;
> +		remaining -= opt_size;
> 		return true;
> 	}
>
> @@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
> 		mp_opt.mp_prio = 0;
> 	}
>
> +	if (mp_opt.mp_fail) {
> +		/* TODO */

It's not clear to me what RFC 8684 requires here.

If Peer A detects the bad checksum, it sends MP_FAIL with fail_seq 
matching subflow->map_seq for the mapping being received (which the code 
above does).

The RFC says the recipient of the option (Peer B) must discard data after 
fail_seq - but it's the transmitter of the data? It's very confusingly 
worded. I looked at the multipath-tcp.org kernel and the MP_FAIL sequence 
number appears to be ignored by the receiver.

So, maybe this is the right thing to do:

  * When checksums are enabled, do not send DATA_ACK for received data 
until the full mapping is received and the checksum is verified. This 
allows retransmission of any data that's part of the bad checksum.

  * When a bad checksum is detected, send the MP_FAIL and watch for the 
MP_FAIL reply. I'm not sure of the conditions for resending MP_FAIL.

  * When MP_FAIL is received, send MP_FAIL+RST for the mapping currently 
being received on that subflow and fallback if the in-order-single-subflow 
conditions in the RFC are met.

Christoph, is that on the right track? (If so, is it time for an errata 
submission? :) )


- Mat



> +		mp_opt.mp_fail = 0;
> +	}
> +
> 	if (mp_opt.reset) {
> 		subflow->reset_seen = 1;
> 		subflow->reset_reason = mp_opt.reset_reason;
> @@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> 				      opts->backup, TCPOPT_NOP);
> 	}
>
> +	if (OPTION_MPTCP_FAIL & opts->suboptions) {
> +		const struct sock *ssk = (const struct sock *)tp;
> +		struct mptcp_subflow_context *subflow;
> +
> +		subflow = mptcp_subflow_ctx(ssk);
> +		subflow->send_mp_fail = 0;
> +
> +		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> +				      TCPOLEN_MPTCP_FAIL,
> +				      0, 0);
> +		put_unaligned_be64(opts->fail_seq, ptr);
> +		ptr += 2;
> +	}
> +
> 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
> 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
> 				      TCPOLEN_MPTCP_MPJ_SYN,
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 24b4e1f6d23f..ab8f92c49029 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -27,6 +27,7 @@
> #define OPTION_MPTCP_FASTCLOSE	BIT(9)
> #define OPTION_MPTCP_PRIO	BIT(10)
> #define OPTION_MPTCP_RST	BIT(11)
> +#define OPTION_MPTCP_FAIL	BIT(12)
>
> /* MPTCP option subtypes */
> #define MPTCPOPT_MP_CAPABLE	0
> @@ -68,6 +69,7 @@
> #define TCPOLEN_MPTCP_PRIO_ALIGN	4
> #define TCPOLEN_MPTCP_FASTCLOSE		12
> #define TCPOLEN_MPTCP_RST		4
> +#define TCPOLEN_MPTCP_FAIL		12
>
> /* MPTCP MP_JOIN flags */
> #define MPTCPOPT_BACKUP		BIT(0)
> @@ -135,6 +137,7 @@ struct mptcp_options_received {
> 		add_addr : 1,
> 		rm_addr : 1,
> 		mp_prio : 1,
> +		mp_fail : 1,
> 		family : 4,
> 		echo : 1,
> 		backup : 1;
> @@ -162,6 +165,7 @@ struct mptcp_options_received {
> 	u16	port;
> 	u8	reset_reason:4;
> 	u8	reset_transient:1;
> +	u64	fail_seq;
> };
>
> static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
> @@ -428,6 +432,7 @@ struct mptcp_subflow_context {
> 		mpc_map : 1,
> 		backup : 1,
> 		send_mp_prio : 1,
> +		send_mp_fail : 1,
> 		rx_eof : 1,
> 		can_ack : 1,        /* only after processing the remote a key */
> 		disposable : 1;	    /* ctx can be free at ulp release time */
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index b597811a2f8d..059c1a0ef25b 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
>
> 		if (csum_fold(csum)) {
> 			pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
> +			subflow->send_mp_fail = 1;
> +			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;

This is sending a MP_FAIL not MP_TCPRST, so a reason isn't used.

> +			tcp_send_active_reset(ssk, GFP_ATOMIC);

The subflow socket must be closed to. Does it work to call 
mptcp_subflow_reset() instead?

I don't think there should be data cleanup to do because the data can't be 
pushed up to the msk until the entire mapping is received.


Mat

> 			return true; //false;
> 		}
> 		pr_debug("%s DSS checksum done", __func__);
> 	}
>
> out:
> +	subflow->send_mp_fail = 0;
> 	return true;
> }
>
> -- 
> 2.29.2

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index de88f38e60b1..6635689ce03f 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -64,6 +64,7 @@  struct mptcp_out_options {
 	u32 nonce;
 	u64 thmac;
 	u32 token;
+	u64 fail_seq;
 	u8 hmac[20];
 	struct mptcp_ext ext_copy;
 #endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9df26291cf9a..1b5aaab80ba0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -318,6 +318,17 @@  static void mptcp_parse_option(const struct sk_buff *skb,
 		flags = *ptr++;
 		mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
 		mp_opt->reset_reason = *ptr;
+		pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
+		break;
+
+	case MPTCPOPT_MP_FAIL:
+		if (opsize != TCPOLEN_MPTCP_FAIL)
+			break;
+
+		ptr += 2;
+		mp_opt->mp_fail = 1;
+		mp_opt->fail_seq = get_unaligned_be64(ptr);
+		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
 		break;
 
 	default:
@@ -344,6 +355,7 @@  void mptcp_get_options(const struct sk_buff *skb,
 	mp_opt->mp_prio = 0;
 	mp_opt->reset = 0;
 	mp_opt->csum = 0;
+	mp_opt->mp_fail = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -787,6 +799,28 @@  static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	opts->reset_reason = subflow->reset_reason;
 }
 
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+					      unsigned int *size,
+					      unsigned int remaining,
+					      struct mptcp_out_options *opts)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (!subflow->send_mp_fail)
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FAIL)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FAIL;
+	opts->suboptions |= OPTION_MPTCP_FAIL;
+	opts->fail_seq = subflow->map_seq;
+
+	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+	return true;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -803,7 +837,13 @@  bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 		return false;
 
 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+			*size += opt_size;
+			remaining -= opt_size;
+		}
+		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+		*size += opt_size;
+		remaining -= opt_size;
 		return true;
 	}
 
@@ -1120,6 +1160,11 @@  void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.mp_fail) {
+		/* TODO */
+		mp_opt.mp_fail = 0;
+	}
+
 	if (mp_opt.reset) {
 		subflow->reset_seen = 1;
 		subflow->reset_reason = mp_opt.reset_reason;
@@ -1329,6 +1374,20 @@  void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 				      opts->backup, TCPOPT_NOP);
 	}
 
+	if (OPTION_MPTCP_FAIL & opts->suboptions) {
+		const struct sock *ssk = (const struct sock *)tp;
+		struct mptcp_subflow_context *subflow;
+
+		subflow = mptcp_subflow_ctx(ssk);
+		subflow->send_mp_fail = 0;
+
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+				      TCPOLEN_MPTCP_FAIL,
+				      0, 0);
+		put_unaligned_be64(opts->fail_seq, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
 				      TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 24b4e1f6d23f..ab8f92c49029 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -27,6 +27,7 @@ 
 #define OPTION_MPTCP_FASTCLOSE	BIT(9)
 #define OPTION_MPTCP_PRIO	BIT(10)
 #define OPTION_MPTCP_RST	BIT(11)
+#define OPTION_MPTCP_FAIL	BIT(12)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -68,6 +69,7 @@ 
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_FAIL		12
 
 /* MPTCP MP_JOIN flags */
 #define MPTCPOPT_BACKUP		BIT(0)
@@ -135,6 +137,7 @@  struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
+		mp_fail : 1,
 		family : 4,
 		echo : 1,
 		backup : 1;
@@ -162,6 +165,7 @@  struct mptcp_options_received {
 	u16	port;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
+	u64	fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -428,6 +432,7 @@  struct mptcp_subflow_context {
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
+		send_mp_fail : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b597811a2f8d..059c1a0ef25b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -823,12 +823,16 @@  static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
 
 		if (csum_fold(csum)) {
 			pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
+			subflow->send_mp_fail = 1;
+			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+			tcp_send_active_reset(ssk, GFP_ATOMIC);
 			return true; //false;
 		}
 		pr_debug("%s DSS checksum done", __func__);
 	}
 
 out:
+	subflow->send_mp_fail = 0;
 	return true;
 }