Message ID | dfa64550c8ae54a3d831998f048f7b6b74db8d16.1615378283.git.geliangtang@gmail.com |
---|---|
State | Superseded, archived |
Delegated to: | Mat Martineau |
Headers | show |
Series | DSS checksum and MP_FAIL support | expand |
On Wed, 10 Mar 2021, Geliang Tang wrote: > Add handling for sending and receiving MP_FAIL suboption. > > Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52 > > Signed-off-by: Geliang Tang <geliangtang@gmail.com> > --- > include/net/mptcp.h | 1 + > net/mptcp/options.c | 61 +++++++++++++++++++++++++++++++++++++++++++- > net/mptcp/protocol.h | 5 ++++ > net/mptcp/subflow.c | 4 +++ > 4 files changed, 70 insertions(+), 1 deletion(-) > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > index de88f38e60b1..6635689ce03f 100644 > --- a/include/net/mptcp.h > +++ b/include/net/mptcp.h > @@ -64,6 +64,7 @@ struct mptcp_out_options { > u32 nonce; > u64 thmac; > u32 token; > + u64 fail_seq; > u8 hmac[20]; > struct mptcp_ext ext_copy; > #endif > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index 9df26291cf9a..1b5aaab80ba0 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb, > flags = *ptr++; > mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; > mp_opt->reset_reason = *ptr; > + pr_debug("RST: reset_reason=%u", mp_opt->reset_reason); > + break; > + > + case MPTCPOPT_MP_FAIL: > + if (opsize != TCPOLEN_MPTCP_FAIL) > + break; > + > + ptr += 2; > + mp_opt->mp_fail = 1; > + mp_opt->fail_seq = get_unaligned_be64(ptr); > + pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); > break; > > default: > @@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb, > mp_opt->mp_prio = 0; > mp_opt->reset = 0; > mp_opt->csum = 0; > + mp_opt->mp_fail = 0; > > length = (th->doff * 4) - sizeof(struct tcphdr); > ptr = (const unsigned char *)(th + 1); > @@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu > opts->reset_reason = subflow->reset_reason; > } > > +static bool mptcp_established_options_mp_fail(struct sock *sk, > + unsigned int *size, > + unsigned int remaining, > + struct mptcp_out_options *opts) > +{ > + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); > + > + if (!subflow->send_mp_fail) > + return false; > + > + if (remaining < TCPOLEN_MPTCP_FAIL) > + return false; > + > + *size = TCPOLEN_MPTCP_FAIL; > + opts->suboptions |= OPTION_MPTCP_FAIL; > + opts->fail_seq = subflow->map_seq; > + > + pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); > + > + return true; > +} > + > bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, > unsigned int *size, unsigned int remaining, > struct mptcp_out_options *opts) > @@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, > return false; > > if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { > - mptcp_established_options_rst(sk, skb, size, remaining, opts); > + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { > + *size += opt_size; > + remaining -= opt_size; > + } > + mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts); > + *size += opt_size; > + remaining -= opt_size; > return true; > } > > @@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > mp_opt.mp_prio = 0; > } > > + if (mp_opt.mp_fail) { > + /* TODO */ It's not clear to me what RFC 8684 requires here. If Peer A detects the bad checksum, it sends MP_FAIL with fail_seq matching subflow->map_seq for the mapping being received (which the code above does). The RFC says the recipient of the option (Peer B) must discard data after fail_seq - but it's the transmitter of the data? It's very confusingly worded. I looked at the multipath-tcp.org kernel and the MP_FAIL sequence number appears to be ignored by the receiver. So, maybe this is the right thing to do: * When checksums are enabled, do not send DATA_ACK for received data until the full mapping is received and the checksum is verified. This allows retransmission of any data that's part of the bad checksum. * When a bad checksum is detected, send the MP_FAIL and watch for the MP_FAIL reply. I'm not sure of the conditions for resending MP_FAIL. * When MP_FAIL is received, send MP_FAIL+RST for the mapping currently being received on that subflow and fallback if the in-order-single-subflow conditions in the RFC are met. Christoph, is that on the right track? (If so, is it time for an errata submission? :) ) - Mat > + mp_opt.mp_fail = 0; > + } > + > if (mp_opt.reset) { > subflow->reset_seen = 1; > subflow->reset_reason = mp_opt.reset_reason; > @@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, > opts->backup, TCPOPT_NOP); > } > > + if (OPTION_MPTCP_FAIL & opts->suboptions) { > + const struct sock *ssk = (const struct sock *)tp; > + struct mptcp_subflow_context *subflow; > + > + subflow = mptcp_subflow_ctx(ssk); > + subflow->send_mp_fail = 0; > + > + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, > + TCPOLEN_MPTCP_FAIL, > + 0, 0); > + put_unaligned_be64(opts->fail_seq, ptr); > + ptr += 2; > + } > + > if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { > *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, > TCPOLEN_MPTCP_MPJ_SYN, > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index 24b4e1f6d23f..ab8f92c49029 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -27,6 +27,7 @@ > #define OPTION_MPTCP_FASTCLOSE BIT(9) > #define OPTION_MPTCP_PRIO BIT(10) > #define OPTION_MPTCP_RST BIT(11) > +#define OPTION_MPTCP_FAIL BIT(12) > > /* MPTCP option subtypes */ > #define MPTCPOPT_MP_CAPABLE 0 > @@ -68,6 +69,7 @@ > #define TCPOLEN_MPTCP_PRIO_ALIGN 4 > #define TCPOLEN_MPTCP_FASTCLOSE 12 > #define TCPOLEN_MPTCP_RST 4 > +#define TCPOLEN_MPTCP_FAIL 12 > > /* MPTCP MP_JOIN flags */ > #define MPTCPOPT_BACKUP BIT(0) > @@ -135,6 +137,7 @@ struct mptcp_options_received { > add_addr : 1, > rm_addr : 1, > mp_prio : 1, > + mp_fail : 1, > family : 4, > echo : 1, > backup : 1; > @@ -162,6 +165,7 @@ struct mptcp_options_received { > u16 port; > u8 reset_reason:4; > u8 reset_transient:1; > + u64 fail_seq; > }; > > static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) > @@ -428,6 +432,7 @@ struct mptcp_subflow_context { > mpc_map : 1, > backup : 1, > send_mp_prio : 1, > + send_mp_fail : 1, > rx_eof : 1, > can_ack : 1, /* only after processing the remote a key */ > disposable : 1; /* ctx can be free at ulp release time */ > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > index b597811a2f8d..059c1a0ef25b 100644 > --- a/net/mptcp/subflow.c > +++ b/net/mptcp/subflow.c > @@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb) > > if (csum_fold(csum)) { > pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum)); > + subflow->send_mp_fail = 1; > + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; This is sending a MP_FAIL not MP_TCPRST, so a reason isn't used. > + tcp_send_active_reset(ssk, GFP_ATOMIC); The subflow socket must be closed to. Does it work to call mptcp_subflow_reset() instead? I don't think there should be data cleanup to do because the data can't be pushed up to the msk until the entire mapping is received. Mat > return true; //false; > } > pr_debug("%s DSS checksum done", __func__); > } > > out: > + subflow->send_mp_fail = 0; > return true; > } > > -- > 2.29.2 -- Mat Martineau Intel
diff --git a/include/net/mptcp.h b/include/net/mptcp.h index de88f38e60b1..6635689ce03f 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -64,6 +64,7 @@ struct mptcp_out_options { u32 nonce; u64 thmac; u32 token; + u64 fail_seq; u8 hmac[20]; struct mptcp_ext ext_copy; #endif diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 9df26291cf9a..1b5aaab80ba0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; + pr_debug("RST: reset_reason=%u", mp_opt->reset_reason); + break; + + case MPTCPOPT_MP_FAIL: + if (opsize != TCPOLEN_MPTCP_FAIL) + break; + + ptr += 2; + mp_opt->mp_fail = 1; + mp_opt->fail_seq = get_unaligned_be64(ptr); + pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); break; default: @@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->mp_prio = 0; mp_opt->reset = 0; mp_opt->csum = 0; + mp_opt->mp_fail = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu opts->reset_reason = subflow->reset_reason; } +static bool mptcp_established_options_mp_fail(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (!subflow->send_mp_fail) + return false; + + if (remaining < TCPOLEN_MPTCP_FAIL) + return false; + + *size = TCPOLEN_MPTCP_FAIL; + opts->suboptions |= OPTION_MPTCP_FAIL; + opts->fail_seq = subflow->map_seq; + + pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + + return true; +} + bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { - mptcp_established_options_rst(sk, skb, size, remaining, opts); + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + } + mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts); + *size += opt_size; + remaining -= opt_size; return true; } @@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mp_opt.mp_prio = 0; } + if (mp_opt.mp_fail) { + /* TODO */ + mp_opt.mp_fail = 0; + } + if (mp_opt.reset) { subflow->reset_seen = 1; subflow->reset_reason = mp_opt.reset_reason; @@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, opts->backup, TCPOPT_NOP); } + if (OPTION_MPTCP_FAIL & opts->suboptions) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + } + if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYN, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 24b4e1f6d23f..ab8f92c49029 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -27,6 +27,7 @@ #define OPTION_MPTCP_FASTCLOSE BIT(9) #define OPTION_MPTCP_PRIO BIT(10) #define OPTION_MPTCP_RST BIT(11) +#define OPTION_MPTCP_FAIL BIT(12) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -68,6 +69,7 @@ #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_FAIL 12 /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) @@ -135,6 +137,7 @@ struct mptcp_options_received { add_addr : 1, rm_addr : 1, mp_prio : 1, + mp_fail : 1, family : 4, echo : 1, backup : 1; @@ -162,6 +165,7 @@ struct mptcp_options_received { u16 port; u8 reset_reason:4; u8 reset_transient:1; + u64 fail_seq; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) @@ -428,6 +432,7 @@ struct mptcp_subflow_context { mpc_map : 1, backup : 1, send_mp_prio : 1, + send_mp_fail : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1; /* ctx can be free at ulp release time */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b597811a2f8d..059c1a0ef25b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb) if (csum_fold(csum)) { pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum)); + subflow->send_mp_fail = 1; + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; + tcp_send_active_reset(ssk, GFP_ATOMIC); return true; //false; } pr_debug("%s DSS checksum done", __func__); } out: + subflow->send_mp_fail = 0; return true; }
Add handling for sending and receiving MP_FAIL suboption. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52 Signed-off-by: Geliang Tang <geliangtang@gmail.com> --- include/net/mptcp.h | 1 + net/mptcp/options.c | 61 +++++++++++++++++++++++++++++++++++++++++++- net/mptcp/protocol.h | 5 ++++ net/mptcp/subflow.c | 4 +++ 4 files changed, 70 insertions(+), 1 deletion(-)