Message ID | 20200513053257.8836-1-cpaasch@apple.com |
---|---|
State | Superseded, archived |
Delegated to: | Paolo Abeni |
Headers | show |
Series | [RFC,netnext] mptcp: Use 32-bit DATA_ACK when possible | expand |
On Tue, 2020-05-12 at 22:32 -0700, Christoph Paasch wrote: > RFC8684 allows to send 32-bit DATA_ACKs as long as the peer is not > sending 64-bit data-sequence numbers. The 64-bit DSN is only there for > extreme scenarios when a very high throughput subflow is combined with a > long-RTT subflow such that the high-throughput subflow wraps around the > 32-bit sequence number space within an RTT of the high-RTT subflow. > > It is thus a rare scenario and we should try to use the 32-bit DATA_ACK > instead as long as possible. It allows to reduce the TCP-option overhead > by 4 bytes, thus makes space for an additional SACK-block. It also makes > tcpdumps much easier to read when the DSN and DATA_ACK are both either > 32 or 64-bit. > > Signed-off-by: Christoph Paasch <cpaasch@apple.com> > --- > include/net/mptcp.h | 1 + > net/mptcp/options.c | 33 ++++++++++++++++++++++++--------- > net/mptcp/protocol.h | 1 + > net/mptcp/subflow.c | 2 ++ > 4 files changed, 28 insertions(+), 9 deletions(-) > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > index e60275659de6..25d8b36ed94f 100644 > --- a/include/net/mptcp.h > +++ b/include/net/mptcp.h > @@ -17,6 +17,7 @@ struct seq_file; > /* MPTCP sk_buff extension data */ > struct mptcp_ext { > u64 data_ack; > + u32 data_ack32; I think the above 2 fields could be "union-ed". I think it's preferrable to avoid growing mptcp_ext size. > u64 data_seq; > u32 subflow_seq; > u16 data_len; > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index 45497af23906..ece6f92cf7d1 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > return ret; > } > > - ack_size = TCPOLEN_MPTCP_DSS_ACK64; > + if (subflow->use_64bit_ack) { > + ack_size = TCPOLEN_MPTCP_DSS_ACK64; > + opts->ext_copy.data_ack = msk->ack_seq; > + opts->ext_copy.ack64 = 1; > + } else { > + ack_size = TCPOLEN_MPTCP_DSS_ACK32; > + opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); > + opts->ext_copy.ack64 = 0; > + } > + opts->ext_copy.use_ack = 1; If I read correctly, 'use_64bit_ack' is initialized with 0 and only set if the peer sends 64bits sequence number. When interacting with impl using 32bits DSN, our stack will flip the behavior at least once for passive socket: MPC+data is internally translated into DSS with 64bit DSN, so first passive socket ack will use 64 bits. Later, if the client uses 32 bits acks, it will switch to 32 bit acks. Is that ok? > /* Add kind/length/subtype/flag overhead if mapping is not populated */ > if (dss_size == 0) > @@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > > dss_size += ack_size; > > - opts->ext_copy.data_ack = msk->ack_seq; > - opts->ext_copy.ack64 = 1; > - opts->ext_copy.use_ack = 1; > - > *size = ALIGN(dss_size, 4); > return true; > } > @@ -986,8 +991,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) > u8 flags = 0; > > if (mpext->use_ack) { > - len += TCPOLEN_MPTCP_DSS_ACK64; > - flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; > + flags = MPTCP_DSS_HAS_ACK; > + if (mpext->ack64) { > + len += TCPOLEN_MPTCP_DSS_ACK64; > + flags |= MPTCP_DSS_ACK64; > + } else { > + len += TCPOLEN_MPTCP_DSS_ACK32; > + } > } > > if (mpext->use_map) { > @@ -1004,8 +1014,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) > *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); > > if (mpext->use_ack) { > - put_unaligned_be64(mpext->data_ack, ptr); > - ptr += 2; > + if (mpext->ack64) { > + put_unaligned_be64(mpext->data_ack, ptr); > + ptr += 2; > + } else { > + put_unaligned_be32(mpext->data_ack32, ptr); > + ptr += 1; > + } > } > > if (mpext->use_map) { > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index e4ca6320ce76..f5adca93e8fb 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -290,6 +290,7 @@ struct mptcp_subflow_context { > data_avail : 1, > rx_eof : 1, > data_fin_tx_enable : 1, > + use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ > can_ack : 1; /* only after processing the remote a key */ > u64 data_fin_tx_seq; > u32 remote_nonce; Possibly with a wider scope than this patch, but the above bitfields contains both fields that are rarely changed (data_fin_tx_enable, can_ack, request_mptcp, etc...) and fields that are set on every packet /every DSS (map_vaild, data_avail, the newly added use_64bit_ack). At least 'data_avail' is accessed without holding the ssk socket lock by mptcp_subflow_recv_lookup(). Perhaps we could revisit the binary layout of this struct? Other than the above LGTM! Cheers, Paolo
On 13/05/20 - 09:16:54, Paolo Abeni wrote: > On Tue, 2020-05-12 at 22:32 -0700, Christoph Paasch wrote: > > RFC8684 allows to send 32-bit DATA_ACKs as long as the peer is not > > sending 64-bit data-sequence numbers. The 64-bit DSN is only there for > > extreme scenarios when a very high throughput subflow is combined with a > > long-RTT subflow such that the high-throughput subflow wraps around the > > 32-bit sequence number space within an RTT of the high-RTT subflow. > > > > It is thus a rare scenario and we should try to use the 32-bit DATA_ACK > > instead as long as possible. It allows to reduce the TCP-option overhead > > by 4 bytes, thus makes space for an additional SACK-block. It also makes > > tcpdumps much easier to read when the DSN and DATA_ACK are both either > > 32 or 64-bit. > > > > Signed-off-by: Christoph Paasch <cpaasch@apple.com> > > --- > > include/net/mptcp.h | 1 + > > net/mptcp/options.c | 33 ++++++++++++++++++++++++--------- > > net/mptcp/protocol.h | 1 + > > net/mptcp/subflow.c | 2 ++ > > 4 files changed, 28 insertions(+), 9 deletions(-) > > > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > > index e60275659de6..25d8b36ed94f 100644 > > --- a/include/net/mptcp.h > > +++ b/include/net/mptcp.h > > @@ -17,6 +17,7 @@ struct seq_file; > > /* MPTCP sk_buff extension data */ > > struct mptcp_ext { > > u64 data_ack; > > + u32 data_ack32; > > I think the above 2 fields could be "union-ed". > I think it's preferrable to avoid growing mptcp_ext size. Sounds good! > > > u64 data_seq; > > u32 subflow_seq; > > u16 data_len; > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > > index 45497af23906..ece6f92cf7d1 100644 > > --- a/net/mptcp/options.c > > +++ b/net/mptcp/options.c > > @@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > > return ret; > > } > > > > - ack_size = TCPOLEN_MPTCP_DSS_ACK64; > > + if (subflow->use_64bit_ack) { > > + ack_size = TCPOLEN_MPTCP_DSS_ACK64; > > + opts->ext_copy.data_ack = msk->ack_seq; > > + opts->ext_copy.ack64 = 1; > > + } else { > > + ack_size = TCPOLEN_MPTCP_DSS_ACK32; > > + opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); > > + opts->ext_copy.ack64 = 0; > > + } > > + opts->ext_copy.use_ack = 1; > > If I read correctly, 'use_64bit_ack' is initialized with 0 and only set > if the peer sends 64bits sequence number. When interacting with impl > using 32bits DSN, our stack will flip the behavior at least once for > passive socket: > > MPC+data is internally translated into DSS with 64bit DSN, so first > passive socket ack will use 64 bits. > > Later, if the client uses 32 bits acks, it will switch to 32 bit acks. > > Is that ok? Yes, I saw that behavior in my tests. I think it is ok. For the initial data there won't be a SACK-block, so nothing is lost there. > > /* Add kind/length/subtype/flag overhead if mapping is not populated */ > > if (dss_size == 0) > > @@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > > > > dss_size += ack_size; > > > > - opts->ext_copy.data_ack = msk->ack_seq; > > - opts->ext_copy.ack64 = 1; > > - opts->ext_copy.use_ack = 1; > > - > > *size = ALIGN(dss_size, 4); > > return true; > > } > > @@ -986,8 +991,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) > > u8 flags = 0; > > > > if (mpext->use_ack) { > > - len += TCPOLEN_MPTCP_DSS_ACK64; > > - flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; > > + flags = MPTCP_DSS_HAS_ACK; > > + if (mpext->ack64) { > > + len += TCPOLEN_MPTCP_DSS_ACK64; > > + flags |= MPTCP_DSS_ACK64; > > + } else { > > + len += TCPOLEN_MPTCP_DSS_ACK32; > > + } > > } > > > > if (mpext->use_map) { > > @@ -1004,8 +1014,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) > > *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); > > > > if (mpext->use_ack) { > > - put_unaligned_be64(mpext->data_ack, ptr); > > - ptr += 2; > > + if (mpext->ack64) { > > + put_unaligned_be64(mpext->data_ack, ptr); > > + ptr += 2; > > + } else { > > + put_unaligned_be32(mpext->data_ack32, ptr); > > + ptr += 1; > > + } > > } > > > > if (mpext->use_map) { > > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > > index e4ca6320ce76..f5adca93e8fb 100644 > > --- a/net/mptcp/protocol.h > > +++ b/net/mptcp/protocol.h > > @@ -290,6 +290,7 @@ struct mptcp_subflow_context { > > data_avail : 1, > > rx_eof : 1, > > data_fin_tx_enable : 1, > > + use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ > > can_ack : 1; /* only after processing the remote a key */ > > u64 data_fin_tx_seq; > > u32 remote_nonce; > > Possibly with a wider scope than this patch, but the above bitfields > contains both fields that are rarely changed (data_fin_tx_enable, > can_ack, request_mptcp, etc...) and fields that are set on every packet > /every DSS (map_vaild, data_avail, the newly added use_64bit_ack). > > At least 'data_avail' is accessed without holding the ssk socket lock > by mptcp_subflow_recv_lookup(). Perhaps we could revisit the binary > layout of this struct? Yes, that might be good: https://github.com/multipath-tcp/mptcp_net-next/issues/24 > Other than the above LGTM! Cool! I will make the union and send to netnext if there are no other comments here on the list. Thanks, Christoph
diff --git a/include/net/mptcp.h b/include/net/mptcp.h index e60275659de6..25d8b36ed94f 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -17,6 +17,7 @@ struct seq_file; /* MPTCP sk_buff extension data */ struct mptcp_ext { u64 data_ack; + u32 data_ack32; u64 data_seq; u32 subflow_seq; u16 data_len; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 45497af23906..ece6f92cf7d1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } - ack_size = TCPOLEN_MPTCP_DSS_ACK64; + if (subflow->use_64bit_ack) { + ack_size = TCPOLEN_MPTCP_DSS_ACK64; + opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.ack64 = 1; + } else { + ack_size = TCPOLEN_MPTCP_DSS_ACK32; + opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.ack64 = 0; + } + opts->ext_copy.use_ack = 1; /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size += ack_size; - opts->ext_copy.data_ack = msk->ack_seq; - opts->ext_copy.ack64 = 1; - opts->ext_copy.use_ack = 1; - *size = ALIGN(dss_size, 4); return true; } @@ -986,8 +991,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) u8 flags = 0; if (mpext->use_ack) { - len += TCPOLEN_MPTCP_DSS_ACK64; - flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; + flags = MPTCP_DSS_HAS_ACK; + if (mpext->ack64) { + len += TCPOLEN_MPTCP_DSS_ACK64; + flags |= MPTCP_DSS_ACK64; + } else { + len += TCPOLEN_MPTCP_DSS_ACK32; + } } if (mpext->use_map) { @@ -1004,8 +1014,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { - put_unaligned_be64(mpext->data_ack, ptr); - ptr += 2; + if (mpext->ack64) { + put_unaligned_be64(mpext->data_ack, ptr); + ptr += 2; + } else { + put_unaligned_be32(mpext->data_ack32, ptr); + ptr += 1; + } } if (mpext->use_map) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e4ca6320ce76..f5adca93e8fb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -290,6 +290,7 @@ struct mptcp_subflow_context { data_avail : 1, rx_eof : 1, data_fin_tx_enable : 1, + use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ can_ack : 1; /* only after processing the remote a key */ u64 data_fin_tx_seq; u32 remote_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 009d5c478062..f22dad482cd4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -661,9 +661,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (!mpext->dsn64) { map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, mpext->data_seq); + subflow->use_64bit_ack = 0; pr_debug("expanded seq=%llu", subflow->map_seq); } else { map_seq = mpext->data_seq; + subflow->use_64bit_ack = 1; } if (subflow->map_valid) {
RFC8684 allows to send 32-bit DATA_ACKs as long as the peer is not sending 64-bit data-sequence numbers. The 64-bit DSN is only there for extreme scenarios when a very high throughput subflow is combined with a long-RTT subflow such that the high-throughput subflow wraps around the 32-bit sequence number space within an RTT of the high-RTT subflow. It is thus a rare scenario and we should try to use the 32-bit DATA_ACK instead as long as possible. It allows to reduce the TCP-option overhead by 4 bytes, thus makes space for an additional SACK-block. It also makes tcpdumps much easier to read when the DSN and DATA_ACK are both either 32 or 64-bit. Signed-off-by: Christoph Paasch <cpaasch@apple.com> --- include/net/mptcp.h | 1 + net/mptcp/options.c | 33 ++++++++++++++++++++++++--------- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 2 ++ 4 files changed, 28 insertions(+), 9 deletions(-)