Message ID | 5a6b5f4f58d6bf3bd425ed0162ac177643d734f3.1573488751.git.pabeni@redhat.com |
---|---|
State | Superseded, archived |
Headers | show |
Series | [1/4] mptcp: move mp_capable initialization at subflow_init_req() start | expand |
Hi Paolo - On Mon, 2019-11-11 at 17:53 +0100, Paolo Abeni wrote: > since such option will always fit the tcp header, module bugs. "modulo" How is it guaranteed that it will always fit, now and in the future? Peter. > Note that this patch preserve a bool ret value for > mptcp_established_options_dss(), > even if it now always returns true. That will simplify a bit later patches > for v1 support. > > Squash-to: "mptcp: Write MPTCP DSS headers to outgoing data packets" > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > --- > net/mptcp/options.c | 71 ++++++++++++++++++++------------------------- > 1 file changed, 31 insertions(+), 40 deletions(-) > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index fa10086b5b74..cd16baea0666 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -200,7 +200,7 @@ static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size, > { > struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); > > - if (!subflow->fourth_ack && remaining >= TCPOLEN_MPTCP_MPC_ACK) { > + if (!subflow->fourth_ack) { > opts->suboptions = OPTION_MPTCP_MPC_ACK; > opts->sndr_key = subflow->local_key; > opts->rcvr_key = subflow->remote_key; > @@ -220,6 +220,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > { > unsigned int dss_size = 0; > struct mptcp_ext *mpext; > + struct mptcp_sock *msk; > unsigned int ack_size; > > mpext = skb ? mptcp_get_ext(skb) : NULL; > @@ -229,15 +230,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > > map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; > > - if (map_size <= remaining) { > - remaining -= map_size; > - dss_size = map_size; > - if (mpext) > - opts->ext_copy = *mpext; > - } else { > - opts->ext_copy.use_map = 0; > - WARN_ONCE(1, "MPTCP: Map dropped"); > - } > + remaining -= map_size; > + dss_size = map_size; > + if (mpext) > + opts->ext_copy = *mpext; > } > > ack_size = TCPOLEN_MPTCP_DSS_ACK64; > @@ -246,29 +242,19 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, > if (dss_size == 0) > ack_size += TCPOLEN_MPTCP_DSS_BASE; > > - if (ack_size <= remaining) { > - struct mptcp_sock *msk; > - > - dss_size += ack_size; > - > - msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); > - if (msk) { > - opts->ext_copy.data_ack = msk->ack_seq; > - } else { > - mptcp_crypto_key_sha1(mptcp_subflow_ctx(sk)->remote_key, > - NULL, &opts->ext_copy.data_ack); > - opts->ext_copy.data_ack++; > - } > + dss_size += ack_size; > > - opts->ext_copy.ack64 = 1; > - opts->ext_copy.use_ack = 1; > + msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); > + if (msk) { > + opts->ext_copy.data_ack = msk->ack_seq; > } else { > - opts->ext_copy.use_ack = 0; > - WARN(1, "MPTCP: Ack dropped"); > + mptcp_crypto_key_sha1(mptcp_subflow_ctx(sk)->remote_key, > + NULL, &opts->ext_copy.data_ack); > + opts->ext_copy.data_ack++; > } > > - if (!dss_size) > - return false; > + opts->ext_copy.ack64 = 1; > + opts->ext_copy.use_ack = 1; > > *size = ALIGN(dss_size, 4); > return true; > @@ -279,22 +265,27 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, > struct mptcp_out_options *opts) > { > unsigned int opt_size = 0; > + bool ret = true; > > if (!mptcp_subflow_ctx(sk)->mp_capable) > return false; > > - if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) { > - *size += opt_size; > - remaining -= opt_size; > - return true; > - } else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, > - opts)) { > - *size += opt_size; > - remaining -= opt_size; > - return true; > - } > + if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) > + ret = true; > + else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, > + opts)) > + ret = true; > > - return false; > + /* we reserved enough space for the above options, and exceeding the > + * TCP option space would be fatal > + */ > + if (WARN_ON_ONCE(opt_size > remaining)) > + return false; > + > + *size += opt_size; > + remaining -= opt_size; > + > + return ret; > } > > bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
On Mon, 2019-11-11 at 13:50 -0800, Peter Krystad wrote: > Hi Paolo - > > On Mon, 2019-11-11 at 17:53 +0100, Paolo Abeni wrote: > > since such option will always fit the tcp header, module bugs. > > "modulo" thnx! Hopefully comment should be dropped when squashing ;) > How is it guaranteed that it will always fit, now and in the future? TCP option space must accomodate tstamp (12 bytes) and MPTCP options. AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - which fit the TCP option space. Additional MPTCP options (ADD/DEL_ADDR/PRIO/FAIL/FASTCLOSE/...) can indeed run out of space, but that failure is not fatal (do not break the connection nor causes fallback) and is handled gracefully - no WARN, just omit the option. Additionally we could increment some MIB counter. Thanks, Paolo
Hi Paolo, On 12/11/2019 09:21, Paolo Abeni wrote: > On Mon, 2019-11-11 at 13:50 -0800, Peter Krystad wrote: >> Hi Paolo - >> >> On Mon, 2019-11-11 at 17:53 +0100, Paolo Abeni wrote: >>> since such option will always fit the tcp header, module bugs. >> >> "modulo" > > thnx! Hopefully comment should be dropped when squashing ;) > >> How is it guaranteed that it will always fit, now and in the future? > > TCP option space must accomodate tstamp (12 bytes) and MPTCP options. > > AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the > bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - > which fit the TCP option space. > > Additional MPTCP options (ADD/DEL_ADDR/PRIO/FAIL/FASTCLOSE/...) can > indeed run out of space, but that failure is not fatal (do not break > the connection nor causes fallback) and is handled gracefully - no > WARN, just omit the option. Additionally we could increment some MIB > counter. When you say that the option is omitted, do you mean that the option is sent in the new (dedicated?) ACK packet or dropped/never sent? :) Cheers, Matt
On Wed, 2019-11-13 at 16:48 +0100, Matthieu Baerts wrote: > <On 12/11/2019 09:21, Paolo Abeni wrote: > > AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the > > bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - > > which fit the TCP option space. > > > > Additional MPTCP options (ADD/DEL_ADDR/PRIO/FAIL/FASTCLOSE/...) can > > indeed run out of space, but that failure is not fatal (do not break > > the connection nor causes fallback) and is handled gracefully - no > > WARN, just omit the option. Additionally we could increment some MIB > > counter. > > When you say that the option is omitted, do you mean that the option is > sent in the new (dedicated?) ACK packet or dropped/never sent? :) With the current code, prior to this series, the ADD/DEL_ADDR option exceeding the TCP option space is dropped/never sent. This series do not change such behavior. Inserting a (duplicate) ack to try to fit such option looks doable, but also out-of-scope for this change. Cheers, Paolo
Hi Paolo, On 13/11/2019 17:18, Paolo Abeni wrote: > On Wed, 2019-11-13 at 16:48 +0100, Matthieu Baerts wrote: >> <On 12/11/2019 09:21, Paolo Abeni wrote: >>> AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the >>> bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - >>> which fit the TCP option space. >>> >>> Additional MPTCP options (ADD/DEL_ADDR/PRIO/FAIL/FASTCLOSE/...) can >>> indeed run out of space, but that failure is not fatal (do not break >>> the connection nor causes fallback) and is handled gracefully - no >>> WARN, just omit the option. Additionally we could increment some MIB >>> counter. >> >> When you say that the option is omitted, do you mean that the option is >> sent in the new (dedicated?) ACK packet or dropped/never sent? :) > > With the current code, prior to this series, the ADD/DEL_ADDR option > exceeding the TCP option space is dropped/never sent. This series do > not change such behavior. Indeed, it was already the case. Thank you for your quick reply! Should we already add the MIB counter? And/or add a TODO in the code? We are not there yet but missing an ADD_ADDR can have bad consequences. And it would be unexpected for the userspace to drop it, especially with MPTCPv1 which should make it reliable :) > Inserting a (duplicate) ack to try to fit such option looks doable, but > also out-of-scope for this change. Agreed. We should add this in a TODO list of "things for later" :) (or a TODO in the code?) Cheers, Matt
On Wed, 2019-11-13 at 17:35 +0100, Matthieu Baerts wrote: > Should we already add the MIB counter? And/or add a TODO in the code? I think adding more MIB counter is a later tasks. > We are not there yet but missing an ADD_ADDR can have bad consequences. > And it would be unexpected for the userspace to drop it, especially with > MPTCPv1 which should make it reliable :) whoops... I missed that difference between v0 and v1. That opens a full can of worms, I think/fear: ADD_ADDR6 echo (30 bytes) can't fit TCP option space easily, without dropping also the tstamp option. Why does echo includes the IP address itself ?!? why is the address ID not enough? bold/stupid question: is still possible change v1 specs in that regard? (e.g. removing the address from ADD_ADDR echo option) > > Inserting a (duplicate) ack to try to fit such option looks doable, but > > also out-of-scope for this change. Here I missed the side effects of duplicate acks on CC. We can't easily plug there MPTCP specific behavior and if we start sending TCP dup ack to carry MPTCP info we may end-up needing hook to avoid interference on CC. > Agreed. We should add this in a TODO list of "things for later" :) (or a > TODO in the code?) I think tracking this on the framepad is the better option. Cheers, Paolo
Hi Paolo, Christoph, On 13/11/2019 18:37, Paolo Abeni wrote: > On Wed, 2019-11-13 at 17:35 +0100, Matthieu Baerts wrote: >> Should we already add the MIB counter? And/or add a TODO in the code? > > I think adding more MIB counter is a later tasks. Should we add a comment in the code is it useless when there are thousands of lines around? :) >> We are not there yet but missing an ADD_ADDR can have bad consequences. >> And it would be unexpected for the userspace to drop it, especially with >> MPTCPv1 which should make it reliable :) > > whoops... I missed that difference between v0 and v1. That opens a full > can of worms, I think/fear: > > ADD_ADDR6 echo (30 bytes) can't fit TCP option space easily, without > dropping also the tstamp option. Why does echo includes the IP address > itself ?!? why is the address ID not enough? > > bold/stupid question: is still possible change v1 specs in that regard? > (e.g. removing the address from ADD_ADDR echo option) Good question. I just added a new topic in the list for the meeting tomorrow just in case Christoph misses the question :) >>> Inserting a (duplicate) ack to try to fit such option looks doable, but >>> also out-of-scope for this change. > > Here I missed the side effects of duplicate acks on CC. We can't easily > plug there MPTCP specific behavior and if we start sending TCP dup ack > to carry MPTCP info we may end-up needing hook to avoid interference on > CC. I guess we will have to support that at some points. >> Agreed. We should add this in a TODO list of "things for later" :) (or a >> TODO in the code?) > > I think tracking this on the framepad is the better option. Thanks! I added a topic for tomorrow's meeting. Cheers, Matt
On Wed, 2019-11-13 at 19:13 +0100, Matthieu Baerts wrote: > Hi Paolo, Christoph, > > On 13/11/2019 18:37, Paolo Abeni wrote: > > On Wed, 2019-11-13 at 17:35 +0100, Matthieu Baerts wrote: > > > Should we already add the MIB counter? And/or add a TODO in the code? > > > > I think adding more MIB counter is a later tasks. > > Should we add a comment in the code is it useless when there are > thousands of lines around? :) At this point, I would like to avoid adding todos inside comments for the first 2 chunks patches - to avoid additional complex rebases squashing even later. Cheers, Paolo
On Tue, 2019-11-12 at 09:21 +0100, Paolo Abeni wrote: > On Mon, 2019-11-11 at 13:50 -0800, Peter Krystad wrote: > > Hi Paolo - > > > > On Mon, 2019-11-11 at 17:53 +0100, Paolo Abeni wrote: > > > since such option will always fit the tcp header, module bugs. > > > > "modulo" > > thnx! Hopefully comment should be dropped when squashing ;) > > > How is it guaranteed that it will always fit, now and in the future? > > TCP option space must accomodate tstamp (12 bytes) and MPTCP options. > > AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the > bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - > which fit the TCP option space. Just FYI, I should have mentioned this earlier, I have observed the mptcp.org kernel sending 32-bit DSS ack-only in the same header as final MP_CAPABLE ACK so this statement isn't completely true. Peter. > > Additional MPTCP options (ADD/DEL_ADDR/PRIO/FAIL/FASTCLOSE/...) can > indeed run out of space, but that failure is not fatal (do not break > the connection nor causes fallback) and is handled gracefully - no > WARN, just omit the option. Additionally we could increment some MIB > counter. > > Thanks, > > Paolo >
On Wed, 2019-11-13 at 15:43 -0800, Peter Krystad wrote: > On Tue, 2019-11-12 at 09:21 +0100, Paolo Abeni wrote: > > AFAICS DSS, MP_CAPABLE and MP_JOIN are mutually esclusive, and the > > bigger is DSS with 64 bytes ack/seq and csum with a total of 28 bytes - > > which fit the TCP option space. > > Just FYI, I should have mentioned this earlier, I have observed the mptcp.org > kernel sending 32-bit DSS ack-only in the same header as final MP_CAPABLE ACK > so this statement isn't completely true. Thank you for the info. It looks like a .org bug to me ?!? the sender should not send DSS before receiving the MP_CAPABLE ack, and the client should not send ack before receving a DSS ?!? Back to this series, do you prefer I'll revert to the existing checks? Thanks, Paolo
On Mon, 2019-11-11 at 17:53 +0100, Paolo Abeni wrote: > @@ -279,22 +265,27 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, > struct mptcp_out_options *opts) > { > unsigned int opt_size = 0; > + bool ret = true; ^^^^ this is wrong (should be false). I'll send a v3 rebased, including that change Cheers, Paolo
> On Nov 13, 2019, at 9:37 AM, Paolo Abeni <pabeni@redhat.com> wrote: > > On Wed, 2019-11-13 at 17:35 +0100, Matthieu Baerts wrote: >> Should we already add the MIB counter? And/or add a TODO in the code? > > I think adding more MIB counter is a later tasks. > >> We are not there yet but missing an ADD_ADDR can have bad consequences. >> And it would be unexpected for the userspace to drop it, especially with >> MPTCPv1 which should make it reliable :) > > whoops... I missed that difference between v0 and v1. That opens a full > can of worms, I think/fear: > > ADD_ADDR6 echo (30 bytes) can't fit TCP option space easily, without > dropping also the tstamp option. Why does echo includes the IP address > itself ?!? why is the address ID not enough? > > bold/stupid question: is still possible change v1 specs in that regard? > (e.g. removing the address from ADD_ADDR echo option) To clarify: When the echo-bit is set the HMAC is not present int the option. That means the ADD_ADDR option is at most 4 + 16 + 2 = 22 bytes on the echo. However, on the initial ADD_ADDR (the one with echo == 0), the maximum is indeed 30 bytes. That is only the case when it includes a port as well. And in that scenario I don't see another solution than to remove the TCP Timestamp option on that packet (it is not necessary to have the Timestamp option on each packet). Does that work? Christopbh > >>> Inserting a (duplicate) ack to try to fit such option looks doable, but >>> also out-of-scope for this change. > > Here I missed the side effects of duplicate acks on CC. We can't easily > plug there MPTCP specific behavior and if we start sending TCP dup ack > to carry MPTCP info we may end-up needing hook to avoid interference on > CC. > >> Agreed. We should add this in a TODO list of "things for later" :) (or a >> TODO in the code?) > > I think tracking this on the framepad is the better option. > > Cheers, > > Paolo >
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index fa10086b5b74..cd16baea0666 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -200,7 +200,7 @@ static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - if (!subflow->fourth_ack && remaining >= TCPOLEN_MPTCP_MPC_ACK) { + if (!subflow->fourth_ack) { opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; @@ -220,6 +220,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, { unsigned int dss_size = 0; struct mptcp_ext *mpext; + struct mptcp_sock *msk; unsigned int ack_size; mpext = skb ? mptcp_get_ext(skb) : NULL; @@ -229,15 +230,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; - if (map_size <= remaining) { - remaining -= map_size; - dss_size = map_size; - if (mpext) - opts->ext_copy = *mpext; - } else { - opts->ext_copy.use_map = 0; - WARN_ONCE(1, "MPTCP: Map dropped"); - } + remaining -= map_size; + dss_size = map_size; + if (mpext) + opts->ext_copy = *mpext; } ack_size = TCPOLEN_MPTCP_DSS_ACK64; @@ -246,29 +242,19 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (dss_size == 0) ack_size += TCPOLEN_MPTCP_DSS_BASE; - if (ack_size <= remaining) { - struct mptcp_sock *msk; - - dss_size += ack_size; - - msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); - if (msk) { - opts->ext_copy.data_ack = msk->ack_seq; - } else { - mptcp_crypto_key_sha1(mptcp_subflow_ctx(sk)->remote_key, - NULL, &opts->ext_copy.data_ack); - opts->ext_copy.data_ack++; - } + dss_size += ack_size; - opts->ext_copy.ack64 = 1; - opts->ext_copy.use_ack = 1; + msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); + if (msk) { + opts->ext_copy.data_ack = msk->ack_seq; } else { - opts->ext_copy.use_ack = 0; - WARN(1, "MPTCP: Ack dropped"); + mptcp_crypto_key_sha1(mptcp_subflow_ctx(sk)->remote_key, + NULL, &opts->ext_copy.data_ack); + opts->ext_copy.data_ack++; } - if (!dss_size) - return false; + opts->ext_copy.ack64 = 1; + opts->ext_copy.use_ack = 1; *size = ALIGN(dss_size, 4); return true; @@ -279,22 +265,27 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { unsigned int opt_size = 0; + bool ret = true; if (!mptcp_subflow_ctx(sk)->mp_capable) return false; - if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) { - *size += opt_size; - remaining -= opt_size; - return true; - } else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, - opts)) { - *size += opt_size; - remaining -= opt_size; - return true; - } + if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) + ret = true; + else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, + opts)) + ret = true; - return false; + /* we reserved enough space for the above options, and exceeding the + * TCP option space would be fatal + */ + if (WARN_ON_ONCE(opt_size > remaining)) + return false; + + *size += opt_size; + remaining -= opt_size; + + return ret; } bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
since such option will always fit the tcp header, module bugs. Note that this patch preserve a bool ret value for mptcp_established_options_dss(), even if it now always returns true. That will simplify a bit later patches for v1 support. Squash-to: "mptcp: Write MPTCP DSS headers to outgoing data packets" Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 71 ++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 40 deletions(-)