Message ID | a61b61ef9ba9df2c47f91bcfd668cb33afc5dc43.1619035356.git.pabeni@redhat.com |
---|---|
State | Superseded, archived |
Delegated to: | Paolo Abeni |
Headers | show |
Series | mptcp: data checksum support | expand |
Hi Paolo, Paolo Abeni <pabeni@redhat.com> 于2021年4月22日周四 上午4:21写道: > > Move the RX csum validation into get_mapping_status(). > If the current mapping is valid and csum is enabled traverse the > later pending skbs and compute csum incrementally till the whole > mapping has been covered. If not enough data is available in the > rx queue, return MAPPING_EMPTY - that is, no data. > > Next subflow_data_ready invocation will trigger again csum computation. > > When the full DSS is available, validate the csum and return to the > caller an appropriate error code, to trigger subflow reset of fallback > as required by the RFC. > > Additionally: > - if the csum prevence in the DSS don't match the negotiated value > e.g. csum present, but not requested, return invalid mapping to trigger > subflow reset. > - keep some csum state, to avoid re-compute the csum on the > same data when multiple rx queue traversal are required. > - clean-up the uncompleted mapping from the receive queue on close, > to allow proper subflow disposal > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> I want to sign your name in the patch "mptcp: validate the data checksum" as the author, and sign my name as Co-developed-by if you don't mind. > --- > net/mptcp/protocol.c | 35 ------------ > net/mptcp/protocol.h | 6 +-- > net/mptcp/subflow.c | 124 ++++++++++++++++++++++++++++++++++++------- > 3 files changed, 107 insertions(+), 58 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 5160256de731..0d8005b480ab 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -520,35 +520,6 @@ static bool mptcp_check_data_fin(struct sock *sk) > return ret; > } > > -static bool mptcp_validate_data_checksum(struct sock *ssk) > -{ > - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > - struct mptcp_sock *msk = mptcp_sk(subflow->conn); > - struct csum_pseudo_header header; > - __wsum csum; > - > - if (__mptcp_check_fallback(msk)) > - goto out; > - > - if (subflow->csum_len < subflow->map_data_len) > - goto out; > - > - header.data_seq = subflow->map_seq; > - header.subflow_seq = subflow->map_subflow_seq; > - header.data_len = subflow->map_data_len; > - header.csum = subflow->map_csum; > - > - csum = csum_partial(&header, sizeof(header), subflow->data_csum); > - > - if (csum_fold(csum)) > - return false; > - subflow->data_csum = 0; > - subflow->csum_len = 0; > - > -out: > - return true; > -} > - > static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, > struct sock *ssk, > unsigned int *bytes) > @@ -617,12 +588,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, > if (tp->urg_data) > done = true; > > - if (READ_ONCE(msk->csum_enabled)) { > - subflow->data_csum = skb_checksum(skb, offset, len, > - subflow->data_csum); > - subflow->csum_len += len; > - mptcp_validate_data_checksum(ssk); > - } > if (__mptcp_move_skb(msk, ssk, skb, offset, len)) > moved += len; > seq += len; > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index 176f175a00bd..766412e50e73 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -399,9 +399,8 @@ struct mptcp_subflow_context { > u32 map_subflow_seq; > u32 ssn_offset; > u32 map_data_len; > - __wsum data_csum; > - u32 csum_len; > - __sum16 map_csum; > + __wsum map_data_csum; > + u32 map_csum_len; > u32 request_mptcp : 1, /* send MP_CAPABLE */ > request_join : 1, /* send MP_JOIN */ > request_bkup : 1, > @@ -411,6 +410,7 @@ struct mptcp_subflow_context { > pm_notified : 1, /* PM hook called for established status */ > conn_finished : 1, > map_valid : 1, > + map_csum_reqd : 1, > mpc_map : 1, > backup : 1, > send_mp_prio : 1, > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > index 68efc81eaf2c..6a3c161ebe34 100644 > --- a/net/mptcp/subflow.c > +++ b/net/mptcp/subflow.c > @@ -829,10 +829,81 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) > return true; > } > > +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, > + bool csum_reqd) > +{ > + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > + struct csum_pseudo_header header; > + u32 offset, seq, delta; > + __wsum csum; > + int len; > + > + if (!csum_reqd) > + return MAPPING_OK; > + > + /* mapping already validated on previous traversal */ > + if (subflow->map_csum_len == subflow->map_data_len) > + return MAPPING_OK; > + > + /* traverse the receive queue, ensuring it contains a full > + * DSS mapping and accumulating the related csum. > + * Preserve the accoumlate csum across multiple calls, to compute > + * the csum only once > + */ > + delta = subflow->map_data_len - subflow->map_csum_len; > + for (;;) { > + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; > + offset = seq - TCP_SKB_CB(skb)->seq; > + > + /* if the current skb has not been accounted yet, csum its contents > + * up to the amount covered by the current DSS > + */ > + if (offset < skb->len) { > + len = min(skb->len - offset, delta); > + delta -= len; > + subflow->map_csum_len += len; > + subflow->map_data_csum = skb_checksum(skb, offset, len, > + subflow->map_data_csum); > + } > + if (delta == 0) > + break; > + > + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { > + /* if this subflow is closed, the partial mapping > + * will be never completed; flush the pending skbs, so > + * that subflow_sched_work_if_closed() can kick in > + */ > + if (unlikely(ssk->sk_state == TCP_CLOSE)) > + while ((skb = skb_peek(&ssk->sk_receive_queue))) > + sk_eat_skb(ssk, skb); > + > + /* not enough data to validate the csum */ > + return MAPPING_EMPTY; > + } > + > + /* the DSS mapping for next skbs will be validated later, > + * when a get_mapping_status call will process such skb > + */ > + skb = skb->next; > + } > + > + header.data_seq = subflow->map_seq; > + header.subflow_seq = subflow->map_subflow_seq; > + header.data_len = subflow->map_data_len; > + header.csum = 0; > + > + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); > + if (unlikely(csum_fold(csum))) > + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; > + > + return MAPPING_OK; > +} > + > static enum mapping_status get_mapping_status(struct sock *ssk, > struct mptcp_sock *msk) > { > struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > + bool csum_reqd = READ_ONCE(msk->csum_enabled); > struct mptcp_ext *mpext; > struct sk_buff *skb; > u16 data_len; > @@ -926,9 +997,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > /* Allow replacing only with an identical map */ > if (subflow->map_seq == map_seq && > subflow->map_subflow_seq == mpext->subflow_seq && > - subflow->map_data_len == data_len) { > + subflow->map_data_len == data_len && > + subflow->map_csum_reqd == mpext->csum_reqd) { > skb_ext_del(skb, SKB_EXT_MPTCP); > - return MAPPING_OK; > + goto validate_csum; > } > > /* If this skb data are fully covered by the current mapping, > @@ -940,7 +1012,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > } > > /* will validate the next map after consuming the current one */ > - return MAPPING_OK; > + goto validate_csum; > } > > subflow->map_seq = map_seq; > @@ -948,12 +1020,18 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > subflow->map_data_len = data_len; > subflow->map_valid = 1; > subflow->mpc_map = mpext->mpc_map; > - subflow->data_csum = 0; > - subflow->csum_len = 0; > - subflow->map_csum = mpext->csum; > - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%u", > + subflow->map_csum_reqd = mpext->csum_reqd; > + subflow->map_csum_len = 0; > + subflow->map_data_csum = csum_unfold(mpext->csum); > + > + /* Cfr RFC 8684 Section 3.3.0 */ > + if (unlikely(subflow->map_csum_reqd != csum_reqd)) > + return MAPPING_INVALID; > + > + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", > subflow->map_seq, subflow->map_subflow_seq, > - subflow->map_data_len, subflow->map_csum); > + subflow->map_data_len, subflow->map_csum_reqd, > + subflow->map_data_csum); > > validate_seq: > /* we revalidate valid mapping on new skb, because we must ensure > @@ -963,7 +1041,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > return MAPPING_INVALID; > > skb_ext_del(skb, SKB_EXT_MPTCP); > - return MAPPING_OK; > + > +validate_csum: > + return validate_data_csum(ssk, skb, csum_reqd); > } > > static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, > @@ -1024,17 +1104,8 @@ static bool subflow_check_data_avail(struct sock *ssk) > ssk->sk_err = EBADMSG; > goto fatal; > } > - if (status == MAPPING_DUMMY) { > - __mptcp_do_fallback(msk); > - skb = skb_peek(&ssk->sk_receive_queue); > - subflow->map_valid = 1; > - subflow->map_seq = READ_ONCE(msk->ack_seq); > - subflow->map_data_len = skb->len; > - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - > - subflow->ssn_offset; > - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; > - return true; > - } > + if (status == MAPPING_DUMMY) > + goto fallback; > > if (status != MAPPING_OK) > goto no_data; > @@ -1089,6 +1160,19 @@ static bool subflow_check_data_avail(struct sock *ssk) > tcp_send_active_reset(ssk, GFP_ATOMIC); > subflow->data_avail = 0; > return false; > + > +fallback: > + __mptcp_do_fallback(msk); > + skb = skb_peek(&ssk->sk_receive_queue); > + subflow->map_csum_reqd = 0; > + subflow->map_valid = 1; > + subflow->map_seq = READ_ONCE(msk->ack_seq); > + subflow->map_data_len = skb->len; > + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - > + subflow->ssn_offset; > + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; > + return true; > + I'll drop this blank line here. Furthermore, move this fallback trunk as a separated patch if you don't mind. Thanks. -Geliang > } > > bool mptcp_subflow_data_available(struct sock *sk) > -- > 2.26.2 >
On Fri, 2021-04-23 at 11:31 +0800, Geliang Tang wrote: > Hi Paolo, > > Paolo Abeni <pabeni@redhat.com> 于2021年4月22日周四 上午4:21写道: > > Move the RX csum validation into get_mapping_status(). > > If the current mapping is valid and csum is enabled traverse the > > later pending skbs and compute csum incrementally till the whole > > mapping has been covered. If not enough data is available in the > > rx queue, return MAPPING_EMPTY - that is, no data. > > > > Next subflow_data_ready invocation will trigger again csum computation. > > > > When the full DSS is available, validate the csum and return to the > > caller an appropriate error code, to trigger subflow reset of fallback > > as required by the RFC. > > > > Additionally: > > - if the csum prevence in the DSS don't match the negotiated value > > e.g. csum present, but not requested, return invalid mapping to trigger > > subflow reset. > > - keep some csum state, to avoid re-compute the csum on the > > same data when multiple rx queue traversal are required. > > - clean-up the uncompleted mapping from the receive queue on close, > > to allow proper subflow disposal > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > I want to sign your name in the patch "mptcp: validate the data checksum" > as the author, and sign my name as Co-developed-by if you don't mind. NP to me either way. > > --- > > net/mptcp/protocol.c | 35 ------------ > > net/mptcp/protocol.h | 6 +-- > > net/mptcp/subflow.c | 124 ++++++++++++++++++++++++++++++++++++------- > > 3 files changed, 107 insertions(+), 58 deletions(-) > > > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > > index 5160256de731..0d8005b480ab 100644 > > --- a/net/mptcp/protocol.c > > +++ b/net/mptcp/protocol.c > > @@ -520,35 +520,6 @@ static bool mptcp_check_data_fin(struct sock *sk) > > return ret; > > } > > > > -static bool mptcp_validate_data_checksum(struct sock *ssk) > > -{ > > - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > > - struct mptcp_sock *msk = mptcp_sk(subflow->conn); > > - struct csum_pseudo_header header; > > - __wsum csum; > > - > > - if (__mptcp_check_fallback(msk)) > > - goto out; > > - > > - if (subflow->csum_len < subflow->map_data_len) > > - goto out; > > - > > - header.data_seq = subflow->map_seq; > > - header.subflow_seq = subflow->map_subflow_seq; > > - header.data_len = subflow->map_data_len; > > - header.csum = subflow->map_csum; > > - > > - csum = csum_partial(&header, sizeof(header), subflow->data_csum); > > - > > - if (csum_fold(csum)) > > - return false; > > - subflow->data_csum = 0; > > - subflow->csum_len = 0; > > - > > -out: > > - return true; > > -} > > - > > static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, > > struct sock *ssk, > > unsigned int *bytes) > > @@ -617,12 +588,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, > > if (tp->urg_data) > > done = true; > > > > - if (READ_ONCE(msk->csum_enabled)) { > > - subflow->data_csum = skb_checksum(skb, offset, len, > > - subflow->data_csum); > > - subflow->csum_len += len; > > - mptcp_validate_data_checksum(ssk); > > - } > > if (__mptcp_move_skb(msk, ssk, skb, offset, len)) > > moved += len; > > seq += len; > > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > > index 176f175a00bd..766412e50e73 100644 > > --- a/net/mptcp/protocol.h > > +++ b/net/mptcp/protocol.h > > @@ -399,9 +399,8 @@ struct mptcp_subflow_context { > > u32 map_subflow_seq; > > u32 ssn_offset; > > u32 map_data_len; > > - __wsum data_csum; > > - u32 csum_len; > > - __sum16 map_csum; > > + __wsum map_data_csum; > > + u32 map_csum_len; > > u32 request_mptcp : 1, /* send MP_CAPABLE */ > > request_join : 1, /* send MP_JOIN */ > > request_bkup : 1, > > @@ -411,6 +410,7 @@ struct mptcp_subflow_context { > > pm_notified : 1, /* PM hook called for established status */ > > conn_finished : 1, > > map_valid : 1, > > + map_csum_reqd : 1, > > mpc_map : 1, > > backup : 1, > > send_mp_prio : 1, > > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > > index 68efc81eaf2c..6a3c161ebe34 100644 > > --- a/net/mptcp/subflow.c > > +++ b/net/mptcp/subflow.c > > @@ -829,10 +829,81 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) > > return true; > > } > > > > +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, > > + bool csum_reqd) > > +{ > > + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > > + struct csum_pseudo_header header; > > + u32 offset, seq, delta; > > + __wsum csum; > > + int len; > > + > > + if (!csum_reqd) > > + return MAPPING_OK; > > + > > + /* mapping already validated on previous traversal */ > > + if (subflow->map_csum_len == subflow->map_data_len) > > + return MAPPING_OK; > > + > > + /* traverse the receive queue, ensuring it contains a full > > + * DSS mapping and accumulating the related csum. > > + * Preserve the accoumlate csum across multiple calls, to compute > > + * the csum only once > > + */ > > + delta = subflow->map_data_len - subflow->map_csum_len; > > + for (;;) { > > + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; > > + offset = seq - TCP_SKB_CB(skb)->seq; > > + > > + /* if the current skb has not been accounted yet, csum its contents > > + * up to the amount covered by the current DSS > > + */ > > + if (offset < skb->len) { > > + len = min(skb->len - offset, delta); > > + delta -= len; > > + subflow->map_csum_len += len; > > + subflow->map_data_csum = skb_checksum(skb, offset, len, > > + subflow->map_data_csum); > > + } > > + if (delta == 0) > > + break; > > + > > + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { > > + /* if this subflow is closed, the partial mapping > > + * will be never completed; flush the pending skbs, so > > + * that subflow_sched_work_if_closed() can kick in > > + */ > > + if (unlikely(ssk->sk_state == TCP_CLOSE)) > > + while ((skb = skb_peek(&ssk->sk_receive_queue))) > > + sk_eat_skb(ssk, skb); > > + > > + /* not enough data to validate the csum */ > > + return MAPPING_EMPTY; > > + } > > + > > + /* the DSS mapping for next skbs will be validated later, > > + * when a get_mapping_status call will process such skb > > + */ > > + skb = skb->next; > > + } > > + > > + header.data_seq = subflow->map_seq; > > + header.subflow_seq = subflow->map_subflow_seq; > > + header.data_len = subflow->map_data_len; > > + header.csum = 0; > > + > > + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); > > + if (unlikely(csum_fold(csum))) > > + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; > > + > > + return MAPPING_OK; > > +} > > + > > static enum mapping_status get_mapping_status(struct sock *ssk, > > struct mptcp_sock *msk) > > { > > struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); > > + bool csum_reqd = READ_ONCE(msk->csum_enabled); > > struct mptcp_ext *mpext; > > struct sk_buff *skb; > > u16 data_len; > > @@ -926,9 +997,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > > /* Allow replacing only with an identical map */ > > if (subflow->map_seq == map_seq && > > subflow->map_subflow_seq == mpext->subflow_seq && > > - subflow->map_data_len == data_len) { > > + subflow->map_data_len == data_len && > > + subflow->map_csum_reqd == mpext->csum_reqd) { > > skb_ext_del(skb, SKB_EXT_MPTCP); > > - return MAPPING_OK; > > + goto validate_csum; > > } > > > > /* If this skb data are fully covered by the current mapping, > > @@ -940,7 +1012,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > > } > > > > /* will validate the next map after consuming the current one */ > > - return MAPPING_OK; > > + goto validate_csum; > > } > > > > subflow->map_seq = map_seq; > > @@ -948,12 +1020,18 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > > subflow->map_data_len = data_len; > > subflow->map_valid = 1; > > subflow->mpc_map = mpext->mpc_map; > > - subflow->data_csum = 0; > > - subflow->csum_len = 0; > > - subflow->map_csum = mpext->csum; > > - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%u", > > + subflow->map_csum_reqd = mpext->csum_reqd; > > + subflow->map_csum_len = 0; > > + subflow->map_data_csum = csum_unfold(mpext->csum); > > + > > + /* Cfr RFC 8684 Section 3.3.0 */ > > + if (unlikely(subflow->map_csum_reqd != csum_reqd)) > > + return MAPPING_INVALID; > > + > > + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", > > subflow->map_seq, subflow->map_subflow_seq, > > - subflow->map_data_len, subflow->map_csum); > > + subflow->map_data_len, subflow->map_csum_reqd, > > + subflow->map_data_csum); > > > > validate_seq: > > /* we revalidate valid mapping on new skb, because we must ensure > > @@ -963,7 +1041,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > > return MAPPING_INVALID; > > > > skb_ext_del(skb, SKB_EXT_MPTCP); > > - return MAPPING_OK; > > + > > +validate_csum: > > + return validate_data_csum(ssk, skb, csum_reqd); > > } > > > > static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, > > @@ -1024,17 +1104,8 @@ static bool subflow_check_data_avail(struct sock *ssk) > > ssk->sk_err = EBADMSG; > > goto fatal; > > } > > - if (status == MAPPING_DUMMY) { > > - __mptcp_do_fallback(msk); > > - skb = skb_peek(&ssk->sk_receive_queue); > > - subflow->map_valid = 1; > > - subflow->map_seq = READ_ONCE(msk->ack_seq); > > - subflow->map_data_len = skb->len; > > - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - > > - subflow->ssn_offset; > > - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; > > - return true; > > - } > > + if (status == MAPPING_DUMMY) > > + goto fallback; > > > > if (status != MAPPING_OK) > > goto no_data; > > @@ -1089,6 +1160,19 @@ static bool subflow_check_data_avail(struct sock *ssk) > > tcp_send_active_reset(ssk, GFP_ATOMIC); > > subflow->data_avail = 0; > > return false; > > + > > +fallback: > > + __mptcp_do_fallback(msk); > > + skb = skb_peek(&ssk->sk_receive_queue); > > + subflow->map_csum_reqd = 0; > > + subflow->map_valid = 1; > > + subflow->map_seq = READ_ONCE(msk->ack_seq); > > + subflow->map_data_len = skb->len; > > + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - > > + subflow->ssn_offset; > > + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; > > + return true; > > + > > I'll drop this blank line here. > > Furthermore, move this fallback trunk as a separated patch if you don't > mind. Agreed. Thanks! Paolo
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5160256de731..0d8005b480ab 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -520,35 +520,6 @@ static bool mptcp_check_data_fin(struct sock *sk) return ret; } -static bool mptcp_validate_data_checksum(struct sock *ssk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct csum_pseudo_header header; - __wsum csum; - - if (__mptcp_check_fallback(msk)) - goto out; - - if (subflow->csum_len < subflow->map_data_len) - goto out; - - header.data_seq = subflow->map_seq; - header.subflow_seq = subflow->map_subflow_seq; - header.data_len = subflow->map_data_len; - header.csum = subflow->map_csum; - - csum = csum_partial(&header, sizeof(header), subflow->data_csum); - - if (csum_fold(csum)) - return false; - subflow->data_csum = 0; - subflow->csum_len = 0; - -out: - return true; -} - static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sock *ssk, unsigned int *bytes) @@ -617,12 +588,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, if (tp->urg_data) done = true; - if (READ_ONCE(msk->csum_enabled)) { - subflow->data_csum = skb_checksum(skb, offset, len, - subflow->data_csum); - subflow->csum_len += len; - mptcp_validate_data_checksum(ssk); - } if (__mptcp_move_skb(msk, ssk, skb, offset, len)) moved += len; seq += len; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 176f175a00bd..766412e50e73 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -399,9 +399,8 @@ struct mptcp_subflow_context { u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; - __wsum data_csum; - u32 csum_len; - __sum16 map_csum; + __wsum map_data_csum; + u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, @@ -411,6 +410,7 @@ struct mptcp_subflow_context { pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, + map_csum_reqd : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 68efc81eaf2c..6a3c161ebe34 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -829,10 +829,81 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + len = min(skb->len - offset, delta); + delta -= len; + subflow->map_csum_len += len; + subflow->map_data_csum = skb_checksum(skb, offset, len, + subflow->map_data_csum); + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + header.data_seq = subflow->map_seq; + header.subflow_seq = subflow->map_subflow_seq; + header.data_len = subflow->map_data_len; + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -926,9 +997,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -940,7 +1012,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; @@ -948,12 +1020,18 @@ static enum mapping_status get_mapping_status(struct sock *ssk, subflow->map_data_len = data_len; subflow->map_valid = 1; subflow->mpc_map = mpext->mpc_map; - subflow->data_csum = 0; - subflow->csum_len = 0; - subflow->map_csum = mpext->csum; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len, subflow->map_csum); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure @@ -963,7 +1041,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_INVALID; skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, @@ -1024,17 +1104,8 @@ static bool subflow_check_data_avail(struct sock *ssk) ssk->sk_err = EBADMSG; goto fatal; } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (status == MAPPING_DUMMY) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1089,6 +1160,19 @@ static bool subflow_check_data_avail(struct sock *ssk) tcp_send_active_reset(ssk, GFP_ATOMIC); subflow->data_avail = 0; return false; + +fallback: + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_csum_reqd = 0; + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - + subflow->ssn_offset; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + return true; + } bool mptcp_subflow_data_available(struct sock *sk)
Move the RX csum validation into get_mapping_status(). If the current mapping is valid and csum is enabled traverse the later pending skbs and compute csum incrementally till the whole mapping has been covered. If not enough data is available in the rx queue, return MAPPING_EMPTY - that is, no data. Next subflow_data_ready invocation will trigger again csum computation. When the full DSS is available, validate the csum and return to the caller an appropriate error code, to trigger subflow reset of fallback as required by the RFC. Additionally: - if the csum prevence in the DSS don't match the negotiated value e.g. csum present, but not requested, return invalid mapping to trigger subflow reset. - keep some csum state, to avoid re-compute the csum on the same data when multiple rx queue traversal are required. - clean-up the uncompleted mapping from the receive queue on close, to allow proper subflow disposal Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 35 ------------ net/mptcp/protocol.h | 6 +-- net/mptcp/subflow.c | 124 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 107 insertions(+), 58 deletions(-)