Message ID | eb929eeebdf95c04a0bc0414cfd2310b5e554d36.1522136948.git.shahafs@mellanox.com |
---|---|
State | Superseded |
Headers | show |
Series | OVS-DPDK flow offload with rte_flow | expand |
> Subject: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the flow > mark > > From: Yuanhan Liu <yliu@fridaylinux.org> > > So that we could skip some very costly CPU operations, including but not > limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus, > performance could be greatly improved. > > A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and > 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that 260% > performance boost. > > Note that though the heavy miniflow_extract is skipped, we still have to > do per packet checking, due to we have to check the tcp_flags. > > Co-authored-by: Finn Christensen <fc@napatech.com> > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> > Signed-off-by: Finn Christensen <fc@napatech.com> > Signed-off-by: Shahaf Shuler <shahafs@mellanox.com> > --- > lib/dp-packet.h | 13 +++++ > lib/dpif-netdev.c | 44 ++++++++++++-- > lib/flow.c | 155 +++++++++++++++++++++++++++++++++++++++---------- > lib/flow.h | 1 + > 4 files changed, 175 insertions(+), 38 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 21c8ca5..dd3f17b > 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet > *p) #define reset_dp_packet_checksum_ol_flags(arg) > #endif > > +static inline bool > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > + uint32_t *mark OVS_UNUSED) { #ifdef DPDK_NETDEV > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > + *mark = p->mbuf.hash.fdir.hi; > + return true; > + } > +#endif > + return false; > +} > + > enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ > > struct dp_packet_batch { > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2fdb6ef..7489a2f > 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -2015,6 +2015,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd) > } > } > > +static struct dp_netdev_flow * > +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, > + const uint32_t mark) > +{ > + struct dp_netdev_flow *flow; > + > + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), > + &flow_mark.mark_to_flow) { > + if (flow->mark == mark && flow->pmd_id == pmd->core_id && > + flow->dead == false) { > + return flow; > + } > + } > + > + return NULL; > +} > + > static void > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > struct dp_netdev_flow *flow) @@ -5204,10 > +5221,10 @@ struct packet_batch_per_flow { static inline void > packet_batch_per_flow_update(struct packet_batch_per_flow *batch, > struct dp_packet *packet, > - const struct miniflow *mf) > + uint16_t tcp_flags) > { > batch->byte_count += dp_packet_size(packet); > - batch->tcp_flags |= miniflow_get_tcp_flags(mf); > + batch->tcp_flags |= tcp_flags; > batch->array.packets[batch->array.count++] = packet; } > > @@ -5241,7 +5258,7 @@ packet_batch_per_flow_execute(struct > packet_batch_per_flow *batch, > > static inline void > dp_netdev_queue_batches(struct dp_packet *pkt, > - struct dp_netdev_flow *flow, const struct > miniflow *mf, > + struct dp_netdev_flow *flow, uint16_t > + tcp_flags, > struct packet_batch_per_flow *batches, > size_t *n_batches) { @@ -5252,7 +5269,7 @@ > dp_netdev_queue_batches(struct dp_packet *pkt, > packet_batch_per_flow_init(batch, flow); > } > > - packet_batch_per_flow_update(batch, pkt, mf); > + packet_batch_per_flow_update(batch, pkt, tcp_flags); > } > > /* Try to process all ('cnt') the 'packets' using only the exact match > cache @@ -5283,6 +5300,7 @@ emc_processing(struct dp_netdev_pmd_thread > *pmd, > const size_t cnt = dp_packet_batch_size(packets_); > uint32_t cur_min; > int i; > + uint16_t tcp_flags; > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > pmd_perf_update_counter(&pmd->perf_stats, > @@ -5291,6 +5309,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { > struct dp_netdev_flow *flow; > + uint32_t mark; > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > dp_packet_delete(packet); > @@ -5298,6 +5317,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > continue; > } > > + if (dp_packet_has_flow_mark(packet, &mark)) { > + flow = mark_to_flow_find(pmd, mark); > + if (flow) { > + tcp_flags = parse_tcp_flags(packet); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > + n_batches); > + continue; > + } > + } > + > if (i != cnt - 1) { > struct dp_packet **packets = packets_->packets; > /* Prefetch next packet data and metadata. */ @@ -5323,7 > +5352,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > flow = NULL; > } > if (OVS_LIKELY(flow)) { > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > n_batches); > } else { > /* Exact match cache missed. Group missed packets together at > @@ -5501,7 +5531,9 @@ fast_path_processing(struct dp_netdev_pmd_thread > *pmd, > flow = dp_netdev_flow_cast(rules[i]); > > emc_probabilistic_insert(pmd, &keys[i], flow); > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, > n_batches); > + dp_netdev_queue_batches(packet, flow, > + miniflow_get_tcp_flags(&keys[i].mf), > + batches, n_batches); > } > > pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, diff - > -git a/lib/flow.c b/lib/flow.c index 38ff29c..6c74dd3 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -624,6 +624,70 @@ flow_extract(struct dp_packet *packet, struct flow > *flow) > miniflow_expand(&m.mf, flow); > } > > +static inline bool > +ipv4_sanity_check(const struct ip_header *nh, size_t size, > + int *ip_lenp, uint16_t *tot_lenp) { > + int ip_len; > + uint16_t tot_len; > + > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > + return false; > + } > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > + > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) { > + return false; > + } > + > + tot_len = ntohs(nh->ip_tot_len); > + if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len || > + size - tot_len > UINT8_MAX)) { > + return false; > + } > + > + *ip_lenp = ip_len; > + *tot_lenp = tot_len; > + > + return true; > +} > + > +static inline uint8_t > +ipv4_get_nw_frag(const struct ip_header *nh) { > + uint8_t nw_frag = 0; > + > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > + nw_frag = FLOW_NW_FRAG_ANY; > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > + nw_frag |= FLOW_NW_FRAG_LATER; > + } > + } > + > + return nw_frag; > +} > + > +static inline bool > +ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size) > +{ > + uint16_t plen; > + > + if (OVS_UNLIKELY(size < sizeof *nh)) { > + return false; > + } > + > + plen = ntohs(nh->ip6_plen); > + if (OVS_UNLIKELY(plen > size)) { > + return false; > + } > + /* Jumbo Payload option not supported yet. */ > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > + return false; > + } > + > + return true; > +} > + > /* Caller is responsible for initializing 'dst' with enough storage for > * FLOW_U64S * 8 bytes. */ > void > @@ -748,22 +812,7 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > int ip_len; > uint16_t tot_len; > > - if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > - goto out; > - } > - ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > - > - if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { > - goto out; > - } > - if (OVS_UNLIKELY(size < ip_len)) { > - goto out; > - } > - tot_len = ntohs(nh->ip_tot_len); > - if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) { > - goto out; > - } > - if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) { > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > + &tot_len))) { You may have missed my comment in the v7 regarding this. There's no mention in commit message of the introduction of IPv4/IPv6/fragmentation sanity checks. This affects minflow extract regardless of HWOL usage. The miniflow extract refactor should to use these should be introduced as a separate patch to this series so it's clear that it is being modified. > goto out; > } > dp_packet_set_l2_pad_size(packet, size - tot_len); @@ -786,31 > +835,19 @@ miniflow_extract(struct dp_packet *packet, struct miniflow > *dst) > nw_tos = nh->ip_tos; > nw_ttl = nh->ip_ttl; > nw_proto = nh->ip_proto; > - if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > - nw_frag = FLOW_NW_FRAG_ANY; > - if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > - nw_frag |= FLOW_NW_FRAG_LATER; > - } > - } > + nw_frag = ipv4_get_nw_frag(nh); > data_pull(&data, &size, ip_len); > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > - const struct ovs_16aligned_ip6_hdr *nh; > + const struct ovs_16aligned_ip6_hdr *nh = data; > ovs_be32 tc_flow; > uint16_t plen; > > - if (OVS_UNLIKELY(size < sizeof *nh)) { > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > goto out; > } > - nh = data_pull(&data, &size, sizeof *nh); > + data_pull(&data, &size, sizeof *nh); > > plen = ntohs(nh->ip6_plen); > - if (OVS_UNLIKELY(plen > size)) { > - goto out; > - } > - /* Jumbo Payload option not supported yet. */ > - if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > - goto out; > - } > dp_packet_set_l2_pad_size(packet, size - plen); > size = plen; /* Never pull padding. */ > > @@ -982,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_, size_t > size) > return parse_ethertype(&data, &size); } > > +uint16_t > +parse_tcp_flags(struct dp_packet *packet) { > + const void *data = dp_packet_data(packet); > + size_t size = dp_packet_size(packet); > + ovs_be16 dl_type; > + uint8_t nw_frag = 0, nw_proto = 0; > + > + if (packet->packet_type != htonl(PT_ETH)) { > + return 0; > + } > + > + data_pull(&data, &size, ETH_ADDR_LEN * 2); > + dl_type = parse_ethertype(&data, &size); > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { > + const struct ip_header *nh = data; > + int ip_len; > + uint16_t tot_len; > + > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > &tot_len))) { > + return 0; > + } > + nw_proto = nh->ip_proto; > + nw_frag = ipv4_get_nw_frag(nh); > + > + size = tot_len; /* Never pull padding. */ > + data_pull(&data, &size, ip_len); > + } else if (dl_type == htons(ETH_TYPE_IPV6)) { > + const struct ovs_16aligned_ip6_hdr *nh = data; > + > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > + return 0; > + } > + data_pull(&data, &size, sizeof *nh); > + > + size = ntohs(nh->ip6_plen); /* Never pull padding. */ > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { > + return 0; > + } > + nw_proto = nh->ip6_nxt; > + } else { > + return 0; > + } > + > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP && > + size >= TCP_HEADER_LEN) { > + const struct tcp_header *tcp = data; > + > + return TCP_FLAGS_BE32(tcp->tcp_ctl); Will cause compilation failure with sparse when compiling without DPDK. lib/flow.c:1070:16: error: incorrect type in return expression (different base types) lib/flow.c:1070:16: expected unsigned short lib/flow.c:1070:16: got restricted ovs_be32 [usertype] <noident> https://travis-ci.org/istokes/ovs/jobs/364709985 > + } > + > + return 0; > +} > + > /* For every bit of a field that is wildcarded in 'wildcards', sets the > * corresponding bit in 'flow' to zero. */ void diff --git a/lib/flow.h > b/lib/flow.h index 770a07a..0adecbf 100644 > --- a/lib/flow.h > +++ b/lib/flow.h > @@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t > *sizep, uint8_t *nw_proto, > uint8_t *nw_frag); > ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); > bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh > *key); > +uint16_t parse_tcp_flags(struct dp_packet *packet); > > static inline uint64_t > flow_get_xreg(const struct flow *flow, int idx) > -- > 2.7.4
Tuesday, April 10, 2018 10:58 PM, Stokes, Ian: > Subject: RE: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the flow > mark > > > Subject: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the > > flow mark > > > > From: Yuanhan Liu <yliu@fridaylinux.org> > > > > So that we could skip some very costly CPU operations, including but > > not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus, > > performance could be greatly improved. > > > > A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and > > 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that > > 260% performance boost. > > > > Note that though the heavy miniflow_extract is skipped, we still have > > to do per packet checking, due to we have to check the tcp_flags. > > > > Co-authored-by: Finn Christensen <fc@napatech.com> > > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> > > Signed-off-by: Finn Christensen <fc@napatech.com> > > Signed-off-by: Shahaf Shuler <shahafs@mellanox.com> > > --- > > lib/dp-packet.h | 13 +++++ > > lib/dpif-netdev.c | 44 ++++++++++++-- > > lib/flow.c | 155 +++++++++++++++++++++++++++++++++++++++----- > ----- > > lib/flow.h | 1 + > > 4 files changed, 175 insertions(+), 38 deletions(-) > > > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 21c8ca5..dd3f17b > > 100644 > > --- a/lib/dp-packet.h > > +++ b/lib/dp-packet.h > > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct > > dp_packet > > *p) #define reset_dp_packet_checksum_ol_flags(arg) > > #endif > > > > +static inline bool > > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > > + uint32_t *mark OVS_UNUSED) { #ifdef DPDK_NETDEV > > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > > + *mark = p->mbuf.hash.fdir.hi; > > + return true; > > + } > > +#endif > > + return false; > > +} > > + > > enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a > batch. > > */ > > > > struct dp_packet_batch { > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index > > 2fdb6ef..7489a2f > > 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -2015,6 +2015,23 @@ flow_mark_flush(struct > dp_netdev_pmd_thread *pmd) > > } > > } > > > > +static struct dp_netdev_flow * > > +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, > > + const uint32_t mark) { > > + struct dp_netdev_flow *flow; > > + > > + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), > > + &flow_mark.mark_to_flow) { > > + if (flow->mark == mark && flow->pmd_id == pmd->core_id && > > + flow->dead == false) { > > + return flow; > > + } > > + } > > + > > + return NULL; > > +} > > + > > static void > > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > > struct dp_netdev_flow *flow) @@ -5204,10 > > +5221,10 @@ struct packet_batch_per_flow { static inline void > > packet_batch_per_flow_update(struct packet_batch_per_flow *batch, > > struct dp_packet *packet, > > - const struct miniflow *mf) > > + uint16_t tcp_flags) > > { > > batch->byte_count += dp_packet_size(packet); > > - batch->tcp_flags |= miniflow_get_tcp_flags(mf); > > + batch->tcp_flags |= tcp_flags; > > batch->array.packets[batch->array.count++] = packet; } > > > > @@ -5241,7 +5258,7 @@ packet_batch_per_flow_execute(struct > > packet_batch_per_flow *batch, > > > > static inline void > > dp_netdev_queue_batches(struct dp_packet *pkt, > > - struct dp_netdev_flow *flow, const struct > > miniflow *mf, > > + struct dp_netdev_flow *flow, uint16_t > > + tcp_flags, > > struct packet_batch_per_flow *batches, > > size_t *n_batches) { @@ -5252,7 +5269,7 @@ > > dp_netdev_queue_batches(struct dp_packet *pkt, > > packet_batch_per_flow_init(batch, flow); > > } > > > > - packet_batch_per_flow_update(batch, pkt, mf); > > + packet_batch_per_flow_update(batch, pkt, tcp_flags); > > } > > > > /* Try to process all ('cnt') the 'packets' using only the exact > > match cache @@ -5283,6 +5300,7 @@ emc_processing(struct > > dp_netdev_pmd_thread *pmd, > > const size_t cnt = dp_packet_batch_size(packets_); > > uint32_t cur_min; > > int i; > > + uint16_t tcp_flags; > > > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > pmd_perf_update_counter(&pmd->perf_stats, > > @@ -5291,6 +5309,7 @@ emc_processing(struct dp_netdev_pmd_thread > *pmd, > > > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { > > struct dp_netdev_flow *flow; > > + uint32_t mark; > > > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > > dp_packet_delete(packet); @@ -5298,6 +5317,16 @@ > > emc_processing(struct dp_netdev_pmd_thread *pmd, > > continue; > > } > > > > + if (dp_packet_has_flow_mark(packet, &mark)) { > > + flow = mark_to_flow_find(pmd, mark); > > + if (flow) { > > + tcp_flags = parse_tcp_flags(packet); > > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > > + n_batches); > > + continue; > > + } > > + } > > + > > if (i != cnt - 1) { > > struct dp_packet **packets = packets_->packets; > > /* Prefetch next packet data and metadata. */ @@ -5323,7 > > +5352,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > > flow = NULL; > > } > > if (OVS_LIKELY(flow)) { > > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > > n_batches); > > } else { > > /* Exact match cache missed. Group missed packets > > together at @@ -5501,7 +5531,9 @@ fast_path_processing(struct > > dp_netdev_pmd_thread *pmd, > > flow = dp_netdev_flow_cast(rules[i]); > > > > emc_probabilistic_insert(pmd, &keys[i], flow); > > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, > > n_batches); > > + dp_netdev_queue_batches(packet, flow, > > + miniflow_get_tcp_flags(&keys[i].mf), > > + batches, n_batches); > > } > > > > pmd_perf_update_counter(&pmd->perf_stats, > PMD_STAT_MASKED_HIT, > > diff - -git a/lib/flow.c b/lib/flow.c index 38ff29c..6c74dd3 100644 > > --- a/lib/flow.c > > +++ b/lib/flow.c > > @@ -624,6 +624,70 @@ flow_extract(struct dp_packet *packet, struct > > flow > > *flow) > > miniflow_expand(&m.mf, flow); > > } > > > > +static inline bool > > +ipv4_sanity_check(const struct ip_header *nh, size_t size, > > + int *ip_lenp, uint16_t *tot_lenp) { > > + int ip_len; > > + uint16_t tot_len; > > + > > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > > + return false; > > + } > > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > > + > > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) { > > + return false; > > + } > > + > > + tot_len = ntohs(nh->ip_tot_len); > > + if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len || > > + size - tot_len > UINT8_MAX)) { > > + return false; > > + } > > + > > + *ip_lenp = ip_len; > > + *tot_lenp = tot_len; > > + > > + return true; > > +} > > + > > +static inline uint8_t > > +ipv4_get_nw_frag(const struct ip_header *nh) { > > + uint8_t nw_frag = 0; > > + > > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > > + nw_frag = FLOW_NW_FRAG_ANY; > > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > > + nw_frag |= FLOW_NW_FRAG_LATER; > > + } > > + } > > + > > + return nw_frag; > > +} > > + > > +static inline bool > > +ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t > > +size) { > > + uint16_t plen; > > + > > + if (OVS_UNLIKELY(size < sizeof *nh)) { > > + return false; > > + } > > + > > + plen = ntohs(nh->ip6_plen); > > + if (OVS_UNLIKELY(plen > size)) { > > + return false; > > + } > > + /* Jumbo Payload option not supported yet. */ > > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > /* Caller is responsible for initializing 'dst' with enough storage for > > * FLOW_U64S * 8 bytes. */ > > void > > @@ -748,22 +812,7 @@ miniflow_extract(struct dp_packet *packet, struct > > miniflow *dst) > > int ip_len; > > uint16_t tot_len; > > > > - if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > > - goto out; > > - } > > - ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > > - > > - if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { > > - goto out; > > - } > > - if (OVS_UNLIKELY(size < ip_len)) { > > - goto out; > > - } > > - tot_len = ntohs(nh->ip_tot_len); > > - if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) { > > - goto out; > > - } > > - if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) { > > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > > + &tot_len))) { > > You may have missed my comment in the v7 regarding this. > > There's no mention in commit message of the introduction of > IPv4/IPv6/fragmentation sanity checks. > > This affects minflow extract regardless of HWOL usage. The miniflow extract > refactor should to use these should be introduced as a separate patch to this > series so it's clear that it is being modified. > > > goto out; > > } > > dp_packet_set_l2_pad_size(packet, size - tot_len); @@ -786,31 > > +835,19 @@ miniflow_extract(struct dp_packet *packet, struct miniflow > > *dst) > > nw_tos = nh->ip_tos; > > nw_ttl = nh->ip_ttl; > > nw_proto = nh->ip_proto; > > - if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > > - nw_frag = FLOW_NW_FRAG_ANY; > > - if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > > - nw_frag |= FLOW_NW_FRAG_LATER; > > - } > > - } > > + nw_frag = ipv4_get_nw_frag(nh); > > data_pull(&data, &size, ip_len); > > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > > - const struct ovs_16aligned_ip6_hdr *nh; > > + const struct ovs_16aligned_ip6_hdr *nh = data; > > ovs_be32 tc_flow; > > uint16_t plen; > > > > - if (OVS_UNLIKELY(size < sizeof *nh)) { > > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > > goto out; > > } > > - nh = data_pull(&data, &size, sizeof *nh); > > + data_pull(&data, &size, sizeof *nh); > > > > plen = ntohs(nh->ip6_plen); > > - if (OVS_UNLIKELY(plen > size)) { > > - goto out; > > - } > > - /* Jumbo Payload option not supported yet. */ > > - if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > > - goto out; > > - } > > dp_packet_set_l2_pad_size(packet, size - plen); > > size = plen; /* Never pull padding. */ > > > > @@ -982,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_, > > size_t > > size) > > return parse_ethertype(&data, &size); } > > > > +uint16_t > > +parse_tcp_flags(struct dp_packet *packet) { > > + const void *data = dp_packet_data(packet); > > + size_t size = dp_packet_size(packet); > > + ovs_be16 dl_type; > > + uint8_t nw_frag = 0, nw_proto = 0; > > + > > + if (packet->packet_type != htonl(PT_ETH)) { > > + return 0; > > + } > > + > > + data_pull(&data, &size, ETH_ADDR_LEN * 2); > > + dl_type = parse_ethertype(&data, &size); > > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { > > + const struct ip_header *nh = data; > > + int ip_len; > > + uint16_t tot_len; > > + > > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > > &tot_len))) { > > + return 0; > > + } > > + nw_proto = nh->ip_proto; > > + nw_frag = ipv4_get_nw_frag(nh); > > + > > + size = tot_len; /* Never pull padding. */ > > + data_pull(&data, &size, ip_len); > > + } else if (dl_type == htons(ETH_TYPE_IPV6)) { > > + const struct ovs_16aligned_ip6_hdr *nh = data; > > + > > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > > + return 0; > > + } > > + data_pull(&data, &size, sizeof *nh); > > + > > + size = ntohs(nh->ip6_plen); /* Never pull padding. */ > > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { > > + return 0; > > + } > > + nw_proto = nh->ip6_nxt; > > + } else { > > + return 0; > > + } > > + > > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == > IPPROTO_TCP && > > + size >= TCP_HEADER_LEN) { > > + const struct tcp_header *tcp = data; > > + > > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > > Will cause compilation failure with sparse when compiling without DPDK. > > lib/flow.c:1070:16: error: incorrect type in return expression (different base > types) > lib/flow.c:1070:16: expected unsigned short > lib/flow.c:1070:16: got restricted ovs_be32 [usertype] <noident> Thanks, Is there a way for me to re-run travis build so I can verify it is fixed? > > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Ftra > vis- > ci.org%2Fistokes%2Fovs%2Fjobs%2F364709985&data=02%7C01%7Cshahafs% > 40mellanox.com%7C624a50f6dcd64230e11c08d59f1d6f66%7Ca652971c7d2e4 > d9ba6a4d149256f461b%7C0%7C0%7C636589871173668041&sdata=bZ6FwLkX > B0%2FphhOaQBb6tBjAkNaA8to%2BSWiuK6zdBew%3D&reserved=0 > > > + } > > + > > + return 0; > > +} > > + > > /* For every bit of a field that is wildcarded in 'wildcards', sets the > > * corresponding bit in 'flow' to zero. */ void diff --git > > a/lib/flow.h b/lib/flow.h index 770a07a..0adecbf 100644 > > --- a/lib/flow.h > > +++ b/lib/flow.h > > @@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, > > size_t *sizep, uint8_t *nw_proto, > > uint8_t *nw_frag); > > ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); > > bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh > > *key); > > +uint16_t parse_tcp_flags(struct dp_packet *packet); > > > > static inline uint64_t > > flow_get_xreg(const struct flow *flow, int idx) > > -- > > 2.7.4
> Tuesday, April 10, 2018 10:58 PM, Stokes, Ian: > > Subject: RE: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from > > the flow mark > > > > > Subject: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the > > > flow mark > > > > > > From: Yuanhan Liu <yliu@fridaylinux.org> > > > > > > So that we could skip some very costly CPU operations, including but > > > not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. > > > Thus, performance could be greatly improved. > > > > > > A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and > > > 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more > > > that 260% performance boost. > > > > > > Note that though the heavy miniflow_extract is skipped, we still > > > have to do per packet checking, due to we have to check the tcp_flags. > > > > > > Co-authored-by: Finn Christensen <fc@napatech.com> > > > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> > > > Signed-off-by: Finn Christensen <fc@napatech.com> > > > Signed-off-by: Shahaf Shuler <shahafs@mellanox.com> > > > --- > > > lib/dp-packet.h | 13 +++++ > > > lib/dpif-netdev.c | 44 ++++++++++++-- > > > lib/flow.c | 155 +++++++++++++++++++++++++++++++++++++++----- > > ----- > > > lib/flow.h | 1 + > > > 4 files changed, 175 insertions(+), 38 deletions(-) > > > > > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h index > > > 21c8ca5..dd3f17b > > > 100644 > > > --- a/lib/dp-packet.h > > > +++ b/lib/dp-packet.h > > > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct > > > dp_packet > > > *p) #define reset_dp_packet_checksum_ol_flags(arg) > > > #endif > > > > > > +static inline bool > > > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > > > + uint32_t *mark OVS_UNUSED) { #ifdef > DPDK_NETDEV > > > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > > > + *mark = p->mbuf.hash.fdir.hi; > > > + return true; > > > + } > > > +#endif > > > + return false; > > > +} > > > + > > > enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a > > batch. > > > */ > > > > > > struct dp_packet_batch { > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index > > > 2fdb6ef..7489a2f > > > 100644 > > > --- a/lib/dpif-netdev.c > > > +++ b/lib/dpif-netdev.c > > > @@ -2015,6 +2015,23 @@ flow_mark_flush(struct > > dp_netdev_pmd_thread *pmd) > > > } > > > } > > > > > > +static struct dp_netdev_flow * > > > +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, > > > + const uint32_t mark) { > > > + struct dp_netdev_flow *flow; > > > + > > > + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), > > > + &flow_mark.mark_to_flow) { > > > + if (flow->mark == mark && flow->pmd_id == pmd->core_id && > > > + flow->dead == false) { > > > + return flow; > > > + } > > > + } > > > + > > > + return NULL; > > > +} > > > + > > > static void > > > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > > > struct dp_netdev_flow *flow) @@ -5204,10 > > > +5221,10 @@ struct packet_batch_per_flow { static inline void > > > packet_batch_per_flow_update(struct packet_batch_per_flow *batch, > > > struct dp_packet *packet, > > > - const struct miniflow *mf) > > > + uint16_t tcp_flags) > > > { > > > batch->byte_count += dp_packet_size(packet); > > > - batch->tcp_flags |= miniflow_get_tcp_flags(mf); > > > + batch->tcp_flags |= tcp_flags; > > > batch->array.packets[batch->array.count++] = packet; } > > > > > > @@ -5241,7 +5258,7 @@ packet_batch_per_flow_execute(struct > > > packet_batch_per_flow *batch, > > > > > > static inline void > > > dp_netdev_queue_batches(struct dp_packet *pkt, > > > - struct dp_netdev_flow *flow, const struct > > > miniflow *mf, > > > + struct dp_netdev_flow *flow, uint16_t > > > + tcp_flags, > > > struct packet_batch_per_flow *batches, > > > size_t *n_batches) { @@ -5252,7 +5269,7 @@ > > > dp_netdev_queue_batches(struct dp_packet *pkt, > > > packet_batch_per_flow_init(batch, flow); > > > } > > > > > > - packet_batch_per_flow_update(batch, pkt, mf); > > > + packet_batch_per_flow_update(batch, pkt, tcp_flags); > > > } > > > > > > /* Try to process all ('cnt') the 'packets' using only the exact > > > match cache @@ -5283,6 +5300,7 @@ emc_processing(struct > > > dp_netdev_pmd_thread *pmd, > > > const size_t cnt = dp_packet_batch_size(packets_); > > > uint32_t cur_min; > > > int i; > > > + uint16_t tcp_flags; > > > > > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > > pmd_perf_update_counter(&pmd->perf_stats, > > > @@ -5291,6 +5309,7 @@ emc_processing(struct dp_netdev_pmd_thread > > *pmd, > > > > > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { > > > struct dp_netdev_flow *flow; > > > + uint32_t mark; > > > > > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > > > dp_packet_delete(packet); @@ -5298,6 +5317,16 @@ > > > emc_processing(struct dp_netdev_pmd_thread *pmd, > > > continue; > > > } > > > > > > + if (dp_packet_has_flow_mark(packet, &mark)) { > > > + flow = mark_to_flow_find(pmd, mark); > > > + if (flow) { > > > + tcp_flags = parse_tcp_flags(packet); > > > + dp_netdev_queue_batches(packet, flow, tcp_flags, > batches, > > > + n_batches); > > > + continue; > > > + } > > > + } > > > + > > > if (i != cnt - 1) { > > > struct dp_packet **packets = packets_->packets; > > > /* Prefetch next packet data and metadata. */ @@ > > > -5323,7 > > > +5352,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > > > flow = NULL; > > > } > > > if (OVS_LIKELY(flow)) { > > > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > > > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > > > + dp_netdev_queue_batches(packet, flow, tcp_flags, > > > + batches, > > > n_batches); > > > } else { > > > /* Exact match cache missed. Group missed packets > > > together at @@ -5501,7 +5531,9 @@ fast_path_processing(struct > > > dp_netdev_pmd_thread *pmd, > > > flow = dp_netdev_flow_cast(rules[i]); > > > > > > emc_probabilistic_insert(pmd, &keys[i], flow); > > > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, > > > n_batches); > > > + dp_netdev_queue_batches(packet, flow, > > > + miniflow_get_tcp_flags(&keys[i].mf), > > > + batches, n_batches); > > > } > > > > > > pmd_perf_update_counter(&pmd->perf_stats, > > PMD_STAT_MASKED_HIT, > > > diff - -git a/lib/flow.c b/lib/flow.c index 38ff29c..6c74dd3 100644 > > > --- a/lib/flow.c > > > +++ b/lib/flow.c > > > @@ -624,6 +624,70 @@ flow_extract(struct dp_packet *packet, struct > > > flow > > > *flow) > > > miniflow_expand(&m.mf, flow); > > > } > > > > > > +static inline bool > > > +ipv4_sanity_check(const struct ip_header *nh, size_t size, > > > + int *ip_lenp, uint16_t *tot_lenp) { > > > + int ip_len; > > > + uint16_t tot_len; > > > + > > > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > > > + return false; > > > + } > > > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > > > + > > > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) { > > > + return false; > > > + } > > > + > > > + tot_len = ntohs(nh->ip_tot_len); > > > + if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len || > > > + size - tot_len > UINT8_MAX)) { > > > + return false; > > > + } > > > + > > > + *ip_lenp = ip_len; > > > + *tot_lenp = tot_len; > > > + > > > + return true; > > > +} > > > + > > > +static inline uint8_t > > > +ipv4_get_nw_frag(const struct ip_header *nh) { > > > + uint8_t nw_frag = 0; > > > + > > > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > > > + nw_frag = FLOW_NW_FRAG_ANY; > > > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > > > + nw_frag |= FLOW_NW_FRAG_LATER; > > > + } > > > + } > > > + > > > + return nw_frag; > > > +} > > > + > > > +static inline bool > > > +ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t > > > +size) { > > > + uint16_t plen; > > > + > > > + if (OVS_UNLIKELY(size < sizeof *nh)) { > > > + return false; > > > + } > > > + > > > + plen = ntohs(nh->ip6_plen); > > > + if (OVS_UNLIKELY(plen > size)) { > > > + return false; > > > + } > > > + /* Jumbo Payload option not supported yet. */ > > > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > > > + return false; > > > + } > > > + > > > + return true; > > > +} > > > + > > > /* Caller is responsible for initializing 'dst' with enough storage > for > > > * FLOW_U64S * 8 bytes. */ > > > void > > > @@ -748,22 +812,7 @@ miniflow_extract(struct dp_packet *packet, > > > struct miniflow *dst) > > > int ip_len; > > > uint16_t tot_len; > > > > > > - if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > > > - goto out; > > > - } > > > - ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > > > - > > > - if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { > > > - goto out; > > > - } > > > - if (OVS_UNLIKELY(size < ip_len)) { > > > - goto out; > > > - } > > > - tot_len = ntohs(nh->ip_tot_len); > > > - if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) { > > > - goto out; > > > - } > > > - if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) { > > > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > > > + &tot_len))) { > > > > You may have missed my comment in the v7 regarding this. > > > > There's no mention in commit message of the introduction of > > IPv4/IPv6/fragmentation sanity checks. > > > > This affects minflow extract regardless of HWOL usage. The miniflow > > extract refactor should to use these should be introduced as a > > separate patch to this series so it's clear that it is being modified. > > > > > goto out; > > > } > > > dp_packet_set_l2_pad_size(packet, size - tot_len); @@ > > > -786,31 > > > +835,19 @@ miniflow_extract(struct dp_packet *packet, struct > > > +miniflow > > > *dst) > > > nw_tos = nh->ip_tos; > > > nw_ttl = nh->ip_ttl; > > > nw_proto = nh->ip_proto; > > > - if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > > > - nw_frag = FLOW_NW_FRAG_ANY; > > > - if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > > > - nw_frag |= FLOW_NW_FRAG_LATER; > > > - } > > > - } > > > + nw_frag = ipv4_get_nw_frag(nh); > > > data_pull(&data, &size, ip_len); > > > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > > > - const struct ovs_16aligned_ip6_hdr *nh; > > > + const struct ovs_16aligned_ip6_hdr *nh = data; > > > ovs_be32 tc_flow; > > > uint16_t plen; > > > > > > - if (OVS_UNLIKELY(size < sizeof *nh)) { > > > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > > > goto out; > > > } > > > - nh = data_pull(&data, &size, sizeof *nh); > > > + data_pull(&data, &size, sizeof *nh); > > > > > > plen = ntohs(nh->ip6_plen); > > > - if (OVS_UNLIKELY(plen > size)) { > > > - goto out; > > > - } > > > - /* Jumbo Payload option not supported yet. */ > > > - if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > > > - goto out; > > > - } > > > dp_packet_set_l2_pad_size(packet, size - plen); > > > size = plen; /* Never pull padding. */ > > > > > > @@ -982,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_, > > > size_t > > > size) > > > return parse_ethertype(&data, &size); } > > > > > > +uint16_t > > > +parse_tcp_flags(struct dp_packet *packet) { > > > + const void *data = dp_packet_data(packet); > > > + size_t size = dp_packet_size(packet); > > > + ovs_be16 dl_type; > > > + uint8_t nw_frag = 0, nw_proto = 0; > > > + > > > + if (packet->packet_type != htonl(PT_ETH)) { > > > + return 0; > > > + } > > > + > > > + data_pull(&data, &size, ETH_ADDR_LEN * 2); > > > + dl_type = parse_ethertype(&data, &size); > > > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { > > > + const struct ip_header *nh = data; > > > + int ip_len; > > > + uint16_t tot_len; > > > + > > > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, > > > &tot_len))) { > > > + return 0; > > > + } > > > + nw_proto = nh->ip_proto; > > > + nw_frag = ipv4_get_nw_frag(nh); > > > + > > > + size = tot_len; /* Never pull padding. */ > > > + data_pull(&data, &size, ip_len); > > > + } else if (dl_type == htons(ETH_TYPE_IPV6)) { > > > + const struct ovs_16aligned_ip6_hdr *nh = data; > > > + > > > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { > > > + return 0; > > > + } > > > + data_pull(&data, &size, sizeof *nh); > > > + > > > + size = ntohs(nh->ip6_plen); /* Never pull padding. */ > > > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, > &nw_frag)) { > > > + return 0; > > > + } > > > + nw_proto = nh->ip6_nxt; > > > + } else { > > > + return 0; > > > + } > > > + > > > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == > > IPPROTO_TCP && > > > + size >= TCP_HEADER_LEN) { > > > + const struct tcp_header *tcp = data; > > > + > > > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > > > > Will cause compilation failure with sparse when compiling without DPDK. > > > > lib/flow.c:1070:16: error: incorrect type in return expression > > (different base > > types) > > lib/flow.c:1070:16: expected unsigned short > > lib/flow.c:1070:16: got restricted ovs_be32 [usertype] <noident> > > Thanks, > > Is there a way for me to re-run travis build so I can verify it is fixed? Sure, Travis can link to a github account. The OVS test doc has steps you can follow to set it up: http://docs.openvswitch.org/en/latest/topics/testing/#continuous-integration-with-travis-ci Ian > > > > > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Ftra > > vis- > > ci.org%2Fistokes%2Fovs%2Fjobs%2F364709985&data=02%7C01%7Cshahafs% > > 40mellanox.com%7C624a50f6dcd64230e11c08d59f1d6f66%7Ca652971c7d2e4 > > d9ba6a4d149256f461b%7C0%7C0%7C636589871173668041&sdata=bZ6FwLkX > > B0%2FphhOaQBb6tBjAkNaA8to%2BSWiuK6zdBew%3D&reserved=0 > > > > > + } > > > + > > > + return 0; > > > +} > > > + > > > /* For every bit of a field that is wildcarded in 'wildcards', sets > the > > > * corresponding bit in 'flow' to zero. */ void diff --git > > > a/lib/flow.h b/lib/flow.h index 770a07a..0adecbf 100644 > > > --- a/lib/flow.h > > > +++ b/lib/flow.h > > > @@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, > > > size_t *sizep, uint8_t *nw_proto, > > > uint8_t *nw_frag); > > > ovs_be16 parse_dl_type(const struct eth_header *data_, size_t > > > size); bool parse_nsh(const void **datap, size_t *sizep, struct > > > ovs_key_nsh *key); > > > +uint16_t parse_tcp_flags(struct dp_packet *packet); > > > > > > static inline uint64_t > > > flow_get_xreg(const struct flow *flow, int idx) > > > -- > > > 2.7.4
Thursday, April 12, 2018 4:39 PM, Stokes, Ian: > Subject: RE: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the flow > mark > > > > + > > > > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == > > > IPPROTO_TCP && > > > > + size >= TCP_HEADER_LEN) { > > > > + const struct tcp_header *tcp = data; > > > > + > > > > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > > > > > > Will cause compilation failure with sparse when compiling without DPDK. > > > > > > lib/flow.c:1070:16: error: incorrect type in return expression > > > (different base > > > types) > > > lib/flow.c:1070:16: expected unsigned short > > > lib/flow.c:1070:16: got restricted ovs_be32 [usertype] <noident> > > > > Thanks, > > > > Is there a way for me to re-run travis build so I can verify it is fixed? > > Sure, Travis can link to a github account. The OVS test doc has steps you can > follow to set it up: > > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdoc > s.openvswitch.org%2Fen%2Flatest%2Ftopics%2Ftesting%2F%23continuous- > integration-with-travis- > ci&data=02%7C01%7Cshahafs%40mellanox.com%7C4ab1d89f766f4969a93d0 > 8d5a07aea0c%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636591 > 372144565253&sdata=GnJlyU0Sob%2BQGEODZcfSo%2BOwu0R0YgESLqVd4Ks > IQyQ%3D&reserved=0 > Am getting this error[1] when enabling the sparse compiler. I thought it should be included in the Travis builder. [1] https://travis-ci.org/shahafsh/ovs/jobs/366766006
On Sun, Apr 15, 2018 at 12:40:28PM +0000, Shahaf Shuler wrote: > Thursday, April 12, 2018 4:39 PM, Stokes, Ian: > > Subject: RE: [PATCH v8 2/6] dpif-netdev: retrieve flow directly from the flow > > mark > > > > > + > > > > > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == > > > > IPPROTO_TCP && > > > > > + size >= TCP_HEADER_LEN) { > > > > > + const struct tcp_header *tcp = data; > > > > > + > > > > > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > > > > > > > > Will cause compilation failure with sparse when compiling without DPDK. > > > > > > > > lib/flow.c:1070:16: error: incorrect type in return expression > > > > (different base > > > > types) > > > > lib/flow.c:1070:16: expected unsigned short > > > > lib/flow.c:1070:16: got restricted ovs_be32 [usertype] <noident> > > > > > > Thanks, > > > > > > Is there a way for me to re-run travis build so I can verify it is fixed? > > > > Sure, Travis can link to a github account. The OVS test doc has steps you can > > follow to set it up: > > > > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdoc > > s.openvswitch.org%2Fen%2Flatest%2Ftopics%2Ftesting%2F%23continuous- > > integration-with-travis- > > ci&data=02%7C01%7Cshahafs%40mellanox.com%7C4ab1d89f766f4969a93d0 > > 8d5a07aea0c%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636591 > > 372144565253&sdata=GnJlyU0Sob%2BQGEODZcfSo%2BOwu0R0YgESLqVd4Ks > > IQyQ%3D&reserved=0 > > > > Am getting this error[1] when enabling the sparse compiler. > > I thought it should be included in the Travis builder. > > [1] > https://travis-ci.org/shahafsh/ovs/jobs/366766006 The build enables sparse already, see .travis/linux-build.sh. You don't need it as a separate compiler.
diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 21c8ca5..dd3f17b 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) #define reset_dp_packet_checksum_ol_flags(arg) #endif +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ +#ifdef DPDK_NETDEV + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } +#endif + return false; +} + enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2fdb6ef..7489a2f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2015,6 +2015,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd) } } +static struct dp_netdev_flow * +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, + const uint32_t mark) +{ + struct dp_netdev_flow *flow; + + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), + &flow_mark.mark_to_flow) { + if (flow->mark == mark && flow->pmd_id == pmd->core_id && + flow->dead == false) { + return flow; + } + } + + return NULL; +} + static void dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow) @@ -5204,10 +5221,10 @@ struct packet_batch_per_flow { static inline void packet_batch_per_flow_update(struct packet_batch_per_flow *batch, struct dp_packet *packet, - const struct miniflow *mf) + uint16_t tcp_flags) { batch->byte_count += dp_packet_size(packet); - batch->tcp_flags |= miniflow_get_tcp_flags(mf); + batch->tcp_flags |= tcp_flags; batch->array.packets[batch->array.count++] = packet; } @@ -5241,7 +5258,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, static inline void dp_netdev_queue_batches(struct dp_packet *pkt, - struct dp_netdev_flow *flow, const struct miniflow *mf, + struct dp_netdev_flow *flow, uint16_t tcp_flags, struct packet_batch_per_flow *batches, size_t *n_batches) { @@ -5252,7 +5269,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt, packet_batch_per_flow_init(batch, flow); } - packet_batch_per_flow_update(batch, pkt, mf); + packet_batch_per_flow_update(batch, pkt, tcp_flags); } /* Try to process all ('cnt') the 'packets' using only the exact match cache @@ -5283,6 +5300,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, const size_t cnt = dp_packet_batch_size(packets_); uint32_t cur_min; int i; + uint16_t tcp_flags; atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); pmd_perf_update_counter(&pmd->perf_stats, @@ -5291,6 +5309,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { struct dp_netdev_flow *flow; + uint32_t mark; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -5298,6 +5317,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, continue; } + if (dp_packet_has_flow_mark(packet, &mark)) { + flow = mark_to_flow_find(pmd, mark); + if (flow) { + tcp_flags = parse_tcp_flags(packet); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, + n_batches); + continue; + } + } + if (i != cnt - 1) { struct dp_packet **packets = packets_->packets; /* Prefetch next packet data and metadata. */ @@ -5323,7 +5352,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, flow = NULL; } if (OVS_LIKELY(flow)) { - dp_netdev_queue_batches(packet, flow, &key->mf, batches, + tcp_flags = miniflow_get_tcp_flags(&key->mf); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, n_batches); } else { /* Exact match cache missed. Group missed packets together at @@ -5501,7 +5531,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); emc_probabilistic_insert(pmd, &keys[i], flow); - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); + dp_netdev_queue_batches(packet, flow, + miniflow_get_tcp_flags(&keys[i].mf), + batches, n_batches); } pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, diff --git a/lib/flow.c b/lib/flow.c index 38ff29c..6c74dd3 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -624,6 +624,70 @@ flow_extract(struct dp_packet *packet, struct flow *flow) miniflow_expand(&m.mf, flow); } +static inline bool +ipv4_sanity_check(const struct ip_header *nh, size_t size, + int *ip_lenp, uint16_t *tot_lenp) +{ + int ip_len; + uint16_t tot_len; + + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { + return false; + } + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; + + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) { + return false; + } + + tot_len = ntohs(nh->ip_tot_len); + if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len || + size - tot_len > UINT8_MAX)) { + return false; + } + + *ip_lenp = ip_len; + *tot_lenp = tot_len; + + return true; +} + +static inline uint8_t +ipv4_get_nw_frag(const struct ip_header *nh) +{ + uint8_t nw_frag = 0; + + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { + nw_frag = FLOW_NW_FRAG_ANY; + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { + nw_frag |= FLOW_NW_FRAG_LATER; + } + } + + return nw_frag; +} + +static inline bool +ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size) +{ + uint16_t plen; + + if (OVS_UNLIKELY(size < sizeof *nh)) { + return false; + } + + plen = ntohs(nh->ip6_plen); + if (OVS_UNLIKELY(plen > size)) { + return false; + } + /* Jumbo Payload option not supported yet. */ + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { + return false; + } + + return true; +} + /* Caller is responsible for initializing 'dst' with enough storage for * FLOW_U64S * 8 bytes. */ void @@ -748,22 +812,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) int ip_len; uint16_t tot_len; - if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { - goto out; - } - ip_len = IP_IHL(nh->ip_ihl_ver) * 4; - - if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { - goto out; - } - if (OVS_UNLIKELY(size < ip_len)) { - goto out; - } - tot_len = ntohs(nh->ip_tot_len); - if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) { - goto out; - } - if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) { + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) { goto out; } dp_packet_set_l2_pad_size(packet, size - tot_len); @@ -786,31 +835,19 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) nw_tos = nh->ip_tos; nw_ttl = nh->ip_ttl; nw_proto = nh->ip_proto; - if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { - nw_frag = FLOW_NW_FRAG_ANY; - if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { - nw_frag |= FLOW_NW_FRAG_LATER; - } - } + nw_frag = ipv4_get_nw_frag(nh); data_pull(&data, &size, ip_len); } else if (dl_type == htons(ETH_TYPE_IPV6)) { - const struct ovs_16aligned_ip6_hdr *nh; + const struct ovs_16aligned_ip6_hdr *nh = data; ovs_be32 tc_flow; uint16_t plen; - if (OVS_UNLIKELY(size < sizeof *nh)) { + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { goto out; } - nh = data_pull(&data, &size, sizeof *nh); + data_pull(&data, &size, sizeof *nh); plen = ntohs(nh->ip6_plen); - if (OVS_UNLIKELY(plen > size)) { - goto out; - } - /* Jumbo Payload option not supported yet. */ - if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { - goto out; - } dp_packet_set_l2_pad_size(packet, size - plen); size = plen; /* Never pull padding. */ @@ -982,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_, size_t size) return parse_ethertype(&data, &size); } +uint16_t +parse_tcp_flags(struct dp_packet *packet) +{ + const void *data = dp_packet_data(packet); + size_t size = dp_packet_size(packet); + ovs_be16 dl_type; + uint8_t nw_frag = 0, nw_proto = 0; + + if (packet->packet_type != htonl(PT_ETH)) { + return 0; + } + + data_pull(&data, &size, ETH_ADDR_LEN * 2); + dl_type = parse_ethertype(&data, &size); + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { + const struct ip_header *nh = data; + int ip_len; + uint16_t tot_len; + + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) { + return 0; + } + nw_proto = nh->ip_proto; + nw_frag = ipv4_get_nw_frag(nh); + + size = tot_len; /* Never pull padding. */ + data_pull(&data, &size, ip_len); + } else if (dl_type == htons(ETH_TYPE_IPV6)) { + const struct ovs_16aligned_ip6_hdr *nh = data; + + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { + return 0; + } + data_pull(&data, &size, sizeof *nh); + + size = ntohs(nh->ip6_plen); /* Never pull padding. */ + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { + return 0; + } + nw_proto = nh->ip6_nxt; + } else { + return 0; + } + + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP && + size >= TCP_HEADER_LEN) { + const struct tcp_header *tcp = data; + + return TCP_FLAGS_BE32(tcp->tcp_ctl); + } + + return 0; +} + /* For every bit of a field that is wildcarded in 'wildcards', sets the * corresponding bit in 'flow' to zero. */ void diff --git a/lib/flow.h b/lib/flow.h index 770a07a..0adecbf 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, uint8_t *nw_frag); ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key); +uint16_t parse_tcp_flags(struct dp_packet *packet); static inline uint64_t flow_get_xreg(const struct flow *flow, int idx)