Message ID | 1504603381-30071-3-git-send-email-yliu@fridaylinux.org |
---|---|
State | Superseded |
Headers | show |
Series | OVS-DPDK flow offload with rte_flow | expand |
On Tue, Sep 05, 2017 at 05:22:55PM +0800, Yuanhan Liu wrote: > So that we could skip the heavy emc processing, notably, the > miniflow_extract function. A simple PHY-PHY forwarding testing > shows 53% performance improvement. > > Note that though the heavy miniflow_extract is skipped, we > still have to do per packet checking, due to we have to check > the tcp_flags. > > Co-authored-by: Finn Christensen <fc@napatech.com> > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> > Signed-off-by: Finn Christensen <fc@napatech.com> > --- > > v2: update tcp_flags, which also fixes the build warnings > --- > lib/dp-packet.h | 13 ++++++++++ > lib/dpif-netdev.c | 27 ++++++++++++++----- > lib/flow.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > lib/flow.h | 1 + > 4 files changed, 113 insertions(+), 6 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 046f3ab..a7a062f 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) > #define reset_dp_packet_checksum_ol_flags(arg) > #endif > > +static inline bool > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > + uint32_t *mark OVS_UNUSED) > +{ > +#ifdef DPDK_NETDEV > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > + *mark = p->mbuf.hash.fdir.hi; > + return true; > + } > +#endif > + return false; > +} It seems odd that p and mark are marked as OVS_UNUSED but used in the case that DPDK_NETDEV is defined. Something like this seems cleaner to me. +#ifdef DPDK_NETDEV +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p, uint32_t *mark) +{ + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } + return false; +} +#else +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ + return false; +} +#endif ... > diff --git a/lib/flow.c b/lib/flow.c > index b2b10aa..912c538 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) > return parse_ethertype(&data, &size); > } > > +uint16_t > +parse_tcp_flags(struct dp_packet *packet) > +{ > + const void *data = dp_packet_data(packet); > + size_t size = dp_packet_size(packet); > + ovs_be16 dl_type; > + uint8_t nw_frag = 0, nw_proto = 0; > + > + if (packet->packet_type != htonl(PT_ETH)) { > + return 0; > + } > + > + data_pull(&data, &size, ETH_ADDR_LEN * 2); > + dl_type = parse_ethertype(&data, &size); > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { > + const struct ip_header *nh = data; > + int ip_len; > + > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > + return 0; > + } > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > + > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { > + return 0; > + } > + if (OVS_UNLIKELY(size < ip_len)) { > + return 0; > + } > + > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > + nw_frag = FLOW_NW_FRAG_ANY; > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > + nw_frag |= FLOW_NW_FRAG_LATER; > + } > + } > + nw_proto = nh->ip_proto; > + data_pull(&data, &size, ip_len); > + } else if (dl_type == htons(ETH_TYPE_IPV6)) { > + const struct ovs_16aligned_ip6_hdr *nh; > + uint16_t plen; > + > + if (OVS_UNLIKELY(size < sizeof *nh)) { > + return 0; > + } > + nh = data_pull(&data, &size, sizeof *nh); > + > + plen = ntohs(nh->ip6_plen); > + if (OVS_UNLIKELY(plen > size)) { > + return 0; > + } > + /* Jumbo Payload option not supported yet. */ > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > + return 0; > + } > + size = plen; > + > + nw_proto = nh->ip6_nxt; > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { > + return 0; > + } > + } else { > + return 0; > + } > + > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { > + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { > + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { The following might be nicer: + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER)) + && OVS_LIKELY(nw_proto == IPPROTO_TCP) + && OVS_LIKELY(size >= TCP_HEADER_LEN)) { > + const struct tcp_header *tcp = data; > + > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > + } > + } > + } > + > + return 0; > +} > + > /* For every bit of a field that is wildcarded in 'wildcards', sets the > * corresponding bit in 'flow' to zero. */ > void ...
On 9/8/17, 9:44 AM, "ovs-dev-bounces@openvswitch.org on behalf of Simon Horman" <ovs-dev-bounces@openvswitch.org on behalf of simon.horman@netronome.com> wrote: On Tue, Sep 05, 2017 at 05:22:55PM +0800, Yuanhan Liu wrote: > So that we could skip the heavy emc processing, notably, the > miniflow_extract function. A simple PHY-PHY forwarding testing > shows 53% performance improvement. > > Note that though the heavy miniflow_extract is skipped, we > still have to do per packet checking, due to we have to check > the tcp_flags. > > Co-authored-by: Finn Christensen <fc@napatech.com> > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> > Signed-off-by: Finn Christensen <fc@napatech.com> > --- > > v2: update tcp_flags, which also fixes the build warnings > --- > lib/dp-packet.h | 13 ++++++++++ > lib/dpif-netdev.c | 27 ++++++++++++++----- > lib/flow.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > lib/flow.h | 1 + > 4 files changed, 113 insertions(+), 6 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 046f3ab..a7a062f 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) > #define reset_dp_packet_checksum_ol_flags(arg) > #endif > > +static inline bool > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > + uint32_t *mark OVS_UNUSED) > +{ > +#ifdef DPDK_NETDEV > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > + *mark = p->mbuf.hash.fdir.hi; > + return true; > + } > +#endif > + return false; > +} It seems odd that p and mark are marked as OVS_UNUSED but used in the case that DPDK_NETDEV is defined. [Darrell] OVS_UNUSED means ‘’may be unused” Typically, for a trivial alternative, we would do a slight variation of the above rather than writing two versions of the functions. +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ +#ifdef DPDK_NETDEV + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } +#else + return false; +#endif +} ////////////////////////////// Something like this seems cleaner to me. +#ifdef DPDK_NETDEV +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p, uint32_t *mark) +{ + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } + return false; +} +#else +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ + return false; +} +#endif ... > diff --git a/lib/flow.c b/lib/flow.c > index b2b10aa..912c538 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) > return parse_ethertype(&data, &size); > } > > +uint16_t > +parse_tcp_flags(struct dp_packet *packet) > +{ > + const void *data = dp_packet_data(packet); > + size_t size = dp_packet_size(packet); > + ovs_be16 dl_type; > + uint8_t nw_frag = 0, nw_proto = 0; > + > + if (packet->packet_type != htonl(PT_ETH)) { > + return 0; > + } > + > + data_pull(&data, &size, ETH_ADDR_LEN * 2); > + dl_type = parse_ethertype(&data, &size); > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { > + const struct ip_header *nh = data; > + int ip_len; > + > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > + return 0; > + } > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; > + > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { > + return 0; > + } > + if (OVS_UNLIKELY(size < ip_len)) { > + return 0; > + } > + > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { > + nw_frag = FLOW_NW_FRAG_ANY; > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { > + nw_frag |= FLOW_NW_FRAG_LATER; > + } > + } > + nw_proto = nh->ip_proto; > + data_pull(&data, &size, ip_len); > + } else if (dl_type == htons(ETH_TYPE_IPV6)) { > + const struct ovs_16aligned_ip6_hdr *nh; > + uint16_t plen; > + > + if (OVS_UNLIKELY(size < sizeof *nh)) { > + return 0; > + } > + nh = data_pull(&data, &size, sizeof *nh); > + > + plen = ntohs(nh->ip6_plen); > + if (OVS_UNLIKELY(plen > size)) { > + return 0; > + } > + /* Jumbo Payload option not supported yet. */ > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { > + return 0; > + } > + size = plen; > + > + nw_proto = nh->ip6_nxt; > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { > + return 0; > + } > + } else { > + return 0; > + } > + > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { > + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { > + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { The following might be nicer: + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER)) + && OVS_LIKELY(nw_proto == IPPROTO_TCP) + && OVS_LIKELY(size >= TCP_HEADER_LEN)) { > + const struct tcp_header *tcp = data; > + > + return TCP_FLAGS_BE32(tcp->tcp_ctl); > + } > + } > + } > + > + return 0; > +} > + > /* For every bit of a field that is wildcarded in 'wildcards', sets the > * corresponding bit in 'flow' to zero. */ > void ... _______________________________________________ dev mailing list dev@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=gPQ6bI2kxK_3KX17GsXuiCRl8YFpi7zTpZOiIkM9bgw&s=kqeotzN7bKj_MV8xg5c-r9pc5hrfgTttYknrR2c0pQs&e=
Regards _Sugesh > -----Original Message----- > From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev- > bounces@openvswitch.org] On Behalf Of Yuanhan Liu > Sent: Tuesday, September 5, 2017 10:23 AM > To: dev@openvswitch.org > Subject: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly from > the flow mark [snip] > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { > struct dp_netdev_flow *flow; > + uint32_t flow_mark; > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > dp_packet_delete(packet); > @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread > *pmd, > continue; > } > > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > + if (flow) { [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can be avoided? Is that true? > + tcp_flags = parse_tcp_flags(packet); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > + n_batches); > + continue; > + } > + } > + > if (i != size - 1) { > struct dp_packet **packets = packets_->packets; > /* Prefetch next packet data and metadata. */ @@ -4989,7 +5001,8 > @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > /* If EMC is disabled skip emc_lookup */ > flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); > if (OVS_LIKELY(flow)) { > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > n_batches); > } else { > /* Exact match cache missed. Group missed packets together at @@ - > 5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread > *pmd, > flow = dp_netdev_flow_cast(rules[i]); > > emc_probabilistic_insert(pmd, &keys[i], flow); > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, > n_batches); > + dp_netdev_queue_batches(packet, flow, > + miniflow_get_tcp_flags(&keys[i].mf), > + batches, n_batches); > } > > dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); > diff --git a/lib/flow.c b/lib/flow.c index b2b10aa..912c538 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, > size_t size) > return parse_ethertype(&data, &size); } > [Snip] > -- > 2.7.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Fri, Sep 08, 2017 at 05:38:07PM +0000, Darrell Ball wrote: > > +static inline bool > > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > > + uint32_t *mark OVS_UNUSED) > > +{ > > +#ifdef DPDK_NETDEV > > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > > + *mark = p->mbuf.hash.fdir.hi; > > + return true; > > + } > > +#endif > > + return false; > > +} > > It seems odd that p and mark are marked as OVS_UNUSED > but used in the case that DPDK_NETDEV is defined. > > [Darrell] OVS_UNUSED means ‘’may be unused” > Typically, for a trivial alternative, we would do a slight variation of the above > rather than writing two versions of the functions. Right. I also saw quite many examples like this in OVS. That's why I code in this way, though I agree with Simon, it's indeed a bit odd. > > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { > > + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { > > + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { > > The following might be nicer: > > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER)) > + && OVS_LIKELY(nw_proto == IPPROTO_TCP) > + && OVS_LIKELY(size >= TCP_HEADER_LEN)) { Indeed. Thanks! --yliu
On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote: > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { > > struct dp_netdev_flow *flow; > > + uint32_t flow_mark; > > > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > > dp_packet_delete(packet); > > @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread > > *pmd, > > continue; > > } > > > > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > > + if (flow) { > [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can be avoided? > Is that true? Maybe. If so, we could get better performance, as I have showed in v1. --yliu
Regards _Sugesh > -----Original Message----- > From: Yuanhan Liu [mailto:yliu@fridaylinux.org] > Sent: Monday, September 11, 2017 7:36 AM > To: Chandran, Sugesh <sugesh.chandran@intel.com> > Cc: dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly > from the flow mark > > On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote: > > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > > > > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { > > > struct dp_netdev_flow *flow; > > > + uint32_t flow_mark; > > > > > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > > > dp_packet_delete(packet); @@ -4972,6 +4974,16 @@ > > > emc_processing(struct dp_netdev_pmd_thread *pmd, > > > continue; > > > } > > > > > > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > > > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > > > + if (flow) { > > [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can > be avoided? > > Is that true? > > Maybe. If so, we could get better performance, as I have showed in v1. [Sugesh] Then another probing feature to see if this parsing needs to be done/not.? > > --yliu
On Mon, Sep 11, 2017 at 08:56:30AM +0000, Chandran, Sugesh wrote: > > > Regards > _Sugesh > > > > -----Original Message----- > > From: Yuanhan Liu [mailto:yliu@fridaylinux.org] > > Sent: Monday, September 11, 2017 7:36 AM > > To: Chandran, Sugesh <sugesh.chandran@intel.com> > > Cc: dev@openvswitch.org > > Subject: Re: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly > > from the flow mark > > > > On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote: > > > > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); > > > > > > > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { > > > > struct dp_netdev_flow *flow; > > > > + uint32_t flow_mark; > > > > > > > > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { > > > > dp_packet_delete(packet); @@ -4972,6 +4974,16 @@ > > > > emc_processing(struct dp_netdev_pmd_thread *pmd, > > > > continue; > > > > } > > > > > > > > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > > > > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > > > > + if (flow) { > > > [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can > > be avoided? > > > Is that true? > > > > Maybe. If so, we could get better performance, as I have showed in v1. > [Sugesh] Then another probing feature to see if this parsing needs to be done/not.? I think so. I'd leave it as it is (only sw implementation) and will check the possibility after this patchset has been merged. --yliu
On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote: So that we could skip the heavy emc processing, notably, the miniflow_extract function. A simple PHY-PHY forwarding testing shows 53% performance improvement. Note that though the heavy miniflow_extract is skipped [Darrell] How much of the increased performance is due to skipping miniflow_extract ? , we still have to do per packet checking, due to we have to check the tcp_flags. Co-authored-by: Finn Christensen <fc@napatech.com> Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org> Signed-off-by: Finn Christensen <fc@napatech.com> --- v2: update tcp_flags, which also fixes the build warnings --- lib/dp-packet.h | 13 ++++++++++ lib/dpif-netdev.c | 27 ++++++++++++++----- lib/flow.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/flow.h | 1 + 4 files changed, 113 insertions(+), 6 deletions(-) diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 046f3ab..a7a062f 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) #define reset_dp_packet_checksum_ol_flags(arg) #endif +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ +#ifdef DPDK_NETDEV + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } +#endif + return false; +} + enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f3b7f25..a95b8d4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4883,10 +4883,10 @@ struct packet_batch_per_flow { static inline void packet_batch_per_flow_update(struct packet_batch_per_flow *batch, struct dp_packet *packet, - const struct miniflow *mf) + uint16_t tcp_flags) { batch->byte_count += dp_packet_size(packet); - batch->tcp_flags |= miniflow_get_tcp_flags(mf); + batch->tcp_flags |= tcp_flags; batch->array.packets[batch->array.count++] = packet; } @@ -4921,7 +4921,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, static inline void dp_netdev_queue_batches(struct dp_packet *pkt, - struct dp_netdev_flow *flow, const struct miniflow *mf, + struct dp_netdev_flow *flow, uint16_t tcp_flags, struct packet_batch_per_flow *batches, size_t *n_batches) { @@ -4932,7 +4932,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt, packet_batch_per_flow_init(batch, flow); } - packet_batch_per_flow_update(batch, pkt, mf); + packet_batch_per_flow_update(batch, pkt, tcp_flags); } /* Try to process all ('cnt') the 'packets' using only the exact match cache @@ -4960,11 +4960,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, const size_t size = dp_packet_batch_size(packets_); uint32_t cur_min; int i; + uint16_t tcp_flags; atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { struct dp_netdev_flow *flow; + uint32_t flow_mark; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, continue; } + if (dp_packet_has_flow_mark(packet, &flow_mark)) { + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); + if (flow) { + tcp_flags = parse_tcp_flags(packet); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, + n_batches); + continue; + } + } + [Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark() to following code; also, maybe, you can get rid of parse_tcp_flags() as a result. if (i != size - 1) { struct dp_packet **packets = packets_->packets; /* Prefetch next packet data and metadata. */ @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, /* If EMC is disabled skip emc_lookup */ flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); if (OVS_LIKELY(flow)) { - dp_netdev_queue_batches(packet, flow, &key->mf, batches, + tcp_flags = miniflow_get_tcp_flags(&key->mf); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, n_batches); } else { /* Exact match cache missed. Group missed packets together at @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); emc_probabilistic_insert(pmd, &keys[i], flow); - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); + dp_netdev_queue_batches(packet, flow, + miniflow_get_tcp_flags(&keys[i].mf), + batches, n_batches); } dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); diff --git a/lib/flow.c b/lib/flow.c index b2b10aa..912c538 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) return parse_ethertype(&data, &size); } [Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h If not, we would have to splice out common code. +uint16_t +parse_tcp_flags(struct dp_packet *packet) +{ + const void *data = dp_packet_data(packet); + size_t size = dp_packet_size(packet); + ovs_be16 dl_type; + uint8_t nw_frag = 0, nw_proto = 0; + + if (packet->packet_type != htonl(PT_ETH)) { + return 0; + } + + data_pull(&data, &size, ETH_ADDR_LEN * 2); + dl_type = parse_ethertype(&data, &size); + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { + const struct ip_header *nh = data; + int ip_len; + + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { + return 0; + } + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; + + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { + return 0; + } + if (OVS_UNLIKELY(size < ip_len)) { + return 0; + } + + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { + nw_frag = FLOW_NW_FRAG_ANY; + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { + nw_frag |= FLOW_NW_FRAG_LATER; + } + } + nw_proto = nh->ip_proto; + data_pull(&data, &size, ip_len); + } else if (dl_type == htons(ETH_TYPE_IPV6)) { + const struct ovs_16aligned_ip6_hdr *nh; + uint16_t plen; + + if (OVS_UNLIKELY(size < sizeof *nh)) { + return 0; + } + nh = data_pull(&data, &size, sizeof *nh); + + plen = ntohs(nh->ip6_plen); + if (OVS_UNLIKELY(plen > size)) { + return 0; + } + /* Jumbo Payload option not supported yet. */ + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { + return 0; + } + size = plen; + + nw_proto = nh->ip6_nxt; + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { + return 0; + } + } else { + return 0; + } + + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { + const struct tcp_header *tcp = data; + + return TCP_FLAGS_BE32(tcp->tcp_ctl); + } + } + } + + return 0; +} + /* For every bit of a field that is wildcarded in 'wildcards', sets the * corresponding bit in 'flow' to zero. */ void diff --git a/lib/flow.h b/lib/flow.h index 6ae5a67..f113ec4 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -130,6 +130,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, uint8_t *nw_frag); ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key); +uint16_t parse_tcp_flags(struct dp_packet *packet); static inline uint64_t flow_get_xreg(const struct flow *flow, int idx) -- 2.7.4
On Mon, Sep 11, 2017 at 01:37:10PM +0800, Yuanhan Liu wrote: > On Fri, Sep 08, 2017 at 05:38:07PM +0000, Darrell Ball wrote: > > > +static inline bool > > > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, > > > + uint32_t *mark OVS_UNUSED) > > > +{ > > > +#ifdef DPDK_NETDEV > > > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { > > > + *mark = p->mbuf.hash.fdir.hi; > > > + return true; > > > + } > > > +#endif > > > + return false; > > > +} > > > > It seems odd that p and mark are marked as OVS_UNUSED > > but used in the case that DPDK_NETDEV is defined. > > > > [Darrell] OVS_UNUSED means ‘’may be unused” > > Typically, for a trivial alternative, we would do a slight variation of the above > > rather than writing two versions of the functions. > > Right. I also saw quite many examples like this in OVS. That's why I > code in this way, though I agree with Simon, it's indeed a bit odd. Thanks. In light of the above I have no objections to dp_packet_has_flow_mark() the way it is. > > > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { > > > + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { > > > + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { > > > > The following might be nicer: > > > > + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER)) > > + && OVS_LIKELY(nw_proto == IPPROTO_TCP) > > + && OVS_LIKELY(size >= TCP_HEADER_LEN)) { > > Indeed. Thanks! > > --yliu
On Wed, Sep 13, 2017 at 05:20:58AM +0000, Darrell Ball wrote: > > > On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote: > > So that we could skip the heavy emc processing, notably, the > miniflow_extract function. A simple PHY-PHY forwarding testing > shows 53% performance improvement. > > Note that though the heavy miniflow_extract is skipped > > [Darrell] How much of the increased performance is due to skipping > miniflow_extract ? For the PHY-PHY case, following are the performance gains for each case. with_miniflow_extract 22% with_parse_tcp_falgs 54% without both: 70% [...] > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > + if (flow) { > + tcp_flags = parse_tcp_flags(packet); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > + n_batches); > + continue; > + } > + } > + > > [Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark() to following code; > also, maybe, you can get rid of parse_tcp_flags() as a result. I doubt it, as you see from above data, the performance impact due to miniflow_extract is huge. > > if (i != size - 1) { > struct dp_packet **packets = packets_->packets; > /* Prefetch next packet data and metadata. */ > @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > /* If EMC is disabled skip emc_lookup */ > flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); > if (OVS_LIKELY(flow)) { > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > n_batches); > } else { > /* Exact match cache missed. Group missed packets together at > @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, > flow = dp_netdev_flow_cast(rules[i]); > > emc_probabilistic_insert(pmd, &keys[i], flow); > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); > + dp_netdev_queue_batches(packet, flow, > + miniflow_get_tcp_flags(&keys[i].mf), > + batches, n_batches); > } > > dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); > diff --git a/lib/flow.c b/lib/flow.c > index b2b10aa..912c538 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) > return parse_ethertype(&data, &size); > } > > > [Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h > If not, we would have to splice out common code. To let parse_tcp_flags() and miniflow_extract() share more common code? --yliu
On 9/13/17, 7:50 PM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote: On Wed, Sep 13, 2017 at 05:20:58AM +0000, Darrell Ball wrote: > > > On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote: > > So that we could skip the heavy emc processing, notably, the > miniflow_extract function. A simple PHY-PHY forwarding testing > shows 53% performance improvement. > > Note that though the heavy miniflow_extract is skipped > > [Darrell] How much of the increased performance is due to skipping > miniflow_extract ? For the PHY-PHY case, following are the performance gains for each case. with_miniflow_extract 22% with_parse_tcp_falgs 54% without both: 70% [...] > + if (dp_packet_has_flow_mark(packet, &flow_mark)) { > + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); > + if (flow) { > + tcp_flags = parse_tcp_flags(packet); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > + n_batches); > + continue; > + } > + } > + > > [Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark() to following code; > also, maybe, you can get rid of parse_tcp_flags() as a result. I doubt it, as you see from above data, the performance impact due to miniflow_extract is huge. > > if (i != size - 1) { > struct dp_packet **packets = packets_->packets; > /* Prefetch next packet data and metadata. */ > @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > /* If EMC is disabled skip emc_lookup */ > flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); > if (OVS_LIKELY(flow)) { > - dp_netdev_queue_batches(packet, flow, &key->mf, batches, > + tcp_flags = miniflow_get_tcp_flags(&key->mf); > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, > n_batches); > } else { > /* Exact match cache missed. Group missed packets together at > @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, > flow = dp_netdev_flow_cast(rules[i]); > > emc_probabilistic_insert(pmd, &keys[i], flow); > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); > + dp_netdev_queue_batches(packet, flow, > + miniflow_get_tcp_flags(&keys[i].mf), > + batches, n_batches); > } > > dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); > diff --git a/lib/flow.c b/lib/flow.c > index b2b10aa..912c538 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) > return parse_ethertype(&data, &size); > } > > > [Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h > If not, we would have to splice out common code. To let parse_tcp_flags() and miniflow_extract() share more common code? [Darrell] It looks to me parse_tcp_flags() is distilled from miniflow_extract(), so, yes, that is what I meant. --yliu
diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 046f3ab..a7a062f 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) #define reset_dp_packet_checksum_ol_flags(arg) #endif +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ +#ifdef DPDK_NETDEV + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } +#endif + return false; +} + enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f3b7f25..a95b8d4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4883,10 +4883,10 @@ struct packet_batch_per_flow { static inline void packet_batch_per_flow_update(struct packet_batch_per_flow *batch, struct dp_packet *packet, - const struct miniflow *mf) + uint16_t tcp_flags) { batch->byte_count += dp_packet_size(packet); - batch->tcp_flags |= miniflow_get_tcp_flags(mf); + batch->tcp_flags |= tcp_flags; batch->array.packets[batch->array.count++] = packet; } @@ -4921,7 +4921,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, static inline void dp_netdev_queue_batches(struct dp_packet *pkt, - struct dp_netdev_flow *flow, const struct miniflow *mf, + struct dp_netdev_flow *flow, uint16_t tcp_flags, struct packet_batch_per_flow *batches, size_t *n_batches) { @@ -4932,7 +4932,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt, packet_batch_per_flow_init(batch, flow); } - packet_batch_per_flow_update(batch, pkt, mf); + packet_batch_per_flow_update(batch, pkt, tcp_flags); } /* Try to process all ('cnt') the 'packets' using only the exact match cache @@ -4960,11 +4960,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, const size_t size = dp_packet_batch_size(packets_); uint32_t cur_min; int i; + uint16_t tcp_flags; atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { struct dp_netdev_flow *flow; + uint32_t flow_mark; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, continue; } + if (dp_packet_has_flow_mark(packet, &flow_mark)) { + flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark); + if (flow) { + tcp_flags = parse_tcp_flags(packet); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, + n_batches); + continue; + } + } + if (i != size - 1) { struct dp_packet **packets = packets_->packets; /* Prefetch next packet data and metadata. */ @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, /* If EMC is disabled skip emc_lookup */ flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); if (OVS_LIKELY(flow)) { - dp_netdev_queue_batches(packet, flow, &key->mf, batches, + tcp_flags = miniflow_get_tcp_flags(&key->mf); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, n_batches); } else { /* Exact match cache missed. Group missed packets together at @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); emc_probabilistic_insert(pmd, &keys[i], flow); - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); + dp_netdev_queue_batches(packet, flow, + miniflow_get_tcp_flags(&keys[i].mf), + batches, n_batches); } dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); diff --git a/lib/flow.c b/lib/flow.c index b2b10aa..912c538 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size) return parse_ethertype(&data, &size); } +uint16_t +parse_tcp_flags(struct dp_packet *packet) +{ + const void *data = dp_packet_data(packet); + size_t size = dp_packet_size(packet); + ovs_be16 dl_type; + uint8_t nw_frag = 0, nw_proto = 0; + + if (packet->packet_type != htonl(PT_ETH)) { + return 0; + } + + data_pull(&data, &size, ETH_ADDR_LEN * 2); + dl_type = parse_ethertype(&data, &size); + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { + const struct ip_header *nh = data; + int ip_len; + + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { + return 0; + } + ip_len = IP_IHL(nh->ip_ihl_ver) * 4; + + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) { + return 0; + } + if (OVS_UNLIKELY(size < ip_len)) { + return 0; + } + + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) { + nw_frag = FLOW_NW_FRAG_ANY; + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { + nw_frag |= FLOW_NW_FRAG_LATER; + } + } + nw_proto = nh->ip_proto; + data_pull(&data, &size, ip_len); + } else if (dl_type == htons(ETH_TYPE_IPV6)) { + const struct ovs_16aligned_ip6_hdr *nh; + uint16_t plen; + + if (OVS_UNLIKELY(size < sizeof *nh)) { + return 0; + } + nh = data_pull(&data, &size, sizeof *nh); + + plen = ntohs(nh->ip6_plen); + if (OVS_UNLIKELY(plen > size)) { + return 0; + } + /* Jumbo Payload option not supported yet. */ + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { + return 0; + } + size = plen; + + nw_proto = nh->ip6_nxt; + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { + return 0; + } + } else { + return 0; + } + + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { + if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { + if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { + const struct tcp_header *tcp = data; + + return TCP_FLAGS_BE32(tcp->tcp_ctl); + } + } + } + + return 0; +} + /* For every bit of a field that is wildcarded in 'wildcards', sets the * corresponding bit in 'flow' to zero. */ void diff --git a/lib/flow.h b/lib/flow.h index 6ae5a67..f113ec4 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -130,6 +130,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, uint8_t *nw_frag); ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key); +uint16_t parse_tcp_flags(struct dp_packet *packet); static inline uint64_t flow_get_xreg(const struct flow *flow, int idx)