diff mbox series

[ovs-dev,v2,2/8] dpif-netdev: retrieve flow directly from the flow mark

Message ID 1504603381-30071-3-git-send-email-yliu@fridaylinux.org
State Superseded
Headers show
Series OVS-DPDK flow offload with rte_flow | expand

Commit Message

Yuanhan Liu Sept. 5, 2017, 9:22 a.m. UTC
So that we could skip the heavy emc processing, notably, the
miniflow_extract function. A simple PHY-PHY forwarding testing
shows 53% performance improvement.

Note that though the heavy miniflow_extract is skipped, we
still have to do per packet checking, due to we have to check
the tcp_flags.

Co-authored-by: Finn Christensen <fc@napatech.com>
Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
Signed-off-by: Finn Christensen <fc@napatech.com>
---

v2: update tcp_flags, which also fixes the build warnings
---
 lib/dp-packet.h   | 13 ++++++++++
 lib/dpif-netdev.c | 27 ++++++++++++++-----
 lib/flow.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/flow.h        |  1 +
 4 files changed, 113 insertions(+), 6 deletions(-)

Comments

Simon Horman Sept. 8, 2017, 4:44 p.m. UTC | #1
On Tue, Sep 05, 2017 at 05:22:55PM +0800, Yuanhan Liu wrote:
> So that we could skip the heavy emc processing, notably, the
> miniflow_extract function. A simple PHY-PHY forwarding testing
> shows 53% performance improvement.
> 
> Note that though the heavy miniflow_extract is skipped, we
> still have to do per packet checking, due to we have to check
> the tcp_flags.
> 
> Co-authored-by: Finn Christensen <fc@napatech.com>
> Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
> Signed-off-by: Finn Christensen <fc@napatech.com>
> ---
> 
> v2: update tcp_flags, which also fixes the build warnings
> ---
>  lib/dp-packet.h   | 13 ++++++++++
>  lib/dpif-netdev.c | 27 ++++++++++++++-----
>  lib/flow.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/flow.h        |  1 +
>  4 files changed, 113 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 046f3ab..a7a062f 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
>  #define reset_dp_packet_checksum_ol_flags(arg)
>  #endif
>  
> +static inline bool
> +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
> +                        uint32_t *mark OVS_UNUSED)
> +{
> +#ifdef DPDK_NETDEV
> +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
> +        *mark = p->mbuf.hash.fdir.hi;
> +        return true;
> +    }
> +#endif
> +    return false;
> +}

It seems odd that p and mark are marked as OVS_UNUSED
but used in the case that DPDK_NETDEV is defined.

Something like this seems cleaner to me.

+#ifdef DPDK_NETDEV
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p, uint32_t *mark)
+{
+    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
+        *mark = p->mbuf.hash.fdir.hi;
+        return true;
+    }
+    return false;
+}
+#else
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+                        uint32_t *mark OVS_UNUSED)
+{
+    return false;
+}
+#endif

...

> diff --git a/lib/flow.c b/lib/flow.c
> index b2b10aa..912c538 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)
>      return parse_ethertype(&data, &size);
>  }
>  
> +uint16_t
> +parse_tcp_flags(struct dp_packet *packet)
> +{
> +    const void *data = dp_packet_data(packet);
> +    size_t size = dp_packet_size(packet);
> +    ovs_be16 dl_type;
> +    uint8_t nw_frag = 0, nw_proto = 0;
> +
> +    if (packet->packet_type != htonl(PT_ETH)) {
> +        return 0;
> +    }
> +
> +    data_pull(&data, &size, ETH_ADDR_LEN * 2);
> +    dl_type = parse_ethertype(&data, &size);
> +    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
> +        const struct ip_header *nh = data;
> +        int ip_len;
> +
> +        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
> +            return 0;
> +        }
> +        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
> +
> +        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
> +            return 0;
> +        }
> +        if (OVS_UNLIKELY(size < ip_len)) {
> +            return 0;
> +        }
> +
> +        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
> +            nw_frag = FLOW_NW_FRAG_ANY;
> +            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
> +                nw_frag |= FLOW_NW_FRAG_LATER;
> +            }
> +        }
> +        nw_proto = nh->ip_proto;
> +        data_pull(&data, &size, ip_len);
> +    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
> +        const struct ovs_16aligned_ip6_hdr *nh;
> +        uint16_t plen;
> +
> +        if (OVS_UNLIKELY(size < sizeof *nh)) {
> +            return 0;
> +        }
> +        nh = data_pull(&data, &size, sizeof *nh);
> +
> +        plen = ntohs(nh->ip6_plen);
> +        if (OVS_UNLIKELY(plen > size)) {
> +            return 0;
> +        }
> +        /* Jumbo Payload option not supported yet. */
> +        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
> +            return 0;
> +        }
> +        size = plen;
> +
> +        nw_proto = nh->ip6_nxt;
> +        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
> +            return 0;
> +        }
> +    } else {
> +        return 0;
> +    }
> +
> +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
> +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
> +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {

The following might be nicer:

+    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))
+        && OVS_LIKELY(nw_proto == IPPROTO_TCP)
+        && OVS_LIKELY(size >= TCP_HEADER_LEN)) {

> +                const struct tcp_header *tcp = data;
> +
> +                return TCP_FLAGS_BE32(tcp->tcp_ctl);
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  /* For every bit of a field that is wildcarded in 'wildcards', sets the
>   * corresponding bit in 'flow' to zero. */
>  void

...
Darrell Ball Sept. 8, 2017, 5:38 p.m. UTC | #2
On 9/8/17, 9:44 AM, "ovs-dev-bounces@openvswitch.org on behalf of Simon Horman" <ovs-dev-bounces@openvswitch.org on behalf of simon.horman@netronome.com> wrote:

    On Tue, Sep 05, 2017 at 05:22:55PM +0800, Yuanhan Liu wrote:
    > So that we could skip the heavy emc processing, notably, the

    > miniflow_extract function. A simple PHY-PHY forwarding testing

    > shows 53% performance improvement.

    > 

    > Note that though the heavy miniflow_extract is skipped, we

    > still have to do per packet checking, due to we have to check

    > the tcp_flags.

    > 

    > Co-authored-by: Finn Christensen <fc@napatech.com>

    > Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>

    > Signed-off-by: Finn Christensen <fc@napatech.com>

    > ---

    > 

    > v2: update tcp_flags, which also fixes the build warnings

    > ---

    >  lib/dp-packet.h   | 13 ++++++++++

    >  lib/dpif-netdev.c | 27 ++++++++++++++-----

    >  lib/flow.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++

    >  lib/flow.h        |  1 +

    >  4 files changed, 113 insertions(+), 6 deletions(-)

    > 

    > diff --git a/lib/dp-packet.h b/lib/dp-packet.h

    > index 046f3ab..a7a062f 100644

    > --- a/lib/dp-packet.h

    > +++ b/lib/dp-packet.h

    > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)

    >  #define reset_dp_packet_checksum_ol_flags(arg)

    >  #endif

    >  

    > +static inline bool

    > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,

    > +                        uint32_t *mark OVS_UNUSED)

    > +{

    > +#ifdef DPDK_NETDEV

    > +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {

    > +        *mark = p->mbuf.hash.fdir.hi;

    > +        return true;

    > +    }

    > +#endif

    > +    return false;

    > +}

    
    It seems odd that p and mark are marked as OVS_UNUSED
    but used in the case that DPDK_NETDEV is defined.

[Darrell] OVS_UNUSED means ‘’may be unused”
                Typically, for a trivial alternative, we would do a slight variation of the above
                rather than writing two versions of the functions.

+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+                        uint32_t *mark OVS_UNUSED)
+{
 +#ifdef DPDK_NETDEV
 +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
 +        *mark = p->mbuf.hash.fdir.hi;
 +        return true;
 +    }
 +#else
 +    return false; 
 +#endif
 +}

//////////////////////////////

    
    Something like this seems cleaner to me.
    
    +#ifdef DPDK_NETDEV
    +static inline bool
    +dp_packet_has_flow_mark(struct dp_packet *p, uint32_t *mark)
    +{
    +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
    +        *mark = p->mbuf.hash.fdir.hi;
    +        return true;
    +    }
    +    return false;
    +}
    +#else
    +static inline bool
    +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
    +                        uint32_t *mark OVS_UNUSED)
    +{
    +    return false;
    +}
    +#endif
    
    ...
    
    > diff --git a/lib/flow.c b/lib/flow.c

    > index b2b10aa..912c538 100644

    > --- a/lib/flow.c

    > +++ b/lib/flow.c

    > @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)

    >      return parse_ethertype(&data, &size);

    >  }

    >  

    > +uint16_t

    > +parse_tcp_flags(struct dp_packet *packet)

    > +{

    > +    const void *data = dp_packet_data(packet);

    > +    size_t size = dp_packet_size(packet);

    > +    ovs_be16 dl_type;

    > +    uint8_t nw_frag = 0, nw_proto = 0;

    > +

    > +    if (packet->packet_type != htonl(PT_ETH)) {

    > +        return 0;

    > +    }

    > +

    > +    data_pull(&data, &size, ETH_ADDR_LEN * 2);

    > +    dl_type = parse_ethertype(&data, &size);

    > +    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {

    > +        const struct ip_header *nh = data;

    > +        int ip_len;

    > +

    > +        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {

    > +            return 0;

    > +        }

    > +        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;

    > +

    > +        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {

    > +            return 0;

    > +        }

    > +        if (OVS_UNLIKELY(size < ip_len)) {

    > +            return 0;

    > +        }

    > +

    > +        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {

    > +            nw_frag = FLOW_NW_FRAG_ANY;

    > +            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {

    > +                nw_frag |= FLOW_NW_FRAG_LATER;

    > +            }

    > +        }

    > +        nw_proto = nh->ip_proto;

    > +        data_pull(&data, &size, ip_len);

    > +    } else if (dl_type == htons(ETH_TYPE_IPV6)) {

    > +        const struct ovs_16aligned_ip6_hdr *nh;

    > +        uint16_t plen;

    > +

    > +        if (OVS_UNLIKELY(size < sizeof *nh)) {

    > +            return 0;

    > +        }

    > +        nh = data_pull(&data, &size, sizeof *nh);

    > +

    > +        plen = ntohs(nh->ip6_plen);

    > +        if (OVS_UNLIKELY(plen > size)) {

    > +            return 0;

    > +        }

    > +        /* Jumbo Payload option not supported yet. */

    > +        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {

    > +            return 0;

    > +        }

    > +        size = plen;

    > +

    > +        nw_proto = nh->ip6_nxt;

    > +        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {

    > +            return 0;

    > +        }

    > +    } else {

    > +        return 0;

    > +    }

    > +

    > +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {

    > +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {

    > +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {

    
    The following might be nicer:
    
    +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))
    +        && OVS_LIKELY(nw_proto == IPPROTO_TCP)
    +        && OVS_LIKELY(size >= TCP_HEADER_LEN)) {
    
    > +                const struct tcp_header *tcp = data;

    > +

    > +                return TCP_FLAGS_BE32(tcp->tcp_ctl);

    > +            }

    > +        }

    > +    }

    > +

    > +    return 0;

    > +}

    > +

    >  /* For every bit of a field that is wildcarded in 'wildcards', sets the

    >   * corresponding bit in 'flow' to zero. */

    >  void

    
    ...
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=gPQ6bI2kxK_3KX17GsXuiCRl8YFpi7zTpZOiIkM9bgw&s=kqeotzN7bKj_MV8xg5c-r9pc5hrfgTttYknrR2c0pQs&e=
Chandran, Sugesh Sept. 10, 2017, 4:15 p.m. UTC | #3
Regards
_Sugesh

> -----Original Message-----
> From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev-
> bounces@openvswitch.org] On Behalf Of Yuanhan Liu
> Sent: Tuesday, September 5, 2017 10:23 AM
> To: dev@openvswitch.org
> Subject: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly from
> the flow mark

[snip]
> 
>      atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
> 
>      DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
>          struct dp_netdev_flow *flow;
> +        uint32_t flow_mark;
> 
>          if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
>              dp_packet_delete(packet);
> @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread
> *pmd,
>              continue;
>          }
> 
> +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
> +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
> +            if (flow) {
[Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can be avoided?
Is that true?
> +                tcp_flags = parse_tcp_flags(packet);
> +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
> +                                        n_batches);
> +                continue;
> +            }
> +        }
> +
>          if (i != size - 1) {
>              struct dp_packet **packets = packets_->packets;
>              /* Prefetch next packet data and metadata. */ @@ -4989,7 +5001,8
> @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>          /* If EMC is disabled skip emc_lookup */
>          flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
>          if (OVS_LIKELY(flow)) {
> -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
> +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
> +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
>                                      n_batches);
>          } else {
>              /* Exact match cache missed. Group missed packets together at @@ -
> 5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread
> *pmd,
>          flow = dp_netdev_flow_cast(rules[i]);
> 
>          emc_probabilistic_insert(pmd, &keys[i], flow);
> -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches,
> n_batches);
> +        dp_netdev_queue_batches(packet, flow,
> +                                miniflow_get_tcp_flags(&keys[i].mf),
> +                                batches, n_batches);
>      }
> 
>      dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
> diff --git a/lib/flow.c b/lib/flow.c index b2b10aa..912c538 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_,
> size_t size)
>      return parse_ethertype(&data, &size);  }
> 

[Snip]
> --
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Yuanhan Liu Sept. 11, 2017, 5:37 a.m. UTC | #4
On Fri, Sep 08, 2017 at 05:38:07PM +0000, Darrell Ball wrote:
>     > +static inline bool
>     > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
>     > +                        uint32_t *mark OVS_UNUSED)
>     > +{
>     > +#ifdef DPDK_NETDEV
>     > +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
>     > +        *mark = p->mbuf.hash.fdir.hi;
>     > +        return true;
>     > +    }
>     > +#endif
>     > +    return false;
>     > +}
>     
>     It seems odd that p and mark are marked as OVS_UNUSED
>     but used in the case that DPDK_NETDEV is defined.
> 
> [Darrell] OVS_UNUSED means ‘’may be unused”
>                 Typically, for a trivial alternative, we would do a slight variation of the above
>                 rather than writing two versions of the functions.

Right. I also saw quite many examples like this in OVS. That's why I
code in this way, though I agree with Simon, it's indeed a bit odd.

>     > +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
>     > +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
>     > +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
>     
>     The following might be nicer:
>     
>     +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))
>     +        && OVS_LIKELY(nw_proto == IPPROTO_TCP)
>     +        && OVS_LIKELY(size >= TCP_HEADER_LEN)) {

Indeed. Thanks!

	--yliu
Yuanhan Liu Sept. 11, 2017, 6:35 a.m. UTC | #5
On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote:
> >      atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
> > 
> >      DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
> >          struct dp_netdev_flow *flow;
> > +        uint32_t flow_mark;
> > 
> >          if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
> >              dp_packet_delete(packet);
> > @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread
> > *pmd,
> >              continue;
> >          }
> > 
> > +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
> > +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
> > +            if (flow) {
> [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can be avoided?
> Is that true?

Maybe. If so, we could get better performance, as I have showed in v1.

	--yliu
Chandran, Sugesh Sept. 11, 2017, 8:56 a.m. UTC | #6
Regards
_Sugesh


> -----Original Message-----
> From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> Sent: Monday, September 11, 2017 7:36 AM
> To: Chandran, Sugesh <sugesh.chandran@intel.com>
> Cc: dev@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly
> from the flow mark
> 
> On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote:
> > >      atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
> > >
> > >      DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
> > >          struct dp_netdev_flow *flow;
> > > +        uint32_t flow_mark;
> > >
> > >          if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
> > >              dp_packet_delete(packet); @@ -4972,6 +4974,16 @@
> > > emc_processing(struct dp_netdev_pmd_thread *pmd,
> > >              continue;
> > >          }
> > >
> > > +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
> > > +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
> > > +            if (flow) {
> > [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can
> be avoided?
> > Is that true?
> 
> Maybe. If so, we could get better performance, as I have showed in v1.
[Sugesh] Then another probing feature to see if this parsing needs to be done/not.?
> 
> 	--yliu
Yuanhan Liu Sept. 11, 2017, 9:14 a.m. UTC | #7
On Mon, Sep 11, 2017 at 08:56:30AM +0000, Chandran, Sugesh wrote:
> 
> 
> Regards
> _Sugesh
> 
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yliu@fridaylinux.org]
> > Sent: Monday, September 11, 2017 7:36 AM
> > To: Chandran, Sugesh <sugesh.chandran@intel.com>
> > Cc: dev@openvswitch.org
> > Subject: Re: [ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly
> > from the flow mark
> > 
> > On Sun, Sep 10, 2017 at 04:15:42PM +0000, Chandran, Sugesh wrote:
> > > >      atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
> > > >
> > > >      DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
> > > >          struct dp_netdev_flow *flow;
> > > > +        uint32_t flow_mark;
> > > >
> > > >          if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
> > > >              dp_packet_delete(packet); @@ -4972,6 +4974,16 @@
> > > > emc_processing(struct dp_netdev_pmd_thread *pmd,
> > > >              continue;
> > > >          }
> > > >
> > > > +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
> > > > +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
> > > > +            if (flow) {
> > > [Sugesh] If the NIC/hardware can match on tcp_flags then this parsing can
> > be avoided?
> > > Is that true?
> > 
> > Maybe. If so, we could get better performance, as I have showed in v1.
> [Sugesh] Then another probing feature to see if this parsing needs to be done/not.?

I think so. I'd leave it as it is (only sw implementation) and will check
the possibility after this patchset has been merged.

	--yliu
Darrell Ball Sept. 13, 2017, 5:20 a.m. UTC | #8
On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:

    So that we could skip the heavy emc processing, notably, the
    miniflow_extract function. A simple PHY-PHY forwarding testing
    shows 53% performance improvement.
    
    Note that though the heavy miniflow_extract is skipped

[Darrell] How much of the increased performance is due to skipping 
               miniflow_extract ?



, we
    still have to do per packet checking, due to we have to check
    the tcp_flags.
    
    Co-authored-by: Finn Christensen <fc@napatech.com>
    Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
    Signed-off-by: Finn Christensen <fc@napatech.com>
    ---
    
    v2: update tcp_flags, which also fixes the build warnings
    ---
     lib/dp-packet.h   | 13 ++++++++++
     lib/dpif-netdev.c | 27 ++++++++++++++-----
     lib/flow.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
     lib/flow.h        |  1 +
     4 files changed, 113 insertions(+), 6 deletions(-)
    
    diff --git a/lib/dp-packet.h b/lib/dp-packet.h
    index 046f3ab..a7a062f 100644
    --- a/lib/dp-packet.h
    +++ b/lib/dp-packet.h
    @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
     #define reset_dp_packet_checksum_ol_flags(arg)
     #endif
     
    +static inline bool
    +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
    +                        uint32_t *mark OVS_UNUSED)
    +{
    +#ifdef DPDK_NETDEV
    +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
    +        *mark = p->mbuf.hash.fdir.hi;
    +        return true;
    +    }
    +#endif
    +    return false;
    +}
    +
     enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
     
     struct dp_packet_batch {
    diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
    index f3b7f25..a95b8d4 100644
    --- a/lib/dpif-netdev.c
    +++ b/lib/dpif-netdev.c
    @@ -4883,10 +4883,10 @@ struct packet_batch_per_flow {
     static inline void
     packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
                                  struct dp_packet *packet,
    -                             const struct miniflow *mf)
    +                             uint16_t tcp_flags)
     {
         batch->byte_count += dp_packet_size(packet);
    -    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
    +    batch->tcp_flags |= tcp_flags;
         batch->array.packets[batch->array.count++] = packet;
     }
     
    @@ -4921,7 +4921,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
     
     static inline void
     dp_netdev_queue_batches(struct dp_packet *pkt,
    -                        struct dp_netdev_flow *flow, const struct miniflow *mf,
    +                        struct dp_netdev_flow *flow, uint16_t tcp_flags,
                             struct packet_batch_per_flow *batches,
                             size_t *n_batches)
     {
    @@ -4932,7 +4932,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
             packet_batch_per_flow_init(batch, flow);
         }
     
    -    packet_batch_per_flow_update(batch, pkt, mf);
    +    packet_batch_per_flow_update(batch, pkt, tcp_flags);
     }
     
     /* Try to process all ('cnt') the 'packets' using only the exact match cache
    @@ -4960,11 +4960,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
         const size_t size = dp_packet_batch_size(packets_);
         uint32_t cur_min;
         int i;
    +    uint16_t tcp_flags;
     
         atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
     
         DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
             struct dp_netdev_flow *flow;
    +        uint32_t flow_mark;
     
             if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
                 dp_packet_delete(packet);
    @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
                 continue;
             }
     
    +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
    +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
    +            if (flow) {
    +                tcp_flags = parse_tcp_flags(packet);
    +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
    +                                        n_batches);
    +                continue;
    +            }
    +        }
    +

[Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark()  to following code; 
              also, maybe, you can get rid of parse_tcp_flags() as a result.


             if (i != size - 1) {
                 struct dp_packet **packets = packets_->packets;
                 /* Prefetch next packet data and metadata. */
    @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
             /* If EMC is disabled skip emc_lookup */
             flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
             if (OVS_LIKELY(flow)) {
    -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
    +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
    +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
                                         n_batches);
             } else {
                 /* Exact match cache missed. Group missed packets together at
    @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
             flow = dp_netdev_flow_cast(rules[i]);
     
             emc_probabilistic_insert(pmd, &keys[i], flow);
    -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
    +        dp_netdev_queue_batches(packet, flow,
    +                                miniflow_get_tcp_flags(&keys[i].mf),
    +                                batches, n_batches);
         }
     
         dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
    diff --git a/lib/flow.c b/lib/flow.c
    index b2b10aa..912c538 100644
    --- a/lib/flow.c
    +++ b/lib/flow.c
    @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)
         return parse_ethertype(&data, &size);
     }
     

[Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h
                If not, we would have to splice out common code.


    +uint16_t
    +parse_tcp_flags(struct dp_packet *packet)
    +{
    +    const void *data = dp_packet_data(packet);
    +    size_t size = dp_packet_size(packet);
    +    ovs_be16 dl_type;
    +    uint8_t nw_frag = 0, nw_proto = 0;
    +
    +    if (packet->packet_type != htonl(PT_ETH)) {
    +        return 0;
    +    }
    +
    +    data_pull(&data, &size, ETH_ADDR_LEN * 2);
    +    dl_type = parse_ethertype(&data, &size);
    +    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
    +        const struct ip_header *nh = data;
    +        int ip_len;
    +
    +        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
    +            return 0;
    +        }
    +        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
    +
    +        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
    +            return 0;
    +        }
    +        if (OVS_UNLIKELY(size < ip_len)) {
    +            return 0;
    +        }
    +
    +        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
    +            nw_frag = FLOW_NW_FRAG_ANY;
    +            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
    +                nw_frag |= FLOW_NW_FRAG_LATER;
    +            }
    +        }
    +        nw_proto = nh->ip_proto;
    +        data_pull(&data, &size, ip_len);
    +    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
    +        const struct ovs_16aligned_ip6_hdr *nh;
    +        uint16_t plen;
    +
    +        if (OVS_UNLIKELY(size < sizeof *nh)) {
    +            return 0;
    +        }
    +        nh = data_pull(&data, &size, sizeof *nh);
    +
    +        plen = ntohs(nh->ip6_plen);
    +        if (OVS_UNLIKELY(plen > size)) {
    +            return 0;
    +        }
    +        /* Jumbo Payload option not supported yet. */
    +        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
    +            return 0;
    +        }
    +        size = plen;
    +
    +        nw_proto = nh->ip6_nxt;
    +        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
    +            return 0;
    +        }
    +    } else {
    +        return 0;
    +    }
    +
    +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
    +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
    +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
    +                const struct tcp_header *tcp = data;
    +
    +                return TCP_FLAGS_BE32(tcp->tcp_ctl);
    +            }
    +        }
    +    }
    +
    +    return 0;
    +}
    +
     /* For every bit of a field that is wildcarded in 'wildcards', sets the
      * corresponding bit in 'flow' to zero. */
     void
    diff --git a/lib/flow.h b/lib/flow.h
    index 6ae5a67..f113ec4 100644
    --- a/lib/flow.h
    +++ b/lib/flow.h
    @@ -130,6 +130,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
                              uint8_t *nw_frag);
     ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
     bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key);
    +uint16_t parse_tcp_flags(struct dp_packet *packet);
     
     static inline uint64_t
     flow_get_xreg(const struct flow *flow, int idx)
    -- 
    2.7.4
Simon Horman Sept. 13, 2017, 9:29 a.m. UTC | #9
On Mon, Sep 11, 2017 at 01:37:10PM +0800, Yuanhan Liu wrote:
> On Fri, Sep 08, 2017 at 05:38:07PM +0000, Darrell Ball wrote:
> >     > +static inline bool
> >     > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
> >     > +                        uint32_t *mark OVS_UNUSED)
> >     > +{
> >     > +#ifdef DPDK_NETDEV
> >     > +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
> >     > +        *mark = p->mbuf.hash.fdir.hi;
> >     > +        return true;
> >     > +    }
> >     > +#endif
> >     > +    return false;
> >     > +}
> >     
> >     It seems odd that p and mark are marked as OVS_UNUSED
> >     but used in the case that DPDK_NETDEV is defined.
> > 
> > [Darrell] OVS_UNUSED means ‘’may be unused”
> >                 Typically, for a trivial alternative, we would do a slight variation of the above
> >                 rather than writing two versions of the functions.
> 
> Right. I also saw quite many examples like this in OVS. That's why I
> code in this way, though I agree with Simon, it's indeed a bit odd.

Thanks. In light of the above I have no objections to
dp_packet_has_flow_mark() the way it is.

> >     > +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
> >     > +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
> >     > +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
> >     
> >     The following might be nicer:
> >     
> >     +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))
> >     +        && OVS_LIKELY(nw_proto == IPPROTO_TCP)
> >     +        && OVS_LIKELY(size >= TCP_HEADER_LEN)) {
> 
> Indeed. Thanks!
> 
> 	--yliu
Yuanhan Liu Sept. 14, 2017, 2:50 a.m. UTC | #10
On Wed, Sep 13, 2017 at 05:20:58AM +0000, Darrell Ball wrote:
> 
> 
> On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
> 
>     So that we could skip the heavy emc processing, notably, the
>     miniflow_extract function. A simple PHY-PHY forwarding testing
>     shows 53% performance improvement.
>     
>     Note that though the heavy miniflow_extract is skipped
> 
> [Darrell] How much of the increased performance is due to skipping 
>                miniflow_extract ?

For the PHY-PHY case, following are the performance gains for each case.

with_miniflow_extract 22%                       
with_parse_tcp_falgs  54%
without both:         70%

[...]
>     +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
>     +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
>     +            if (flow) {
>     +                tcp_flags = parse_tcp_flags(packet);
>     +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
>     +                                        n_batches);
>     +                continue;
>     +            }
>     +        }
>     +
> 
> [Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark()  to following code; 
>               also, maybe, you can get rid of parse_tcp_flags() as a result.

I doubt it, as you see from above data, the performance impact due to
miniflow_extract is huge.

> 
>              if (i != size - 1) {
>                  struct dp_packet **packets = packets_->packets;
>                  /* Prefetch next packet data and metadata. */
>     @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>              /* If EMC is disabled skip emc_lookup */
>              flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
>              if (OVS_LIKELY(flow)) {
>     -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
>     +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
>     +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
>                                          n_batches);
>              } else {
>                  /* Exact match cache missed. Group missed packets together at
>     @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
>              flow = dp_netdev_flow_cast(rules[i]);
>      
>              emc_probabilistic_insert(pmd, &keys[i], flow);
>     -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
>     +        dp_netdev_queue_batches(packet, flow,
>     +                                miniflow_get_tcp_flags(&keys[i].mf),
>     +                                batches, n_batches);
>          }
>      
>          dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
>     diff --git a/lib/flow.c b/lib/flow.c
>     index b2b10aa..912c538 100644
>     --- a/lib/flow.c
>     +++ b/lib/flow.c
>     @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)
>          return parse_ethertype(&data, &size);
>      }
>      
> 
> [Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h
>                 If not, we would have to splice out common code.

To let parse_tcp_flags() and miniflow_extract() share more common code?

	--yliu
Darrell Ball Sept. 14, 2017, 7:13 p.m. UTC | #11
On 9/13/17, 7:50 PM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:

    On Wed, Sep 13, 2017 at 05:20:58AM +0000, Darrell Ball wrote:
    > 
    > 
    > On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
    > 
    >     So that we could skip the heavy emc processing, notably, the
    >     miniflow_extract function. A simple PHY-PHY forwarding testing
    >     shows 53% performance improvement.
    >     
    >     Note that though the heavy miniflow_extract is skipped
    > 
    > [Darrell] How much of the increased performance is due to skipping 
    >                miniflow_extract ?
    
    For the PHY-PHY case, following are the performance gains for each case.
    
    with_miniflow_extract 22%                       
    with_parse_tcp_falgs  54%
    without both:         70%
    
    [...]
    >     +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
    >     +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
    >     +            if (flow) {
    >     +                tcp_flags = parse_tcp_flags(packet);
    >     +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
    >     +                                        n_batches);
    >     +                continue;
    >     +            }
    >     +        }
    >     +
    > 
    > [Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark()  to following code; 
    >               also, maybe, you can get rid of parse_tcp_flags() as a result.
    
    I doubt it, as you see from above data, the performance impact due to
    miniflow_extract is huge.
    
    > 
    >              if (i != size - 1) {
    >                  struct dp_packet **packets = packets_->packets;
    >                  /* Prefetch next packet data and metadata. */
    >     @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
    >              /* If EMC is disabled skip emc_lookup */
    >              flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
    >              if (OVS_LIKELY(flow)) {
    >     -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
    >     +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
    >     +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
    >                                          n_batches);
    >              } else {
    >                  /* Exact match cache missed. Group missed packets together at
    >     @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
    >              flow = dp_netdev_flow_cast(rules[i]);
    >      
    >              emc_probabilistic_insert(pmd, &keys[i], flow);
    >     -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
    >     +        dp_netdev_queue_batches(packet, flow,
    >     +                                miniflow_get_tcp_flags(&keys[i].mf),
    >     +                                batches, n_batches);
    >          }
    >      
    >          dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
    >     diff --git a/lib/flow.c b/lib/flow.c
    >     index b2b10aa..912c538 100644
    >     --- a/lib/flow.c
    >     +++ b/lib/flow.c
    >     @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)
    >          return parse_ethertype(&data, &size);
    >      }
    >      
    > 
    > [Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h
    >                 If not, we would have to splice out common code.
    
    To let parse_tcp_flags() and miniflow_extract() share more common code?

[Darrell]
It looks to me parse_tcp_flags() is distilled from miniflow_extract(), so, yes, that is what I meant.

    
    	--yliu
diff mbox series

Patch

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 046f3ab..a7a062f 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -691,6 +691,19 @@  reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
 #define reset_dp_packet_checksum_ol_flags(arg)
 #endif
 
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+                        uint32_t *mark OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
+        *mark = p->mbuf.hash.fdir.hi;
+        return true;
+    }
+#endif
+    return false;
+}
+
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f3b7f25..a95b8d4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4883,10 +4883,10 @@  struct packet_batch_per_flow {
 static inline void
 packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
                              struct dp_packet *packet,
-                             const struct miniflow *mf)
+                             uint16_t tcp_flags)
 {
     batch->byte_count += dp_packet_size(packet);
-    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
+    batch->tcp_flags |= tcp_flags;
     batch->array.packets[batch->array.count++] = packet;
 }
 
@@ -4921,7 +4921,7 @@  packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
 
 static inline void
 dp_netdev_queue_batches(struct dp_packet *pkt,
-                        struct dp_netdev_flow *flow, const struct miniflow *mf,
+                        struct dp_netdev_flow *flow, uint16_t tcp_flags,
                         struct packet_batch_per_flow *batches,
                         size_t *n_batches)
 {
@@ -4932,7 +4932,7 @@  dp_netdev_queue_batches(struct dp_packet *pkt,
         packet_batch_per_flow_init(batch, flow);
     }
 
-    packet_batch_per_flow_update(batch, pkt, mf);
+    packet_batch_per_flow_update(batch, pkt, tcp_flags);
 }
 
 /* Try to process all ('cnt') the 'packets' using only the exact match cache
@@ -4960,11 +4960,13 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
     const size_t size = dp_packet_batch_size(packets_);
     uint32_t cur_min;
     int i;
+    uint16_t tcp_flags;
 
     atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
         struct dp_netdev_flow *flow;
+        uint32_t flow_mark;
 
         if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
             dp_packet_delete(packet);
@@ -4972,6 +4974,16 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
             continue;
         }
 
+        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
+            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
+            if (flow) {
+                tcp_flags = parse_tcp_flags(packet);
+                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
+                                        n_batches);
+                continue;
+            }
+        }
+
         if (i != size - 1) {
             struct dp_packet **packets = packets_->packets;
             /* Prefetch next packet data and metadata. */
@@ -4989,7 +5001,8 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
         /* If EMC is disabled skip emc_lookup */
         flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
         if (OVS_LIKELY(flow)) {
-            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
+            tcp_flags = miniflow_get_tcp_flags(&key->mf);
+            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
                                     n_batches);
         } else {
             /* Exact match cache missed. Group missed packets together at
@@ -5166,7 +5179,9 @@  fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         flow = dp_netdev_flow_cast(rules[i]);
 
         emc_probabilistic_insert(pmd, &keys[i], flow);
-        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
+        dp_netdev_queue_batches(packet, flow,
+                                miniflow_get_tcp_flags(&keys[i].mf),
+                                batches, n_batches);
     }
 
     dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
diff --git a/lib/flow.c b/lib/flow.c
index b2b10aa..912c538 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -991,6 +991,84 @@  parse_dl_type(const struct eth_header *data_, size_t size)
     return parse_ethertype(&data, &size);
 }
 
+uint16_t
+parse_tcp_flags(struct dp_packet *packet)
+{
+    const void *data = dp_packet_data(packet);
+    size_t size = dp_packet_size(packet);
+    ovs_be16 dl_type;
+    uint8_t nw_frag = 0, nw_proto = 0;
+
+    if (packet->packet_type != htonl(PT_ETH)) {
+        return 0;
+    }
+
+    data_pull(&data, &size, ETH_ADDR_LEN * 2);
+    dl_type = parse_ethertype(&data, &size);
+    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
+        const struct ip_header *nh = data;
+        int ip_len;
+
+        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
+            return 0;
+        }
+        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
+
+        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
+            return 0;
+        }
+        if (OVS_UNLIKELY(size < ip_len)) {
+            return 0;
+        }
+
+        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
+            nw_frag = FLOW_NW_FRAG_ANY;
+            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
+                nw_frag |= FLOW_NW_FRAG_LATER;
+            }
+        }
+        nw_proto = nh->ip_proto;
+        data_pull(&data, &size, ip_len);
+    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+        const struct ovs_16aligned_ip6_hdr *nh;
+        uint16_t plen;
+
+        if (OVS_UNLIKELY(size < sizeof *nh)) {
+            return 0;
+        }
+        nh = data_pull(&data, &size, sizeof *nh);
+
+        plen = ntohs(nh->ip6_plen);
+        if (OVS_UNLIKELY(plen > size)) {
+            return 0;
+        }
+        /* Jumbo Payload option not supported yet. */
+        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
+            return 0;
+        }
+        size = plen;
+
+        nw_proto = nh->ip6_nxt;
+        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
+            return 0;
+        }
+    } else {
+        return 0;
+    }
+
+    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
+        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
+            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
+                const struct tcp_header *tcp = data;
+
+                return TCP_FLAGS_BE32(tcp->tcp_ctl);
+            }
+        }
+    }
+
+    return 0;
+}
+
 /* For every bit of a field that is wildcarded in 'wildcards', sets the
  * corresponding bit in 'flow' to zero. */
 void
diff --git a/lib/flow.h b/lib/flow.h
index 6ae5a67..f113ec4 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -130,6 +130,7 @@  bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
                          uint8_t *nw_frag);
 ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
 bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key);
+uint16_t parse_tcp_flags(struct dp_packet *packet);
 
 static inline uint64_t
 flow_get_xreg(const struct flow *flow, int idx)