diff mbox series

[ovs-dev,v1,2/6] dpif-netdev: add tunnel_valid flag to skip ip/ipv6 address comparison

Message ID 20200602071005.29925-3-Yanqin.Wei@arm.com
State Changes Requested
Headers show
Series Memory access optimization for flow scalability of userspace datapath. | expand

Commit Message

Yanqin Wei June 2, 2020, 7:10 a.m. UTC
miniflow_extract checks the validation of tunnel metadata by comparing
tunnel destination address, including 16 bytes ipv6 address.
This patch introduces a 'tunnel_valid' flag. If it is false,
md->cacheline2 will not be touched. This improvement is beneficial to
miniflow_extract performance for all kinds of traffic.

Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
Reviewed-by: Malvika Gupta <Malvika.Gupta@arm.com>
Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
---
 lib/dpif-netdev.c | 14 +++++++++++---
 lib/flow.c        |  2 +-
 lib/packets.h     | 46 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 52 insertions(+), 10 deletions(-)

Comments

Ilya Maximets July 5, 2020, 2:11 p.m. UTC | #1
On 6/2/20 9:10 AM, Yanqin Wei wrote:
> miniflow_extract checks the validation of tunnel metadata by comparing
> tunnel destination address, including 16 bytes ipv6 address.
> This patch introduces a 'tunnel_valid' flag. If it is false,
> md->cacheline2 will not be touched. This improvement is beneficial to
> miniflow_extract performance for all kinds of traffic.
> 
> Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
> Reviewed-by: Malvika Gupta <Malvika.Gupta@arm.com>
> Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
> ---

Hi.
First of all, thanks for working on performance improvements!

However, this doesn't look as a clean patch.

Why we need both pkt_metadata_datapath_init() and pkt_metadata_init() ?
Why we can't just not initialize ip_dst and use tunnel_valid flag everywhere?

Current version complicates code making it less readable and prone to errors.

Best regards, Ilya Maximets.

>  lib/dpif-netdev.c | 14 +++++++++++---
>  lib/flow.c        |  2 +-
>  lib/packets.h     | 46 ++++++++++++++++++++++++++++++++++++++++------
>  3 files changed, 52 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 51c888501..c94d5e8c7 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -6625,12 +6625,16 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>          if (i != cnt - 1) {
>              struct dp_packet **packets = packets_->packets;
>              /* Prefetch next packet data and metadata. */
> -            OVS_PREFETCH(dp_packet_data(packets[i+1]));
> -            pkt_metadata_prefetch_init(&packets[i+1]->md);
> +            OVS_PREFETCH(dp_packet_data(packets[i + 1]));
> +            if (md_is_valid) {
> +                pkt_metadata_prefetch(&packets[i + 1]->md);
> +            } else {
> +                pkt_metadata_prefetch_init(&packets[i + 1]->md);
> +            }
>          }
>  
>          if (!md_is_valid) {
> -            pkt_metadata_init(&packet->md, port_no);
> +            pkt_metadata_datapath_init(&packet->md, port_no);
>          }
>  
>          if ((*recirc_depth_get() == 0) &&
> @@ -6730,6 +6734,10 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>      miniflow_expand(&key->mf, &match.flow);
>      memset(&match.wc, 0, sizeof match.wc);
>  
> +    if (!packet->md.tunnel_valid) {
> +        pkt_metadata_tnl_dst_init(&packet->md);
> +    }
> +
>      ofpbuf_clear(actions);
>      ofpbuf_clear(put_actions);
>  
> diff --git a/lib/flow.c b/lib/flow.c
> index cc1b3f2db..1f0b3d4dc 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -747,7 +747,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>      ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
>  
>      /* Metadata. */
> -    if (flow_tnl_dst_is_set(&md->tunnel)) {
> +    if (md->tunnel_valid && flow_tnl_dst_is_set(&md->tunnel)) {
>          miniflow_push_words(mf, tunnel, &md->tunnel,
>                              offsetof(struct flow_tnl, metadata) /
>                              sizeof(uint64_t));
> diff --git a/lib/packets.h b/lib/packets.h
> index 447e6f6fa..3b507d2a3 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -103,15 +103,16 @@ PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>                                     action. */
>      uint32_t skb_priority;      /* Packet priority for QoS. */
>      uint32_t pkt_mark;          /* Packet mark. */
> +    struct conn *conn;          /* Cached conntrack connection. */
>      uint8_t  ct_state;          /* Connection state. */
>      bool ct_orig_tuple_ipv6;
>      uint16_t ct_zone;           /* Connection zone. */
>      uint32_t ct_mark;           /* Connection mark. */
>      ovs_u128 ct_label;          /* Connection label. */
>      union flow_in_port in_port; /* Input port. */
> -    struct conn *conn;          /* Cached conntrack connection. */
>      bool reply;                 /* True if reply direction. */
>      bool icmp_related;          /* True if ICMP related. */
> +    bool tunnel_valid;
>  );
>  
>  PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
> @@ -141,6 +142,7 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
>       * are before this and as long as they are empty, the options won't
>       * be looked at. */
>      memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
> +    md->tunnel_valid = true;
>  }
>  
>  static inline void
> @@ -151,6 +153,25 @@ pkt_metadata_init_conn(struct pkt_metadata *md)
>  
>  static inline void
>  pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> +{
> +    /* Initialize only till ct_state. Once the ct_state is zeroed out rest
> +     * of ct fields will not be looked at unless ct_state != 0.
> +     */
> +    memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
> +
> +    /* It can be expensive to zero out all of the tunnel metadata. However,
> +     * we can just zero out ip_dst and the rest of the data will never be
> +     * looked at. */
> +    md->tunnel_valid = true;
> +    md->tunnel.ip_dst = 0;
> +    md->tunnel.ipv6_dst = in6addr_any;
> +
> +    md->in_port.odp_port = port;
> +}
> +
> +/* This function initializes those members used by userspace datapath */
> +static inline void
> +pkt_metadata_datapath_init(struct pkt_metadata *md, odp_port_t port)
>  {
>      /* This is called for every packet in userspace datapath and affects
>       * performance if all the metadata is initialized. Hence, fields should
> @@ -162,12 +183,19 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
>      memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
>  
>      /* It can be expensive to zero out all of the tunnel metadata. However,
> -     * we can just zero out ip_dst and the rest of the data will never be
> -     * looked at. */
> +     * we can just clear tunnel_valid */
> +    md->tunnel_valid = false;
> +
> +    md->in_port.odp_port = port;
> +}
> +
> +/* This function initializes tunnel dst for upcall */
> +static inline void
> +pkt_metadata_tnl_dst_init(struct pkt_metadata *md)
> +{
>      md->tunnel.ip_dst = 0;
>      md->tunnel.ipv6_dst = in6addr_any;
> -    md->in_port.odp_port = port;
> -    md->conn = NULL;
> +    md->tunnel_valid = true;
>  }
>  
>  /* This function prefetches the cachelines touched by pkt_metadata_init()
> @@ -184,7 +212,13 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
>       * in pkt_metadata_init_tnl(). */
>      OVS_PREFETCH(md->cacheline1);
>  
> -    /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
> +}
> +/* This function prefetches the cachelines touched by miniflow_extract().*/
> +static inline void
> +pkt_metadata_prefetch(struct pkt_metadata *md)
> +{
> +    OVS_PREFETCH(md->cacheline0);
> +    OVS_PREFETCH(md->cacheline1);
>      OVS_PREFETCH(md->cacheline2);
>  }
>  
>
Yanqin Wei July 6, 2020, 11:27 a.m. UTC | #2
Hi Ilya,

> > ---
> 
> Hi.
> First of all, thanks for working on performance improvements!
Thanks, I saw some slides where OVS was used to compare flow scalability with other projects. It inspired me to optimize this code.

> 
> However, this doesn't look as a clean patch.
There are some trade-off for legacy code.
> 
> Why we need both pkt_metadata_datapath_init() and pkt_metadata_init() ?
> Why we can't just not initialize ip_dst and use tunnel_valid flag everywhere?

This patch wants to reduce the scope of modification( only for fastpath), because performance is not critical for slow path. So tunnel dst@ is set before leaving fast path(upcall).
Another reason is 'flow_tnl' member is defined in both ' pkt_metadata' and 'flow'.  If tunnel_valid flag is introduced into 'flow', the layout and  legacy flow API also need to be modified.

> 
> Current version complicates code making it less readable and prone to errors.
Do you prefer to use tunnel_valid in both fast path and slow path? I could send v2 for this modification.
 
> Best regards, Ilya Maximets.
> 
> >  lib/dpif-netdev.c | 14 +++++++++++---
> >  lib/flow.c        |  2 +-
> >  lib/packets.h     | 46 ++++++++++++++++++++++++++++++++++++++++------
> >  3 files changed, 52 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > 51c888501..c94d5e8c7 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -6625,12 +6625,16 @@ dfc_processing(struct dp_netdev_pmd_thread
> *pmd,
> >          if (i != cnt - 1) {
> >              struct dp_packet **packets = packets_->packets;
> >              /* Prefetch next packet data and metadata. */
> > -            OVS_PREFETCH(dp_packet_data(packets[i+1]));
> > -            pkt_metadata_prefetch_init(&packets[i+1]->md);
> > +            OVS_PREFETCH(dp_packet_data(packets[i + 1]));
> > +            if (md_is_valid) {
> > +                pkt_metadata_prefetch(&packets[i + 1]->md);
> > +            } else {
> > +                pkt_metadata_prefetch_init(&packets[i + 1]->md);
> > +            }
> >          }
> >
> >          if (!md_is_valid) {
> > -            pkt_metadata_init(&packet->md, port_no);
> > +            pkt_metadata_datapath_init(&packet->md, port_no);
> >          }
> >
> >          if ((*recirc_depth_get() == 0) && @@ -6730,6 +6734,10 @@
> > handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
> >      miniflow_expand(&key->mf, &match.flow);
> >      memset(&match.wc, 0, sizeof match.wc);
> >
> > +    if (!packet->md.tunnel_valid) {
> > +        pkt_metadata_tnl_dst_init(&packet->md);
> > +    }
> > +
> >      ofpbuf_clear(actions);
> >      ofpbuf_clear(put_actions);
> >
> > diff --git a/lib/flow.c b/lib/flow.c
> > index cc1b3f2db..1f0b3d4dc 100644
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> > @@ -747,7 +747,7 @@ miniflow_extract(struct dp_packet *packet, struct
> miniflow *dst)
> >      ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
> >
> >      /* Metadata. */
> > -    if (flow_tnl_dst_is_set(&md->tunnel)) {
> > +    if (md->tunnel_valid && flow_tnl_dst_is_set(&md->tunnel)) {
> >          miniflow_push_words(mf, tunnel, &md->tunnel,
> >                              offsetof(struct flow_tnl, metadata) /
> >                              sizeof(uint64_t)); diff --git
> > a/lib/packets.h b/lib/packets.h index 447e6f6fa..3b507d2a3 100644
> > --- a/lib/packets.h
> > +++ b/lib/packets.h
> > @@ -103,15 +103,16 @@
> PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
> >                                     action. */
> >      uint32_t skb_priority;      /* Packet priority for QoS. */
> >      uint32_t pkt_mark;          /* Packet mark. */
> > +    struct conn *conn;          /* Cached conntrack connection. */
> >      uint8_t  ct_state;          /* Connection state. */
> >      bool ct_orig_tuple_ipv6;
> >      uint16_t ct_zone;           /* Connection zone. */
> >      uint32_t ct_mark;           /* Connection mark. */
> >      ovs_u128 ct_label;          /* Connection label. */
> >      union flow_in_port in_port; /* Input port. */
> > -    struct conn *conn;          /* Cached conntrack connection. */
> >      bool reply;                 /* True if reply direction. */
> >      bool icmp_related;          /* True if ICMP related. */
> > +    bool tunnel_valid;
> >  );
> >
> >  PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
> @@
> > -141,6 +142,7 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
> >       * are before this and as long as they are empty, the options won't
> >       * be looked at. */
> >      memset(md, 0, offsetof(struct pkt_metadata,
> > tunnel.metadata.opts));
> > +    md->tunnel_valid = true;
> >  }
> >
> >  static inline void
> > @@ -151,6 +153,25 @@ pkt_metadata_init_conn(struct pkt_metadata *md)
> >
> >  static inline void
> >  pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> > +{
> > +    /* Initialize only till ct_state. Once the ct_state is zeroed out rest
> > +     * of ct fields will not be looked at unless ct_state != 0.
> > +     */
> > +    memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
> > +
> > +    /* It can be expensive to zero out all of the tunnel metadata. However,
> > +     * we can just zero out ip_dst and the rest of the data will never be
> > +     * looked at. */
> > +    md->tunnel_valid = true;
> > +    md->tunnel.ip_dst = 0;
> > +    md->tunnel.ipv6_dst = in6addr_any;
> > +
> > +    md->in_port.odp_port = port;
> > +}
> > +
> > +/* This function initializes those members used by userspace datapath
> > +*/ static inline void pkt_metadata_datapath_init(struct pkt_metadata
> > +*md, odp_port_t port)
> >  {
> >      /* This is called for every packet in userspace datapath and affects
> >       * performance if all the metadata is initialized. Hence, fields
> > should @@ -162,12 +183,19 @@ pkt_metadata_init(struct pkt_metadata
> *md, odp_port_t port)
> >      memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
> >
> >      /* It can be expensive to zero out all of the tunnel metadata. However,
> > -     * we can just zero out ip_dst and the rest of the data will never be
> > -     * looked at. */
> > +     * we can just clear tunnel_valid */
> > +    md->tunnel_valid = false;
> > +
> > +    md->in_port.odp_port = port;
> > +}
> > +
> > +/* This function initializes tunnel dst for upcall */ static inline
> > +void pkt_metadata_tnl_dst_init(struct pkt_metadata *md) {
> >      md->tunnel.ip_dst = 0;
> >      md->tunnel.ipv6_dst = in6addr_any;
> > -    md->in_port.odp_port = port;
> > -    md->conn = NULL;
> > +    md->tunnel_valid = true;
> >  }
> >
> >  /* This function prefetches the cachelines touched by
> > pkt_metadata_init() @@ -184,7 +212,13 @@
> pkt_metadata_prefetch_init(struct pkt_metadata *md)
> >       * in pkt_metadata_init_tnl(). */
> >      OVS_PREFETCH(md->cacheline1);
> >
> > -    /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
> > +}
> > +/* This function prefetches the cachelines touched by
> > +miniflow_extract().*/ static inline void pkt_metadata_prefetch(struct
> > +pkt_metadata *md) {
> > +    OVS_PREFETCH(md->cacheline0);
> > +    OVS_PREFETCH(md->cacheline1);
> >      OVS_PREFETCH(md->cacheline2);
> >  }
> >
> >
Ilya Maximets July 9, 2020, 6:19 p.m. UTC | #3
On 7/6/20 1:27 PM, Yanqin Wei wrote:
> Hi Ilya,
> 
>>> ---
>>
>> Hi.
>> First of all, thanks for working on performance improvements!
> Thanks, I saw some slides where OVS was used to compare flow scalability with other projects. It inspired me to optimize this code.
> 
>>
>> However, this doesn't look as a clean patch.
> There are some trade-off for legacy code.

What trade-offs?

>>
>> Why we need both pkt_metadata_datapath_init() and pkt_metadata_init() ?
>> Why we can't just not initialize ip_dst and use tunnel_valid flag everywhere?
> 
> This patch wants to reduce the scope of modification( only for fastpath), because performance is not critical for slow path. So tunnel dst@ is set before leaving fast path(upcall).
> Another reason is 'flow_tnl' member is defined in both ' pkt_metadata' and 'flow'.  If tunnel_valid flag is introduced into 'flow', the layout and  legacy flow API also need to be modified.

I understand that you didn't want to touch anything beside the
performance critical parts.  However, dp_packet_/pkt_ API is
already heavily overloaded and having a few very similar
functions that can or can not be used in some contexts makes
things even more complicated.  It's hard to read and maintain.
And it's prone to errors in case someone will try t modify
datapath code.
I'd prefer not t have different initialization functions and
only have one variant.  This will also solve the issue that
every other part of code uses tunneling metadata without checking
'tunnel_valid' flag.  This is actually a logical mistake.
And yes, 'tunnel_valid' flag really needs a comment inside the
structure definition.

> 
>>
>> Current version complicates code making it less readable and prone to errors.
> Do you prefer to use tunnel_valid in both fast path and slow path? I could send v2 for this modification.
>  
>> Best regards, Ilya Maximets.
Yanqin Wei July 10, 2020, 8:30 a.m. UTC | #4
Hi Ilya,

> >
> >>> ---
> >>
> >> Hi.
> >> First of all, thanks for working on performance improvements!
> > Thanks, I saw some slides where OVS was used to compare flow scalability
> with other projects. It inspired me to optimize this code.
> >
> >>
> >> However, this doesn't look as a clean patch.
> > There are some trade-off for legacy code.
> 
> What trade-offs?
In some function, the parameter is struct flow_tnl instead of 'pkt_metadata' or 'flow'. In these function, tunnel_valid cannot be used for valid check. So some function signatures need be modified.
> 
> >>
> >> Why we need both pkt_metadata_datapath_init() and pkt_metadata_init() ?
> >> Why we can't just not initialize ip_dst and use tunnel_valid flag everywhere?
> >
> > This patch wants to reduce the scope of modification( only for fastpath),
> because performance is not critical for slow path. So tunnel dst@ is set before
> leaving fast path(upcall).
> > Another reason is 'flow_tnl' member is defined in both ' pkt_metadata' and
> 'flow'.  If tunnel_valid flag is introduced into 'flow', the layout and  legacy flow
> API also need to be modified.
> 
> I understand that you didn't want to touch anything beside the performance
> critical parts.  However, dp_packet_/pkt_ API is already heavily overloaded
> and having a few very similar functions that can or can not be used in some
> contexts makes things even more complicated.  It's hard to read and maintain.
> And it's prone to errors in case someone will try t modify datapath code.
> I'd prefer not t have different initialization functions and only have one variant.
> This will also solve the issue that every other part of code uses tunneling
> metadata without checking 'tunnel_valid' flag.  This is actually a logical
> mistake.
OK, it makes sense. I'll check all the places where flow_tnl is used and update v2 for tunnel_valid checking. 

> And yes, 'tunnel_valid' flag really needs a comment inside the structure
> definition.
OK, will add comment in V2.
> 
> >
> >>
> >> Current version complicates code making it less readable and prone to
> errors.
> > Do you prefer to use tunnel_valid in both fast path and slow path? I could
> send v2 for this modification.
> >
> >> Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 51c888501..c94d5e8c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6625,12 +6625,16 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
         if (i != cnt - 1) {
             struct dp_packet **packets = packets_->packets;
             /* Prefetch next packet data and metadata. */
-            OVS_PREFETCH(dp_packet_data(packets[i+1]));
-            pkt_metadata_prefetch_init(&packets[i+1]->md);
+            OVS_PREFETCH(dp_packet_data(packets[i + 1]));
+            if (md_is_valid) {
+                pkt_metadata_prefetch(&packets[i + 1]->md);
+            } else {
+                pkt_metadata_prefetch_init(&packets[i + 1]->md);
+            }
         }
 
         if (!md_is_valid) {
-            pkt_metadata_init(&packet->md, port_no);
+            pkt_metadata_datapath_init(&packet->md, port_no);
         }
 
         if ((*recirc_depth_get() == 0) &&
@@ -6730,6 +6734,10 @@  handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
     miniflow_expand(&key->mf, &match.flow);
     memset(&match.wc, 0, sizeof match.wc);
 
+    if (!packet->md.tunnel_valid) {
+        pkt_metadata_tnl_dst_init(&packet->md);
+    }
+
     ofpbuf_clear(actions);
     ofpbuf_clear(put_actions);
 
diff --git a/lib/flow.c b/lib/flow.c
index cc1b3f2db..1f0b3d4dc 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -747,7 +747,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
     ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
 
     /* Metadata. */
-    if (flow_tnl_dst_is_set(&md->tunnel)) {
+    if (md->tunnel_valid && flow_tnl_dst_is_set(&md->tunnel)) {
         miniflow_push_words(mf, tunnel, &md->tunnel,
                             offsetof(struct flow_tnl, metadata) /
                             sizeof(uint64_t));
diff --git a/lib/packets.h b/lib/packets.h
index 447e6f6fa..3b507d2a3 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -103,15 +103,16 @@  PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
                                    action. */
     uint32_t skb_priority;      /* Packet priority for QoS. */
     uint32_t pkt_mark;          /* Packet mark. */
+    struct conn *conn;          /* Cached conntrack connection. */
     uint8_t  ct_state;          /* Connection state. */
     bool ct_orig_tuple_ipv6;
     uint16_t ct_zone;           /* Connection zone. */
     uint32_t ct_mark;           /* Connection mark. */
     ovs_u128 ct_label;          /* Connection label. */
     union flow_in_port in_port; /* Input port. */
-    struct conn *conn;          /* Cached conntrack connection. */
     bool reply;                 /* True if reply direction. */
     bool icmp_related;          /* True if ICMP related. */
+    bool tunnel_valid;
 );
 
 PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
@@ -141,6 +142,7 @@  pkt_metadata_init_tnl(struct pkt_metadata *md)
      * are before this and as long as they are empty, the options won't
      * be looked at. */
     memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
+    md->tunnel_valid = true;
 }
 
 static inline void
@@ -151,6 +153,25 @@  pkt_metadata_init_conn(struct pkt_metadata *md)
 
 static inline void
 pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
+{
+    /* Initialize only till ct_state. Once the ct_state is zeroed out rest
+     * of ct fields will not be looked at unless ct_state != 0.
+     */
+    memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
+
+    /* It can be expensive to zero out all of the tunnel metadata. However,
+     * we can just zero out ip_dst and the rest of the data will never be
+     * looked at. */
+    md->tunnel_valid = true;
+    md->tunnel.ip_dst = 0;
+    md->tunnel.ipv6_dst = in6addr_any;
+
+    md->in_port.odp_port = port;
+}
+
+/* This function initializes those members used by userspace datapath */
+static inline void
+pkt_metadata_datapath_init(struct pkt_metadata *md, odp_port_t port)
 {
     /* This is called for every packet in userspace datapath and affects
      * performance if all the metadata is initialized. Hence, fields should
@@ -162,12 +183,19 @@  pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
     memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
 
     /* It can be expensive to zero out all of the tunnel metadata. However,
-     * we can just zero out ip_dst and the rest of the data will never be
-     * looked at. */
+     * we can just clear tunnel_valid */
+    md->tunnel_valid = false;
+
+    md->in_port.odp_port = port;
+}
+
+/* This function initializes tunnel dst for upcall */
+static inline void
+pkt_metadata_tnl_dst_init(struct pkt_metadata *md)
+{
     md->tunnel.ip_dst = 0;
     md->tunnel.ipv6_dst = in6addr_any;
-    md->in_port.odp_port = port;
-    md->conn = NULL;
+    md->tunnel_valid = true;
 }
 
 /* This function prefetches the cachelines touched by pkt_metadata_init()
@@ -184,7 +212,13 @@  pkt_metadata_prefetch_init(struct pkt_metadata *md)
      * in pkt_metadata_init_tnl(). */
     OVS_PREFETCH(md->cacheline1);
 
-    /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
+}
+/* This function prefetches the cachelines touched by miniflow_extract().*/
+static inline void
+pkt_metadata_prefetch(struct pkt_metadata *md)
+{
+    OVS_PREFETCH(md->cacheline0);
+    OVS_PREFETCH(md->cacheline1);
     OVS_PREFETCH(md->cacheline2);
 }