diff mbox series

[ovs-dev,v5,9/9] mfex-avx512: Add support for tunnel packets in avx512 mfex.

Message ID 20220825233058.2697002-10-kumar.amber@intel.com
State Superseded
Headers show
Series DPIF + MFEX Inner AVX512 | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Kumar Amber Aug. 25, 2022, 11:30 p.m. UTC
This patch adds the necessary support to avx512 mfex to
support handling of tunnel packet type.

Signed-off-by: Kumar Amber <kumar.amber@intel.com>

---
v5:
- check metadata IP address to find tunneling is valid or not.
  As dummy-pmd often passes garbage data to dpif.
---
---
 lib/dpif-netdev-avx512.c          |  16 +--
 lib/dpif-netdev-extract-avx512.c  | 195 ++++++++++++++++++++++++------
 lib/dpif-netdev-private-extract.c |   4 +-
 3 files changed, 170 insertions(+), 45 deletions(-)

Comments

Ferriter, Cian Sept. 29, 2022, 3:50 p.m. UTC | #1
Hi Amber,

Thanks for the patches. I've left some comments below inline.

Thanks,
Cian

> -----Original Message-----
> From: Amber, Kumar <kumar.amber@intel.com>
> Sent: Friday 26 August 2022 00:31
> To: ovs-dev@openvswitch.org
> Cc: echaudro@redhat.com; i.maximets@ovn.org; Ferriter, Cian <cian.ferriter@intel.com>; Stokes, Ian
> <ian.stokes@intel.com>; fbl@sysclose.org; Van Haaren, Harry <harry.van.haaren@intel.com>; Amber, Kumar
> <kumar.amber@intel.com>
> Subject: [PATCH v5 9/9] mfex-avx512: Add support for tunnel packets in avx512 mfex.
> 
> This patch adds the necessary support to avx512 mfex to
> support handling of tunnel packet type.
> 
> Signed-off-by: Kumar Amber <kumar.amber@intel.com>
> 
> ---
> v5:
> - check metadata IP address to find tunneling is valid or not.
>   As dummy-pmd often passes garbage data to dpif.
> ---
> ---
>  lib/dpif-netdev-avx512.c          |  16 +--
>  lib/dpif-netdev-extract-avx512.c  | 195 ++++++++++++++++++++++++------
>  lib/dpif-netdev-private-extract.c |   4 +-
>  3 files changed, 170 insertions(+), 45 deletions(-)
> 
> diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> index 1c3b67b02..d5c61baff 100644
> --- a/lib/dpif-netdev-avx512.c
> +++ b/lib/dpif-netdev-avx512.c
> @@ -185,15 +185,17 @@ dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
>      }
> 
>      /* Do a batch minfilow extract into keys. */
> -     /* Do a batch minfilow extract into keys, but only for outer packets. */

In the earlier DPIF part of this patchset, I guess you add the above comment line that you are removing here. But when you add it, I don't think it should be duplicating the line above. Just add the ", but only for outer packets." part in the earlier patchset and remove it here, rather than adding a whole line then removing in a later patch.

>      uint32_t mf_mask = 0;
> -    if (recirc_depth == 0) {
> -        miniflow_extract_func mfex_func;
> -        atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
> -        if (mfex_func) {
> -            mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
> +    miniflow_extract_func mfex_func;
> +    atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
> +    miniflow_extract_func mfex_inner_func;
> +    atomic_read_relaxed(&pmd->miniflow_extract_inner_opt, &mfex_inner_func);
> +    if (md_is_valid && mfex_inner_func) {
> +        mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
> +                                  md_is_valid);
> +    } else if (!md_is_valid && mfex_func) {
> +        mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
>                                  md_is_valid);

Align the above line with the 'p' in 'packets' from 2 lines above.

> -        }
>      }
> 
>      uint32_t iter = lookup_pkts_bitmask;
> diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
> index 833e9bd31..4c62bd911 100644
> --- a/lib/dpif-netdev-extract-avx512.c
> +++ b/lib/dpif-netdev-extract-avx512.c
> @@ -360,6 +360,53 @@ _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, __m512i v_shuf,
>                         MF_WORD(ipv6_dst, 2) | MF_BIT(tp_src) | MF_BIT(tp_dst))
>  #define MF_IPV6_TCP   (MF_IPV6_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
> 
> +#define MF_TUNNEL     MF_WORD(tunnel, offsetof(struct flow_tnl, metadata) / 8)
> +
> +#define MF_ETH_TUNNEL (MF_TUNNEL | MF_ETH)
> +#define MF_ETH_VLAN_TUNNEL (MF_TUNNEL | MF_ETH_VLAN)
> +
> +/* Block offsets represents the offsets into the blocks array of miniflow
> + * and are derived experimentally. Scalar miniflow parses the header
> + * in a fixed order and sequentially in a dynamic fashion thus incrementing
> + * pointer and copying data is enough but in AVX512 since the headers are
> + * parsed using pre-defined masks we need these magic offsets to write
> + * some of the data items at the correct loaction in the blocks array
> + * using below magic numbers.
> + */
> +#define BLK_META_DATA_OFFS            9

We could use something like the below instead of hardcoding 9, right?
offsetof(struct flow_tnl, metadata) / sizeof(uint64_t)

That is the number of words that the scalar miniflow_extract() passes to miniflow_push_words() for the tunnel metadata.

> +#define BLK_IPv4_TCP_FLAG             6
> +#define BLK_VLAN_IPv4_TCP_FLAG        7
> +#define BLK_VLAN_PCP                  4
> +#define BLK_IPv6_HDR_OFFS             8
> +#define BLK_VLAN_IPv6_HDR_OFFS        9
> +#define BLK_IPv6_TCP_FLAG             9
> +#define BLK_VLAN_IPv6_TCP_FLAG        10
> +#define BLK_L4_UDP_OFFS               9
> +#define BLK_L4_TCP_OFFS               10
> +#define BLK_VLAN_L4_UDP_OFFS          10
> +#define BLK_VLAN_L4_TCP_OFFS          11


I spent some time thinking about these #defines and whether we can generate them in a more dynamic and robust way like with other #defines in the file. I think it's tricky since they aren't as straight forward as figuring out "sizeof()" since the scalar miniflow_extract() pushes some but maybe not all of a protocol header. miniflow_extract() also calls miniflow_pad_to_64() for some protocol headers. Maybe we could build up the values using individual header #defines, like this:
#define BLK_ETH_HEADER                2
#define BLK_IPv4_HEADER               2
#define BLK_TCP_FLAG                  2

#define BLK_IPv4_TCP_FLAG             (BLK_ETH_HEADER + BLK_IPv4_HEADER + BLK_TCP_FLAG)

This might make it a little more clear where the values are coming from.

We could make the #defines a little more related to the protocol headers with something like this:
#define BLK_ETH_HEADER    ROUND_UP(sizeof(struct eth_header), 8)/sizeof(uint64_t)

This should give the value 2 as well, but shows where it's coming from since the ROUND_UP is essentially what the miniflow_pad_to_64() ends up doing.

I'm not sure if this would work for all headers though.

Also, maybe counting the miniflow bits field which we know ahead of time for each MFEX impl could give us these values. I'll investigate this a bit more to see if there are any better solutions.

Let me know if you have any thoughts on this.

> +
> +/* Below Offsets simply shifts the offsets by 9 blocks as
> + * in the tunneling case the first 9 blocks are reserved and
> + * written with the outer tunnel data.
> + */
> +#define BLK_TUN_IPv6_HDR_OFFS         (BLK_IPv6_HDR_OFFS + BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_IPv6_HDR_OFFS    (BLK_VLAN_IPv6_HDR_OFFS + \
> +                                       BLK_META_DATA_OFFS)
> +#define BLK_TUN_IPv6_TCP_FLAG         (BLK_IPv6_TCP_FLAG + BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_IPv6_TCP_FLAG    (BLK_VLAN_IPv6_TCP_FLAG + \
> +                                       BLK_META_DATA_OFFS)
> +#define BLK_TUN_L4_UDP_OFFS           (BLK_L4_UDP_OFFS + BLK_META_DATA_OFFS)
> +#define BLK_TUN_L4_TCP_OFFS           (BLK_L4_TCP_OFFS + BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_L4_UDP_OFFS      (BLK_VLAN_L4_UDP_OFFS + \
> +                                       BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_L4_TCP_OFFS      (BLK_VLAN_L4_TCP_OFFS + \
> +                                       BLK_META_DATA_OFFS)
> +#define BLK_TUN_IPv4_TCP_FLAG         (BLK_IPv4_TCP_FLAG + BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_PCP              (BLK_VLAN_PCP + BLK_META_DATA_OFFS)
> +#define BLK_TUN_VLAN_IPv4_TCP_FLAG    (BLK_VLAN_IPv4_TCP_FLAG + \
> +                                       BLK_META_DATA_OFFS)
> +
>  #define PATTERN_STRIP_IPV6_MASK                                         \
>      NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
>      NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
> @@ -744,7 +791,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                      uint32_t keys_size OVS_UNUSED,
>                      odp_port_t in_port,
>                      void *pmd_handle OVS_UNUSED,
> -                    bool md_is_valid OVS_UNUSED,
> +                    bool md_is_valid,
>                      const enum MFEX_PROFILES profile_id,
>                      const uint32_t use_vbmi OVS_UNUSED)
>  {
> @@ -770,6 +817,15 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>      __m128i v_blocks01 = _mm_insert_epi32(v_zeros, odp_to_u32(in_port), 1);
> 
>      DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        /* Handle meta-data init in the loop. */
> +        if (!md_is_valid) {
> +            pkt_metadata_init(&packet->md, in_port);
> +        }

Please see my comments on the patch 7/9 for whether we need this md_is_valid check.

> +        const struct pkt_metadata *md = &packet->md;
> +        /* Dummy pmd dont always pass correct md_is_valid and hence
> +         * need to check the tunnel data to ensure correct behaviour.
> +         */
> +        bool tunnel = flow_tnl_dst_is_set(&md->tunnel);
>          /* If the packet is smaller than the probe size, skip it. */
>          const uint32_t size = dp_packet_size(packet);
>          if (size < dp_pkt_min_size) {
> @@ -808,7 +864,16 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>                                                                  use_vbmi);
> 
>          __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
> -        _mm512_storeu_si512(&blocks[2], v_blk0_strip);
> +        /* Handle inner meta-data if valid. */
> +        if (!tunnel) {
> +            _mm512_storeu_si512(&blocks[2], v_blk0_strip);
> +        } else {
> +            __m512i v_tun = _mm512_loadu_si512(&md->tunnel);
> +            _mm512_storeu_si512(&blocks[0], v_tun);
> +            _mm512_storeu_si512(&blocks[11], v_blk0_strip);
> +            blocks[BLK_META_DATA_OFFS] = md->dp_hash |
> +                        ((uint64_t) odp_to_u32(md->in_port.odp_port) << 32);
> +        }
> 
>          /* Perform "post-processing" per profile, handling details not easily
>           * handled in the above generic AVX512 code. Examples include TCP flag
> @@ -820,38 +885,45 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>              break;
> 
>          case PROFILE_ETH_VLAN_IPV4_TCP: {
> -                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
> -
>                  uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
>                  struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
>                  if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
>                                                TCP_HEADER_LEN)) {
>                      continue;
>                  }
> -
>                  /* Process TCP flags, and store to blocks. */
>                  const struct tcp_header *tcp = (void *)&pkt[38];
> -                mfex_handle_tcp_flags(tcp, &blocks[7]);
> +                uint32_t vlan_pcp_off = BLK_VLAN_PCP;
> +                uint32_t tcp_flag_off = BLK_VLAN_IPv4_TCP_FLAG;
> +
> +                if (tunnel) {
> +                    vlan_pcp_off = BLK_TUN_VLAN_PCP;
> +                    tcp_flag_off = BLK_TUN_VLAN_IPv4_TCP_FLAG;
> +                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
> +                }

I like this pattern to reuse the existing MFEX impls for the tunnel case, since we just need to conditionally adjust offsets like you are doing and it will work for tunnel and non-tunnel cases. Avoids double the number of impls, nice job.

> +                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_off]);
> +                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_off]);
>                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
>              } break;
> 

<snip the rest of the MFEX impls>

> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index 12ac8ecce..5f7f1b6d3 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -362,7 +362,9 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> 
>      /* Run scalar miniflow_extract to get default result. */
>      DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> -        pkt_metadata_init(&packet->md, in_port);
> +        if (!md_is_valid) {
> +            pkt_metadata_init(&packet->md, in_port);
> +        }

Could the "        bool tunnel = flow_tnl_dst_is_set(&md->tunnel);" type check be used here too?

>          miniflow_extract(packet, &keys[i].mf);
> 
>          /* Store known good metadata to compare with optimized metadata. */
> --
> 2.25.1
Kumar Amber Oct. 3, 2022, 4:26 p.m. UTC | #2
Hi Cian,

Please find the comments inline.

> >      /* Do a batch minfilow extract into keys. */
> > -     /* Do a batch minfilow extract into keys, but only for outer packets. */
> 
> In the earlier DPIF part of this patchset, I guess you add the above comment
> line that you are removing here. But when you add it, I don't think it should
> be duplicating the line above. Just add the ", but only for outer packets." part
> in the earlier patchset and remove it here, rather than adding a whole line
> then removing in a later patch.
> 

Fixed in earlier patches.

> >      uint32_t mf_mask = 0;
> > -    if (recirc_depth == 0) {
> > -        miniflow_extract_func mfex_func;
> > -        atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
> > -        if (mfex_func) {
> > -            mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
> > +    miniflow_extract_func mfex_func;
> > +    atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
> > +    miniflow_extract_func mfex_inner_func;
> > +    atomic_read_relaxed(&pmd->miniflow_extract_inner_opt,
> &mfex_inner_func);
> > +    if (md_is_valid && mfex_inner_func) {
> > +        mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
> > +                                  md_is_valid);
> > +    } else if (!md_is_valid && mfex_func) {
> > +        mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
> >                                  md_is_valid);
> 
> Align the above line with the 'p' in 'packets' from 2 lines above.
> 
Done.

> > -        }
> >      }
> >
> >      uint32_t iter = lookup_pkts_bitmask; diff --git
> > a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
> > index 833e9bd31..4c62bd911 100644
> > --- a/lib/dpif-netdev-extract-avx512.c
> > +++ b/lib/dpif-netdev-extract-avx512.c
> > @@ -360,6 +360,53 @@
> _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, __m512i
> v_shuf,
> >                         MF_WORD(ipv6_dst, 2) | MF_BIT(tp_src) | MF_BIT(tp_dst))
> >  #define MF_IPV6_TCP   (MF_IPV6_UDP | MF_BIT(tcp_flags) |
> MF_BIT(arp_tha.ea[2]))
> >
> > +#define MF_TUNNEL     MF_WORD(tunnel, offsetof(struct flow_tnl,
> metadata) / 8)
> > +
> > +#define MF_ETH_TUNNEL (MF_TUNNEL | MF_ETH) #define
> MF_ETH_VLAN_TUNNEL
> > +(MF_TUNNEL | MF_ETH_VLAN)
> > +
> > +/* Block offsets represents the offsets into the blocks array of
> > +miniflow
> > + * and are derived experimentally. Scalar miniflow parses the header
> > + * in a fixed order and sequentially in a dynamic fashion thus
> > +incrementing
> > + * pointer and copying data is enough but in AVX512 since the headers
> > +are
> > + * parsed using pre-defined masks we need these magic offsets to
> > +write
> > + * some of the data items at the correct loaction in the blocks array
> > + * using below magic numbers.
> > + */
> > +#define BLK_META_DATA_OFFS            9
> 
> We could use something like the below instead of hardcoding 9, right?
> offsetof(struct flow_tnl, metadata) / sizeof(uint64_t)
> 
> That is the number of words that the scalar miniflow_extract() passes to
> miniflow_push_words() for the tunnel metadata.
> 
> > +#define BLK_IPv4_TCP_FLAG             6
> > +#define BLK_VLAN_IPv4_TCP_FLAG        7
> > +#define BLK_VLAN_PCP                  4
> > +#define BLK_IPv6_HDR_OFFS             8
> > +#define BLK_VLAN_IPv6_HDR_OFFS        9
> > +#define BLK_IPv6_TCP_FLAG             9
> > +#define BLK_VLAN_IPv6_TCP_FLAG        10
> > +#define BLK_L4_UDP_OFFS               9
> > +#define BLK_L4_TCP_OFFS               10
> > +#define BLK_VLAN_L4_UDP_OFFS          10
> > +#define BLK_VLAN_L4_TCP_OFFS          11
> 
> 
> I spent some time thinking about these #defines and whether we can
> generate them in a more dynamic and robust way like with other #defines in
> the file. I think it's tricky since they aren't as straight forward as figuring out
> "sizeof()" since the scalar miniflow_extract() pushes some but maybe not all
> of a protocol header. miniflow_extract() also calls miniflow_pad_to_64() for
> some protocol headers. Maybe we could build up the values using individual
> header #defines, like this:
> #define BLK_ETH_HEADER                2
> #define BLK_IPv4_HEADER               2
> #define BLK_TCP_FLAG                  2
> 
> #define BLK_IPv4_TCP_FLAG             (BLK_ETH_HEADER + BLK_IPv4_HEADER +
> BLK_TCP_FLAG)
> 
> This might make it a little more clear where the values are coming from.
> 
> We could make the #defines a little more related to the protocol headers
> with something like this:
> #define BLK_ETH_HEADER    ROUND_UP(sizeof(struct eth_header),
> 8)/sizeof(uint64_t)
> 
> This should give the value 2 as well, but shows where it's coming from since
> the ROUND_UP is essentially what the miniflow_pad_to_64() ends up doing.
> 
> I'm not sure if this would work for all headers though.
> 
> Also, maybe counting the miniflow bits field which we know ahead of time
> for each MFEX impl could give us these values. I'll investigate this a bit more
> to see if there are any better solutions.
> 
> Let me know if you have any thoughts on this.
> 

I have removed the magic bits as much I can, and the patches builds on the offsets using packet
Header lengths .

> > +
> > +/* Below Offsets simply shifts the offsets by 9 blocks as
> > + * in the tunneling case the first 9 blocks are reserved and
> > + * written with the outer tunnel data.
> > + */
> > +#define BLK_TUN_IPv6_HDR_OFFS         (BLK_IPv6_HDR_OFFS +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_IPv6_HDR_OFFS    (BLK_VLAN_IPv6_HDR_OFFS + \
> > +                                       BLK_META_DATA_OFFS)
> > +#define BLK_TUN_IPv6_TCP_FLAG         (BLK_IPv6_TCP_FLAG +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_IPv6_TCP_FLAG    (BLK_VLAN_IPv6_TCP_FLAG + \
> > +                                       BLK_META_DATA_OFFS)
> > +#define BLK_TUN_L4_UDP_OFFS           (BLK_L4_UDP_OFFS +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_L4_TCP_OFFS           (BLK_L4_TCP_OFFS +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_L4_UDP_OFFS      (BLK_VLAN_L4_UDP_OFFS + \
> > +                                       BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_L4_TCP_OFFS      (BLK_VLAN_L4_TCP_OFFS + \
> > +                                       BLK_META_DATA_OFFS)
> > +#define BLK_TUN_IPv4_TCP_FLAG         (BLK_IPv4_TCP_FLAG +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_PCP              (BLK_VLAN_PCP +
> BLK_META_DATA_OFFS)
> > +#define BLK_TUN_VLAN_IPv4_TCP_FLAG    (BLK_VLAN_IPv4_TCP_FLAG + \
> > +                                       BLK_META_DATA_OFFS)
> > +
> >  #define PATTERN_STRIP_IPV6_MASK                                         \
> >      NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
> >      NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
> > @@ -744,7 +791,7 @@ mfex_avx512_process(struct dp_packet_batch
> *packets,
> >                      uint32_t keys_size OVS_UNUSED,
> >                      odp_port_t in_port,
> >                      void *pmd_handle OVS_UNUSED,
> > -                    bool md_is_valid OVS_UNUSED,
> > +                    bool md_is_valid,
> >                      const enum MFEX_PROFILES profile_id,
> >                      const uint32_t use_vbmi OVS_UNUSED)  { @@ -770,6
> > +817,15 @@ mfex_avx512_process(struct dp_packet_batch *packets,
> >      __m128i v_blocks01 = _mm_insert_epi32(v_zeros,
> > odp_to_u32(in_port), 1);
> >
> >      DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > +        /* Handle meta-data init in the loop. */
> > +        if (!md_is_valid) {
> > +            pkt_metadata_init(&packet->md, in_port);
> > +        }
> 
> Please see my comments on the patch 7/9 for whether we need this
> md_is_valid check.
> 

Done remove tunnel check.

> > +        const struct pkt_metadata *md = &packet->md;
> > +        /* Dummy pmd dont always pass correct md_is_valid and hence
> > +         * need to check the tunnel data to ensure correct behaviour.
> > +         */
> > +        bool tunnel = flow_tnl_dst_is_set(&md->tunnel);
> >          /* If the packet is smaller than the probe size, skip it. */
> >          const uint32_t size = dp_packet_size(packet);
> >          if (size < dp_pkt_min_size) { @@ -808,7 +864,16 @@
> > mfex_avx512_process(struct dp_packet_batch *packets,
> >
> > use_vbmi);
> >
> >          __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
> > -        _mm512_storeu_si512(&blocks[2], v_blk0_strip);
> > +        /* Handle inner meta-data if valid. */
> > +        if (!tunnel) {
> > +            _mm512_storeu_si512(&blocks[2], v_blk0_strip);
> > +        } else {
> > +            __m512i v_tun = _mm512_loadu_si512(&md->tunnel);
> > +            _mm512_storeu_si512(&blocks[0], v_tun);
> > +            _mm512_storeu_si512(&blocks[11], v_blk0_strip);
> > +            blocks[BLK_META_DATA_OFFS] = md->dp_hash |
> > +                        ((uint64_t) odp_to_u32(md->in_port.odp_port) << 32);
> > +        }
> >
> >          /* Perform "post-processing" per profile, handling details not easily
> >           * handled in the above generic AVX512 code. Examples include
> > TCP flag @@ -820,38 +885,45 @@ mfex_avx512_process(struct
> dp_packet_batch *packets,
> >              break;
> >
> >          case PROFILE_ETH_VLAN_IPV4_TCP: {
> > -                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
> > -
> >                  uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
> >                  struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
> >                  if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
> >                                                TCP_HEADER_LEN)) {
> >                      continue;
> >                  }
> > -
> >                  /* Process TCP flags, and store to blocks. */
> >                  const struct tcp_header *tcp = (void *)&pkt[38];
> > -                mfex_handle_tcp_flags(tcp, &blocks[7]);
> > +                uint32_t vlan_pcp_off = BLK_VLAN_PCP;
> > +                uint32_t tcp_flag_off = BLK_VLAN_IPv4_TCP_FLAG;
> > +
> > +                if (tunnel) {
> > +                    vlan_pcp_off = BLK_TUN_VLAN_PCP;
> > +                    tcp_flag_off = BLK_TUN_VLAN_IPv4_TCP_FLAG;
> > +                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
> > +                }
> 
> I like this pattern to reuse the existing MFEX impls for the tunnel case, since
> we just need to conditionally adjust offsets like you are doing and it will work
> for tunnel and non-tunnel cases. Avoids double the number of impls, nice
> job.
> 

Thanks .

> > +                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_off]);
> > +                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_off]);
> >                  dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> >              } break;
> >
> 
> <snip the rest of the MFEX impls>
> 
> > diff --git a/lib/dpif-netdev-private-extract.c
> > b/lib/dpif-netdev-private-extract.c
> > index 12ac8ecce..5f7f1b6d3 100644
> > --- a/lib/dpif-netdev-private-extract.c
> > +++ b/lib/dpif-netdev-private-extract.c
> > @@ -362,7 +362,9 @@ dpif_miniflow_extract_autovalidator(struct
> > dp_packet_batch *packets,
> >
> >      /* Run scalar miniflow_extract to get default result. */
> >      DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > -        pkt_metadata_init(&packet->md, in_port);
> > +        if (!md_is_valid) {
> > +            pkt_metadata_init(&packet->md, in_port);
> > +        }
> 
> Could the "        bool tunnel = flow_tnl_dst_is_set(&md->tunnel);" type check
> be used here too?
> 

Yes kept the md_is_valid as it's a better solution.

Regards
Amber
diff mbox series

Patch

diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index 1c3b67b02..d5c61baff 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -185,15 +185,17 @@  dp_netdev_input_avx512__(struct dp_netdev_pmd_thread *pmd,
     }
 
     /* Do a batch minfilow extract into keys. */
-     /* Do a batch minfilow extract into keys, but only for outer packets. */
     uint32_t mf_mask = 0;
-    if (recirc_depth == 0) {
-        miniflow_extract_func mfex_func;
-        atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
-        if (mfex_func) {
-            mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
+    miniflow_extract_func mfex_func;
+    atomic_read_relaxed(&pmd->miniflow_extract_opt, &mfex_func);
+    miniflow_extract_func mfex_inner_func;
+    atomic_read_relaxed(&pmd->miniflow_extract_inner_opt, &mfex_inner_func);
+    if (md_is_valid && mfex_inner_func) {
+        mf_mask = mfex_inner_func(packets, keys, batch_size, in_port, pmd,
+                                  md_is_valid);
+    } else if (!md_is_valid && mfex_func) {
+        mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd,
                                 md_is_valid);
-        }
     }
 
     uint32_t iter = lookup_pkts_bitmask;
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 833e9bd31..4c62bd911 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -360,6 +360,53 @@  _mm512_maskz_permutexvar_epi8_selector(__mmask64 k_shuf, __m512i v_shuf,
                        MF_WORD(ipv6_dst, 2) | MF_BIT(tp_src) | MF_BIT(tp_dst))
 #define MF_IPV6_TCP   (MF_IPV6_UDP | MF_BIT(tcp_flags) | MF_BIT(arp_tha.ea[2]))
 
+#define MF_TUNNEL     MF_WORD(tunnel, offsetof(struct flow_tnl, metadata) / 8)
+
+#define MF_ETH_TUNNEL (MF_TUNNEL | MF_ETH)
+#define MF_ETH_VLAN_TUNNEL (MF_TUNNEL | MF_ETH_VLAN)
+
+/* Block offsets represents the offsets into the blocks array of miniflow
+ * and are derived experimentally. Scalar miniflow parses the header
+ * in a fixed order and sequentially in a dynamic fashion thus incrementing
+ * pointer and copying data is enough but in AVX512 since the headers are
+ * parsed using pre-defined masks we need these magic offsets to write
+ * some of the data items at the correct loaction in the blocks array
+ * using below magic numbers.
+ */
+#define BLK_META_DATA_OFFS            9
+#define BLK_IPv4_TCP_FLAG             6
+#define BLK_VLAN_IPv4_TCP_FLAG        7
+#define BLK_VLAN_PCP                  4
+#define BLK_IPv6_HDR_OFFS             8
+#define BLK_VLAN_IPv6_HDR_OFFS        9
+#define BLK_IPv6_TCP_FLAG             9
+#define BLK_VLAN_IPv6_TCP_FLAG        10
+#define BLK_L4_UDP_OFFS               9
+#define BLK_L4_TCP_OFFS               10
+#define BLK_VLAN_L4_UDP_OFFS          10
+#define BLK_VLAN_L4_TCP_OFFS          11
+
+/* Below Offsets simply shifts the offsets by 9 blocks as
+ * in the tunneling case the first 9 blocks are reserved and
+ * written with the outer tunnel data.
+ */
+#define BLK_TUN_IPv6_HDR_OFFS         (BLK_IPv6_HDR_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv6_HDR_OFFS    (BLK_VLAN_IPv6_HDR_OFFS + \
+                                       BLK_META_DATA_OFFS)
+#define BLK_TUN_IPv6_TCP_FLAG         (BLK_IPv6_TCP_FLAG + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv6_TCP_FLAG    (BLK_VLAN_IPv6_TCP_FLAG + \
+                                       BLK_META_DATA_OFFS)
+#define BLK_TUN_L4_UDP_OFFS           (BLK_L4_UDP_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_L4_TCP_OFFS           (BLK_L4_TCP_OFFS + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_L4_UDP_OFFS      (BLK_VLAN_L4_UDP_OFFS + \
+                                       BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_L4_TCP_OFFS      (BLK_VLAN_L4_TCP_OFFS + \
+                                       BLK_META_DATA_OFFS)
+#define BLK_TUN_IPv4_TCP_FLAG         (BLK_IPv4_TCP_FLAG + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_PCP              (BLK_VLAN_PCP + BLK_META_DATA_OFFS)
+#define BLK_TUN_VLAN_IPv4_TCP_FLAG    (BLK_VLAN_IPv4_TCP_FLAG + \
+                                       BLK_META_DATA_OFFS)
+
 #define PATTERN_STRIP_IPV6_MASK                                         \
     NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
     NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC,     \
@@ -744,7 +791,7 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                     uint32_t keys_size OVS_UNUSED,
                     odp_port_t in_port,
                     void *pmd_handle OVS_UNUSED,
-                    bool md_is_valid OVS_UNUSED,
+                    bool md_is_valid,
                     const enum MFEX_PROFILES profile_id,
                     const uint32_t use_vbmi OVS_UNUSED)
 {
@@ -770,6 +817,15 @@  mfex_avx512_process(struct dp_packet_batch *packets,
     __m128i v_blocks01 = _mm_insert_epi32(v_zeros, odp_to_u32(in_port), 1);
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+        /* Handle meta-data init in the loop. */
+        if (!md_is_valid) {
+            pkt_metadata_init(&packet->md, in_port);
+        }
+        const struct pkt_metadata *md = &packet->md;
+        /* Dummy pmd dont always pass correct md_is_valid and hence
+         * need to check the tunnel data to ensure correct behaviour.
+         */
+        bool tunnel = flow_tnl_dst_is_set(&md->tunnel);
         /* If the packet is smaller than the probe size, skip it. */
         const uint32_t size = dp_packet_size(packet);
         if (size < dp_pkt_min_size) {
@@ -808,7 +864,16 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                                                                 use_vbmi);
 
         __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
-        _mm512_storeu_si512(&blocks[2], v_blk0_strip);
+        /* Handle inner meta-data if valid. */
+        if (!tunnel) {
+            _mm512_storeu_si512(&blocks[2], v_blk0_strip);
+        } else {
+            __m512i v_tun = _mm512_loadu_si512(&md->tunnel);
+            _mm512_storeu_si512(&blocks[0], v_tun);
+            _mm512_storeu_si512(&blocks[11], v_blk0_strip);
+            blocks[BLK_META_DATA_OFFS] = md->dp_hash |
+                        ((uint64_t) odp_to_u32(md->in_port.odp_port) << 32);
+        }
 
         /* Perform "post-processing" per profile, handling details not easily
          * handled in the above generic AVX512 code. Examples include TCP flag
@@ -820,38 +885,45 @@  mfex_avx512_process(struct dp_packet_batch *packets,
             break;
 
         case PROFILE_ETH_VLAN_IPV4_TCP: {
-                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
-
                 uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
                 struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
                 if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
                                               TCP_HEADER_LEN)) {
                     continue;
                 }
-
                 /* Process TCP flags, and store to blocks. */
                 const struct tcp_header *tcp = (void *)&pkt[38];
-                mfex_handle_tcp_flags(tcp, &blocks[7]);
+                uint32_t vlan_pcp_off = BLK_VLAN_PCP;
+                uint32_t tcp_flag_off = BLK_VLAN_IPv4_TCP_FLAG;
+
+                if (tunnel) {
+                    vlan_pcp_off = BLK_TUN_VLAN_PCP;
+                    tcp_flag_off = BLK_TUN_VLAN_IPv4_TCP_FLAG;
+                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
+                }
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_off]);
+                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_off]);
                 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
             } break;
 
         case PROFILE_ETH_VLAN_IPV4_UDP: {
-                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
-
                 uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
                 struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
                 if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
                                               UDP_HEADER_LEN)) {
                     continue;
                 }
+
+                uint32_t vlan_pcp_off = BLK_VLAN_PCP;
+                if (tunnel) {
+                    vlan_pcp_off = BLK_TUN_VLAN_PCP;
+                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
+                }
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_off]);
                 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
             } break;
 
         case PROFILE_ETH_IPV4_TCP: {
-                /* Process TCP flags, and store to blocks. */
-                const struct tcp_header *tcp = (void *)&pkt[34];
-                mfex_handle_tcp_flags(tcp, &blocks[6]);
-
                 /* Handle dynamic l2_pad_size. */
                 uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
                 struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
@@ -859,6 +931,14 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                                               TCP_HEADER_LEN)) {
                     continue;
                 }
+                /* Process TCP flags, and store to blocks. */
+                const struct tcp_header *tcp = (void *)&pkt[34];
+                uint32_t tcp_flag_off = BLK_IPv4_TCP_FLAG;
+                if (tunnel) {
+                    tcp_flag_off = BLK_TUN_IPv4_TCP_FLAG;
+                    mf->map.bits[0] = MF_ETH_TUNNEL;
+                }
+                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_off]);
                 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
             } break;
 
@@ -870,6 +950,9 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                                               UDP_HEADER_LEN)) {
                     continue;
                 }
+                if (tunnel) {
+                    mf->map.bits[0] = MF_ETH_TUNNEL;
+                }
                 dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
             } break;
 
@@ -883,11 +966,19 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                     continue;
                 }
 
-                /* Process IPv6 header for TC, flow Label and next header. */
-                mfex_handle_ipv6_hdr_block(&pkt[ETH_HEADER_LEN], &blocks[8]);
-
+                uint32_t hdr_blk_off = BLK_IPv6_HDR_OFFS;
+                uint32_t udp_offs = BLK_L4_UDP_OFFS;
+                if (tunnel) {
+                    hdr_blk_off = BLK_TUN_IPv6_HDR_OFFS;
+                    udp_offs = BLK_TUN_L4_UDP_OFFS;
+                    mf->map.bits[0] = MF_ETH_TUNNEL;
+                }
+                /* Process IPv6 header for TC, flow Label and next
+                  * header. */
+                mfex_handle_ipv6_hdr_block(&pkt[ETH_HEADER_LEN],
+                                           &blocks[hdr_blk_off]);
                 /* Process UDP header. */
-                mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[9]);
+                mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[udp_offs]);
                 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
             } break;
 
@@ -901,22 +992,31 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                     continue;
                 }
 
-                /* Process IPv6 header for TC, flow Label and next header. */
-                mfex_handle_ipv6_hdr_block(&pkt[ETH_HEADER_LEN], &blocks[8]);
-
-                /* Process TCP header. */
-                mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[10]);
                 const struct tcp_header *tcp = (void *)&pkt[54];
                 if (!mfex_check_tcp_data_offset(tcp)) {
                     continue;
                 }
-                mfex_handle_tcp_flags(tcp, &blocks[9]);
+
+                uint32_t ipv6_hdr_off = BLK_IPv6_HDR_OFFS;
+                uint32_t tcp_offs = BLK_L4_TCP_OFFS;
+                uint32_t tcp_flag_offs = BLK_IPv6_TCP_FLAG;
+                if (tunnel) {
+                    mf->map.bits[0] = MF_ETH_TUNNEL;
+                    ipv6_hdr_off = BLK_TUN_IPv6_HDR_OFFS;
+                    tcp_offs = BLK_TUN_L4_TCP_OFFS;
+                    tcp_flag_offs = BLK_TUN_IPv6_TCP_FLAG;
+                }
+                /* Process IPv6 header for TC, flow Label and next
+                 * header. */
+                mfex_handle_ipv6_hdr_block(&pkt[ETH_HEADER_LEN],
+                                           &blocks[ipv6_hdr_off]);
+                /* Process TCP header. */
+                mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[tcp_offs]);
+                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_offs]);
                 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
             } break;
 
         case PROFILE_ETH_VLAN_IPV6_TCP: {
-                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
-
                 /* Handle dynamic l2_pad_size. */
                 uint32_t size_from_ipv6 = size - VLAN_ETH_HEADER_LEN;
                 struct ovs_16aligned_ip6_hdr *nh = (void *)&pkt
@@ -926,23 +1026,34 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                     continue;
                 }
 
-                /* Process IPv6 header for TC, flow Label and next header. */
-                mfex_handle_ipv6_hdr_block(&pkt[VLAN_ETH_HEADER_LEN],
-                                           &blocks[9]);
-
-                /* Process TCP header. */
-                mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[11]);
                 const struct tcp_header *tcp = (void *)&pkt[58];
                 if (!mfex_check_tcp_data_offset(tcp)) {
                     continue;
                 }
-                mfex_handle_tcp_flags(tcp, &blocks[10]);
+
+                uint32_t ipv6_hdr_off = BLK_VLAN_IPv6_HDR_OFFS;
+                uint32_t tcp_offs = BLK_VLAN_L4_TCP_OFFS;
+                uint32_t tcp_flag_offs = BLK_VLAN_IPv6_TCP_FLAG;
+                uint32_t vlan_pcp_offs = BLK_VLAN_PCP;
+                if (tunnel) {
+                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
+                    ipv6_hdr_off = BLK_TUN_VLAN_IPv6_HDR_OFFS;
+                    tcp_offs = BLK_TUN_VLAN_L4_TCP_OFFS;
+                    tcp_flag_offs = BLK_TUN_VLAN_IPv6_TCP_FLAG;
+                    vlan_pcp_offs = BLK_TUN_VLAN_PCP;
+                }
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_offs]);
+                mfex_handle_tcp_flags(tcp, &blocks[tcp_flag_offs]);
+                /* Process IPv6 header for TC, flow Label and next
+                 * header. */
+                mfex_handle_ipv6_hdr_block(&pkt[VLAN_ETH_HEADER_LEN],
+                                           &blocks[ipv6_hdr_off]);
+                /* Process TCP header. */
+                mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[tcp_offs]);
                 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
             } break;
 
         case PROFILE_ETH_VLAN_IPV6_UDP: {
-                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
-
                 /* Handle dynamic l2_pad_size. */
                 uint32_t size_from_ipv6 = size - VLAN_ETH_HEADER_LEN;
                 struct ovs_16aligned_ip6_hdr *nh = (void *)&pkt
@@ -952,12 +1063,22 @@  mfex_avx512_process(struct dp_packet_batch *packets,
                     continue;
                 }
 
-                /* Process IPv6 header for TC, flow Label and next header. */
+                uint32_t ipv6_hdr_off = BLK_VLAN_IPv6_HDR_OFFS;
+                uint32_t udp_offs = BLK_VLAN_L4_UDP_OFFS;
+                uint32_t vlan_pcp_offs = BLK_VLAN_PCP;
+                if (tunnel) {
+                    mf->map.bits[0] = MF_ETH_VLAN_TUNNEL;
+                    ipv6_hdr_off = BLK_TUN_VLAN_IPv6_HDR_OFFS;
+                    udp_offs = BLK_TUN_VLAN_L4_UDP_OFFS;
+                    vlan_pcp_offs = BLK_TUN_VLAN_PCP;
+                }
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[vlan_pcp_offs]);
+                /* Process IPv6 header for TC, flow Label and next
+                 * header. */
                 mfex_handle_ipv6_hdr_block(&pkt[VLAN_ETH_HEADER_LEN],
-                                           &blocks[9]);
-
+                                           &blocks[ipv6_hdr_off]);
                 /* Process UDP header. */
-                mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[10]);
+                mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[udp_offs]);
                 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
             } break;
         default:
diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index 12ac8ecce..5f7f1b6d3 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -362,7 +362,9 @@  dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
 
     /* Run scalar miniflow_extract to get default result. */
     DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
-        pkt_metadata_init(&packet->md, in_port);
+        if (!md_is_valid) {
+            pkt_metadata_init(&packet->md, in_port);
+        }
         miniflow_extract(packet, &keys[i].mf);
 
         /* Store known good metadata to compare with optimized metadata. */