diff mbox series

[ovs-dev,v4,02/12] dpif-netdev: Add auto validation function for miniflow extract

Message ID 20210617162754.2028048-3-kumar.amber@intel.com
State Superseded
Headers show
Series MFEX Infrastructure + Optimizations | expand

Commit Message

Kumar Amber June 17, 2021, 4:27 p.m. UTC
This patch introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different miniflow implementations against the linear
miniflow extract and return a hitmask.

The autovaidator function can be triggered at runtime using the
following command:

$ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator

Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
 lib/dpif-netdev-private-extract.h |  15 ++++
 lib/dpif-netdev.c                 |   2 +-
 3 files changed, 157 insertions(+), 1 deletion(-)

Comments

Stokes, Ian June 24, 2021, 10:46 a.m. UTC | #1
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
> 
> The autovaidator function can be triggered at runtime using the
> following command:
> 
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator

Thanks for the patch Amber, seems very like the DPCLS autovalidator that has already been upstreamed so makes sense to me.

A few comments below.

> 
> Signed-off-by: Kumar Amber <kumar.amber@intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
>  lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.h |  15 ++++
>  lib/dpif-netdev.c                 |   2 +-
>  3 files changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
> 
>  /* Implementations of available extract options. */
>  static struct dpif_miniflow_extract_impl mfex_impls[] = {
> +   {
> +        .probe = NULL,
> +        .extract_func = dpif_miniflow_extract_autovalidator,
> +        .name = "autovalidator",
> +    },
>      {
>          .probe = NULL,
>          .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct
> dpif_miniflow_extract_impl **out_ptr)
>      *out_ptr = mfex_impls;
>      return ARRAY_SIZE(mfex_impls);
>  }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t in_port,
> +                                    void *pmd_handle)
> +{
> +    const size_t cnt = dp_packet_batch_size(packets);
> +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> +    struct dp_packet *packet;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> +    int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
> +    if (mfunc_count < 0) {
> +        pmd->miniflow_extract_opt = NULL;
> +        VLOG_ERR("failed to get miniflow extract function implementations\n");
> +        return 0;
> +    }
> +    ovs_assert(keys_size >= cnt);
> +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> +    /* Run scalar miniflow_extract to get default result. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        pkt_metadata_init(&packet->md, in_port);
> +        miniflow_extract(packet, &keys[i].mf);
> +
> +        /* Store known good metadata to compare with optimized metadata. */
> +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> +        good_l3_ofs[i] = packet->l3_ofs;
> +        good_l4_ofs[i] = packet->l4_ofs;
> +        good_l2_pad_size[i] = packet->l2_pad_size;
> +    }
> +
> +    /* Iterate through each version of miniflow implementations. */
> +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> +        if (!mfex_impls[j].available) {
> +            continue;
> +        }
> +
> +        /* Reset keys and offsets before each implementation. */
> +        memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +            dp_packet_reset_offsets(packet);
> +        }
> +        /* Call optimized miniflow for each batch of packet. */
> +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> +                                            keys_size, in_port, pmd_handle);

Alignment above is out, should be aligned under first argument passed ie. packets.

> +
> +        /* Do a miniflow compare for bits, blocks and offsets for all the
> +         * classified packets in the hitmask marked by set bits. */
> +        while (hit_mask) {
> +            /* Index for the set bit. */
> +            uint32_t i = __builtin_ctz(hit_mask);
> +            /* Set the index in hitmask to Zero. */
> +            hit_mask &= (hit_mask - 1);
> +
> +            uint32_t failed = 0;
> +
> +            /* Check miniflow bits are equal. */
> +            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> +                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> +                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> +                         test_keys[i].mf.map.bits[0],
> +                         test_keys[i].mf.map.bits[1]);
> +                failed = 1;
> +            }
> +
> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good hexdump:\n");
> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);

For this and other VLOG Errs  rather than using spaces to have you thought of using pad left?
> +                }
> +                VLOG_ERR("  Test hexdump:\n");
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> +                }
> +                failed = 1;
> +            }
> +
> +            if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> +                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> +                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> +                         good_l3_ofs[i], good_l4_ofs[i]);
> +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         packets->packets[i]->l2_pad_size,
> +                         packets->packets[i]->l2_5_ofs,
> +                         packets->packets[i]->l3_ofs,
> +                         packets->packets[i]->l4_ofs);
> +                failed = 1;
> +            }
> +
> +            if (failed) {
> +                /* Having dumped the debug info, disable autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* preserve packet correctness by storing back the good offsets in
> +     * packets back. */

Minor, capitalize Preserve above.

> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        packet->l2_5_ofs = good_l2_5_ofs[i];
> +        packet->l3_ofs = good_l3_ofs[i];
> +        packet->l4_ofs = good_l4_ofs[i];
> +        packet->l2_pad_size = good_l2_pad_size[i];
> +    }
> +
> +    /* Returning zero implies no packets were hit by autovalidation. This
> +     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> +     * would pass/fail. By always returning zero, autovalidator is a little
> +     * slower, but we gain consistency in testing.

Can you explain this in a little more detail?

Is the expectation here that no packets get hit but just simply that the test and comparisons match for each mfex?
 
> +     */
> +    return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
>  /* Max size of dpif_miniflow_extract_impl array. */
>  #define MFEX_IMPLS_MAX_SIZE (16)
> 
> +/* Skip the autovalidator study and null when iterating all available

Is study the right word above for the autovalidator? Seems a bit odd.
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
>  /* Forward declarations. */
>  struct dp_packet;
>  struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
>  int32_t
>  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
> 
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t in_port,
> +                                    void *pmd_handle);
> 
>  #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn
> *conn, int argc,
>      struct ds reply = DS_EMPTY_INITIALIZER;
>      ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
>      const char *reply_str = ds_cstr(&reply);
> -    unixctl_command_reply(conn, reply_str);
>      VLOG_INFO("%s", reply_str);
> +    unixctl_command_reply(conn, reply_str);

Is there a reason for swapping the order above?


BR
Ian
>      ds_destroy(&reply);
>  }
> 
> --
> 2.25.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ilya Maximets June 24, 2021, 10:58 a.m. UTC | #2
On 6/24/21 12:46 PM, Stokes, Ian wrote:
>> +
>> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
>> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
>> +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
>> +                         mfex_impls[j].name, i);
>> +                VLOG_ERR("  Good hexdump:\n");
>> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
>> +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
>> +                for (uint32_t b = 0; b < block_cnt; b++) {
>> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> 
> For this and other VLOG Errs  rather than using spaces to have you thought of using pad left?

FWIW, I'd prefer having a dynamic string for this kind of complex logs
constructed with ds_put_hex_dump() and printed as a single log message.
This way it will not be intermixed with other logs.

Not sure, what you meant under 'pad left', though.

Best regards, Ilya Maximets.
Ilya Maximets June 24, 2021, 11:05 a.m. UTC | #3
On 6/24/21 12:58 PM, Ilya Maximets wrote:
> On 6/24/21 12:46 PM, Stokes, Ian wrote:
>>> +
>>> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
>>> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
>>> +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
>>> +                         mfex_impls[j].name, i);
>>> +                VLOG_ERR("  Good hexdump:\n");
>>> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
>>> +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
>>> +                for (uint32_t b = 0; b < block_cnt; b++) {
>>> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
>>
>> For this and other VLOG Errs  rather than using spaces to have you thought of using pad left?
> 
> FWIW, I'd prefer having a dynamic string for this kind of complex logs
> constructed with ds_put_hex_dump() and printed as a single log message.
> This way it will not be intermixed with other logs.
> 
> Not sure, what you meant under 'pad left', though.
> 
> Best regards, Ilya Maximets.
> 

And these logs must be rate-limited, as if this log is going to be printed,
it will be printed for every single packet or for lots of them anyway.
This might grow log size very fast.
Kumar Amber June 24, 2021, 11:26 a.m. UTC | #4
Hi Ian,

Thanks for reviews, replies are inlined.


<Snipped>

> > +        /* Call optimized miniflow for each batch of packet. */
> > +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> > +                                            keys_size, in_port,
> > + pmd_handle);
> 
> Alignment above is out, should be aligned under first argument passed ie.
> packets.

Fixed in v5.
> 
> > +
> > +        /* Do a miniflow compare for bits, blocks and offsets for all the
> > +         * classified packets in the hitmask marked by set bits. */
> > +        while (hit_mask) {
> > +            /* Index for the set bit. */
> > +            uint32_t i = __builtin_ctz(hit_mask);
> > +            /* Set the index in hitmask to Zero. */
> > +            hit_mask &= (hit_mask - 1);
> > +
> > +            uint32_t failed = 0;
> > +
> > +            /* Check miniflow bits are equal. */
> > +            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> > +                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> > +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> > +                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> > +                         test_keys[i].mf.map.bits[0],
> > +                         test_keys[i].mf.map.bits[1]);
> > +                failed = 1;
> > +            }
> > +
> > +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> > +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> > +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> > +                         mfex_impls[j].name, i);
> > +                VLOG_ERR("  Good hexdump:\n");
> > +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> > +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> 
> For this and other VLOG Errs  rather than using spaces to have you thought
> of using pad left?

Fixed in v5.
> > +                }
> > +                VLOG_ERR("  Test hexdump:\n");
> > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> > +                }
> > +                failed = 1;
> > +            }
> > +
> > +            if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> > +                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> > +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> > +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> > +                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> > +                         mfex_impls[j].name, i);
> > +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> > +                         " l3_ofs %u, l4_ofs %u\n",
> > +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> > +                         good_l3_ofs[i], good_l4_ofs[i]);
> > +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> > +                         " l3_ofs %u, l4_ofs %u\n",
> > +                         packets->packets[i]->l2_pad_size,
> > +                         packets->packets[i]->l2_5_ofs,
> > +                         packets->packets[i]->l3_ofs,
> > +                         packets->packets[i]->l4_ofs);
> > +                failed = 1;
> > +            }
> > +
> > +            if (failed) {
> > +                /* Having dumped the debug info, disable autovalidator. */
> > +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> > +                         mfex_impls[j].name, i);
> > +                /* Halt OVS here on debug builds. */
> > +                ovs_assert(0);
> > +                pmd->miniflow_extract_opt = NULL;
> > +                break;
> > +            }
> > +        }
> > +    }
> > +
> > +    /* preserve packet correctness by storing back the good offsets in
> > +     * packets back. */
> 
> Minor, capitalize Preserve above.

Fixed in v5.
> 
> > +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > +        packet->l2_5_ofs = good_l2_5_ofs[i];
> > +        packet->l3_ofs = good_l3_ofs[i];
> > +        packet->l4_ofs = good_l4_ofs[i];
> > +        packet->l2_pad_size = good_l2_pad_size[i];
> > +    }
> > +
> > +    /* Returning zero implies no packets were hit by autovalidation. This
> > +     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> > +     * would pass/fail. By always returning zero, autovalidator is a little
> > +     * slower, but we gain consistency in testing.
> 
> Can you explain this in a little more detail?
> 
> Is the expectation here that no packets get hit but just simply that the test
> and comparisons match for each mfex?
> 

Details included in v5.
> > +     */
> > +    return 0;
> > +}
> > diff --git a/lib/dpif-netdev-private-extract.h
> > b/lib/dpif-netdev-private-extract.h
> > index b7b0b2be4..455a7b590 100644
> > --- a/lib/dpif-netdev-private-extract.h
> > +++ b/lib/dpif-netdev-private-extract.h
> > @@ -24,6 +24,11 @@
> >  /* Max size of dpif_miniflow_extract_impl array. */  #define
> > MFEX_IMPLS_MAX_SIZE (16)
> >
> > +/* Skip the autovalidator study and null when iterating all available
> 
> Is study the right word above for the autovalidator? Seems a bit odd.

Fixed in v5.

> > + * miniflow implementations.
> > + */
> > +#define MFEX_IMPL_START_IDX (1)
> > +
> >  /* Forward declarations. */
> >  struct dp_packet;
> >  struct miniflow;
> > @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);  int32_t
> > dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl
> > **out_ptr);
> >
> > +/* Retrieve the hitmask of the batch of pakcets which is obtained by
> > +comparing
> > + * different miniflow implementations with linear miniflow extract.
> > + * On error, returns a zero.
> > + * On success, returns the number of packets in the batch compared.
> > + */
> > +uint32_t
> > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> > +                                    struct netdev_flow_key *keys,
> > +                                    uint32_t keys_size, odp_port_t in_port,
> > +                                    void *pmd_handle);
> >
> >  #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git
> > a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 567ebd952..4f4ab2790
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > unixctl_conn *conn, int argc,
> >      struct ds reply = DS_EMPTY_INITIALIZER;
> >      ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> mfex_name);
> >      const char *reply_str = ds_cstr(&reply);
> > -    unixctl_command_reply(conn, reply_str);
> >      VLOG_INFO("%s", reply_str);
> > +    unixctl_command_reply(conn, reply_str);
> 
> Is there a reason for swapping the order above?
> 

This looks more logical .
> 
> BR
> Ian
> >      ds_destroy(&reply);
> >  }
> >
> > --
> > 2.25.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Van Haaren, Harry June 24, 2021, 11:39 a.m. UTC | #5
> -----Original Message-----
> From: Amber, Kumar <kumar.amber@intel.com>
> Sent: Thursday, June 24, 2021 12:27 PM
> To: Stokes, Ian <ian.stokes@intel.com>; dev@openvswitch.org; Van Haaren, Harry
> <harry.van.haaren@intel.com>
> Cc: i.maximets@ovn.org
> Subject: RE: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract

<snip lots of patch>

> > >  #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git
> > > a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 567ebd952..4f4ab2790
> > > 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > > unixctl_conn *conn, int argc,
> > >      struct ds reply = DS_EMPTY_INITIALIZER;
> > >      ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> > mfex_name);
> > >      const char *reply_str = ds_cstr(&reply);
> > > -    unixctl_command_reply(conn, reply_str);
> > >      VLOG_INFO("%s", reply_str);
> > > +    unixctl_command_reply(conn, reply_str);
> >
> > Is there a reason for swapping the order above?
> >
> 
> This looks more logical .

Hi All,

Actually yes there's a good reason, by logging internally in Vswitchd first,
and then sending the reply to the user, the order of prints in the logs is
easier to understand.

This is particularly true when e.g. MFEX enabling logs can come *after* the PMD log
print of study having chosen a specific MFEX impl.

(pseudo) Example of bad logging behaviour:
<ovs startup stuff>
PMD: MFEX study chose profile "eth_ip_udp" (128/128 hits)
DPIF: MFEX optimized functionality set to "study"

(pseudo) Example of good logging behaviour (with switched log/conn_reply):
<ovs startup stuff>
DPIF: MFEX optimized functionality set to "study"
PMD: MFEX study chose profile "eth_ip_udp" (128/128 hits)

Hope the helps clarify the change! -Harry
Van Haaren, Harry June 24, 2021, 12:18 p.m. UTC | #6
> -----Original Message-----
> From: Ilya Maximets <i.maximets@ovn.org>
> Sent: Thursday, June 24, 2021 12:06 PM
> To: Ilya Maximets <i.maximets@ovn.org>; Stokes, Ian <ian.stokes@intel.com>;
> Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org; Van Haaren,
> Harry <harry.van.haaren@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract

Hi Ilya,

Thanks for reviewing.

> On 6/24/21 12:58 PM, Ilya Maximets wrote:
> > On 6/24/21 12:46 PM, Stokes, Ian wrote:
> >>> +
> >>> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> >>> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> >>> +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> >>> +                         mfex_impls[j].name, i);
> >>> +                VLOG_ERR("  Good hexdump:\n");
> >>> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> >>> +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> >>> +                for (uint32_t b = 0; b < block_cnt; b++) {
> >>> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> >>
> >> For this and other VLOG Errs  rather than using spaces to have you thought of
> using pad left?
> >
> > FWIW, I'd prefer having a dynamic string for this kind of complex logs
> > constructed with ds_put_hex_dump() and printed as a single log message.
> > This way it will not be intermixed with other logs.

Ah, I wasn't aware of the ds_put_hex_dump() functionality, agree this is
a good improvement to add the data into a single log entry. Done a quick
POC and this is indeed a good improvement, thanks.

<snip>

> And these logs must be rate-limited, as if this log is going to be printed,
> it will be printed for every single packet or for lots of them anyway.
> This might grow log size very fast.

If the logs here get triggered, its means that packets can have miniflows built
up that are incorrect (compared to scalar impl.). That's pretty bad, so today we
have an "ovs_assert(0)" in there to stop, but we also switch off the PMD's mfex
func pointer (setting it to NULL). This avoids spamming to the logs, logging only
the packet that caused the issue. (See the patch/code snippet I pasted below for impl.)

Once the user re-enables autovalidator via the miniflow-parser-set command,
another log-print will occur if a packet doesn't match (and again mfex opt
autovalidator will be switched off).

Hope the logic here makes sense. Will remove the ovs_assert().
I feel that "auto disable" of the MFEX opt ptr is a very nice way to
avoid spamming logs and also enable user to continue probing if required.

Regards, -Harry

> +            if (failed) {
> +                /* Having dumped the debug info, disable autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
Eelco Chaudron June 25, 2021, 1 p.m. UTC | #7
Hi Kumar,

I plan to review this patch set, but I need to go over the dpif AVX512 
set first to get a better understanding.

However, I did run some performance tests on old hardware (as I do not 
have an AVX512 system) and notice some degradation (and improvements). 
This was a single run for both scenarios, with the following results 
(based on ovs_perf), on a single Intel(R) Xeon(R) CPU E5-2690 v4 @ 
2.60GHz:

        Number of flows 64      128     256     512     768     1024    
1514
Delta
        10              1.48%    1.72%   1.59%  -0.12%   0.44%   6.99%   
7.31%
        1000            1.06%   -0.73%  -1.46%  -1.42%  -2.54%  -0.20%  
-0.98%
        10000          -0.93%   -1.62%  -0.32%  -1.50%  -0.30%  -0.56%   
0.19%
        100000          0.39%   -0.05%  -0.60%  -0.51%  -0.90%   1.24%  
-1.10%


Master
        10              4767168 4601575 4382381 4127125 3594158 2932787 
2400479
        100             3804956 3612716 3547054 3127117 2950324 2615856 
2133892
        1000            3251959 3257535 2985693 2869970 2549086 2286262 
1979985
        10000           2671946 2624808 2536575 2412845 2190386 1952359 
1699142


Patch
        10              4838691 4682131 4453022 4122100 3609915 3153228 
2589748
        100             3845585 3586650 3496167 3083467 2877265 2610640 
2113108
        1000            3221894 3205732 2976203 2827620 2541349 2273468 
1983794
        10000           2682461 2623585 2521419 2400627 2170751 1976909 
1680607

Zero loss for master 5.8% (3,452,306pps) vs on Patch 5.7% 
(3,392,783pps).


Did you guys do any tests like this? I think it would be good not only 
to know the improvement but also the degradation of existing systems 
without AVX512.


I see Ian is currently reviewing the v4 and was wondering if you plan to 
send the v5 soon, if so, I hold off a bit, and do the v5 rather than 
doing the v4 and verify it’s not something Ian mentioned.

Cheers,


Eelco


On 17 Jun 2021, at 18:27, Kumar Amber wrote:

> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber <kumar.amber@intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
>  lib/dpif-netdev-private-extract.c | 141 
> ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.h |  15 ++++
>  lib/dpif-netdev.c                 |   2 +-
>  3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
>  /* Implementations of available extract options. */
>  static struct dpif_miniflow_extract_impl mfex_impls[] = {
> +   {
> +        .probe = NULL,
> +        .extract_func = dpif_miniflow_extract_autovalidator,
> +        .name = "autovalidator",
> +    },
>      {
>          .probe = NULL,
>          .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct 
> dpif_miniflow_extract_impl **out_ptr)
>      *out_ptr = mfex_impls;
>      return ARRAY_SIZE(mfex_impls);
>  }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle)
> +{
> +    const size_t cnt = dp_packet_batch_size(packets);
> +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> +    struct dp_packet *packet;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> +    int32_t mfunc_count = 
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> +    if (mfunc_count < 0) {
> +        pmd->miniflow_extract_opt = NULL;
> +        VLOG_ERR("failed to get miniflow extract function 
> implementations\n");
> +        return 0;
> +    }
> +    ovs_assert(keys_size >= cnt);
> +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> +    /* Run scalar miniflow_extract to get default result. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        pkt_metadata_init(&packet->md, in_port);
> +        miniflow_extract(packet, &keys[i].mf);
> +
> +        /* Store known good metadata to compare with optimized 
> metadata. */
> +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> +        good_l3_ofs[i] = packet->l3_ofs;
> +        good_l4_ofs[i] = packet->l4_ofs;
> +        good_l2_pad_size[i] = packet->l2_pad_size;
> +    }
> +
> +    /* Iterate through each version of miniflow implementations. */
> +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); 
> j++) {
> +        if (!mfex_impls[j].available) {
> +            continue;
> +        }
> +
> +        /* Reset keys and offsets before each implementation. */
> +        memset(test_keys, 0, keys_size * sizeof(struct 
> netdev_flow_key));
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +            dp_packet_reset_offsets(packet);
> +        }
> +        /* Call optimized miniflow for each batch of packet. */
> +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, 
> test_keys,
> +                                            keys_size, in_port, 
> pmd_handle);
> +
> +        /* Do a miniflow compare for bits, blocks and offsets for all 
> the
> +         * classified packets in the hitmask marked by set bits. */
> +        while (hit_mask) {
> +            /* Index for the set bit. */
> +            uint32_t i = __builtin_ctz(hit_mask);
> +            /* Set the index in hitmask to Zero. */
> +            hit_mask &= (hit_mask - 1);
> +
> +            uint32_t failed = 0;
> +
> +            /* Check miniflow bits are equal. */
> +            if ((keys[i].mf.map.bits[0] != 
> test_keys[i].mf.map.bits[0]) ||
> +                (keys[i].mf.map.bits[1] != 
> test_keys[i].mf.map.bits[1])) {
> +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> +                         keys[i].mf.map.bits[0], 
> keys[i].mf.map.bits[1],
> +                         test_keys[i].mf.map.bits[0],
> +                         test_keys[i].mf.map.bits[1]);
> +                failed = 1;
> +            }
> +
> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +                VLOG_ERR("Autovalidation blocks failed for %s pkt 
> %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good hexdump:\n");
> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> +                uint64_t *test_block_ptr = (uint64_t 
> *)&test_keys[i].buf;
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> +                }
> +                VLOG_ERR("  Test hexdump:\n");
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> +                }
> +                failed = 1;
> +            }
> +
> +            if ((packets->packets[i]->l2_pad_size != 
> good_l2_pad_size[i]) ||
> +                    (packets->packets[i]->l2_5_ofs != 
> good_l2_5_ofs[i]) ||
> +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) 
> ||
> +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) 
> {
> +                VLOG_ERR("Autovalidation packet offsets failed for %s 
> pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> +                         good_l3_ofs[i], good_l4_ofs[i]);
> +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         packets->packets[i]->l2_pad_size,
> +                         packets->packets[i]->l2_5_ofs,
> +                         packets->packets[i]->l3_ofs,
> +                         packets->packets[i]->l4_ofs);
> +                failed = 1;
> +            }
> +
> +            if (failed) {
> +                /* Having dumped the debug info, disable 
> autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, 
> disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* preserve packet correctness by storing back the good offsets 
> in
> +     * packets back. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        packet->l2_5_ofs = good_l2_5_ofs[i];
> +        packet->l3_ofs = good_l3_ofs[i];
> +        packet->l4_ofs = good_l4_ofs[i];
> +        packet->l2_pad_size = good_l2_pad_size[i];
> +    }
> +
> +    /* Returning zero implies no packets were hit by autovalidation. 
> This
> +     * simplifies unit-tests as changing 
> --enable-mfex-default-autovalidator
> +     * would pass/fail. By always returning zero, autovalidator is a 
> little
> +     * slower, but we gain consistency in testing.
> +     */
> +    return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h 
> b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
>  /* Max size of dpif_miniflow_extract_impl array. */
>  #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
>  /* Forward declarations. */
>  struct dp_packet;
>  struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
>  int32_t
>  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl 
> **out_ptr);
>
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by 
> comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle);
>
>  #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct 
> unixctl_conn *conn, int argc,
>      struct ds reply = DS_EMPTY_INITIALIZER;
>      ds_put_format(&reply, "Miniflow implementation set to %s.\n", 
> mfex_name);
>      const char *reply_str = ds_cstr(&reply);
> -    unixctl_command_reply(conn, reply_str);
>      VLOG_INFO("%s", reply_str);
> +    unixctl_command_reply(conn, reply_str);
>      ds_destroy(&reply);
>  }
>
> -- 
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Flavio Leitner June 28, 2021, 2:50 a.m. UTC | #8
Hi,

I haven't tested the patch set yet.
I left some comments in line.

On Thu, Jun 17, 2021 at 09:57:44PM +0530, Kumar Amber wrote:
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
> 
> The autovaidator function can be triggered at runtime using the
> following command:
> 
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
> 
> Signed-off-by: Kumar Amber <kumar.amber@intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
>  lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.h |  15 ++++
>  lib/dpif-netdev.c                 |   2 +-
>  3 files changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>  
>  /* Implementations of available extract options. */
>  static struct dpif_miniflow_extract_impl mfex_impls[] = {
> +   {
> +        .probe = NULL,
> +        .extract_func = dpif_miniflow_extract_autovalidator,
> +        .name = "autovalidator",
> +    },

Please define a enum for each entry. Also document that
autovalidator is required to be the first and suggest
to see the comment explaining more on MFEX_IMPL_START_IDX.

>      {
>          .probe = NULL,
>          .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
>      *out_ptr = mfex_impls;
>      return ARRAY_SIZE(mfex_impls);
>  }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t in_port,
> +                                    void *pmd_handle)
> +{
> +    const size_t cnt = dp_packet_batch_size(packets);
> +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> +    struct dp_packet *packet;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> +    int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
> +    if (mfunc_count < 0) {
> +        pmd->miniflow_extract_opt = NULL;
> +        VLOG_ERR("failed to get miniflow extract function implementations\n");

No need for terminating with \n here and other calls to VLOG_*().

> +        return 0;
> +    }

> +    ovs_assert(keys_size >= cnt);
> +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> +    /* Run scalar miniflow_extract to get default result. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        pkt_metadata_init(&packet->md, in_port);
> +        miniflow_extract(packet, &keys[i].mf);
> +
> +        /* Store known good metadata to compare with optimized metadata. */
> +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> +        good_l3_ofs[i] = packet->l3_ofs;
> +        good_l4_ofs[i] = packet->l4_ofs;
> +        good_l2_pad_size[i] = packet->l2_pad_size;
> +    }
> +
> +    /* Iterate through each version of miniflow implementations. */
> +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> +        if (!mfex_impls[j].available) {
> +            continue;
> +        }
> +
> +        /* Reset keys and offsets before each implementation. */
> +        memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +            dp_packet_reset_offsets(packet);
> +        }
> +        /* Call optimized miniflow for each batch of packet. */
> +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> +                                            keys_size, in_port, pmd_handle);
> +
> +        /* Do a miniflow compare for bits, blocks and offsets for all the
> +         * classified packets in the hitmask marked by set bits. */
> +        while (hit_mask) {
> +            /* Index for the set bit. */
> +            uint32_t i = __builtin_ctz(hit_mask);
> +            /* Set the index in hitmask to Zero. */
> +            hit_mask &= (hit_mask - 1);
> +
> +            uint32_t failed = 0;
> +
> +            /* Check miniflow bits are equal. */
> +            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> +                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> +                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> +                         test_keys[i].mf.map.bits[0],
> +                         test_keys[i].mf.map.bits[1]);
> +                failed = 1;
> +            }
> +
> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good hexdump:\n");
> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> +                }
> +                VLOG_ERR("  Test hexdump:\n");
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> +                }
> +                failed = 1;
> +            }
> +

What do you think of doing this instead:
               packet = packets->packets[i];
               if ((packet->l2_pad_size != good_l2_pad_size[i]) ||
               ...

> +            if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||

> +                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> +                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> +                         good_l3_ofs[i], good_l4_ofs[i]);
> +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         packets->packets[i]->l2_pad_size,
> +                         packets->packets[i]->l2_5_ofs,
> +                         packets->packets[i]->l3_ofs,
> +                         packets->packets[i]->l4_ofs);
> +                failed = 1;
> +            }
> +
> +            if (failed) {
> +                /* Having dumped the debug info, disable autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* preserve packet correctness by storing back the good offsets in
> +     * packets back. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        packet->l2_5_ofs = good_l2_5_ofs[i];
> +        packet->l3_ofs = good_l3_ofs[i];
> +        packet->l4_ofs = good_l4_ofs[i];
> +        packet->l2_pad_size = good_l2_pad_size[i];
> +    }
> +
> +    /* Returning zero implies no packets were hit by autovalidation. This
> +     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> +     * would pass/fail. By always returning zero, autovalidator is a little
> +     * slower, but we gain consistency in testing.
> +     */
> +    return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
>  /* Max size of dpif_miniflow_extract_impl array. */
>  #define MFEX_IMPLS_MAX_SIZE (16)
>  
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +

As pointed above, this should come from an enum.


>  /* Forward declarations. */
>  struct dp_packet;
>  struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
>  int32_t
>  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
>  
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t in_port,
> +                                    void *pmd_handle);
>  
>  #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
>      struct ds reply = DS_EMPTY_INITIALIZER;
>      ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
>      const char *reply_str = ds_cstr(&reply);
> -    unixctl_command_reply(conn, reply_str);
>      VLOG_INFO("%s", reply_str);
> +    unixctl_command_reply(conn, reply_str);
>      ds_destroy(&reply);
>  }
>  
> -- 
> 2.25.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Kumar Amber June 28, 2021, 3:04 p.m. UTC | #9
Hi Flavio,

Thanks Again replies are inlined.

> >  /* Implementations of available extract options. */  static struct
> > dpif_miniflow_extract_impl mfex_impls[] = {
> > +   {
> > +        .probe = NULL,
> > +        .extract_func = dpif_miniflow_extract_autovalidator,
> > +        .name = "autovalidator",
> > +    },
> 
> Please define a enum for each entry. Also document that autovalidator is
> required to be the first and suggest to see the comment explaining more on
> MFEX_IMPL_START_IDX.
> 

Fixed in upcoming v5. Using ENUMS directly.
> >      {
> >          .probe = NULL,
> >          .extract_func = NULL,
> > @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct
> dpif_miniflow_extract_impl **out_ptr)
> >      *out_ptr = mfex_impls;
> >      return ARRAY_SIZE(mfex_impls);
> >  }
> > +
> > +uint32_t
> > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> > +                                    struct netdev_flow_key *keys,
> > +                                    uint32_t keys_size, odp_port_t in_port,
> > +                                    void *pmd_handle) {
> > +    const size_t cnt = dp_packet_batch_size(packets);
> > +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> > +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> > +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> > +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> > +    struct dp_packet *packet;
> > +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> > +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> > +
> > +    int32_t mfunc_count =
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> > +    if (mfunc_count < 0) {
> > +        pmd->miniflow_extract_opt = NULL;
> > +        VLOG_ERR("failed to get miniflow extract function
> > + implementations\n");
> 
> No need for terminating with \n here and other calls to VLOG_*().
> 
Fixed in upcoming v5.
> > +        return 0;
> > +    }
> 
> > +    ovs_assert(keys_size >= cnt);
> > +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> > +
> > +    /* Run scalar miniflow_extract to get default result. */
> > +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > +        pkt_metadata_init(&packet->md, in_port);
> > +        miniflow_extract(packet, &keys[i].mf);
> > +
> > +        /* Store known good metadata to compare with optimized
> metadata. */
> > +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> > +        good_l3_ofs[i] = packet->l3_ofs;
> > +        good_l4_ofs[i] = packet->l4_ofs;
> > +        good_l2_pad_size[i] = packet->l2_pad_size;
> > +    }
> > +
> > +    /* Iterate through each version of miniflow implementations. */
> > +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> > +        if (!mfex_impls[j].available) {
> > +            continue;
> > +        }
> > +
> > +        /* Reset keys and offsets before each implementation. */
> > +        memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> > +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > +            dp_packet_reset_offsets(packet);
> > +        }
> > +        /* Call optimized miniflow for each batch of packet. */
> > +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> > +                                            keys_size, in_port,
> > + pmd_handle);
> > +
> > +        /* Do a miniflow compare for bits, blocks and offsets for all the
> > +         * classified packets in the hitmask marked by set bits. */
> > +        while (hit_mask) {
> > +            /* Index for the set bit. */
> > +            uint32_t i = __builtin_ctz(hit_mask);
> > +            /* Set the index in hitmask to Zero. */
> > +            hit_mask &= (hit_mask - 1);
> > +
> > +            uint32_t failed = 0;
> > +
> > +            /* Check miniflow bits are equal. */
> > +            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> > +                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> > +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> > +                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> > +                         test_keys[i].mf.map.bits[0],
> > +                         test_keys[i].mf.map.bits[1]);
> > +                failed = 1;
> > +            }
> > +
> > +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> > +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> > +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> > +                         mfex_impls[j].name, i);
> > +                VLOG_ERR("  Good hexdump:\n");
> > +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> > +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> > +                }
> > +                VLOG_ERR("  Test hexdump:\n");
> > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> > +                }
> > +                failed = 1;
> > +            }
> > +
> 
> What do you think of doing this instead:
>                packet = packets->packets[i];

Does look nice taken into v5.
>                if ((packet->l2_pad_size != good_l2_pad_size[i]) ||
>                ...
> 
> > +            if ((packets->packets[i]->l2_pad_size !=
> > + good_l2_pad_size[i]) ||
> 
> > +                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> > +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> > +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> > +                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> > +                         mfex_impls[j].name, i);
> > +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> > +                         " l3_ofs %u, l4_ofs %u\n",
> > +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> > +                         good_l3_ofs[i], good_l4_ofs[i]);
> > +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> > +                         " l3_ofs %u, l4_ofs %u\n",
> > +                         packets->packets[i]->l2_pad_size,
> > +                         packets->packets[i]->l2_5_ofs,
> > +                         packets->packets[i]->l3_ofs,
> > +                         packets->packets[i]->l4_ofs);
> > +                failed = 1;
> > +            }
> > +
> > +            if (failed) {
> > +                /* Having dumped the debug info, disable autovalidator. */
> > +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> > +                         mfex_impls[j].name, i);
> > +                /* Halt OVS here on debug builds. */
> > +                ovs_assert(0);
> > +                pmd->miniflow_extract_opt = NULL;
> > +                break;
> > +            }
> > +        }
> > +    }
> > +
> > +    /* preserve packet correctness by storing back the good offsets in
> > +     * packets back. */
> > +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > +        packet->l2_5_ofs = good_l2_5_ofs[i];
> > +        packet->l3_ofs = good_l3_ofs[i];
> > +        packet->l4_ofs = good_l4_ofs[i];
> > +        packet->l2_pad_size = good_l2_pad_size[i];
> > +    }
> > +
> > +    /* Returning zero implies no packets were hit by autovalidation. This
> > +     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> > +     * would pass/fail. By always returning zero, autovalidator is a little
> > +     * slower, but we gain consistency in testing.
> > +     */
> > +    return 0;
> > +}
> > diff --git a/lib/dpif-netdev-private-extract.h
> > b/lib/dpif-netdev-private-extract.h
> > index b7b0b2be4..455a7b590 100644
> > --- a/lib/dpif-netdev-private-extract.h
> > +++ b/lib/dpif-netdev-private-extract.h
> > @@ -24,6 +24,11 @@
> >  /* Max size of dpif_miniflow_extract_impl array. */  #define
> > MFEX_IMPLS_MAX_SIZE (16)
> >
> > +/* Skip the autovalidator study and null when iterating all available
> > + * miniflow implementations.
> > + */
> > +#define MFEX_IMPL_START_IDX (1)
> > +
> 
> As pointed above, this should come from an enum.

Fixed in upcoming v5.
> 
> 
> >  /* Forward declarations. */
> >  struct dp_packet;
> >  struct miniflow;
> > @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);  int32_t
> > dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl
> > **out_ptr);
> >
> > +/* Retrieve the hitmask of the batch of pakcets which is obtained by
> > +comparing
> > + * different miniflow implementations with linear miniflow extract.
> > + * On error, returns a zero.
> > + * On success, returns the number of packets in the batch compared.
> > + */
> > +uint32_t
> > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> > +                                    struct netdev_flow_key *keys,
> > +                                    uint32_t keys_size, odp_port_t in_port,
> > +                                    void *pmd_handle);
> >
> >  #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git
> > a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 567ebd952..4f4ab2790
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> unixctl_conn *conn, int argc,
> >      struct ds reply = DS_EMPTY_INITIALIZER;
> >      ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> mfex_name);
> >      const char *reply_str = ds_cstr(&reply);
> > -    unixctl_command_reply(conn, reply_str);
> >      VLOG_INFO("%s", reply_str);
> > +    unixctl_command_reply(conn, reply_str);
> >      ds_destroy(&reply);
> >  }
> >
> > --
> > 2.25.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
> --
> fbl
Van Haaren, Harry June 29, 2021, 11:05 a.m. UTC | #10
Hi Eelco,

Would you describe the actual test being run below?

I'm having a hard time figuring out what the actual datapath packet flow is. It seems strange
that MFEX optimizations are affected by flow-count, that doesn't really logically make sense.
Hence, some more understanding on what the test setup is may help.

To remove complexity & noise from the setup: does running a simple Phy-to-Phy test with L2 bridging
cause any perf degradation? If so, please describe that exact setup and I'll try to reproduce/replicate results here.

Regards, -Harry

PS: Apologies for top post/html email, is my mail client acting strange, or was this already a html email on list?
Changing it back to plain-text causes loss of all > previous reply indentation…

From: Eelco Chaudron <echaudro@redhat.com>
Sent: Friday, June 25, 2021 2:00 PM
To: Amber, Kumar <kumar.amber@intel.com>
Cc: dev@openvswitch.org; i.maximets@ovn.org; Van Haaren, Harry <harry.van.haaren@intel.com>; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian <ian.stokes@intel.com>
Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract


Hi Kumar,

I plan to review this patch set, but I need to go over the dpif AVX512 set first to get a better understanding.

However, I did run some performance tests on old hardware (as I do not have an AVX512 system) and notice some degradation (and improvements). This was a single run for both scenarios, with the following results (based on ovs_perf), on a single Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz:

   Number of flows 64      128     256     512     768     1024    1514

Delta
10 1.48% 1.72% 1.59% -0.12% 0.44% 6.99% 7.31%
1000 1.06% -0.73% -1.46% -1.42% -2.54% -0.20% -0.98%
10000 -0.93% -1.62% -0.32% -1.50% -0.30% -0.56% 0.19%
100000 0.39% -0.05% -0.60% -0.51% -0.90% 1.24% -1.10%

Master
10 4767168 4601575 4382381 4127125 3594158 2932787 2400479
100 3804956 3612716 3547054 3127117 2950324 2615856 2133892
1000 3251959 3257535 2985693 2869970 2549086 2286262 1979985
10000 2671946 2624808 2536575 2412845 2190386 1952359 1699142

Patch
10 4838691 4682131 4453022 4122100 3609915 3153228 2589748
100 3845585 3586650 3496167 3083467 2877265 2610640 2113108
1000 3221894 3205732 2976203 2827620 2541349 2273468 1983794
10000 2682461 2623585 2521419 2400627 2170751 1976909 1680607

Zero loss for master 5.8% (3,452,306pps) vs on Patch 5.7% (3,392,783pps).

Did you guys do any tests like this? I think it would be good not only to know the improvement but also the degradation of existing systems without AVX512.

I see Ian is currently reviewing the v4 and was wondering if you plan to send the v5 soon, if so, I hold off a bit, and do the v5 rather than doing the v4 and verify it’s not something Ian mentioned.

Cheers,

Eelco

On 17 Jun 2021, at 18:27, Kumar Amber wrote:

This patch introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different miniflow implementations against the linear
miniflow extract and return a hitmask.

The autovaidator function can be triggered at runtime using the
following command:

$ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator

Signed-off-by: Kumar Amber <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
---
lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
lib/dpif-netdev-private-extract.h | 15 ++++
lib/dpif-netdev.c | 2 +-
3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index fcc56ef26..0741c19f9 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);

/* Implementations of available extract options. */
static struct dpif_miniflow_extract_impl mfex_impls[] = {
+ {
+ .probe = NULL,
+ .extract_func = dpif_miniflow_extract_autovalidator,
+ .name = "autovalidator",
+ },
{
.probe = NULL,
.extract_func = NULL,
@@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
*out_ptr = mfex_impls;
return ARRAY_SIZE(mfex_impls);
}
+
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle)
+{
+ const size_t cnt = dp_packet_batch_size(packets);
+ uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l3_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l4_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
+ struct dp_packet *packet;
+ struct dp_netdev_pmd_thread *pmd = pmd_handle;
+ struct dpif_miniflow_extract_impl *miniflow_funcs;
+
+ int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
+ if (mfunc_count < 0) {
+ pmd->miniflow_extract_opt = NULL;
+ VLOG_ERR("failed to get miniflow extract function implementations\n");
+ return 0;
+ }
+ ovs_assert(keys_size >= cnt);
+ struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
+
+ /* Run scalar miniflow_extract to get default result. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ pkt_metadata_init(&packet->md, in_port);
+ miniflow_extract(packet, &keys[i].mf);
+
+ /* Store known good metadata to compare with optimized metadata. */
+ good_l2_5_ofs[i] = packet->l2_5_ofs;
+ good_l3_ofs[i] = packet->l3_ofs;
+ good_l4_ofs[i] = packet->l4_ofs;
+ good_l2_pad_size[i] = packet->l2_pad_size;
+ }
+
+ /* Iterate through each version of miniflow implementations. */
+ for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
+ if (!mfex_impls[j].available) {
+ continue;
+ }
+
+ /* Reset keys and offsets before each implementation. */
+ memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ dp_packet_reset_offsets(packet);
+ }
+ /* Call optimized miniflow for each batch of packet. */
+ uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
+ keys_size, in_port, pmd_handle);
+
+ /* Do a miniflow compare for bits, blocks and offsets for all the
+ * classified packets in the hitmask marked by set bits. */
+ while (hit_mask) {
+ /* Index for the set bit. */
+ uint32_t i = __builtin_ctz(hit_mask);
+ /* Set the index in hitmask to Zero. */
+ hit_mask &= (hit_mask - 1);
+
+ uint32_t failed = 0;
+
+ /* Check miniflow bits are equal. */
+ if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
+ (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
+ VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
+ keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
+ test_keys[i].mf.map.bits[0],
+ test_keys[i].mf.map.bits[1]);
+ failed = 1;
+ }
+
+ if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
+ uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
+ VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good hexdump:\n");
+ uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
+ uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
+ }
+ VLOG_ERR(" Test hexdump:\n");
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
+ }
+ failed = 1;
+ }
+
+ if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
+ (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
+ (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
+ (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
+ VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ good_l2_pad_size[i], good_l2_5_ofs[i],
+ good_l3_ofs[i], good_l4_ofs[i]);
+ VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ packets->packets[i]->l2_pad_size,
+ packets->packets[i]->l2_5_ofs,
+ packets->packets[i]->l3_ofs,
+ packets->packets[i]->l4_ofs);
+ failed = 1;
+ }
+
+ if (failed) {
+ /* Having dumped the debug info, disable autovalidator. */
+ VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
+ mfex_impls[j].name, i);
+ /* Halt OVS here on debug builds. */
+ ovs_assert(0);
+ pmd->miniflow_extract_opt = NULL;
+ break;
+ }
+ }
+ }
+
+ /* preserve packet correctness by storing back the good offsets in
+ * packets back. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ packet->l2_5_ofs = good_l2_5_ofs[i];
+ packet->l3_ofs = good_l3_ofs[i];
+ packet->l4_ofs = good_l4_ofs[i];
+ packet->l2_pad_size = good_l2_pad_size[i];
+ }
+
+ /* Returning zero implies no packets were hit by autovalidation. This
+ * simplifies unit-tests as changing --enable-mfex-default-autovalidator
+ * would pass/fail. By always returning zero, autovalidator is a little
+ * slower, but we gain consistency in testing.
+ */
+ return 0;
+}
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index b7b0b2be4..455a7b590 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -24,6 +24,11 @@
/* Max size of dpif_miniflow_extract_impl array. */
#define MFEX_IMPLS_MAX_SIZE (16)

+/* Skip the autovalidator study and null when iterating all available
+ * miniflow implementations.
+ */
+#define MFEX_IMPL_START_IDX (1)
+
/* Forward declarations. */
struct dp_packet;
struct miniflow;
@@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
int32_t
dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);

+/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
+ * different miniflow implementations with linear miniflow extract.
+ * On error, returns a zero.
+ * On success, returns the number of packets in the batch compared.
+ */
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle);

#endif /* DPIF_NETDEV_AVX512_EXTRACT */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 567ebd952..4f4ab2790 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
struct ds reply = DS_EMPTY_INITIALIZER;
ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
const char *reply_str = ds_cstr(&reply);
- unixctl_command_reply(conn, reply_str);
VLOG_INFO("%s", reply_str);
+ unixctl_command_reply(conn, reply_str);
ds_destroy(&reply);
}

--
2.25.1
Eelco Chaudron June 29, 2021, 11:12 a.m. UTC | #11
On 29 Jun 2021, at 13:05, Van Haaren, Harry wrote:

> Hi Eelco,
>
> Would you describe the actual test being run below?
>
> I'm having a hard time figuring out what the actual datapath packet 
> flow is. It seems strange
> that MFEX optimizations are affected by flow-count, that doesn't 
> really logically make sense.
> Hence, some more understanding on what the test setup is may help.


This is using the standard PVP scenario with ovs_perf as explained here:

[ovs_perf](https://github.com/chaudron/ovs_perf#automated-open-vswitch-pvp-testing)


> To remove complexity & noise from the setup: does running a simple 
> Phy-to-Phy test with L2 bridging
> cause any perf degradation? If so, please describe that exact setup 
> and I'll try to reproduce/replicate results here.

I’ll try to do some more testing later this week, and get back.

> Regards, -Harry
>
> PS: Apologies for top post/html email, is my mail client acting 
> strange, or was this already a html email on list?
> Changing it back to plain-text causes loss of all > previous reply 
> indentation…
>
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Friday, June 25, 2021 2:00 PM
> To: Amber, Kumar <kumar.amber@intel.com>
> Cc: dev@openvswitch.org; i.maximets@ovn.org; Van Haaren, Harry 
> <harry.van.haaren@intel.com>; Flavio Leitner <fbl@sysclose.org>; 
> Stokes, Ian <ian.stokes@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation 
> function for miniflow extract
>
>
> Hi Kumar,
>
> I plan to review this patch set, but I need to go over the dpif AVX512 
> set first to get a better understanding.
>
> However, I did run some performance tests on old hardware (as I do not 
> have an AVX512 system) and notice some degradation (and improvements). 
> This was a single run for both scenarios, with the following results 
> (based on ovs_perf), on a single Intel(R) Xeon(R) CPU E5-2690 v4 @ 
> 2.60GHz:
>
>    Number of flows 64      128     256     512     768     1024    
> 1514
>
> Delta
> 10 1.48% 1.72% 1.59% -0.12% 0.44% 6.99% 7.31%
> 1000 1.06% -0.73% -1.46% -1.42% -2.54% -0.20% -0.98%
> 10000 -0.93% -1.62% -0.32% -1.50% -0.30% -0.56% 0.19%
> 100000 0.39% -0.05% -0.60% -0.51% -0.90% 1.24% -1.10%
>
> Master
> 10 4767168 4601575 4382381 4127125 3594158 2932787 2400479
> 100 3804956 3612716 3547054 3127117 2950324 2615856 2133892
> 1000 3251959 3257535 2985693 2869970 2549086 2286262 1979985
> 10000 2671946 2624808 2536575 2412845 2190386 1952359 1699142
>
> Patch
> 10 4838691 4682131 4453022 4122100 3609915 3153228 2589748
> 100 3845585 3586650 3496167 3083467 2877265 2610640 2113108
> 1000 3221894 3205732 2976203 2827620 2541349 2273468 1983794
> 10000 2682461 2623585 2521419 2400627 2170751 1976909 1680607
>
> Zero loss for master 5.8% (3,452,306pps) vs on Patch 5.7% 
> (3,392,783pps).
>
> Did you guys do any tests like this? I think it would be good not only 
> to know the improvement but also the degradation of existing systems 
> without AVX512.
>
> I see Ian is currently reviewing the v4 and was wondering if you plan 
> to send the v5 soon, if so, I hold off a bit, and do the v5 rather 
> than doing the v4 and verify it’s not something Ian mentioned.
>
> Cheers,
>
> Eelco
>
> On 17 Jun 2021, at 18:27, Kumar Amber wrote:
>
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber 
> <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
> Co-authored-by: Harry van Haaren 
> <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> Signed-off-by: Harry van Haaren 
> <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> ---
> lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
> lib/dpif-netdev-private-extract.h | 15 ++++
> lib/dpif-netdev.c | 2 +-
> 3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
> /* Implementations of available extract options. */
> static struct dpif_miniflow_extract_impl mfex_impls[] = {
> + {
> + .probe = NULL,
> + .extract_func = dpif_miniflow_extract_autovalidator,
> + .name = "autovalidator",
> + },
> {
> .probe = NULL,
> .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct 
> dpif_miniflow_extract_impl **out_ptr)
> *out_ptr = mfex_impls;
> return ARRAY_SIZE(mfex_impls);
> }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle)
> +{
> + const size_t cnt = dp_packet_batch_size(packets);
> + uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> + struct dp_packet *packet;
> + struct dp_netdev_pmd_thread *pmd = pmd_handle;
> + struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> + int32_t mfunc_count = 
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> + if (mfunc_count < 0) {
> + pmd->miniflow_extract_opt = NULL;
> + VLOG_ERR("failed to get miniflow extract function 
> implementations\n");
> + return 0;
> + }
> + ovs_assert(keys_size >= cnt);
> + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> + /* Run scalar miniflow_extract to get default result. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + pkt_metadata_init(&packet->md, in_port);
> + miniflow_extract(packet, &keys[i].mf);
> +
> + /* Store known good metadata to compare with optimized metadata. */
> + good_l2_5_ofs[i] = packet->l2_5_ofs;
> + good_l3_ofs[i] = packet->l3_ofs;
> + good_l4_ofs[i] = packet->l4_ofs;
> + good_l2_pad_size[i] = packet->l2_pad_size;
> + }
> +
> + /* Iterate through each version of miniflow implementations. */
> + for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> + if (!mfex_impls[j].available) {
> + continue;
> + }
> +
> + /* Reset keys and offsets before each implementation. */
> + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + dp_packet_reset_offsets(packet);
> + }
> + /* Call optimized miniflow for each batch of packet. */
> + uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> + keys_size, in_port, pmd_handle);
> +
> + /* Do a miniflow compare for bits, blocks and offsets for all the
> + * classified packets in the hitmask marked by set bits. */
> + while (hit_mask) {
> + /* Index for the set bit. */
> + uint32_t i = __builtin_ctz(hit_mask);
> + /* Set the index in hitmask to Zero. */
> + hit_mask &= (hit_mask - 1);
> +
> + uint32_t failed = 0;
> +
> + /* Check miniflow bits are equal. */
> + if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> + (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> + VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> + keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> + test_keys[i].mf.map.bits[0],
> + test_keys[i].mf.map.bits[1]);
> + failed = 1;
> + }
> +
> + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> + uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> + VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good hexdump:\n");
> + uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> + uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
> + }
> + VLOG_ERR(" Test hexdump:\n");
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
> + }
> + failed = 1;
> + }
> +
> + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + good_l2_pad_size[i], good_l2_5_ofs[i],
> + good_l3_ofs[i], good_l4_ofs[i]);
> + VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + packets->packets[i]->l2_pad_size,
> + packets->packets[i]->l2_5_ofs,
> + packets->packets[i]->l3_ofs,
> + packets->packets[i]->l4_ofs);
> + failed = 1;
> + }
> +
> + if (failed) {
> + /* Having dumped the debug info, disable autovalidator. */
> + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> + mfex_impls[j].name, i);
> + /* Halt OVS here on debug builds. */
> + ovs_assert(0);
> + pmd->miniflow_extract_opt = NULL;
> + break;
> + }
> + }
> + }
> +
> + /* preserve packet correctness by storing back the good offsets in
> + * packets back. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + packet->l2_5_ofs = good_l2_5_ofs[i];
> + packet->l3_ofs = good_l3_ofs[i];
> + packet->l4_ofs = good_l4_ofs[i];
> + packet->l2_pad_size = good_l2_pad_size[i];
> + }
> +
> + /* Returning zero implies no packets were hit by autovalidation. 
> This
> + * simplifies unit-tests as changing 
> --enable-mfex-default-autovalidator
> + * would pass/fail. By always returning zero, autovalidator is a 
> little
> + * slower, but we gain consistency in testing.
> + */
> + return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h 
> b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
> /* Max size of dpif_miniflow_extract_impl array. */
> #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
> /* Forward declarations. */
> struct dp_packet;
> struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
> int32_t
> dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl 
> **out_ptr);
>
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by 
> comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle);
>
> #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct 
> unixctl_conn *conn, int argc,
> struct ds reply = DS_EMPTY_INITIALIZER;
> ds_put_format(&reply, "Miniflow implementation set to %s.\n", 
> mfex_name);
> const char *reply_str = ds_cstr(&reply);
> - unixctl_command_reply(conn, reply_str);
> VLOG_INFO("%s", reply_str);
> + unixctl_command_reply(conn, reply_str);
> ds_destroy(&reply);
> }
>
> --
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron June 29, 2021, 11:43 a.m. UTC | #12
On 17 Jun 2021, at 18:27, Kumar Amber wrote:

> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber <kumar.amber@intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
>  lib/dpif-netdev-private-extract.c | 141 
> ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.h |  15 ++++
>  lib/dpif-netdev.c                 |   2 +-
>  3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
>  /* Implementations of available extract options. */
>  static struct dpif_miniflow_extract_impl mfex_impls[] = {
> +   {
> +        .probe = NULL,
> +        .extract_func = dpif_miniflow_extract_autovalidator,
> +        .name = "autovalidator",
> +    },
>      {
>          .probe = NULL,
>          .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct 
> dpif_miniflow_extract_impl **out_ptr)
>      *out_ptr = mfex_impls;
>      return ARRAY_SIZE(mfex_impls);
>  }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle)
> +{
> +    const size_t cnt = dp_packet_batch_size(packets);
> +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> +    struct dp_packet *packet;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> +    int32_t mfunc_count = 
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> +    if (mfunc_count < 0) {

In theory 0 could not be returned, but just to cover the corner case can 
we change this to include zero.

> +        pmd->miniflow_extract_opt = NULL;

Guess the above needs to be atomic.

> +        VLOG_ERR("failed to get miniflow extract function 
> implementations\n");

Capital F to be in sync with your other error messages?

> +        return 0;
> +    }
> +    ovs_assert(keys_size >= cnt);

I don’t think we should assert here. Just return an error like above, 
so in production, we get notified, and this implementation gets 
disabled.

> +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> +    /* Run scalar miniflow_extract to get default result. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        pkt_metadata_init(&packet->md, in_port);
> +        miniflow_extract(packet, &keys[i].mf);
> +
> +        /* Store known good metadata to compare with optimized 
> metadata. */
> +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> +        good_l3_ofs[i] = packet->l3_ofs;
> +        good_l4_ofs[i] = packet->l4_ofs;
> +        good_l2_pad_size[i] = packet->l2_pad_size;
> +    }
> +
> +    /* Iterate through each version of miniflow implementations. */
> +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); 
> j++) {
> +        if (!mfex_impls[j].available) {
> +            continue;
> +        }
> +
> +        /* Reset keys and offsets before each implementation. */
> +        memset(test_keys, 0, keys_size * sizeof(struct 
> netdev_flow_key));
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +            dp_packet_reset_offsets(packet);
> +        }
> +        /* Call optimized miniflow for each batch of packet. */
> +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, 
> test_keys,
> +                                            keys_size, in_port, 
> pmd_handle);
> +
> +        /* Do a miniflow compare for bits, blocks and offsets for all 
> the
> +         * classified packets in the hitmask marked by set bits. */
> +        while (hit_mask) {
> +            /* Index for the set bit. */
> +            uint32_t i = __builtin_ctz(hit_mask);
> +            /* Set the index in hitmask to Zero. */
> +            hit_mask &= (hit_mask - 1);
> +
> +            uint32_t failed = 0;
> +
> +            /* Check miniflow bits are equal. */
> +            if ((keys[i].mf.map.bits[0] != 
> test_keys[i].mf.map.bits[0]) ||
> +                (keys[i].mf.map.bits[1] != 
> test_keys[i].mf.map.bits[1])) {
> +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> +                         keys[i].mf.map.bits[0], 
> keys[i].mf.map.bits[1],
> +                         test_keys[i].mf.map.bits[0],
> +                         test_keys[i].mf.map.bits[1]);
> +                failed = 1;
> +            }
> +
> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +                VLOG_ERR("Autovalidation blocks failed for %s pkt 
> %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good hexdump:\n");
> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> +                uint64_t *test_block_ptr = (uint64_t 
> *)&test_keys[i].buf;
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> +                }
> +                VLOG_ERR("  Test hexdump:\n");
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> +                }
> +                failed = 1;
> +            }
> +
> +            if ((packets->packets[i]->l2_pad_size != 
> good_l2_pad_size[i]) ||
> +                    (packets->packets[i]->l2_5_ofs != 
> good_l2_5_ofs[i]) ||
> +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) 
> ||
> +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) 
> {
> +                VLOG_ERR("Autovalidation packet offsets failed for %s 
> pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> +                         good_l3_ofs[i], good_l4_ofs[i]);
> +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         packets->packets[i]->l2_pad_size,
> +                         packets->packets[i]->l2_5_ofs,
> +                         packets->packets[i]->l3_ofs,
> +                         packets->packets[i]->l4_ofs);
> +                failed = 1;
> +            }
> +
> +            if (failed) {

Why stop now!? I think we should run all implementations, as others 
might need fixing too!

> +                /* Having dumped the debug info, disable 
> autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, 
> disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* preserve packet correctness by storing back the good offsets 
> in
> +     * packets back. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        packet->l2_5_ofs = good_l2_5_ofs[i];
> +        packet->l3_ofs = good_l3_ofs[i];
> +        packet->l4_ofs = good_l4_ofs[i];
> +        packet->l2_pad_size = good_l2_pad_size[i];
> +    }
> +
> +    /* Returning zero implies no packets were hit by autovalidation. 
> This
> +     * simplifies unit-tests as changing 
> --enable-mfex-default-autovalidator
> +     * would pass/fail. By always returning zero, autovalidator is a 
> little
> +     * slower, but we gain consistency in testing.
> +     */
> +    return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h 
> b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
>  /* Max size of dpif_miniflow_extract_impl array. */
>  #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
>  /* Forward declarations. */
>  struct dp_packet;
>  struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
>  int32_t
>  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl 
> **out_ptr);


Forgot to comment on this in my previous patch:

/* Function pointer prototype to be implemented in the optimized 
miniflow
  * extract code.
  * returns the hitmask of the processed packets on success.
  * returns zero on failure.
  */
typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
                                           struct netdev_flow_key *keys,
                                           uint32_t keys_size,
                                           odp_port_t in_port,
                                           void *pmd_handle);


Maybe we could add some description of what the input parameters mean, 
are expected? For example that key_size need to be at least the size of 
the batch? If this is the case, do we need it, and can we just assume 
keys should be at least batch size long?

> +/* Retrieve the hitmask of the batch of pakcets which is obtained by 
> comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle);
>
>  #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct 
> unixctl_conn *conn, int argc,
>      struct ds reply = DS_EMPTY_INITIALIZER;
>      ds_put_format(&reply, "Miniflow implementation set to %s.\n", 
> mfex_name);
>      const char *reply_str = ds_cstr(&reply);
> -    unixctl_command_reply(conn, reply_str);
>      VLOG_INFO("%s", reply_str);
> +    unixctl_command_reply(conn, reply_str);
>      ds_destroy(&reply);
>  }
>
> -- 
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Kumar Amber June 29, 2021, 12:27 p.m. UTC | #13
Hi Eelco,

Thanks Again for reviews , Pls find my replies inline.

From: Eelco Chaudron <echaudro@redhat.com>
Sent: Tuesday, June 29, 2021 5:14 PM
To: Van Haaren, Harry <harry.van.haaren@intel.com>; Amber, Kumar <kumar.amber@intel.com>
Cc: dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>
Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract


On 17 Jun 2021, at 18:27, Kumar Amber wrote:

This patch introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different miniflow implementations against the linear
miniflow extract and return a hitmask.

The autovaidator function can be triggered at runtime using the
following command:

$ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator

Signed-off-by: Kumar Amber <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
---
lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
lib/dpif-netdev-private-extract.h | 15 ++++
lib/dpif-netdev.c | 2 +-
3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index fcc56ef26..0741c19f9 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);

/* Implementations of available extract options. */
static struct dpif_miniflow_extract_impl mfex_impls[] = {
+ {
+ .probe = NULL,
+ .extract_func = dpif_miniflow_extract_autovalidator,
+ .name = "autovalidator",
+ },
{
.probe = NULL,
.extract_func = NULL,
@@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
*out_ptr = mfex_impls;
return ARRAY_SIZE(mfex_impls);
}
+
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle)
+{
+ const size_t cnt = dp_packet_batch_size(packets);
+ uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l3_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l4_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
+ struct dp_packet *packet;
+ struct dp_netdev_pmd_thread *pmd = pmd_handle;
+ struct dpif_miniflow_extract_impl *miniflow_funcs;
+
+ int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
+ if (mfunc_count < 0) {

In theory 0 could not be returned, but just to cover the corner case can we change this to include zero.

The  code has been adapted as per Flavio comments so will not be a concern.

+ pmd->miniflow_extract_opt = NULL;

Guess the above needs to be atomic.

Removed based on Flavio comments.

+ VLOG_ERR("failed to get miniflow extract function implementations\n");

Capital F to be in sync with your other error messages?

Removed based on Flavio comments.

+ return 0;
+ }
+ ovs_assert(keys_size >= cnt);

I don’t think we should assert here. Just return an error like above, so in production, we get notified, and this implementation gets disabled.

Actually we do else one would most likely be overwriting the assigned array space for keys and will hit a Seg fault at some point.

And hence we would like to know at the compile time if this is the case.

+ struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
+
+ /* Run scalar miniflow_extract to get default result. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ pkt_metadata_init(&packet->md, in_port);
+ miniflow_extract(packet, &keys[i].mf);
+
+ /* Store known good metadata to compare with optimized metadata. */
+ good_l2_5_ofs[i] = packet->l2_5_ofs;
+ good_l3_ofs[i] = packet->l3_ofs;
+ good_l4_ofs[i] = packet->l4_ofs;
+ good_l2_pad_size[i] = packet->l2_pad_size;
+ }
+
+ /* Iterate through each version of miniflow implementations. */
+ for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
+ if (!mfex_impls[j].available) {
+ continue;
+ }
+
+ /* Reset keys and offsets before each implementation. */
+ memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ dp_packet_reset_offsets(packet);
+ }
+ /* Call optimized miniflow for each batch of packet. */
+ uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
+ keys_size, in_port, pmd_handle);
+
+ /* Do a miniflow compare for bits, blocks and offsets for all the
+ * classified packets in the hitmask marked by set bits. */
+ while (hit_mask) {
+ /* Index for the set bit. */
+ uint32_t i = __builtin_ctz(hit_mask);
+ /* Set the index in hitmask to Zero. */
+ hit_mask &= (hit_mask - 1);
+
+ uint32_t failed = 0;
+
+ /* Check miniflow bits are equal. */
+ if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
+ (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
+ VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
+ keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
+ test_keys[i].mf.map.bits[0],
+ test_keys[i].mf.map.bits[1]);
+ failed = 1;
+ }
+
+ if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
+ uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
+ VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good hexdump:\n");
+ uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
+ uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
+ }
+ VLOG_ERR(" Test hexdump:\n");
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
+ }
+ failed = 1;
+ }
+
+ if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
+ (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
+ (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
+ (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
+ VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ good_l2_pad_size[i], good_l2_5_ofs[i],
+ good_l3_ofs[i], good_l4_ofs[i]);
+ VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ packets->packets[i]->l2_pad_size,
+ packets->packets[i]->l2_5_ofs,
+ packets->packets[i]->l3_ofs,
+ packets->packets[i]->l4_ofs);
+ failed = 1;
+ }
+
+ if (failed) {

Why stop now!? I think we should run all implementations, as others might need fixing too!

We had the same model as above by you but with so many debug messages flooding made it impossible for us to

Know the root cause and ovs_assert(0); have been removes so will no longer cause a crash but will simply disable the autovalidator

+ /* Having dumped the debug info, disable autovalidator. */
+ VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
+ mfex_impls[j].name, i);
+ /* Halt OVS here on debug builds. */
+ ovs_assert(0);
+ pmd->miniflow_extract_opt = NULL;
+ break;
+ }
+ }
+ }
+
+ /* preserve packet correctness by storing back the good offsets in
+ * packets back. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ packet->l2_5_ofs = good_l2_5_ofs[i];
+ packet->l3_ofs = good_l3_ofs[i];
+ packet->l4_ofs = good_l4_ofs[i];
+ packet->l2_pad_size = good_l2_pad_size[i];
+ }
+
+ /* Returning zero implies no packets were hit by autovalidation. This
+ * simplifies unit-tests as changing --enable-mfex-default-autovalidator
+ * would pass/fail. By always returning zero, autovalidator is a little
+ * slower, but we gain consistency in testing.
+ */
+ return 0;
+}
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index b7b0b2be4..455a7b590 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -24,6 +24,11 @@
/* Max size of dpif_miniflow_extract_impl array. */
#define MFEX_IMPLS_MAX_SIZE (16)

+/* Skip the autovalidator study and null when iterating all available
+ * miniflow implementations.
+ */
+#define MFEX_IMPL_START_IDX (1)
+
/* Forward declarations. */
struct dp_packet;
struct miniflow;
@@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
int32_t
dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);

Forgot to comment on this in my previous patch:

/* Function pointer prototype to be implemented in the optimized miniflow

  *   extract code.
  *   returns the hitmask of the processed packets on success.
  *   returns zero on failure.

*/
typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
struct netdev_flow_key *keys,
uint32_t keys_size,
odp_port_t in_port,
void *pmd_handle);

Maybe we could add some description of what the input parameters mean, are expected? For example that key_size need to be at least the size of the batch? If this is the case, do we need it, and can we just assume keys should be at least batch size long?

Yes put a comment in there about key size in the description in v5.

But as per convention whenever we pass an array we should also put the size of the array and this also complies with various

Security checkers and also prevents potential exploitations.

+/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
+ * different miniflow implementations with linear miniflow extract.
+ * On error, returns a zero.
+ * On success, returns the number of packets in the batch compared.
+ */
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle);

#endif /* DPIF_NETDEV_AVX512_EXTRACT */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 567ebd952..4f4ab2790 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
struct ds reply = DS_EMPTY_INITIALIZER;
ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
const char *reply_str = ds_cstr(&reply);
- unixctl_command_reply(conn, reply_str);
VLOG_INFO("%s", reply_str);
+ unixctl_command_reply(conn, reply_str);
ds_destroy(&reply);
}

--
2.25.1
Flavio Leitner June 29, 2021, 2:03 p.m. UTC | #14
Hi,

On Tue, Jun 29, 2021 at 12:27:57PM +0000, Amber, Kumar wrote:
> Hi Eelco,
> 
> Thanks Again for reviews , Pls find my replies inline.
> 
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Tuesday, June 29, 2021 5:14 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Amber, Kumar <kumar.amber@intel.com>
> Cc: dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract
> 
> 
> On 17 Jun 2021, at 18:27, Kumar Amber wrote:
> 
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
> 
> The autovaidator function can be triggered at runtime using the
> following command:
> 
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
> 
> Signed-off-by: Kumar Amber <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> ---
> lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
> lib/dpif-netdev-private-extract.h | 15 ++++
> lib/dpif-netdev.c | 2 +-
> 3 files changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
> 
> /* Implementations of available extract options. */
> static struct dpif_miniflow_extract_impl mfex_impls[] = {
> + {
> + .probe = NULL,
> + .extract_func = dpif_miniflow_extract_autovalidator,
> + .name = "autovalidator",
> + },
> {
> .probe = NULL,
> .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
> *out_ptr = mfex_impls;
> return ARRAY_SIZE(mfex_impls);
> }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle)
> +{
> + const size_t cnt = dp_packet_batch_size(packets);
> + uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> + struct dp_packet *packet;
> + struct dp_netdev_pmd_thread *pmd = pmd_handle;
> + struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> + int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
> + if (mfunc_count < 0) {
> 
> In theory 0 could not be returned, but just to cover the corner case can we change this to include zero.
> 
> The  code has been adapted as per Flavio comments so will not be a concern.
> 
> + pmd->miniflow_extract_opt = NULL;
> 
> Guess the above needs to be atomic.
> 
> Removed based on Flavio comments.

I asked to initialize that using an API and Eelco is asking to
set it atomically. The requests are complementary, right?

> 
> + VLOG_ERR("failed to get miniflow extract function implementations\n");
> 
> Capital F to be in sync with your other error messages?
> 
> Removed based on Flavio comments.

Not sure if I got this. I mentioned that the '\n' is not needed at
the end of all VLOG_* calls. Eelco is asking to start with capital
'F'. So the requests are complementary, unless with the refactor
the message went away.

Just make sure to follow the logging style convention in OVS.

fbl



> 
> + return 0;
> + }
> + ovs_assert(keys_size >= cnt);
> 
> I don’t think we should assert here. Just return an error like above, so in production, we get notified, and this implementation gets disabled.
> 
> Actually we do else one would most likely be overwriting the assigned array space for keys and will hit a Seg fault at some point.
> 
> And hence we would like to know at the compile time if this is the case.
> 
> + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> + /* Run scalar miniflow_extract to get default result. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + pkt_metadata_init(&packet->md, in_port);
> + miniflow_extract(packet, &keys[i].mf);
> +
> + /* Store known good metadata to compare with optimized metadata. */
> + good_l2_5_ofs[i] = packet->l2_5_ofs;
> + good_l3_ofs[i] = packet->l3_ofs;
> + good_l4_ofs[i] = packet->l4_ofs;
> + good_l2_pad_size[i] = packet->l2_pad_size;
> + }
> +
> + /* Iterate through each version of miniflow implementations. */
> + for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> + if (!mfex_impls[j].available) {
> + continue;
> + }
> +
> + /* Reset keys and offsets before each implementation. */
> + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + dp_packet_reset_offsets(packet);
> + }
> + /* Call optimized miniflow for each batch of packet. */
> + uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> + keys_size, in_port, pmd_handle);
> +
> + /* Do a miniflow compare for bits, blocks and offsets for all the
> + * classified packets in the hitmask marked by set bits. */
> + while (hit_mask) {
> + /* Index for the set bit. */
> + uint32_t i = __builtin_ctz(hit_mask);
> + /* Set the index in hitmask to Zero. */
> + hit_mask &= (hit_mask - 1);
> +
> + uint32_t failed = 0;
> +
> + /* Check miniflow bits are equal. */
> + if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> + (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> + VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> + keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> + test_keys[i].mf.map.bits[0],
> + test_keys[i].mf.map.bits[1]);
> + failed = 1;
> + }
> +
> + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> + uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> + VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good hexdump:\n");
> + uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> + uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
> + }
> + VLOG_ERR(" Test hexdump:\n");
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
> + }
> + failed = 1;
> + }
> +
> + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + good_l2_pad_size[i], good_l2_5_ofs[i],
> + good_l3_ofs[i], good_l4_ofs[i]);
> + VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + packets->packets[i]->l2_pad_size,
> + packets->packets[i]->l2_5_ofs,
> + packets->packets[i]->l3_ofs,
> + packets->packets[i]->l4_ofs);
> + failed = 1;
> + }
> +
> + if (failed) {
> 
> Why stop now!? I think we should run all implementations, as others might need fixing too!
> 
> We had the same model as above by you but with so many debug messages flooding made it impossible for us to
> 
> Know the root cause and ovs_assert(0); have been removes so will no longer cause a crash but will simply disable the autovalidator
> 
> + /* Having dumped the debug info, disable autovalidator. */
> + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> + mfex_impls[j].name, i);
> + /* Halt OVS here on debug builds. */
> + ovs_assert(0);
> + pmd->miniflow_extract_opt = NULL;
> + break;
> + }
> + }
> + }
> +
> + /* preserve packet correctness by storing back the good offsets in
> + * packets back. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + packet->l2_5_ofs = good_l2_5_ofs[i];
> + packet->l3_ofs = good_l3_ofs[i];
> + packet->l4_ofs = good_l4_ofs[i];
> + packet->l2_pad_size = good_l2_pad_size[i];
> + }
> +
> + /* Returning zero implies no packets were hit by autovalidation. This
> + * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> + * would pass/fail. By always returning zero, autovalidator is a little
> + * slower, but we gain consistency in testing.
> + */
> + return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
> /* Max size of dpif_miniflow_extract_impl array. */
> #define MFEX_IMPLS_MAX_SIZE (16)
> 
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
> /* Forward declarations. */
> struct dp_packet;
> struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
> int32_t
> dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
> 
> Forgot to comment on this in my previous patch:
> 
> /* Function pointer prototype to be implemented in the optimized miniflow
> 
>   *   extract code.
>   *   returns the hitmask of the processed packets on success.
>   *   returns zero on failure.
> 
> */
> typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
> struct netdev_flow_key *keys,
> uint32_t keys_size,
> odp_port_t in_port,
> void *pmd_handle);
> 
> Maybe we could add some description of what the input parameters mean, are expected? For example that key_size need to be at least the size of the batch? If this is the case, do we need it, and can we just assume keys should be at least batch size long?
> 
> Yes put a comment in there about key size in the description in v5.
> 
> But as per convention whenever we pass an array we should also put the size of the array and this also complies with various
> 
> Security checkers and also prevents potential exploitations.
> 
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle);
> 
> #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
> struct ds reply = DS_EMPTY_INITIALIZER;
> ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
> const char *reply_str = ds_cstr(&reply);
> - unixctl_command_reply(conn, reply_str);
> VLOG_INFO("%s", reply_str);
> + unixctl_command_reply(conn, reply_str);
> ds_destroy(&reply);
> }
> 
> --
> 2.25.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron June 29, 2021, 2:06 p.m. UTC | #15
Not sure how you replied, but it’s hard to see which comments are 
mine, and which are yours.

On 29 Jun 2021, at 14:27, Amber, Kumar wrote:

> Hi Eelco,
>
> Thanks Again for reviews , Pls find my replies inline.
>
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Tuesday, June 29, 2021 5:14 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Amber, Kumar 
> <kumar.amber@intel.com>
> Cc: dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian 
> <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation 
> function for miniflow extract
>
>
> On 17 Jun 2021, at 18:27, Kumar Amber wrote:
>
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber 
> <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
> Co-authored-by: Harry van Haaren 
> <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> Signed-off-by: Harry van Haaren 
> <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> ---
> lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
> lib/dpif-netdev-private-extract.h | 15 ++++
> lib/dpif-netdev.c | 2 +-
> 3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
> /* Implementations of available extract options. */
> static struct dpif_miniflow_extract_impl mfex_impls[] = {
> + {
> + .probe = NULL,
> + .extract_func = dpif_miniflow_extract_autovalidator,
> + .name = "autovalidator",
> + },
> {
> .probe = NULL,
> .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct 
> dpif_miniflow_extract_impl **out_ptr)
> *out_ptr = mfex_impls;
> return ARRAY_SIZE(mfex_impls);
> }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle)
> +{
> + const size_t cnt = dp_packet_batch_size(packets);
> + uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> + struct dp_packet *packet;
> + struct dp_netdev_pmd_thread *pmd = pmd_handle;
> + struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> + int32_t mfunc_count = 
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> + if (mfunc_count < 0) {
>
> In theory 0 could not be returned, but just to cover the corner case 
> can we change this to include zero.
>
> The  code has been adapted as per Flavio comments so will not be a 
> concern.
>
> + pmd->miniflow_extract_opt = NULL;
>
> Guess the above needs to be atomic.
>
> Removed based on Flavio comments.
>
> + VLOG_ERR("failed to get miniflow extract function 
> implementations\n");
>
> Capital F to be in sync with your other error messages?
>
> Removed based on Flavio comments.
>
> + return 0;
> + }
> + ovs_assert(keys_size >= cnt);


> I don’t think we should assert here. Just return an error like 
> above, so in production, we get notified, and this implementation gets 
> disabled.
>
> Actually we do else one would most likely be overwriting the assigned 
> array space for keys and will hit a Seg fault at some point.
>
> And hence we would like to know at the compile time if this is the 
> case.


But this is not a compile time check, it will crash OVS. You could just 
do this:

if (keys_size < cnt) {
      pmd->miniflow_extract_opt = NULL;
      VLOG_ERR(“Invalid key size supplied etc. etc.\n”);
      return 0;
}

Or you could process up to key_size packets

>
> + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> + /* Run scalar miniflow_extract to get default result. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + pkt_metadata_init(&packet->md, in_port);
> + miniflow_extract(packet, &keys[i].mf);
> +
> + /* Store known good metadata to compare with optimized metadata. */
> + good_l2_5_ofs[i] = packet->l2_5_ofs;
> + good_l3_ofs[i] = packet->l3_ofs;
> + good_l4_ofs[i] = packet->l4_ofs;
> + good_l2_pad_size[i] = packet->l2_pad_size;
> + }
> +
> + /* Iterate through each version of miniflow implementations. */
> + for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> + if (!mfex_impls[j].available) {
> + continue;
> + }
> +
> + /* Reset keys and offsets before each implementation. */
> + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + dp_packet_reset_offsets(packet);
> + }
> + /* Call optimized miniflow for each batch of packet. */
> + uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> + keys_size, in_port, pmd_handle);
> +
> + /* Do a miniflow compare for bits, blocks and offsets for all the
> + * classified packets in the hitmask marked by set bits. */
> + while (hit_mask) {
> + /* Index for the set bit. */
> + uint32_t i = __builtin_ctz(hit_mask);
> + /* Set the index in hitmask to Zero. */
> + hit_mask &= (hit_mask - 1);
> +
> + uint32_t failed = 0;
> +
> + /* Check miniflow bits are equal. */
> + if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> + (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> + VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> + keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> + test_keys[i].mf.map.bits[0],
> + test_keys[i].mf.map.bits[1]);
> + failed = 1;
> + }
> +
> + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> + uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> + VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good hexdump:\n");
> + uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> + uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
> + }
> + VLOG_ERR(" Test hexdump:\n");
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
> + }
> + failed = 1;
> + }
> +
> + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + good_l2_pad_size[i], good_l2_5_ofs[i],
> + good_l3_ofs[i], good_l4_ofs[i]);
> + VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + packets->packets[i]->l2_pad_size,
> + packets->packets[i]->l2_5_ofs,
> + packets->packets[i]->l3_ofs,
> + packets->packets[i]->l4_ofs);
> + failed = 1;
> + }
> +
> + if (failed) {
>
> Why stop now!? I think we should run all implementations, as others 
> might need fixing too!
>
> We had the same model as above by you but with so many debug messages 
> flooding made it impossible for us to
>
> Know the root cause and ovs_assert(0); have been removes so will no 
> longer cause a crash but will simply disable the autovalidator

I still would opt for getting all in the log. What is worse than getting 
a customer to crash (LOG) one failure, and with the provided fix, it 
will crash/log the next one :(
And if the logs were not clear enough, maybe they need some rewriting?


> + /* Having dumped the debug info, disable autovalidator. */
> + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> + mfex_impls[j].name, i);
> + /* Halt OVS here on debug builds. */
> + ovs_assert(0);
> + pmd->miniflow_extract_opt = NULL;
> + break;
> + }
> + }
> + }
> +
> + /* preserve packet correctness by storing back the good offsets in
> + * packets back. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + packet->l2_5_ofs = good_l2_5_ofs[i];
> + packet->l3_ofs = good_l3_ofs[i];
> + packet->l4_ofs = good_l4_ofs[i];
> + packet->l2_pad_size = good_l2_pad_size[i];
> + }
> +
> + /* Returning zero implies no packets were hit by autovalidation. 
> This
> + * simplifies unit-tests as changing 
> --enable-mfex-default-autovalidator
> + * would pass/fail. By always returning zero, autovalidator is a 
> little
> + * slower, but we gain consistency in testing.
> + */
> + return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h 
> b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
> /* Max size of dpif_miniflow_extract_impl array. */
> #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
> /* Forward declarations. */
> struct dp_packet;
> struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
> int32_t
> dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl 
> **out_ptr);
>
> Forgot to comment on this in my previous patch:
>
> /* Function pointer prototype to be implemented in the optimized 
> miniflow
>
>   *   extract code.
>   *   returns the hitmask of the processed packets on success.
>   *   returns zero on failure.
>
> */
> typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch 
> *batch,
> struct netdev_flow_key *keys,
> uint32_t keys_size,
> odp_port_t in_port,
> void *pmd_handle);
>
> Maybe we could add some description of what the input parameters mean, 
> are expected? For example that key_size need to be at least the size 
> of the batch? If this is the case, do we need it, and can we just 
> assume keys should be at least batch size long?
>
> Yes put a comment in there about key size in the description in v5.
>
> But as per convention whenever we pass an array we should also put the 
> size of the array and this also complies with various
>
> Security checkers and also prevents potential exploitations.

Ack

> +/* Retrieve the hitmask of the batch of pakcets which is obtained by 
> comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle);
>
> #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct 
> unixctl_conn *conn, int argc,
> struct ds reply = DS_EMPTY_INITIALIZER;
> ds_put_format(&reply, "Miniflow implementation set to %s.\n", 
> mfex_name);
> const char *reply_str = ds_cstr(&reply);
> - unixctl_command_reply(conn, reply_str);
> VLOG_INFO("%s", reply_str);
> + unixctl_command_reply(conn, reply_str);
> ds_destroy(&reply);
> }
>
> --
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Kumar Amber June 29, 2021, 3:20 p.m. UTC | #16
Hi Flavio,

Replies inline.

> >
> > Guess the above needs to be atomic.
> >
> > Removed based on Flavio comments.
> 
> I asked to initialize that using an API and Eelco is asking to set it atomically.
> The requests are complementary, right?
> 

Yes True sorry for confusion so we have refactored the code a bit to use Atomic set and get along with the API 
Wherever applicable since here on any failure we would want to fall back to Scalar we would not need the API
To find default implementation.
> >
> > + VLOG_ERR("failed to get miniflow extract function
> > + implementations\n");
> >
> > Capital F to be in sync with your other error messages?
> >
> > Removed based on Flavio comments.
> 
> Not sure if I got this. I mentioned that the '\n' is not needed at the end of all
> VLOG_* calls. Eelco is asking to start with capital 'F'. So the requests are
> complementary, unless with the refactor the message went away.
> 
> Just make sure to follow the logging style convention in OVS.

Sorry for confusion I have fixed all the VLOGS with this convention.
> 
> fbl
> 
> 
> 
> >
> > + return 0;
> > + }
> > + ovs_assert(keys_size >= cnt);
> >
> > I don’t think we should assert here. Just return an error like above, so in
> production, we get notified, and this implementation gets disabled.
> >
> > Actually we do else one would most likely be overwriting the assigned
> array space for keys and will hit a Seg fault at some point.
> >
> > And hence we would like to know at the compile time if this is the case.
> >
> > + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> > +
> > + /* Run scalar miniflow_extract to get default result. */
> > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > + pkt_metadata_init(&packet->md, in_port); miniflow_extract(packet,
> > + &keys[i].mf);
> > +
> > + /* Store known good metadata to compare with optimized metadata. */
> > + good_l2_5_ofs[i] = packet->l2_5_ofs; good_l3_ofs[i] =
> > + packet->l3_ofs; good_l4_ofs[i] = packet->l4_ofs; good_l2_pad_size[i]
> > + = packet->l2_pad_size; }
> > +
> > + /* Iterate through each version of miniflow implementations. */ for
> > + (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) { if
> > + (!mfex_impls[j].available) { continue; }
> > +
> > + /* Reset keys and offsets before each implementation. */
> > + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > + dp_packet_reset_offsets(packet); }
> > + /* Call optimized miniflow for each batch of packet. */ uint32_t
> > + hit_mask = mfex_impls[j].extract_func(packets, test_keys, keys_size,
> > + in_port, pmd_handle);
> > +
> > + /* Do a miniflow compare for bits, blocks and offsets for all the
> > + * classified packets in the hitmask marked by set bits. */ while
> > + (hit_mask) {
> > + /* Index for the set bit. */
> > + uint32_t i = __builtin_ctz(hit_mask);
> > + /* Set the index in hitmask to Zero. */ hit_mask &= (hit_mask - 1);
> > +
> > + uint32_t failed = 0;
> > +
> > + /* Check miniflow bits are equal. */ if ((keys[i].mf.map.bits[0] !=
> > + test_keys[i].mf.map.bits[0]) || (keys[i].mf.map.bits[1] !=
> > + test_keys[i].mf.map.bits[1])) { VLOG_ERR("Good 0x%llx 0x%llx\tTest
> > + 0x%llx 0x%llx\n", keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> > + test_keys[i].mf.map.bits[0], test_keys[i].mf.map.bits[1]); failed =
> > + 1; }
> > +
> > + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) { uint32_t
> > + block_cnt = miniflow_n_values(&keys[i].mf); VLOG_ERR("Autovalidation
> > + blocks failed for %s pkt %d", mfex_impls[j].name, i); VLOG_ERR("
> > + Good hexdump:\n"); uint64_t *good_block_ptr = (uint64_t
> > + *)&keys[i].buf; uint64_t *test_block_ptr = (uint64_t
> > + *)&test_keys[i].buf; for (uint32_t b = 0; b < block_cnt; b++) {
> > + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]); } VLOG_ERR(" Test
> > + hexdump:\n"); for (uint32_t b = 0; b < block_cnt; b++) { VLOG_ERR("
> > + %"PRIx64"\n", test_block_ptr[b]); } failed = 1; }
> > +
> > + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> > + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> > + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> > + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> > + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> > + mfex_impls[j].name, i); VLOG_ERR(" Good offsets: l2_pad_size %u,
> > + l2_5_ofs : %u"
> > + " l3_ofs %u, l4_ofs %u\n",
> > + good_l2_pad_size[i], good_l2_5_ofs[i], good_l3_ofs[i],
> > + good_l4_ofs[i]); VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs :
> > + %u"
> > + " l3_ofs %u, l4_ofs %u\n",
> > + packets->packets[i]->l2_pad_size,
> > + packets->packets[i]->l2_5_ofs,
> > + packets->packets[i]->l3_ofs,
> > + packets->packets[i]->l4_ofs);
> > + failed = 1;
> > + }
> > +
> > + if (failed) {
> >
> > Why stop now!? I think we should run all implementations, as others
> might need fixing too!
> >
> > We had the same model as above by you but with so many debug
> messages
> > flooding made it impossible for us to
> >
> > Know the root cause and ovs_assert(0); have been removes so will no
> > longer cause a crash but will simply disable the autovalidator
> >
> > + /* Having dumped the debug info, disable autovalidator. */
> > + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> > + mfex_impls[j].name, i);
> > + /* Halt OVS here on debug builds. */ ovs_assert(0);
> > + pmd->miniflow_extract_opt = NULL;
> > + break;
> > + }
> > + }
> > + }
> > +
> > + /* preserve packet correctness by storing back the good offsets in
> > + * packets back. */
> > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > + packet->l2_5_ofs = good_l2_5_ofs[i]; l3_ofs = good_l3_ofs[i]; l4_ofs
> > + packet->= good_l4_ofs[i]; l2_pad_size = good_l2_pad_size[i];
> > + }
> > +
> > + /* Returning zero implies no packets were hit by autovalidation.
> > +This
> > + * simplifies unit-tests as changing
> > +--enable-mfex-default-autovalidator
> > + * would pass/fail. By always returning zero, autovalidator is a
> > +little
> > + * slower, but we gain consistency in testing.
> > + */
> > + return 0;
> > +}
> > diff --git a/lib/dpif-netdev-private-extract.h
> > b/lib/dpif-netdev-private-extract.h
> > index b7b0b2be4..455a7b590 100644
> > --- a/lib/dpif-netdev-private-extract.h
> > +++ b/lib/dpif-netdev-private-extract.h
> > @@ -24,6 +24,11 @@
> > /* Max size of dpif_miniflow_extract_impl array. */ #define
> > MFEX_IMPLS_MAX_SIZE (16)
> >
> > +/* Skip the autovalidator study and null when iterating all available
> > + * miniflow implementations.
> > + */
> > +#define MFEX_IMPL_START_IDX (1)
> > +
> > /* Forward declarations. */
> > struct dp_packet;
> > struct miniflow;
> > @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void); int32_t
> > dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl
> > **out_ptr);
> >
> > Forgot to comment on this in my previous patch:
> >
> > /* Function pointer prototype to be implemented in the optimized
> > miniflow
> >
> >   *   extract code.
> >   *   returns the hitmask of the processed packets on success.
> >   *   returns zero on failure.
> >
> > */
> > typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch
> > *batch, struct netdev_flow_key *keys, uint32_t keys_size, odp_port_t
> > in_port, void *pmd_handle);
> >
> > Maybe we could add some description of what the input parameters
> mean, are expected? For example that key_size need to be at least the size
> of the batch? If this is the case, do we need it, and can we just assume keys
> should be at least batch size long?
> >
> > Yes put a comment in there about key size in the description in v5.
> >
> > But as per convention whenever we pass an array we should also put the
> > size of the array and this also complies with various
> >
> > Security checkers and also prevents potential exploitations.
> >
> > +/* Retrieve the hitmask of the batch of pakcets which is obtained by
> > +comparing
> > + * different miniflow implementations with linear miniflow extract.
> > + * On error, returns a zero.
> > + * On success, returns the number of packets in the batch compared.
> > + */
> > +uint32_t
> > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> > +struct netdev_flow_key *keys,  uint32_t keys_size, odp_port_t
> > +in_port,  void *pmd_handle);
> >
> > #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git a/lib/dpif-netdev.c
> > b/lib/dpif-netdev.c index 567ebd952..4f4ab2790 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > unixctl_conn *conn, int argc, struct ds reply = DS_EMPTY_INITIALIZER;
> > ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> > mfex_name); const char *reply_str = ds_cstr(&reply);
> > - unixctl_command_reply(conn, reply_str); VLOG_INFO("%s", reply_str);
> > + unixctl_command_reply(conn, reply_str);
> > ds_destroy(&reply);
> > }
> >
> > --
> > 2.25.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org<mailto:dev@openvswitch.org>
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
> --
> fbl
Kumar Amber June 29, 2021, 4:15 p.m. UTC | #17
Hi Eelco ,

Sorry the formatting seems broken on this email thread.
Replies are inlined .

From: Eelco Chaudron <echaudro@redhat.com>
Sent: Tuesday, June 29, 2021 7:36 PM
To: Amber, Kumar <kumar.amber@intel.com>
Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>
Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract


Not sure how you replied, but it’s hard to see which comments are mine, and which are yours.

On 29 Jun 2021, at 14:27, Amber, Kumar wrote:

Hi Eelco,

Thanks Again for reviews , Pls find my replies inline.

From: Eelco Chaudron <echaudro@redhat.com<mailto:echaudro@redhat.com>>
Sent: Tuesday, June 29, 2021 5:14 PM
To: Van Haaren, Harry <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>; Amber, Kumar <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
Cc: dev@openvswitch.org<mailto:dev@openvswitch.org>; i.maximets@ovn.org<mailto:i.maximets@ovn.org>; Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>>; Flavio Leitner <fbl@sysclose.org<mailto:fbl@sysclose.org>>
Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract

On 17 Jun 2021, at 18:27, Kumar Amber wrote:

This patch introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different miniflow implementations against the linear
miniflow extract and return a hitmask.

The autovaidator function can be triggered at runtime using the
following command:

$ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator

Signed-off-by: Kumar Amber <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
---
lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
lib/dpif-netdev-private-extract.h | 15 ++++
lib/dpif-netdev.c | 2 +-
3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index fcc56ef26..0741c19f9 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);

/* Implementations of available extract options. */
static struct dpif_miniflow_extract_impl mfex_impls[] = {
+ {
+ .probe = NULL,
+ .extract_func = dpif_miniflow_extract_autovalidator,
+ .name = "autovalidator",
+ },
{
.probe = NULL,
.extract_func = NULL,
@@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
*out_ptr = mfex_impls;
return ARRAY_SIZE(mfex_impls);
}
+
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle)
+{
+ const size_t cnt = dp_packet_batch_size(packets);
+ uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l3_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l4_ofs[NETDEV_MAX_BURST];
+ uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
+ struct dp_packet *packet;
+ struct dp_netdev_pmd_thread *pmd = pmd_handle;
+ struct dpif_miniflow_extract_impl *miniflow_funcs;
+
+ int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
+ if (mfunc_count < 0) {

In theory 0 could not be returned, but just to cover the corner case can we change this to include zero.

The code has been adapted as per Flavio comments so will not be a concern.

+ pmd->miniflow_extract_opt = NULL;

Guess the above needs to be atomic.

Removed based on Flavio comments.

+ VLOG_ERR("failed to get miniflow extract function implementations\n");

Capital F to be in sync with your other error messages?

Removed based on Flavio comments.

+ return 0;
+ }
+ ovs_assert(keys_size >= cnt);


I don’t think we should assert here. Just return an error like above, so in production, we get notified, and this implementation gets disabled.

Actually we do else one would most likely be overwriting the assigned array space for keys and will hit a Seg fault at some point.

And hence we would like to know at the compile time if this is the case.

But this is not a compile time check, it will crash OVS. You could just do this:

if (keys_size < cnt) {
pmd->miniflow_extract_opt = NULL;
VLOG_ERR(“Invalid key size supplied etc. etc.\n”);
return 0;
}

Or you could process up to key_size packets

Reply:   sure I have taken the first approach in v5 as it safe and avoid any risk of Seg fault in V5.

+ struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
+
+ /* Run scalar miniflow_extract to get default result. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ pkt_metadata_init(&packet->md, in_port);
+ miniflow_extract(packet, &keys[i].mf);
+
+ /* Store known good metadata to compare with optimized metadata. */
+ good_l2_5_ofs[i] = packet->l2_5_ofs;
+ good_l3_ofs[i] = packet->l3_ofs;
+ good_l4_ofs[i] = packet->l4_ofs;
+ good_l2_pad_size[i] = packet->l2_pad_size;
+ }
+
+ /* Iterate through each version of miniflow implementations. */
+ for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
+ if (!mfex_impls[j].available) {
+ continue;
+ }
+
+ /* Reset keys and offsets before each implementation. */
+ memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ dp_packet_reset_offsets(packet);
+ }
+ /* Call optimized miniflow for each batch of packet. */
+ uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
+ keys_size, in_port, pmd_handle);
+
+ /* Do a miniflow compare for bits, blocks and offsets for all the
+ * classified packets in the hitmask marked by set bits. */
+ while (hit_mask) {
+ /* Index for the set bit. */
+ uint32_t i = __builtin_ctz(hit_mask);
+ /* Set the index in hitmask to Zero. */
+ hit_mask &= (hit_mask - 1);
+
+ uint32_t failed = 0;
+
+ /* Check miniflow bits are equal. */
+ if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
+ (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
+ VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
+ keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
+ test_keys[i].mf.map.bits[0],
+ test_keys[i].mf.map.bits[1]);
+ failed = 1;
+ }
+
+ if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
+ uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
+ VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good hexdump:\n");
+ uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
+ uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
+ }
+ VLOG_ERR(" Test hexdump:\n");
+ for (uint32_t b = 0; b < block_cnt; b++) {
+ VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
+ }
+ failed = 1;
+ }
+
+ if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
+ (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
+ (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
+ (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
+ VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
+ mfex_impls[j].name, i);
+ VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ good_l2_pad_size[i], good_l2_5_ofs[i],
+ good_l3_ofs[i], good_l4_ofs[i]);
+ VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
+ " l3_ofs %u, l4_ofs %u\n",
+ packets->packets[i]->l2_pad_size,
+ packets->packets[i]->l2_5_ofs,
+ packets->packets[i]->l3_ofs,
+ packets->packets[i]->l4_ofs);
+ failed = 1;
+ }
+
+ if (failed) {

Why stop now!? I think we should run all implementations, as others might need fixing too!

We had the same model as above by you but with so many debug messages flooding made it impossible for us to

Know the root cause and ovs_assert(0); have been removes so will no longer cause a crash but will simply disable the autovalidator

I still would opt for getting all in the log. What is worse than getting a customer to crash (LOG) one failure, and with the provided fix, it will crash/log the next one :(
And if the logs were not clear enough, maybe they need some rewriting?

Reply:  As pointed out by Flavio in patch 07/12 , I guess we can do the following here as:

           - Remove the ovs_assert()

           - VLOG_ERR() any mismatches

           - Continue processing rest of burst with MFEX Autovalidator

On failures, we disable autovalidator *after* the burst, to avoid spamming output and filling logs.

+ /* Having dumped the debug info, disable autovalidator. */
+ VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
+ mfex_impls[j].name, i);
+ /* Halt OVS here on debug builds. */
+ ovs_assert(0);
+ pmd->miniflow_extract_opt = NULL;
+ break;
+ }
+ }
+ }
+
+ /* preserve packet correctness by storing back the good offsets in
+ * packets back. */
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+ packet->l2_5_ofs = good_l2_5_ofs[i];
+ packet->l3_ofs = good_l3_ofs[i];
+ packet->l4_ofs = good_l4_ofs[i];
+ packet->l2_pad_size = good_l2_pad_size[i];
+ }
+
+ /* Returning zero implies no packets were hit by autovalidation. This
+ * simplifies unit-tests as changing --enable-mfex-default-autovalidator
+ * would pass/fail. By always returning zero, autovalidator is a little
+ * slower, but we gain consistency in testing.
+ */
+ return 0;
+}
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index b7b0b2be4..455a7b590 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -24,6 +24,11 @@
/* Max size of dpif_miniflow_extract_impl array. */
#define MFEX_IMPLS_MAX_SIZE (16)

+/* Skip the autovalidator study and null when iterating all available
+ * miniflow implementations.
+ */
+#define MFEX_IMPL_START_IDX (1)
+
/* Forward declarations. */
struct dp_packet;
struct miniflow;
@@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
int32_t
dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);

Forgot to comment on this in my previous patch:

/* Function pointer prototype to be implemented in the optimized miniflow

* extract code.
* returns the hitmask of the processed packets on success.
* returns zero on failure.

*/
typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
struct netdev_flow_key *keys,
uint32_t keys_size,
odp_port_t in_port,
void *pmd_handle);

Maybe we could add some description of what the input parameters mean, are expected? For example that key_size need to be at least the size of the batch? If this is the case, do we need it, and can we just assume keys should be at least batch size long?

Yes put a comment in there about key size in the description in v5.

But as per convention whenever we pass an array we should also put the size of the array and this also complies with various

Security checkers and also prevents potential exploitations.

Ack

+/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
+ * different miniflow implementations with linear miniflow extract.
+ * On error, returns a zero.
+ * On success, returns the number of packets in the batch compared.
+ */
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size, odp_port_t in_port,
+ void *pmd_handle);

#endif /* DPIF_NETDEV_AVX512_EXTRACT */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 567ebd952..4f4ab2790 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
struct ds reply = DS_EMPTY_INITIALIZER;
ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
const char *reply_str = ds_cstr(&reply);
- unixctl_command_reply(conn, reply_str);
VLOG_INFO("%s", reply_str);
+ unixctl_command_reply(conn, reply_str);
ds_destroy(&reply);
}

--
2.25.1
Stokes, Ian June 29, 2021, 4:45 p.m. UTC | #18
> Hi Ian,
> 
> Thanks for reviews, replies are inlined.
> 

Thanks Amber looking forward to the v5.

BR
Ian
> 
> <Snipped>
> 
> > > +        /* Call optimized miniflow for each batch of packet. */
> > > +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> > > +                                            keys_size, in_port,
> > > + pmd_handle);
> >
> > Alignment above is out, should be aligned under first argument passed ie.
> > packets.
> 
> Fixed in v5.
> >
> > > +
> > > +        /* Do a miniflow compare for bits, blocks and offsets for all the
> > > +         * classified packets in the hitmask marked by set bits. */
> > > +        while (hit_mask) {
> > > +            /* Index for the set bit. */
> > > +            uint32_t i = __builtin_ctz(hit_mask);
> > > +            /* Set the index in hitmask to Zero. */
> > > +            hit_mask &= (hit_mask - 1);
> > > +
> > > +            uint32_t failed = 0;
> > > +
> > > +            /* Check miniflow bits are equal. */
> > > +            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> > > +                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> > > +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> > > +                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> > > +                         test_keys[i].mf.map.bits[0],
> > > +                         test_keys[i].mf.map.bits[1]);
> > > +                failed = 1;
> > > +            }
> > > +
> > > +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> > > +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> > > +                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> > > +                         mfex_impls[j].name, i);
> > > +                VLOG_ERR("  Good hexdump:\n");
> > > +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> > > +                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> > > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > > +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> >
> > For this and other VLOG Errs  rather than using spaces to have you thought
> > of using pad left?
> 
> Fixed in v5.
> > > +                }
> > > +                VLOG_ERR("  Test hexdump:\n");
> > > +                for (uint32_t b = 0; b < block_cnt; b++) {
> > > +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> > > +                }
> > > +                failed = 1;
> > > +            }
> > > +
> > > +            if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> > > +                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> > > +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> > > +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> > > +                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> > > +                         mfex_impls[j].name, i);
> > > +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> > > +                         " l3_ofs %u, l4_ofs %u\n",
> > > +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> > > +                         good_l3_ofs[i], good_l4_ofs[i]);
> > > +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> > > +                         " l3_ofs %u, l4_ofs %u\n",
> > > +                         packets->packets[i]->l2_pad_size,
> > > +                         packets->packets[i]->l2_5_ofs,
> > > +                         packets->packets[i]->l3_ofs,
> > > +                         packets->packets[i]->l4_ofs);
> > > +                failed = 1;
> > > +            }
> > > +
> > > +            if (failed) {
> > > +                /* Having dumped the debug info, disable autovalidator. */
> > > +                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> > > +                         mfex_impls[j].name, i);
> > > +                /* Halt OVS here on debug builds. */
> > > +                ovs_assert(0);
> > > +                pmd->miniflow_extract_opt = NULL;
> > > +                break;
> > > +            }
> > > +        }
> > > +    }
> > > +
> > > +    /* preserve packet correctness by storing back the good offsets in
> > > +     * packets back. */
> >
> > Minor, capitalize Preserve above.
> 
> Fixed in v5.
> >
> > > +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > > +        packet->l2_5_ofs = good_l2_5_ofs[i];
> > > +        packet->l3_ofs = good_l3_ofs[i];
> > > +        packet->l4_ofs = good_l4_ofs[i];
> > > +        packet->l2_pad_size = good_l2_pad_size[i];
> > > +    }
> > > +
> > > +    /* Returning zero implies no packets were hit by autovalidation. This
> > > +     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> > > +     * would pass/fail. By always returning zero, autovalidator is a little
> > > +     * slower, but we gain consistency in testing.
> >
> > Can you explain this in a little more detail?
> >
> > Is the expectation here that no packets get hit but just simply that the test
> > and comparisons match for each mfex?
> >
> 
> Details included in v5.
> > > +     */
> > > +    return 0;
> > > +}
> > > diff --git a/lib/dpif-netdev-private-extract.h
> > > b/lib/dpif-netdev-private-extract.h
> > > index b7b0b2be4..455a7b590 100644
> > > --- a/lib/dpif-netdev-private-extract.h
> > > +++ b/lib/dpif-netdev-private-extract.h
> > > @@ -24,6 +24,11 @@
> > >  /* Max size of dpif_miniflow_extract_impl array. */  #define
> > > MFEX_IMPLS_MAX_SIZE (16)
> > >
> > > +/* Skip the autovalidator study and null when iterating all available
> >
> > Is study the right word above for the autovalidator? Seems a bit odd.
> 
> Fixed in v5.
> 
> > > + * miniflow implementations.
> > > + */
> > > +#define MFEX_IMPL_START_IDX (1)
> > > +
> > >  /* Forward declarations. */
> > >  struct dp_packet;
> > >  struct miniflow;
> > > @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);  int32_t
> > > dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl
> > > **out_ptr);
> > >
> > > +/* Retrieve the hitmask of the batch of pakcets which is obtained by
> > > +comparing
> > > + * different miniflow implementations with linear miniflow extract.
> > > + * On error, returns a zero.
> > > + * On success, returns the number of packets in the batch compared.
> > > + */
> > > +uint32_t
> > > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> > > +                                    struct netdev_flow_key *keys,
> > > +                                    uint32_t keys_size, odp_port_t in_port,
> > > +                                    void *pmd_handle);
> > >
> > >  #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git
> > > a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 567ebd952..4f4ab2790
> > > 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > > unixctl_conn *conn, int argc,
> > >      struct ds reply = DS_EMPTY_INITIALIZER;
> > >      ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> > mfex_name);
> > >      const char *reply_str = ds_cstr(&reply);
> > > -    unixctl_command_reply(conn, reply_str);
> > >      VLOG_INFO("%s", reply_str);
> > > +    unixctl_command_reply(conn, reply_str);
> >
> > Is there a reason for swapping the order above?
> >
> 
> This looks more logical .
> >
> > BR
> > Ian
> > >      ds_destroy(&reply);
> > >  }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Stokes, Ian June 29, 2021, 4:46 p.m. UTC | #19
> > -----Original Message-----
> > From: Amber, Kumar <kumar.amber@intel.com>
> > Sent: Thursday, June 24, 2021 12:27 PM
> > To: Stokes, Ian <ian.stokes@intel.com>; dev@openvswitch.org; Van Haaren,
> Harry
> > <harry.van.haaren@intel.com>
> > Cc: i.maximets@ovn.org
> > Subject: RE: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> > miniflow extract
> 
> <snip lots of patch>
> 
> > > >  #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git
> > > > a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 567ebd952..4f4ab2790
> > > > 100644
> > > > --- a/lib/dpif-netdev.c
> > > > +++ b/lib/dpif-netdev.c
> > > > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > > > unixctl_conn *conn, int argc,
> > > >      struct ds reply = DS_EMPTY_INITIALIZER;
> > > >      ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> > > mfex_name);
> > > >      const char *reply_str = ds_cstr(&reply);
> > > > -    unixctl_command_reply(conn, reply_str);
> > > >      VLOG_INFO("%s", reply_str);
> > > > +    unixctl_command_reply(conn, reply_str);
> > >
> > > Is there a reason for swapping the order above?
> > >
> >
> > This looks more logical .
> 
> Hi All,
> 
> Actually yes there's a good reason, by logging internally in Vswitchd first,
> and then sending the reply to the user, the order of prints in the logs is
> easier to understand.
> 
> This is particularly true when e.g. MFEX enabling logs can come *after* the PMD
> log
> print of study having chosen a specific MFEX impl.
> 
> (pseudo) Example of bad logging behaviour:
> <ovs startup stuff>
> PMD: MFEX study chose profile "eth_ip_udp" (128/128 hits)
> DPIF: MFEX optimized functionality set to "study"
> 
> (pseudo) Example of good logging behaviour (with switched log/conn_reply):
> <ovs startup stuff>
> DPIF: MFEX optimized functionality set to "study"
> PMD: MFEX study chose profile "eth_ip_udp" (128/128 hits)
> 
> Hope the helps clarify the change! -Harry

Thanks Harry, that makes it clearer for sure. Is it worth detailing the change in behaviour here that it does not get reverted in future by accident?

Regards
Ian
Flavio Leitner June 29, 2021, 5:35 p.m. UTC | #20
Hi,

On Tue, Jun 29, 2021 at 03:20:36PM +0000, Amber, Kumar wrote:
> Hi Flavio,
> 
> Replies inline.
> 
> > >
> > > Guess the above needs to be atomic.
> > >
> > > Removed based on Flavio comments.
> > 
> > I asked to initialize that using an API and Eelco is asking to set it atomically.
> > The requests are complementary, right?
> > 
> 
> Yes True sorry for confusion so we have refactored the code a bit to use Atomic set and get along with the API 
> Wherever applicable since here on any failure we would want to fall back to Scalar we would not need the API
> To find default implementation.

OK, no problem. Looking forward to the next version.

> > >
> > > + VLOG_ERR("failed to get miniflow extract function
> > > + implementations\n");
> > >
> > > Capital F to be in sync with your other error messages?
> > >
> > > Removed based on Flavio comments.
> > 
> > Not sure if I got this. I mentioned that the '\n' is not needed at the end of all
> > VLOG_* calls. Eelco is asking to start with capital 'F'. So the requests are
> > complementary, unless with the refactor the message went away.
> > 
> > Just make sure to follow the logging style convention in OVS.
> 
> Sorry for confusion I have fixed all the VLOGS with this convention.

great!

fbl

> > 
> > fbl
> > 
> > 
> > 
> > >
> > > + return 0;
> > > + }
> > > + ovs_assert(keys_size >= cnt);
> > >
> > > I don’t think we should assert here. Just return an error like above, so in
> > production, we get notified, and this implementation gets disabled.
> > >
> > > Actually we do else one would most likely be overwriting the assigned
> > array space for keys and will hit a Seg fault at some point.
> > >
> > > And hence we would like to know at the compile time if this is the case.
> > >
> > > + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> > > +
> > > + /* Run scalar miniflow_extract to get default result. */
> > > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > > + pkt_metadata_init(&packet->md, in_port); miniflow_extract(packet,
> > > + &keys[i].mf);
> > > +
> > > + /* Store known good metadata to compare with optimized metadata. */
> > > + good_l2_5_ofs[i] = packet->l2_5_ofs; good_l3_ofs[i] =
> > > + packet->l3_ofs; good_l4_ofs[i] = packet->l4_ofs; good_l2_pad_size[i]
> > > + = packet->l2_pad_size; }
> > > +
> > > + /* Iterate through each version of miniflow implementations. */ for
> > > + (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) { if
> > > + (!mfex_impls[j].available) { continue; }
> > > +
> > > + /* Reset keys and offsets before each implementation. */
> > > + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> > > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > > + dp_packet_reset_offsets(packet); }
> > > + /* Call optimized miniflow for each batch of packet. */ uint32_t
> > > + hit_mask = mfex_impls[j].extract_func(packets, test_keys, keys_size,
> > > + in_port, pmd_handle);
> > > +
> > > + /* Do a miniflow compare for bits, blocks and offsets for all the
> > > + * classified packets in the hitmask marked by set bits. */ while
> > > + (hit_mask) {
> > > + /* Index for the set bit. */
> > > + uint32_t i = __builtin_ctz(hit_mask);
> > > + /* Set the index in hitmask to Zero. */ hit_mask &= (hit_mask - 1);
> > > +
> > > + uint32_t failed = 0;
> > > +
> > > + /* Check miniflow bits are equal. */ if ((keys[i].mf.map.bits[0] !=
> > > + test_keys[i].mf.map.bits[0]) || (keys[i].mf.map.bits[1] !=
> > > + test_keys[i].mf.map.bits[1])) { VLOG_ERR("Good 0x%llx 0x%llx\tTest
> > > + 0x%llx 0x%llx\n", keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> > > + test_keys[i].mf.map.bits[0], test_keys[i].mf.map.bits[1]); failed =
> > > + 1; }
> > > +
> > > + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) { uint32_t
> > > + block_cnt = miniflow_n_values(&keys[i].mf); VLOG_ERR("Autovalidation
> > > + blocks failed for %s pkt %d", mfex_impls[j].name, i); VLOG_ERR("
> > > + Good hexdump:\n"); uint64_t *good_block_ptr = (uint64_t
> > > + *)&keys[i].buf; uint64_t *test_block_ptr = (uint64_t
> > > + *)&test_keys[i].buf; for (uint32_t b = 0; b < block_cnt; b++) {
> > > + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]); } VLOG_ERR(" Test
> > > + hexdump:\n"); for (uint32_t b = 0; b < block_cnt; b++) { VLOG_ERR("
> > > + %"PRIx64"\n", test_block_ptr[b]); } failed = 1; }
> > > +
> > > + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> > > + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> > > + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> > > + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> > > + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> > > + mfex_impls[j].name, i); VLOG_ERR(" Good offsets: l2_pad_size %u,
> > > + l2_5_ofs : %u"
> > > + " l3_ofs %u, l4_ofs %u\n",
> > > + good_l2_pad_size[i], good_l2_5_ofs[i], good_l3_ofs[i],
> > > + good_l4_ofs[i]); VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs :
> > > + %u"
> > > + " l3_ofs %u, l4_ofs %u\n",
> > > + packets->packets[i]->l2_pad_size,
> > > + packets->packets[i]->l2_5_ofs,
> > > + packets->packets[i]->l3_ofs,
> > > + packets->packets[i]->l4_ofs);
> > > + failed = 1;
> > > + }
> > > +
> > > + if (failed) {
> > >
> > > Why stop now!? I think we should run all implementations, as others
> > might need fixing too!
> > >
> > > We had the same model as above by you but with so many debug
> > messages
> > > flooding made it impossible for us to
> > >
> > > Know the root cause and ovs_assert(0); have been removes so will no
> > > longer cause a crash but will simply disable the autovalidator
> > >
> > > + /* Having dumped the debug info, disable autovalidator. */
> > > + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> > > + mfex_impls[j].name, i);
> > > + /* Halt OVS here on debug builds. */ ovs_assert(0);
> > > + pmd->miniflow_extract_opt = NULL;
> > > + break;
> > > + }
> > > + }
> > > + }
> > > +
> > > + /* preserve packet correctness by storing back the good offsets in
> > > + * packets back. */
> > > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> > > + packet->l2_5_ofs = good_l2_5_ofs[i]; l3_ofs = good_l3_ofs[i]; l4_ofs
> > > + packet->= good_l4_ofs[i]; l2_pad_size = good_l2_pad_size[i];
> > > + }
> > > +
> > > + /* Returning zero implies no packets were hit by autovalidation.
> > > +This
> > > + * simplifies unit-tests as changing
> > > +--enable-mfex-default-autovalidator
> > > + * would pass/fail. By always returning zero, autovalidator is a
> > > +little
> > > + * slower, but we gain consistency in testing.
> > > + */
> > > + return 0;
> > > +}
> > > diff --git a/lib/dpif-netdev-private-extract.h
> > > b/lib/dpif-netdev-private-extract.h
> > > index b7b0b2be4..455a7b590 100644
> > > --- a/lib/dpif-netdev-private-extract.h
> > > +++ b/lib/dpif-netdev-private-extract.h
> > > @@ -24,6 +24,11 @@
> > > /* Max size of dpif_miniflow_extract_impl array. */ #define
> > > MFEX_IMPLS_MAX_SIZE (16)
> > >
> > > +/* Skip the autovalidator study and null when iterating all available
> > > + * miniflow implementations.
> > > + */
> > > +#define MFEX_IMPL_START_IDX (1)
> > > +
> > > /* Forward declarations. */
> > > struct dp_packet;
> > > struct miniflow;
> > > @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void); int32_t
> > > dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl
> > > **out_ptr);
> > >
> > > Forgot to comment on this in my previous patch:
> > >
> > > /* Function pointer prototype to be implemented in the optimized
> > > miniflow
> > >
> > >   *   extract code.
> > >   *   returns the hitmask of the processed packets on success.
> > >   *   returns zero on failure.
> > >
> > > */
> > > typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch
> > > *batch, struct netdev_flow_key *keys, uint32_t keys_size, odp_port_t
> > > in_port, void *pmd_handle);
> > >
> > > Maybe we could add some description of what the input parameters
> > mean, are expected? For example that key_size need to be at least the size
> > of the batch? If this is the case, do we need it, and can we just assume keys
> > should be at least batch size long?
> > >
> > > Yes put a comment in there about key size in the description in v5.
> > >
> > > But as per convention whenever we pass an array we should also put the
> > > size of the array and this also complies with various
> > >
> > > Security checkers and also prevents potential exploitations.
> > >
> > > +/* Retrieve the hitmask of the batch of pakcets which is obtained by
> > > +comparing
> > > + * different miniflow implementations with linear miniflow extract.
> > > + * On error, returns a zero.
> > > + * On success, returns the number of packets in the batch compared.
> > > + */
> > > +uint32_t
> > > +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> > > +struct netdev_flow_key *keys,  uint32_t keys_size, odp_port_t
> > > +in_port,  void *pmd_handle);
> > >
> > > #endif /* DPIF_NETDEV_AVX512_EXTRACT */ diff --git a/lib/dpif-netdev.c
> > > b/lib/dpif-netdev.c index 567ebd952..4f4ab2790 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct
> > > unixctl_conn *conn, int argc, struct ds reply = DS_EMPTY_INITIALIZER;
> > > ds_put_format(&reply, "Miniflow implementation set to %s.\n",
> > > mfex_name); const char *reply_str = ds_cstr(&reply);
> > > - unixctl_command_reply(conn, reply_str); VLOG_INFO("%s", reply_str);
> > > + unixctl_command_reply(conn, reply_str);
> > > ds_destroy(&reply);
> > > }
> > >
> > > --
> > > 2.25.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org<mailto:dev@openvswitch.org>
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > 
> > --
> > fbl
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron June 30, 2021, 9:06 a.m. UTC | #21
On 29 Jun 2021, at 18:15, Amber, Kumar wrote:

> Hi Eelco ,
>
> Sorry the formatting seems broken on this email thread.
> Replies are inlined .

Thanks, looking forward to the v5 (hopefully once I finished this version of the series. I’m currently at patch 10 ;)

> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Tuesday, June 29, 2021 7:36 PM
> To: Amber, Kumar <kumar.amber@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract
>
>
> Not sure how you replied, but it’s hard to see which comments are mine, and which are yours.
>
> On 29 Jun 2021, at 14:27, Amber, Kumar wrote:
>
> Hi Eelco,
>
> Thanks Again for reviews , Pls find my replies inline.
>
> From: Eelco Chaudron <echaudro@redhat.com<mailto:echaudro@redhat.com>>
> Sent: Tuesday, June 29, 2021 5:14 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>> ; Amber, Kumar <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
> Cc: dev@openvswitch.org<mailto:dev@openvswitch.org> ; i.maximets@ovn.org<mailto:i.maximets@ovn.org> ; Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>> ; Flavio Leitner <fbl@sysclose.org<mailto:fbl@sysclose.org>>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract
>
> On 17 Jun 2021, at 18:27, Kumar Amber wrote:
>
> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber <kumar.amber@intel.com<mailto:kumar.amber@intel.com>>
> Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com<mailto:harry.van.haaren@intel.com>>
> ---
> lib/dpif-netdev-private-extract.c | 141 ++++++++++++++++++++++++++++++
> lib/dpif-netdev-private-extract.h | 15 ++++
> lib/dpif-netdev.c | 2 +-
> 3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
> /* Implementations of available extract options. */
> static struct dpif_miniflow_extract_impl mfex_impls[] = {
> + {
> + .probe = NULL,
> + .extract_func = dpif_miniflow_extract_autovalidator,
> + .name = "autovalidator",
> + },
> {
> .probe = NULL,
> .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
> *out_ptr = mfex_impls;
> return ARRAY_SIZE(mfex_impls);
> }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle)
> +{
> + const size_t cnt = dp_packet_batch_size(packets);
> + uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> + uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> + struct dp_packet *packet;
> + struct dp_netdev_pmd_thread *pmd = pmd_handle;
> + struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> + int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
> + if (mfunc_count < 0) {
>
> In theory 0 could not be returned, but just to cover the corner case can we change this to include zero.
>
> The code has been adapted as per Flavio comments so will not be a concern.
>
> + pmd->miniflow_extract_opt = NULL;
>
> Guess the above needs to be atomic.
>
> Removed based on Flavio comments.
>
> + VLOG_ERR("failed to get miniflow extract function implementations\n");
>
> Capital F to be in sync with your other error messages?
>
> Removed based on Flavio comments.
>
> + return 0;
> + }
> + ovs_assert(keys_size >= cnt);
>
>
> I don’t think we should assert here. Just return an error like above, so in production, we get notified, and this implementation gets disabled.
>
> Actually we do else one would most likely be overwriting the assigned array space for keys and will hit a Seg fault at some point.
>
> And hence we would like to know at the compile time if this is the case.
>
> But this is not a compile time check, it will crash OVS. You could just do this:
>
> if (keys_size < cnt) {
> pmd->miniflow_extract_opt = NULL;
> VLOG_ERR(“Invalid key size supplied etc. etc.\n”);
> return 0;
> }
>
> Or you could process up to key_size packets
>
> Reply:   sure I have taken the first approach in v5 as it safe and avoid any risk of Seg fault in V5.
>
> + struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> + /* Run scalar miniflow_extract to get default result. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + pkt_metadata_init(&packet->md, in_port);
> + miniflow_extract(packet, &keys[i].mf);
> +
> + /* Store known good metadata to compare with optimized metadata. */
> + good_l2_5_ofs[i] = packet->l2_5_ofs;
> + good_l3_ofs[i] = packet->l3_ofs;
> + good_l4_ofs[i] = packet->l4_ofs;
> + good_l2_pad_size[i] = packet->l2_pad_size;
> + }
> +
> + /* Iterate through each version of miniflow implementations. */
> + for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
> + if (!mfex_impls[j].available) {
> + continue;
> + }
> +
> + /* Reset keys and offsets before each implementation. */
> + memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + dp_packet_reset_offsets(packet);
> + }
> + /* Call optimized miniflow for each batch of packet. */
> + uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
> + keys_size, in_port, pmd_handle);
> +
> + /* Do a miniflow compare for bits, blocks and offsets for all the
> + * classified packets in the hitmask marked by set bits. */
> + while (hit_mask) {
> + /* Index for the set bit. */
> + uint32_t i = __builtin_ctz(hit_mask);
> + /* Set the index in hitmask to Zero. */
> + hit_mask &= (hit_mask - 1);
> +
> + uint32_t failed = 0;
> +
> + /* Check miniflow bits are equal. */
> + if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
> + (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
> + VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> + keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
> + test_keys[i].mf.map.bits[0],
> + test_keys[i].mf.map.bits[1]);
> + failed = 1;
> + }
> +
> + if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> + uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> + VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good hexdump:\n");
> + uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> + uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", good_block_ptr[b]);
> + }
> + VLOG_ERR(" Test hexdump:\n");
> + for (uint32_t b = 0; b < block_cnt; b++) {
> + VLOG_ERR(" %"PRIx64"\n", test_block_ptr[b]);
> + }
> + failed = 1;
> + }
> +
> + if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
> + (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
> + (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
> + (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
> + VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
> + mfex_impls[j].name, i);
> + VLOG_ERR(" Good offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + good_l2_pad_size[i], good_l2_5_ofs[i],
> + good_l3_ofs[i], good_l4_ofs[i]);
> + VLOG_ERR(" Test offsets: l2_pad_size %u, l2_5_ofs : %u"
> + " l3_ofs %u, l4_ofs %u\n",
> + packets->packets[i]->l2_pad_size,
> + packets->packets[i]->l2_5_ofs,
> + packets->packets[i]->l3_ofs,
> + packets->packets[i]->l4_ofs);
> + failed = 1;
> + }
> +
> + if (failed) {
>
> Why stop now!? I think we should run all implementations, as others might need fixing too!
>
> We had the same model as above by you but with so many debug messages flooding made it impossible for us to
>
> Know the root cause and ovs_assert(0); have been removes so will no longer cause a crash but will simply disable the autovalidator
>
> I still would opt for getting all in the log. What is worse than getting a customer to crash (LOG) one failure, and with the provided fix, it will crash/log the next one :(
> And if the logs were not clear enough, maybe they need some rewriting?
>
> Reply:  As pointed out by Flavio in patch 07/12 , I guess we can do the following here as:
>
>            - Remove the ovs_assert()
>
>            - VLOG_ERR() any mismatches
>
>            - Continue processing rest of burst with MFEX Autovalidator
>
> On failures, we disable autovalidator *after* the burst, to avoid spamming output and filling logs.
>
> + /* Having dumped the debug info, disable autovalidator. */
> + VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
> + mfex_impls[j].name, i);
> + /* Halt OVS here on debug builds. */
> + ovs_assert(0);
> + pmd->miniflow_extract_opt = NULL;
> + break;
> + }
> + }
> + }
> +
> + /* preserve packet correctness by storing back the good offsets in
> + * packets back. */
> + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> + packet->l2_5_ofs = good_l2_5_ofs[i];
> + packet->l3_ofs = good_l3_ofs[i];
> + packet->l4_ofs = good_l4_ofs[i];
> + packet->l2_pad_size = good_l2_pad_size[i];
> + }
> +
> + /* Returning zero implies no packets were hit by autovalidation. This
> + * simplifies unit-tests as changing --enable-mfex-default-autovalidator
> + * would pass/fail. By always returning zero, autovalidator is a little
> + * slower, but we gain consistency in testing.
> + */
> + return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
> /* Max size of dpif_miniflow_extract_impl array. */
> #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
> /* Forward declarations. */
> struct dp_packet;
> struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
> int32_t
> dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
>
> Forgot to comment on this in my previous patch:
>
> /* Function pointer prototype to be implemented in the optimized miniflow
>
> * extract code.
> * returns the hitmask of the processed packets on success.
> * returns zero on failure.
>
> */
> typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
> struct netdev_flow_key *keys,
> uint32_t keys_size,
> odp_port_t in_port,
> void *pmd_handle);
>
> Maybe we could add some description of what the input parameters mean, are expected? For example that key_size need to be at least the size of the batch? If this is the case, do we need it, and can we just assume keys should be at least batch size long?
>
> Yes put a comment in there about key size in the description in v5.
>
> But as per convention whenever we pass an array we should also put the size of the array and this also complies with various
>
> Security checkers and also prevents potential exploitations.
>
> Ack
>
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> + struct netdev_flow_key *keys,
> + uint32_t keys_size, odp_port_t in_port,
> + void *pmd_handle);
>
> #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
> struct ds reply = DS_EMPTY_INITIALIZER;
> ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
> const char *reply_str = ds_cstr(&reply);
> - unixctl_command_reply(conn, reply_str);
> VLOG_INFO("%s", reply_str);
> + unixctl_command_reply(conn, reply_str);
> ds_destroy(&reply);
> }
>
> --
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron July 1, 2021, 4:19 p.m. UTC | #22
On 29 Jun 2021, at 13:05, Van Haaren, Harry wrote:

> Hi Eelco,
>
> Would you describe the actual test being run below?
>
> I'm having a hard time figuring out what the actual datapath packet flow is. It seems strange
> that MFEX optimizations are affected by flow-count, that doesn't really logically make sense.
> Hence, some more understanding on what the test setup is may help.
>
> To remove complexity & noise from the setup: does running a simple Phy-to-Phy test with L2 bridging
> cause any perf degradation? If so, please describe that exact setup and I'll try to reproduce/replicate results here.
>

I did run some more tests both PVP as well as a physical port loopback, i.e. same port in and out (so without the VM).
Here are some results (I did 5 runs and took the average, and mention the RS deviation for all runs to make sure it not that):


+-----------------------+-----------------+-------------+--------+---------+--------+--------+--------+---------+--------+-----+-------+------+-------+------+-------+
| P (loopback)          |                 | Packet size |        |         |        |        |        |         |        |     |       |      |       |      |       |
|                       | Number of flows | 64          |        |     128 |        |    256 |        |     512 |        | 768 |       | 1024 |       | 1514 |       |
| without vs with patch | 1000            | -81863      | -0.98% | -134888 | -1.55% | -66261 | -0.80% | -110552 | -1.35% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
| RS Deviation          |                 |             |  0.09% |         |  0.46% |        |  0.09% |         |  0.06% |     | 0.00% |      | 0.00% |      | 0.00% |
| without vs with patch | 10000           | -58903      | -0.82% |  -52742 | -0.73% | -46875 | -0.64% |  -49871 | -0.68% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
| RS Deviation          |                 |             |  0.24% |         |  0.13% |        |  0.13% |         |  0.10% |     | 0.00% |      | 0.00% |      | 0.00% |
+-----------------------+-----------------+-------------+--------+---------+--------+--------+--------+---------+--------+-----+-------+------+-------+------+-------+

I’ll share the google sheet with you directly as it also has the config, and PVP results.

//Eelco

<SNIP>
Van Haaren, Harry July 1, 2021, 5:24 p.m. UTC | #23
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Thursday, July 1, 2021 5:19 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> <ian.stokes@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract
> 
> 
> 
> On 29 Jun 2021, at 13:05, Van Haaren, Harry wrote:
> 
> > Hi Eelco,
> >
> > Would you describe the actual test being run below?
> >
> > I'm having a hard time figuring out what the actual datapath packet flow is. It
> seems strange
> > that MFEX optimizations are affected by flow-count, that doesn't really logically
> make sense.
> > Hence, some more understanding on what the test setup is may help.
> >
> > To remove complexity & noise from the setup: does running a simple Phy-to-Phy
> test with L2 bridging
> > cause any perf degradation? If so, please describe that exact setup and I'll try to
> reproduce/replicate results here.
> >
> 
> I did run some more tests both PVP as well as a physical port loopback, i.e. same
> port in and out (so without the VM).
> Here are some results (I did 5 runs and took the average, and mention the RS
> deviation for all runs to make sure it not that):

Ah, thanks for checking noisiness of data, indeed that was going to be my next question!


> +-----------------------+-----------------+-------------+--------+---------+--------+--------+-----
> ---+---------+--------+-----+-------+------+-------+------+-------+
> | P (loopback)          |                 | Packet size |        |         |        |        |        |         |        |
> |       |      |       |      |       |
> |                       | Number of flows | 64          |        |     128 |        |    256 |        |     512 |
> | 768 |       | 1024 |       | 1514 |       |
> | without vs with patch | 1000            | -81863      | -0.98% | -134888 | -1.55% | -
> 66261 | -0.80% | -110552 | -1.35% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
> | RS Deviation          |                 |             |  0.09% |         |  0.46% |        |  0.09% |         |
> 0.06% |     | 0.00% |      | 0.00% |      | 0.00% |
> | without vs with patch | 10000           | -58903      | -0.82% |  -52742 | -0.73% | -
> 46875 | -0.64% |  -49871 | -0.68% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
> | RS Deviation          |                 |             |  0.24% |         |  0.13% |        |  0.13% |         |
> 0.10% |     | 0.00% |      | 0.00% |      | 0.00% |
> +-----------------------+-----------------+-------------+--------+---------+--------+--------+-----
> ---+---------+--------+-----+-------+------+-------+------+-------+

Thanks, so I'm reading that as showing 64 bytes negative 1%, 128 byte pkts -2%.
Small deltas, but in the wrong direction, thanks for reporting.

> I’ll share the google sheet with you directly as it also has the config, and PVP results.

I can't actually access that doc, sorry. Results above are enough to go by for now :)

We can investigate if there's any optimizations to be done to improve the scalar DPIF
enabling of the miniflow extract func ptr, but I'm not sure there is.

If we cannot improve the perf data from above, there is an option to not enable the scalar
DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is present, running both
the DPIF + MFEX makes sense). What do you think?

> //Eelco

Note I'm out of office tomorrow Friday 2nd July, so expect replies early next week.
Regards, -Harry
Eelco Chaudron July 2, 2021, 7:10 a.m. UTC | #24
On 1 Jul 2021, at 19:24, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Thursday, July 1, 2021 5:19 PM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
>> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
>> <ian.stokes@intel.com>
>> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
>> miniflow extract
>>
>>
>>
>> On 29 Jun 2021, at 13:05, Van Haaren, Harry wrote:
>>
>>> Hi Eelco,
>>>
>>> Would you describe the actual test being run below?
>>>
>>> I'm having a hard time figuring out what the actual datapath packet flow is. It
>> seems strange
>>> that MFEX optimizations are affected by flow-count, that doesn't really logically
>> make sense.
>>> Hence, some more understanding on what the test setup is may help.
>>>
>>> To remove complexity & noise from the setup: does running a simple Phy-to-Phy
>> test with L2 bridging
>>> cause any perf degradation? If so, please describe that exact setup and I'll try to
>> reproduce/replicate results here.
>>>
>>
>> I did run some more tests both PVP as well as a physical port loopback, i.e. same
>> port in and out (so without the VM).
>> Here are some results (I did 5 runs and took the average, and mention the RS
>> deviation for all runs to make sure it not that):
>
> Ah, thanks for checking noisiness of data, indeed that was going to be my next question!
>
>
>> +-----------------------+-----------------+-------------+--------+---------+--------+--------+-----
>> ---+---------+--------+-----+-------+------+-------+------+-------+
>> | P (loopback)          |                 | Packet size |        |         |        |        |        |         |        |
>> |       |      |       |      |       |
>> |                       | Number of flows | 64          |        |     128 |        |    256 |        |     512 |
>> | 768 |       | 1024 |       | 1514 |       |
>> | without vs with patch | 1000            | -81863      | -0.98% | -134888 | -1.55% | -
>> 66261 | -0.80% | -110552 | -1.35% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
>> | RS Deviation          |                 |             |  0.09% |         |  0.46% |        |  0.09% |         |
>> 0.06% |     | 0.00% |      | 0.00% |      | 0.00% |
>> | without vs with patch | 10000           | -58903      | -0.82% |  -52742 | -0.73% | -
>> 46875 | -0.64% |  -49871 | -0.68% |   0 | 0.00% |    0 | 0.00% |    0 | 0.00% |
>> | RS Deviation          |                 |             |  0.24% |         |  0.13% |        |  0.13% |         |
>> 0.10% |     | 0.00% |      | 0.00% |      | 0.00% |
>> +-----------------------+-----------------+-------------+--------+---------+--------+--------+-----
>> ---+---------+--------+-----+-------+------+-------+------+-------+
>
> Thanks, so I'm reading that as showing 64 bytes negative 1%, 128 byte pkts -2%.
> Small deltas, but in the wrong direction, thanks for reporting.
>
>> I’ll share the google sheet with you directly as it also has the config, and PVP results.
>
> I can't actually access that doc, sorry. Results above are enough to go by for now :)

It’s attached.

> We can investigate if there's any optimizations to be done to improve the scalar DPIF
> enabling of the miniflow extract func ptr, but I'm not sure there is.
>
> If we cannot improve the perf data from above, there is an option to not enable the scalar
> DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is present, running both
> the DPIF + MFEX makes sense). What do you think?

This is on a system without AVX512 support, so all is disabled. The “without patch” has both the new AVX patches removed (mfex and dpif framework).

>
>> //Eelco
>
> Note I'm out of office tomorrow Friday 2nd July, so expect replies early next week.
> Regards, -Harry
Van Haaren, Harry July 6, 2021, 1:58 p.m. UTC | #25
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Friday, July 2, 2021 8:10 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> <ian.stokes@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract
> 
> 
> 
> On 1 Jul 2021, at 19:24, Van Haaren, Harry wrote:
> 
> >> -----Original Message-----
> >> From: Eelco Chaudron <echaudro@redhat.com>

<snip ascii table/data and previous conversations>

> >> I’ll share the google sheet with you directly as it also has the config, and PVP
> results.
> >
> > I can't actually access that doc, sorry. Results above are enough to go by for now :)
> 
> It’s attached.

Thanks.

> > We can investigate if there's any optimizations to be done to improve the scalar DPIF
> > enabling of the miniflow extract func ptr, but I'm not sure there is.

Note the v6 of MFEX has some minor changes/optimizations in place, as per scalar DPIF enabling in this patch:
https://patchwork.ozlabs.org/project/openvswitch/patch/20210706131150.45513-2-cian.ferriter@intel.com/


> > If we cannot improve the perf data from above, there is an option to not enable
> the scalar DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is present,
> running both the DPIF + MFEX makes sense). What do you think?
 
If you feel it is required before merge, would you re-run the benchmark on v6?
If so, we're targeting Thursday for merge, so data ASAP, or by EOD tomorrow would be required.

As mentioned above, there is an option to remove the AVX512-Optimized MFEX enabling
from the scalar datapath, if there is measurable/significant performance reduction in this v6 code.


> This is on a system without AVX512 support, so all is disabled. The “without patch”
> has both the new AVX patches removed (mfex and dpif framework).
> 
> >
> >> //Eelco

Thanks again for testing & follow up! Regards, -Harry
Eelco Chaudron July 7, 2021, 8:58 a.m. UTC | #26
On 6 Jul 2021, at 15:58, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Friday, July 2, 2021 8:10 AM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
>> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
>> <ian.stokes@intel.com>
>> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
>> miniflow extract
>>
>>
>>
>> On 1 Jul 2021, at 19:24, Van Haaren, Harry wrote:
>>
>>>> -----Original Message-----
>>>> From: Eelco Chaudron <echaudro@redhat.com>
>
> <snip ascii table/data and previous conversations>
>
>>>> I’ll share the google sheet with you directly as it also has the config, and PVP
>> results.
>>>
>>> I can't actually access that doc, sorry. Results above are enough to go by for now :)
>>
>> It’s attached.
>
> Thanks.
>
>>> We can investigate if there's any optimizations to be done to improve the scalar DPIF
>>> enabling of the miniflow extract func ptr, but I'm not sure there is.
>
> Note the v6 of MFEX has some minor changes/optimizations in place, as per scalar DPIF enabling in this patch:
> https://patchwork.ozlabs.org/project/openvswitch/patch/20210706131150.45513-2-cian.ferriter@intel.com/
>
>
>>> If we cannot improve the perf data from above, there is an option to not enable
>> the scalar DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is present,
>> running both the DPIF + MFEX makes sense). What do you think?
>
> If you feel it is required before merge, would you re-run the benchmark on v6?
> If so, we're targeting Thursday for merge, so data ASAP, or by EOD tomorrow would be required.

I’m reviewing your v6 now, so I have no cycles to also do the testing before the end of the week. But the tests are simple, so maybe you guys can try it and report the difference with and without the two patchsets applied on a non AVX512 machine?

> As mentioned above, there is an option to remove the AVX512-Optimized MFEX enabling
> from the scalar datapath, if there is measurable/significant performance reduction in this v6 code.

It not clear to me what you mean by this? Can you elaborate? I’m running this on a non AVX512 machine, all with default configs.

>> This is on a system without AVX512 support, so all is disabled. The “without patch”
>> has both the new AVX patches removed (mfex and dpif framework).
>>
>>>
>>>> //Eelco
>
> Thanks again for testing & follow up! Regards, -Harry
Van Haaren, Harry July 7, 2021, 9:33 a.m. UTC | #27
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Wednesday, July 7, 2021 9:59 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> <ian.stokes@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract
> 
> 
> 
> On 6 Jul 2021, at 15:58, Van Haaren, Harry wrote:
> 
> >> -----Original Message-----
> >> From: Eelco Chaudron <echaudro@redhat.com>
> >> Sent: Friday, July 2, 2021 8:10 AM
> >> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> >> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> >> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> >> <ian.stokes@intel.com>
> >> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> >> miniflow extract
> >>
> >>
> >>
> >> On 1 Jul 2021, at 19:24, Van Haaren, Harry wrote:
> >>
> >>>> -----Original Message-----
> >>>> From: Eelco Chaudron <echaudro@redhat.com>
> >
> > <snip ascii table/data and previous conversations>
> >
> >>>> I’ll share the google sheet with you directly as it also has the config, and PVP
> >> results.
> >>>
> >>> I can't actually access that doc, sorry. Results above are enough to go by for
> now :)
> >>
> >> It’s attached.
> >
> > Thanks.
> >
> >>> We can investigate if there's any optimizations to be done to improve the
> scalar DPIF
> >>> enabling of the miniflow extract func ptr, but I'm not sure there is.
> >
> > Note the v6 of MFEX has some minor changes/optimizations in place, as per
> scalar DPIF enabling in this patch:
> >
> https://patchwork.ozlabs.org/project/openvswitch/patch/20210706131150.45513
> -2-cian.ferriter@intel.com/
> >
> >
> >>> If we cannot improve the perf data from above, there is an option to not
> enable
> >> the scalar DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is
> present,
> >> running both the DPIF + MFEX makes sense). What do you think?
> >
> > If you feel it is required before merge, would you re-run the benchmark on v6?
> > If so, we're targeting Thursday for merge, so data ASAP, or by EOD tomorrow
> would be required.
> 
> I’m reviewing your v6 now, so I have no cycles to also do the testing before the
> end of the week. But the tests are simple, so maybe you guys can try it and
> report the difference with and without the two patchsets applied on a non
> AVX512 machine?

Yes, we have done scalar-only code benchmarking of master vs with DPIF patchset.
By not enabling AVX512 at runtime we get the "non AVX512 machine" behaviour.
(All the scalar code is common, no need to a specific CPU in that instance).

Testing OVS master branch vs with patchset did not show up any performance delta
on the test machines here, so there's nothing I can do.

By removing scalar DPIF enabling of MFEX opt pointer (details below) we remove any
urgency on benchmark results?


> > As mentioned above, there is an option to remove the AVX512-Optimized
> MFEX enabling
> > from the scalar datapath, if there is measurable/significant performance
> reduction in this v6 code.
> 
> It not clear to me what you mean by this? Can you elaborate? I’m running this on
> a non AVX512 machine, all with default configs.

I'm suggesting that if you're not OK with merging the ~1.x% negative performance on scalar
DPIF performance to enable MFEX, we can remove the MFEX enabling from the scalar DPIF.

Logically, if AVX512 is in use for MFEX, it is logical to use the AVX512 DPIF too, hence
this is a workable solution/workaround for scalar DPIF performance loss.

Taking this approach would ensure that scalar DPIF performance is not reduced in
this release, and we can re-visit scalar DPIF enabling of MFEX in future if desired?

Overall, this seems the pragmatic way of reducing risk around performance and getting merged.


> >> This is on a system without AVX512 support, so all is disabled. The “without
> patch”
> >> has both the new AVX patches removed (mfex and dpif framework).
> >>
> >>>
> >>>> //Eelco
> >
> > Thanks again for testing & follow up! Regards, -Harry
Eelco Chaudron July 7, 2021, 9:40 a.m. UTC | #28
On 7 Jul 2021, at 11:33, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Wednesday, July 7, 2021 9:59 AM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
>> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
>> <ian.stokes@intel.com>
>> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
>> miniflow extract
>>
>>
>>
>> On 6 Jul 2021, at 15:58, Van Haaren, Harry wrote:
>>
>>>> -----Original Message-----
>>>> From: Eelco Chaudron <echaudro@redhat.com>
>>>> Sent: Friday, July 2, 2021 8:10 AM
>>>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>>>> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
>>>> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
>>>> <ian.stokes@intel.com>
>>>> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
>>>> miniflow extract
>>>>
>>>>
>>>>
>>>> On 1 Jul 2021, at 19:24, Van Haaren, Harry wrote:
>>>>
>>>>>> -----Original Message-----
>>>>>> From: Eelco Chaudron <echaudro@redhat.com>
>>>
>>> <snip ascii table/data and previous conversations>
>>>
>>>>>> I’ll share the google sheet with you directly as it also has the config, and PVP
>>>> results.
>>>>>
>>>>> I can't actually access that doc, sorry. Results above are enough to go by for
>> now :)
>>>>
>>>> It’s attached.
>>>
>>> Thanks.
>>>
>>>>> We can investigate if there's any optimizations to be done to improve the
>> scalar DPIF
>>>>> enabling of the miniflow extract func ptr, but I'm not sure there is.
>>>
>>> Note the v6 of MFEX has some minor changes/optimizations in place, as per
>> scalar DPIF enabling in this patch:
>>>
>> https://patchwork.ozlabs.org/project/openvswitch/patch/20210706131150.45513
>> -2-cian.ferriter@intel.com/
>>>
>>>
>>>>> If we cannot improve the perf data from above, there is an option to not
>> enable
>>>> the scalar DPIF with the AVX512 MFEX optimizations. (Logic being if AVX512 is
>> present,
>>>> running both the DPIF + MFEX makes sense). What do you think?
>>>
>>> If you feel it is required before merge, would you re-run the benchmark on v6?
>>> If so, we're targeting Thursday for merge, so data ASAP, or by EOD tomorrow
>> would be required.
>>
>> I’m reviewing your v6 now, so I have no cycles to also do the testing before the
>> end of the week. But the tests are simple, so maybe you guys can try it and
>> report the difference with and without the two patchsets applied on a non
>> AVX512 machine?
>
> Yes, we have done scalar-only code benchmarking of master vs with DPIF patchset.
> By not enabling AVX512 at runtime we get the "non AVX512 machine" behaviour.
> (All the scalar code is common, no need to a specific CPU in that instance).
>
> Testing OVS master branch vs with patchset did not show up any performance delta
> on the test machines here, so there's nothing I can do.
>
> By removing scalar DPIF enabling of MFEX opt pointer (details below) we remove any
> urgency on benchmark results?

I’ll wrap up the review first, and hopefully, when you are working on potential changes, I can run the tests and get some results.

I understand now what you meant with disabling it in the scalar part, so if I still see 1%+ deltas I’ll try it out.

>>> As mentioned above, there is an option to remove the AVX512-Optimized
>> MFEX enabling
>>> from the scalar datapath, if there is measurable/significant performance
>> reduction in this v6 code.
>>
>> It not clear to me what you mean by this? Can you elaborate? I’m running this on
>> a non AVX512 machine, all with default configs.
>
> I'm suggesting that if you're not OK with merging the ~1.x% negative performance on scalar
> DPIF performance to enable MFEX, we can remove the MFEX enabling from the scalar DPIF.
>
> Logically, if AVX512 is in use for MFEX, it is logical to use the AVX512 DPIF too, hence
> this is a workable solution/workaround for scalar DPIF performance loss.
>
> Taking this approach would ensure that scalar DPIF performance is not reduced in
> this release, and we can re-visit scalar DPIF enabling of MFEX in future if desired?
>
> Overall, this seems the pragmatic way of reducing risk around performance and getting merged.
>
>
>>>> This is on a system without AVX512 support, so all is disabled. The “without
>> patch”
>>>> has both the new AVX patches removed (mfex and dpif framework).
>>>>
>>>>>
>>>>>> //Eelco
>>>
>>> Thanks again for testing & follow up! Regards, -Harry
Van Haaren, Harry July 7, 2021, 11:20 a.m. UTC | #29
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Wednesday, July 7, 2021 10:41 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> <ian.stokes@intel.com>
> Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> miniflow extract
> 
> 
> 
> On 7 Jul 2021, at 11:33, Van Haaren, Harry wrote:

<snip>

> > By removing scalar DPIF enabling of MFEX opt pointer (details below) we
> remove any
> > urgency on benchmark results?
> 
> I’ll wrap up the review first, and hopefully, when you are working on potential
> changes, I can run the tests and get some results.

As we're nearing the merge dates, I'd prefer to focus on getting merged.
To help review & merge, v7 will contain the following patch split change:

Scalar DPIF usage of the MFEX Optimized function is now in its own patch at
the end of the series. This allows all other MFEX patches to be merged, without
any hazard to scalar DPIF datapath performance.


> I understand now what you meant with disabling it in the scalar part, so if I still
> see 1%+ deltas I’ll try it out.

Eelco's testing results can inform the inclusion of Scalar DPIF usage of the MFEX
function pointer. As this enabling is now in a separate patch at the end of the
series, it means that the patch can be easily merged, or not merged. No rebasing
or rework required.

If the main MFEX code is ready for merge before the testing results are in, this
allows the merge of MFEX. Scalar enabling can be merged later in the 2.16
merge window if desired, or re-visited in a future release.

<snip>

Regards, -Harry
Stokes, Ian July 7, 2021, 11:38 a.m. UTC | #30
> > -----Original Message-----
> > From: Eelco Chaudron <echaudro@redhat.com>
> > Sent: Wednesday, July 7, 2021 10:41 AM
> > To: Van Haaren, Harry <harry.van.haaren@intel.com>
> > Cc: Amber, Kumar <kumar.amber@intel.com>; dev@openvswitch.org;
> > i.maximets@ovn.org; Flavio Leitner <fbl@sysclose.org>; Stokes, Ian
> > <ian.stokes@intel.com>
> > Subject: Re: [ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for
> > miniflow extract
> >
> >
> >
> > On 7 Jul 2021, at 11:33, Van Haaren, Harry wrote:
> 
> <snip>
> 
> > > By removing scalar DPIF enabling of MFEX opt pointer (details below) we
> > remove any
> > > urgency on benchmark results?
> >
> > I’ll wrap up the review first, and hopefully, when you are working on potential
> > changes, I can run the tests and get some results.
> 
> As we're nearing the merge dates, I'd prefer to focus on getting merged.
> To help review & merge, v7 will contain the following patch split change:
> 
> Scalar DPIF usage of the MFEX Optimized function is now in its own patch at
> the end of the series. This allows all other MFEX patches to be merged, without
> any hazard to scalar DPIF datapath performance.
> 
> 
> > I understand now what you meant with disabling it in the scalar part, so if I still
> > see 1%+ deltas I’ll try it out.
> 
> Eelco's testing results can inform the inclusion of Scalar DPIF usage of the MFEX
> function pointer. As this enabling is now in a separate patch at the end of the
> series, it means that the patch can be easily merged, or not merged. No rebasing
> or rework required.
> 
> If the main MFEX code is ready for merge before the testing results are in, this
> allows the merge of MFEX. Scalar enabling can be merged later in the 2.16
> merge window if desired, or re-visited in a future release.

+1 to this approach. If there is more discussion needed then lets keep it separate for the moment as described above and not block the main series.

Regards
Ian
> 
> <snip>
> 
> Regards, -Harry
diff mbox series

Patch

diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index fcc56ef26..0741c19f9 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -32,6 +32,11 @@  VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
 
 /* Implementations of available extract options. */
 static struct dpif_miniflow_extract_impl mfex_impls[] = {
+   {
+        .probe = NULL,
+        .extract_func = dpif_miniflow_extract_autovalidator,
+        .name = "autovalidator",
+    },
     {
         .probe = NULL,
         .extract_func = NULL,
@@ -84,3 +89,139 @@  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
     *out_ptr = mfex_impls;
     return ARRAY_SIZE(mfex_impls);
 }
+
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
+                                    struct netdev_flow_key *keys,
+                                    uint32_t keys_size, odp_port_t in_port,
+                                    void *pmd_handle)
+{
+    const size_t cnt = dp_packet_batch_size(packets);
+    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
+    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
+    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
+    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
+    struct dp_packet *packet;
+    struct dp_netdev_pmd_thread *pmd = pmd_handle;
+    struct dpif_miniflow_extract_impl *miniflow_funcs;
+
+    int32_t mfunc_count = dpif_miniflow_extract_info_get(&miniflow_funcs);
+    if (mfunc_count < 0) {
+        pmd->miniflow_extract_opt = NULL;
+        VLOG_ERR("failed to get miniflow extract function implementations\n");
+        return 0;
+    }
+    ovs_assert(keys_size >= cnt);
+    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
+
+    /* Run scalar miniflow_extract to get default result. */
+    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+        pkt_metadata_init(&packet->md, in_port);
+        miniflow_extract(packet, &keys[i].mf);
+
+        /* Store known good metadata to compare with optimized metadata. */
+        good_l2_5_ofs[i] = packet->l2_5_ofs;
+        good_l3_ofs[i] = packet->l3_ofs;
+        good_l4_ofs[i] = packet->l4_ofs;
+        good_l2_pad_size[i] = packet->l2_pad_size;
+    }
+
+    /* Iterate through each version of miniflow implementations. */
+    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); j++) {
+        if (!mfex_impls[j].available) {
+            continue;
+        }
+
+        /* Reset keys and offsets before each implementation. */
+        memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
+        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+            dp_packet_reset_offsets(packet);
+        }
+        /* Call optimized miniflow for each batch of packet. */
+        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
+                                            keys_size, in_port, pmd_handle);
+
+        /* Do a miniflow compare for bits, blocks and offsets for all the
+         * classified packets in the hitmask marked by set bits. */
+        while (hit_mask) {
+            /* Index for the set bit. */
+            uint32_t i = __builtin_ctz(hit_mask);
+            /* Set the index in hitmask to Zero. */
+            hit_mask &= (hit_mask - 1);
+
+            uint32_t failed = 0;
+
+            /* Check miniflow bits are equal. */
+            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
+                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
+                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
+                         keys[i].mf.map.bits[0], keys[i].mf.map.bits[1],
+                         test_keys[i].mf.map.bits[0],
+                         test_keys[i].mf.map.bits[1]);
+                failed = 1;
+            }
+
+            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
+                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
+                VLOG_ERR("Autovalidation blocks failed for %s pkt %d",
+                         mfex_impls[j].name, i);
+                VLOG_ERR("  Good hexdump:\n");
+                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
+                uint64_t *test_block_ptr = (uint64_t *)&test_keys[i].buf;
+                for (uint32_t b = 0; b < block_cnt; b++) {
+                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
+                }
+                VLOG_ERR("  Test hexdump:\n");
+                for (uint32_t b = 0; b < block_cnt; b++) {
+                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
+                }
+                failed = 1;
+            }
+
+            if ((packets->packets[i]->l2_pad_size != good_l2_pad_size[i]) ||
+                    (packets->packets[i]->l2_5_ofs != good_l2_5_ofs[i]) ||
+                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) ||
+                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) {
+                VLOG_ERR("Autovalidation packet offsets failed for %s pkt %d",
+                         mfex_impls[j].name, i);
+                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : %u"
+                         " l3_ofs %u, l4_ofs %u\n",
+                         good_l2_pad_size[i], good_l2_5_ofs[i],
+                         good_l3_ofs[i], good_l4_ofs[i]);
+                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : %u"
+                         " l3_ofs %u, l4_ofs %u\n",
+                         packets->packets[i]->l2_pad_size,
+                         packets->packets[i]->l2_5_ofs,
+                         packets->packets[i]->l3_ofs,
+                         packets->packets[i]->l4_ofs);
+                failed = 1;
+            }
+
+            if (failed) {
+                /* Having dumped the debug info, disable autovalidator. */
+                VLOG_ERR("Autovalidation failed in %s pkt %d, disabling.\n",
+                         mfex_impls[j].name, i);
+                /* Halt OVS here on debug builds. */
+                ovs_assert(0);
+                pmd->miniflow_extract_opt = NULL;
+                break;
+            }
+        }
+    }
+
+    /* preserve packet correctness by storing back the good offsets in
+     * packets back. */
+    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+        packet->l2_5_ofs = good_l2_5_ofs[i];
+        packet->l3_ofs = good_l3_ofs[i];
+        packet->l4_ofs = good_l4_ofs[i];
+        packet->l2_pad_size = good_l2_pad_size[i];
+    }
+
+    /* Returning zero implies no packets were hit by autovalidation. This
+     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
+     * would pass/fail. By always returning zero, autovalidator is a little
+     * slower, but we gain consistency in testing.
+     */
+    return 0;
+}
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index b7b0b2be4..455a7b590 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -24,6 +24,11 @@ 
 /* Max size of dpif_miniflow_extract_impl array. */
 #define MFEX_IMPLS_MAX_SIZE (16)
 
+/* Skip the autovalidator study and null when iterating all available
+ * miniflow implementations.
+ */
+#define MFEX_IMPL_START_IDX (1)
+
 /* Forward declarations. */
 struct dp_packet;
 struct miniflow;
@@ -90,5 +95,15 @@  dpif_miniflow_extract_init(void);
 int32_t
 dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
 
+/* Retrieve the hitmask of the batch of pakcets which is obtained by comparing
+ * different miniflow implementations with linear miniflow extract.
+ * On error, returns a zero.
+ * On success, returns the number of packets in the batch compared.
+ */
+uint32_t
+dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
+                                    struct netdev_flow_key *keys,
+                                    uint32_t keys_size, odp_port_t in_port,
+                                    void *pmd_handle);
 
 #endif /* DPIF_NETDEV_AVX512_EXTRACT */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 567ebd952..4f4ab2790 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1181,8 +1181,8 @@  dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
     struct ds reply = DS_EMPTY_INITIALIZER;
     ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
     const char *reply_str = ds_cstr(&reply);
-    unixctl_command_reply(conn, reply_str);
     VLOG_INFO("%s", reply_str);
+    unixctl_command_reply(conn, reply_str);
     ds_destroy(&reply);
 }