diff mbox series

[ovs-dev,v7,02/11] odp-execute: Add function pointers to odp-execute for different action implementations.

Message ID 20220614115743.1143341-3-emma.finn@intel.com
State Changes Requested
Headers show
Series [ovs-dev,v7,01/11] ofproto-dpif: Fix incorrect checksums in input packets | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/intel-ovs-compilation fail test: fail
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Emma Finn June 14, 2022, 11:57 a.m. UTC
This commit introduces the initial infrastructure required to allow
different implementations for OvS actions. The patch introduces action
function pointers which allows user to switch between different action
implementations available. This will allow for more performance and flexibility
so the user can choose the action implementation to best suite their use case.

Signed-off-by: Emma Finn <emma.finn@intel.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/automake.mk           |  2 +
 lib/dpif-netdev.c         |  4 ++
 lib/odp-execute-private.c | 80 +++++++++++++++++++++++++++++++++++++++
 lib/odp-execute-private.h | 74 ++++++++++++++++++++++++++++++++++++
 lib/odp-execute.c         | 41 +++++++++++++++++---
 lib/odp-execute.h         |  7 ++++
 6 files changed, 203 insertions(+), 5 deletions(-)
 create mode 100644 lib/odp-execute-private.c
 create mode 100644 lib/odp-execute-private.h

Comments

Eelco Chaudron June 23, 2022, 3:37 p.m. UTC | #1
On 14 Jun 2022, at 13:57, Emma Finn wrote:

> This commit introduces the initial infrastructure required to allow
> different implementations for OvS actions. The patch introduces action
> function pointers which allows user to switch between different action
> implementations available. This will allow for more performance and flexibility
> so the user can choose the action implementation to best suite their use case.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
> ---
>  lib/automake.mk           |  2 +
>  lib/dpif-netdev.c         |  4 ++
>  lib/odp-execute-private.c | 80 +++++++++++++++++++++++++++++++++++++++
>  lib/odp-execute-private.h | 74 ++++++++++++++++++++++++++++++++++++
>  lib/odp-execute.c         | 41 +++++++++++++++++---
>  lib/odp-execute.h         |  7 ++++
>  6 files changed, 203 insertions(+), 5 deletions(-)
>  create mode 100644 lib/odp-execute-private.c
>  create mode 100644 lib/odp-execute-private.h
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index cb50578eb..1a49dd30b 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -213,6 +213,8 @@ lib_libopenvswitch_la_SOURCES = \
>  	lib/object-collection.h \
>  	lib/odp-execute.c \
>  	lib/odp-execute.h \
> +	lib/odp-execute-private.c \
> +	lib/odp-execute-private.h \
>  	lib/odp-util.c \
>  	lib/odp-util.h \
>  	lib/ofp-actions.c \
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ff57b3961..47dd7a1a6 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1675,6 +1675,10 @@ create_dpif_netdev(struct dp_netdev *dp)
>      dpif->dp = dp;
>      dpif->last_port_seq = seq_read(dp->port_seq);
>
> +    /* Called once at initialization time. This handles setting up the state
> +     * of the actions functions at init time. */
> +    odp_execute_init();
> +
>      return &dpif->dpif;
>  }
>
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> new file mode 100644
> index 000000000..92db2386c
> --- /dev/null
> +++ b/lib/odp-execute-private.c
> @@ -0,0 +1,80 @@
> +/*
> + * Copyright (c) 2022 Intel.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <errno.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "dpdk.h"
> +#include "dp-packet.h"
> +#include "odp-execute-private.h"
> +#include "odp-netlink.h"
> +#include "odp-util.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(odp_execute_impl);
> +
> +static struct odp_execute_action_impl action_impls[] = {
> +    [ACTION_IMPL_SCALAR] = {
> +        .available = false,
> +        .name = "scalar",
> +        .init_func = NULL,
> +    },
> +};
> +
> +static void
> +action_impl_copy_funcs(struct odp_execute_action_impl *src,
> +                       const struct odp_execute_action_impl *dst)

nit: dst -> dest

> +{
> +    for (int i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
> +        atomic_store_relaxed(&src->funcs[i], dst->funcs[i]);
> +    }
> +}
> +
> +void
> +odp_execute_action_init(void)
> +{
> +    /* Each impl's function array is initialized to reflect the scalar
> +     * implementation. This simplifies adding optimized implementations,
> +     * as the autovalidator can always compare all actions.
> +     *
> +     * Below will check if impl is available and copies the scalar functions
> +     * to all other implementations.
> +     */
> +    for (int i = 0; i < ACTION_IMPL_MAX; i++) {
> +        bool avail = true;
> +
> +        if (action_impls[i].init_func) {
> +            /* Return zero is success, non-zero means error. */
> +            avail = (action_impls[i].init_func(&action_impls[i]) == 0);
> +        }
> +
> +        action_impls[i].available = avail;
> +
> +        if (i != ACTION_IMPL_SCALAR) {
> +            action_impl_copy_funcs(&action_impls[i],
> +                                   &action_impls[ACTION_IMPL_SCALAR]);
> +        }
> +
> +        if (action_impls[i].available == true) {
> +            action_impls[i].init_func(&action_impls[i]);
> +        }
> +

Not sure I understand the logic here? We call init twice, should once not be enough?
Something like:

    for (int i = 0; i < ACTION_IMPL_MAX; i++) {
        bool avail = true;

        if (i != ACTION_IMPL_SCALAR) {
            action_impl_copy_funcs(&action_impls[i],
                                   &action_impls[ACTION_IMPL_SCALAR]);
        }

        if (action_impls[i].init_func) {
            /* Return zero is success, non-zero means error. */
            avail = (action_impls[i].init_func(&action_impls[i]) == 0);
        }
        action_impls[i].available = avail;

> +        VLOG_INFO("Action implementation %s (available: %s)\n",

No "\n" for log messages is needed.

> +                  action_impls[i].name, avail ? "available" : "not available");

Make this the same as the action-impl-show output, so:

  +                  action_impls[i].name, avail ? "Yes" : "No");



> +    }
> +}
> diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
> new file mode 100644
> index 000000000..08d1faf38
> --- /dev/null
> +++ b/lib/odp-execute-private.h
> @@ -0,0 +1,74 @@
> +/*
> + * Copyright (c) 2022 Intel.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef ODP_EXTRACT_PRIVATE
> +#define ODP_EXTRACT_PRIVATE 1
> +
> +#include "dp-packet.h"
> +#include "odp-execute.h"
> +#include "odp-netlink.h"
> +#include "ovs-atomic.h"
> +
> +/* Forward declaration for typedef. */
> +struct odp_execute_action_impl;
> +
> +/* Typedef for an initialization function that can initialize each
> + * implementation, checking requirements such as CPU ISA.
> + */
> +typedef int (*odp_execute_action_init_func)
> +                    (struct odp_execute_action_impl *self);
> +
> +/* Structure represents an implementation of the odp actions. */
> +struct odp_execute_action_impl {
> +    /* When set, the CPU ISA required for this implementation is available
> +     * and the implementation can be used.
> +     */
> +    bool available;
> +
> +    /* Name of the implementation. */
> +    const char *name;
> +
> +    /* Function is used to detect if this CPU has the ISA required
> +     * to run the optimized action implementation and if available, initializes
> +     * the implementation for use.
> +     */
> +    odp_execute_action_init_func init_func;
> +
> +    /* An array of callback functions, one for each action. */
> +    ATOMIC(odp_execute_action_cb) funcs[__OVS_ACTION_ATTR_MAX];
> +};
> +
> +/* Order of Actions implementations. */
> +enum odp_execute_action_impl_idx {
> +    ACTION_IMPL_SCALAR,
> +    /* See ACTION_IMPL_BEGIN below, for "first to-be-validated" impl.
> +     * Do not change the autovalidator position in this list without updating
> +     * the define below.
> +     */
> +
> +    ACTION_IMPL_MAX,
> +};
> +
> +/* Index to start verifying implementations from. */
> +BUILD_ASSERT_DECL(ACTION_IMPL_SCALAR == 0);
> +
> +/* Odp execute init handles setting up the state of the actions functions at
> + * initialization time. It cannot return errors, as it must always succeed in
> + * initializing the scalar/generic codepath.
> + */
> +void odp_execute_action_init(void);
> +
> +#endif /* ODP_EXTRACT_PRIVATE */
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 7da56793d..67ebbe195 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -17,6 +17,7 @@
>
>  #include <config.h>
>  #include "odp-execute.h"
> +#include "odp-execute-private.h"
>  #include <sys/types.h>
>  #include <netinet/in.h>
>  #include <arpa/inet.h>
> @@ -833,6 +834,23 @@ requires_datapath_assistance(const struct nlattr *a)
>      return false;
>  }
>
> +/* The active function pointers on the datapath. ISA optimized implementations
> + * are enabled by plugging them into this static arary, which is consulted when
> + * applying actions on the datapath.
> + */
> +static struct odp_execute_action_impl actions_active_impl;

Why not a pointer to the active implementation? I tried to hint at this during v6 patch 3 review, as this would make the design way nicer?
See comment on patch 3.

> +
> +void
> +odp_execute_init(void)
> +{
> +    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> +    if (ovsthread_once_start(&once)) {
> +        odp_execute_action_init();
> +        ovsthread_once_done(&once);
> +    }
> +}
> +
> +
>  /* Executes all of the 'actions_len' bytes of datapath actions in 'actions' on
>   * the packets in 'batch'.  If 'steal' is true, possibly modifies and
>   * definitely free the packets in 'batch', otherwise leaves 'batch' unchanged.
> @@ -857,14 +875,12 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>
>      NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
>          int type = nl_attr_type(a);
> +        enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
>          bool last_action = (left <= NLA_ALIGN(a->nla_len));
> +        bool should_steal = steal && last_action;

This is no longer needed, just undo the change below.

>
>          if (requires_datapath_assistance(a)) {
>              if (dp_execute_action) {
> -                /* Allow 'dp_execute_action' to steal the packet data if we do
> -                 * not need it any more. */
> -                bool should_steal = steal && last_action;
> -

This should be kept, see comment above.

>                  dp_execute_action(dp, batch, a, should_steal);
>
>                  if (last_action || dp_packet_batch_is_empty(batch)) {
> @@ -879,8 +895,20 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>              continue;
>          }
>
> -        switch ((enum ovs_action_attr) type) {
> +        /* If type is set in the active actions implementation, call the
> +         * function-pointer and continue to the next action.
> +         */
> +        if (actions_active_impl.funcs[attr_type] &&
> +            attr_type <= OVS_ACTION_ATTR_MAX) {
> +            actions_active_impl.funcs[attr_type](batch, a);
> +            continue;
> +        }
> +
> +        /* If the action was not handled by the active function pointers above,
> +         * process them by switching on the type below.
> +         */
>
> +        switch (attr_type) {
>          case OVS_ACTION_ATTR_HASH: {
>              const struct ovs_action_hash *hash_act = nl_attr_get(a);
>
> @@ -1094,6 +1122,9 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>          case __OVS_ACTION_ATTR_MAX:
>              OVS_NOT_REACHED();
>          }
> +
> +        /* Do not add any generic processing here, as it won't be executed when
> +         * an ISA-specific action implementation exists. */
>      }
>
>      dp_packet_delete_batch(batch, steal);
> diff --git a/lib/odp-execute.h b/lib/odp-execute.h
> index a3578a575..0921ee924 100644
> --- a/lib/odp-execute.h
> +++ b/lib/odp-execute.h
> @@ -28,6 +28,13 @@ struct dp_packet;
>  struct pkt_metadata;
>  struct dp_packet_batch;
>
> +
> +/* Called once at initialization time. */
> +void odp_execute_init(void);
> +
> +typedef void (*odp_execute_action_cb)(struct dp_packet_batch *batch,
> +                                      const struct nlattr *action);
> +
>  typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
>                                 const struct nlattr *action, bool should_steal);
>
> -- 
> 2.32.0


General discussion item from v6:

EC> I have another general architecture question? What if for a specific packet (or batch of packets) the SIMD implementation is not able to process the packet/batch?
EC> How can we handle this in a uniform way? Can you give this some thought and add support?

EF> We support 100% of the action (regardless of input packet, we perform same action as scalar actions code).  Autovalidation ensures the modifications are identical.
EF> As a result there is no value in a "fallback to scalar" backup, as this must never occur.

What I'm trying to say is, that in certain cases, your SIMD specific code might not be able to optimize the action handling, and it might want to fall back to the scaler implementation.
If there is no way to handle this, it could lead to a lot of code duplication. This might not be the case in your current patch set (maybe patch 10 would need it), but could be in the future. Let's take the following "potential" example:


Someone comes along and wants to offload these actions using OpenCL. As OpenCL is designed to do things in parallel, he wants to process an entire batch of packets in one go, but there is a constraint, all packets need to be at least 128 bytes long. If this is not the case, the scalar approach would be faster.

So I'm suggesting we need an API like this example:

if (all_in_batch_ge_128(batch) {
    openCL_handle_batch(batch)
} else {
   odp_execute_scalar_action(batch, action);
}

I think you need this for patch 10, so I will introduce it there along with my other comments on that patch.



Here are my suggested changes for this patch in a diff to make life easier:

diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 92db2386c..3c2fd0512 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -38,10 +38,10 @@ static struct odp_execute_action_impl action_impls[] = {

 static void
 action_impl_copy_funcs(struct odp_execute_action_impl *src,
-                       const struct odp_execute_action_impl *dst)
+                       const struct odp_execute_action_impl *dest)
 {
     for (int i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
-        atomic_store_relaxed(&src->funcs[i], dst->funcs[i]);
+        atomic_store_relaxed(&src->funcs[i], dest->funcs[i]);
     }
 }

@@ -58,23 +58,19 @@ odp_execute_action_init(void)
     for (int i = 0; i < ACTION_IMPL_MAX; i++) {
         bool avail = true;

-        if (action_impls[i].init_func) {
-            /* Return zero is success, non-zero means error. */
-            avail = (action_impls[i].init_func(&action_impls[i]) == 0);
-        }
-
-        action_impls[i].available = avail;
-
         if (i != ACTION_IMPL_SCALAR) {
             action_impl_copy_funcs(&action_impls[i],
                                    &action_impls[ACTION_IMPL_SCALAR]);
         }

-        if (action_impls[i].available == true) {
-            action_impls[i].init_func(&action_impls[i]);
+        if (action_impls[i].init_func) {
+            /* Return zero is success, non-zero means error. */
+            avail = (action_impls[i].init_func(&action_impls[i]) == 0);
         }

-        VLOG_INFO("Action implementation %s (available: %s)\n",
-                  action_impls[i].name, avail ? "available" : "not available");
+        action_impls[i].available = avail;
+
+        VLOG_INFO("Action implementation %s (available: %s)",
+                  action_impls[i].name, avail ? "Yes" : "No");
     }
 }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 67ebbe195..69dd816f3 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -877,10 +877,12 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         int type = nl_attr_type(a);
         enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
         bool last_action = (left <= NLA_ALIGN(a->nla_len));
-        bool should_steal = steal && last_action;

         if (requires_datapath_assistance(a)) {
             if (dp_execute_action) {
+                /* Allow 'dp_execute_action' to steal the packet data if we do
+                 * not need it any more. */
+                bool should_steal = steal && last_action;
                 dp_execute_action(dp, batch, a, should_steal);

                 if (last_action || dp_packet_batch_is_empty(batch)) {
Van Haaren, Harry June 30, 2022, 8:16 a.m. UTC | #2
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Thursday, June 23, 2022 4:38 PM
> To: Finn, Emma <emma.finn@intel.com>
> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
> <harry.van.haaren@intel.com>; dev@openvswitch.org
> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
> for different action implementations.
> 
> On 14 Jun 2022, at 13:57, Emma Finn wrote:

<snip patch contents>

> > --
> > 2.32.0
> 
> 
> General discussion item from v6:

Thanks Eelco for the detailed review.


> EC> I have another general architecture question? What if for a specific packet (or
> batch of packets) the SIMD implementation is not able to process the packet/batch?
> EC> How can we handle this in a uniform way? Can you give this some thought and
> add support?
> 
> EF> We support 100% of the action (regardless of input packet, we perform same
> action as scalar actions code).  Autovalidation ensures the modifications are
> identical.
> EF> As a result there is no value in a "fallback to scalar" backup, as this must never
> occur.
> 
> What I'm trying to say is, that in certain cases, your SIMD specific code might not be
> able to optimize the action handling, and it might want to fall back to the scaler
> implementation.

Today, the AVX512 implementation in this patchset will *always* be able to handle the action.
There is no requirement for scalar fallback.

> If there is no way to handle this, it could lead to a lot of code duplication. This might
> not be the case in your current patch set (maybe patch 10 would need it), but could
> be in the future. Let's take the following "potential" example:

If in future a patchset is proposed with these requirements, we can refactor based on
actual requirements of the patchset then. I don't like the idea of adding building something
now "just in case" a future patchset might want something.


> Someone comes along and wants to offload these actions using OpenCL. As OpenCL
> is designed to do things in parallel, he wants to process an entire batch of packets in
> one go, but there is a constraint, all packets need to be at least 128 bytes long. If this
> is not the case, the scalar approach would be faster.
> 
> So I'm suggesting we need an API like this example:
> 
> if (all_in_batch_ge_128(batch) {
>     openCL_handle_batch(batch)
> } else {
>    odp_execute_scalar_action(batch, action);
> }

Understood, and perhaps this would be good. I recommend we find out if/when a real
world patchset is proposed so we can discuss implementation details with confidence.

For 2.18 release, I feel the existing approach is fine because it addresses the actual needs
of the AVX512 implementation today. None of the details here are user-visible, we can
refactor if patchsets like the above (e.g. OpenCL or whatever) is posted in a future release.

<snip more patch suggestions/contents>
Eelco Chaudron June 30, 2022, 11:44 a.m. UTC | #3
On 30 Jun 2022, at 10:16, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Thursday, June 23, 2022 4:38 PM
>> To: Finn, Emma <emma.finn@intel.com>
>> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
>> <harry.van.haaren@intel.com>; dev@openvswitch.org
>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
>> for different action implementations.
>>
>> On 14 Jun 2022, at 13:57, Emma Finn wrote:
>
> <snip patch contents>
>
>>> --
>>> 2.32.0
>>
>>
>> General discussion item from v6:
>
> Thanks Eelco for the detailed review.
>
>
>> EC> I have another general architecture question? What if for a specific packet (or
>> batch of packets) the SIMD implementation is not able to process the packet/batch?
>> EC> How can we handle this in a uniform way? Can you give this some thought and
>> add support?
>>
>> EF> We support 100% of the action (regardless of input packet, we perform same
>> action as scalar actions code).  Autovalidation ensures the modifications are
>> identical.
>> EF> As a result there is no value in a "fallback to scalar" backup, as this must never
>> occur.
>>
>> What I'm trying to say is, that in certain cases, your SIMD specific code might not be
>> able to optimize the action handling, and it might want to fall back to the scaler
>> implementation.
>
> Today, the AVX512 implementation in this patchset will *always* be able to handle the action.
> There is no requirement for scalar fallback.

Well actually there is a need, but don’t worry I fixed/added it to my patch 10 review.

Take a look at the quick fix here, https://mail.openvswitch.org/pipermail/ovs-dev/2022-June/395068.html, where some scalar functions get called.


>> If there is no way to handle this, it could lead to a lot of code duplication. This might
>> not be the case in your current patch set (maybe patch 10 would need it), but could
>> be in the future. Let's take the following "potential" example:
>
> If in future a patchset is proposed with these requirements, we can refactor based on
> actual requirements of the patchset then. I don't like the idea of adding building something
> now "just in case" a future patchset might want something.
>
>
>> Someone comes along and wants to offload these actions using OpenCL. As OpenCL
>> is designed to do things in parallel, he wants to process an entire batch of packets in
>> one go, but there is a constraint, all packets need to be at least 128 bytes long. If this
>> is not the case, the scalar approach would be faster.
>>
>> So I'm suggesting we need an API like this example:
>>
>> if (all_in_batch_ge_128(batch) {
>>     openCL_handle_batch(batch)
>> } else {
>>    odp_execute_scalar_action(batch, action);
>> }
>
> Understood, and perhaps this would be good. I recommend we find out if/when a real
> world patchset is proposed so we can discuss implementation details with confidence.
>
> For 2.18 release, I feel the existing approach is fine because it addresses the actual needs
> of the AVX512 implementation today. None of the details here are user-visible, we can
> refactor if patchsets like the above (e.g. OpenCL or whatever) is posted in a future release.





> <snip more patch suggestions/contents>
Van Haaren, Harry June 30, 2022, 12:13 p.m. UTC | #4
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Thursday, June 30, 2022 12:44 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: Finn, Emma <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>;
> dev@openvswitch.org
> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
> for different action implementations.
> 
> 
> 
> On 30 Jun 2022, at 10:16, Van Haaren, Harry wrote:
> 
> >> -----Original Message-----
> >> From: Eelco Chaudron <echaudro@redhat.com>
> >> Sent: Thursday, June 23, 2022 4:38 PM
> >> To: Finn, Emma <emma.finn@intel.com>
> >> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
> >> <harry.van.haaren@intel.com>; dev@openvswitch.org
> >> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
> >> for different action implementations.
> >>
> >> On 14 Jun 2022, at 13:57, Emma Finn wrote:
> >
> > <snip patch contents>
> >
> >>> --
> >>> 2.32.0
> >>
> >>
> >> General discussion item from v6:
> >
> > Thanks Eelco for the detailed review.
> >
> >
> >> EC> I have another general architecture question? What if for a specific packet
> (or
> >> batch of packets) the SIMD implementation is not able to process the
> packet/batch?
> >> EC> How can we handle this in a uniform way? Can you give this some thought
> and
> >> add support?
> >>
> >> EF> We support 100% of the action (regardless of input packet, we perform same
> >> action as scalar actions code).  Autovalidation ensures the modifications are
> >> identical.
> >> EF> As a result there is no value in a "fallback to scalar" backup, as this must never
> >> occur.
> >>
> >> What I'm trying to say is, that in certain cases, your SIMD specific code might not
> be
> >> able to optimize the action handling, and it might want to fall back to the scaler
> >> implementation.
> >
> > Today, the AVX512 implementation in this patchset will *always* be able to handle
> the action.
> > There is no requirement for scalar fallback.
> 
> Well actually there is a need, but don’t worry I fixed/added it to my patch 10 review.
>
> Take a look at the quick fix here, https://mail.openvswitch.org/pipermail/ovs-
> dev/2022-June/395068.html, where some scalar functions get called.

Specifically, you mean this bit?

@@ -411,6 +412,13 @@ action_avx512_set_masked(struct dp_packet_batch *batch OVS_UNUSED,

     if (avx512_impl.set_masked_funcs[attr_type]) {
         avx512_impl.set_masked_funcs[attr_type](batch, a);
+    } else {
+        struct dp_packet *packet;
+        a = nl_attr_get(a);
+
+        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+            odp_execute_masked_set_action(packet, a);
+        }
     }

I don't see that as a *fallback* because something couldn't be handled,
but a "use-scalar" because the AVX512 isn't implemented (func ptr = NULL).

It's not the case that the AVX512 couldn’t handle a specific input packet here;
the case is that Action has SubActions, and all are not implemented yet.

Most Actions do NOT have SubActions (like SET_MASKED has). I think it's OK to handle
the "use-scalar" in these few cases.

My main concern was around requirements hypothetical OpenCL implementations.
To handle in a generic way at the Action level (instead of SubAction like above) seems
to over-complicate things to me...

Input would be useful to quickly find path forward and get that built & merged.
Is the above OK for 2.18, and refactor/rework if required for 2.19/onwards?

<snip>
Eelco Chaudron June 30, 2022, 12:35 p.m. UTC | #5
On 30 Jun 2022, at 14:13, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Thursday, June 30, 2022 12:44 PM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: Finn, Emma <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>;
>> dev@openvswitch.org
>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
>> for different action implementations.
>>
>>
>>
>> On 30 Jun 2022, at 10:16, Van Haaren, Harry wrote:
>>
>>>> -----Original Message-----
>>>> From: Eelco Chaudron <echaudro@redhat.com>
>>>> Sent: Thursday, June 23, 2022 4:38 PM
>>>> To: Finn, Emma <emma.finn@intel.com>
>>>> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
>>>> <harry.van.haaren@intel.com>; dev@openvswitch.org
>>>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
>>>> for different action implementations.
>>>>
>>>> On 14 Jun 2022, at 13:57, Emma Finn wrote:
>>>
>>> <snip patch contents>
>>>
>>>>> --
>>>>> 2.32.0
>>>>
>>>>
>>>> General discussion item from v6:
>>>
>>> Thanks Eelco for the detailed review.
>>>
>>>
>>>> EC> I have another general architecture question? What if for a specific packet
>> (or
>>>> batch of packets) the SIMD implementation is not able to process the
>> packet/batch?
>>>> EC> How can we handle this in a uniform way? Can you give this some thought
>> and
>>>> add support?
>>>>
>>>> EF> We support 100% of the action (regardless of input packet, we perform same
>>>> action as scalar actions code).  Autovalidation ensures the modifications are
>>>> identical.
>>>> EF> As a result there is no value in a "fallback to scalar" backup, as this must never
>>>> occur.
>>>>
>>>> What I'm trying to say is, that in certain cases, your SIMD specific code might not
>> be
>>>> able to optimize the action handling, and it might want to fall back to the scaler
>>>> implementation.
>>>
>>> Today, the AVX512 implementation in this patchset will *always* be able to handle
>> the action.
>>> There is no requirement for scalar fallback.
>>
>> Well actually there is a need, but don’t worry I fixed/added it to my patch 10 review.
>>
>> Take a look at the quick fix here, https://mail.openvswitch.org/pipermail/ovs-
>> dev/2022-June/395068.html, where some scalar functions get called.
>
> Specifically, you mean this bit?
>
> @@ -411,6 +412,13 @@ action_avx512_set_masked(struct dp_packet_batch *batch OVS_UNUSED,
>
>      if (avx512_impl.set_masked_funcs[attr_type]) {
>          avx512_impl.set_masked_funcs[attr_type](batch, a);
> +    } else {
> +        struct dp_packet *packet;
> +        a = nl_attr_get(a);
> +
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +            odp_execute_masked_set_action(packet, a);
> +        }
>      }
>
> I don't see that as a *fallback* because something couldn't be handled,
> but a "use-scalar" because the AVX512 isn't implemented (func ptr = NULL).
>
> It's not the case that the AVX512 couldn’t handle a specific input packet here;
> the case is that Action has SubActions, and all are not implemented yet.
>
> Most Actions do NOT have SubActions (like SET_MASKED has). I think it's OK to handle
> the "use-scalar" in these few cases.
>
> My main concern was around requirements hypothetical OpenCL implementations.
> To handle in a generic way at the Action level (instead of SubAction like above) seems
> to over-complicate things to me...
>
> Input would be useful to quickly find path forward and get that built & merged.
> Is the above OK for 2.18, and refactor/rework if required for 2.19/onwards?

Hi Harry/Emma,

No problem, as an exception to speed things up for this patch set, I’ve created patches on top of Emma's patches with (almost) all the changes I would deem needed for approval. Guess the exception is the checksum handling in patch 11. And some potential AVX optimisations.

If anything in these changes I proposed is not acceptable, let's discuss this before you sent out a new revision. Same for the open items I did not fix, please either ACK or NACK the suggested changes. If NACKed, let's discuss them first before sending the next revision, so we can hopefully have one final rev.


//Eelco
Ilya Maximets July 1, 2022, 12:34 p.m. UTC | #6
On 6/30/22 14:35, Eelco Chaudron wrote:
> 
> 
> On 30 Jun 2022, at 14:13, Van Haaren, Harry wrote:
> 
>>> -----Original Message-----
>>> From: Eelco Chaudron <echaudro@redhat.com>
>>> Sent: Thursday, June 30, 2022 12:44 PM
>>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>>> Cc: Finn, Emma <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>;
>>> dev@openvswitch.org
>>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
>>> for different action implementations.
>>>
>>>
>>>
>>> On 30 Jun 2022, at 10:16, Van Haaren, Harry wrote:
>>>
>>>>> -----Original Message-----
>>>>> From: Eelco Chaudron <echaudro@redhat.com>
>>>>> Sent: Thursday, June 23, 2022 4:38 PM
>>>>> To: Finn, Emma <emma.finn@intel.com>
>>>>> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
>>>>> <harry.van.haaren@intel.com>; dev@openvswitch.org
>>>>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to odp-execute
>>>>> for different action implementations.
>>>>>
>>>>> On 14 Jun 2022, at 13:57, Emma Finn wrote:
>>>>
>>>> <snip patch contents>
>>>>
>>>>>> --
>>>>>> 2.32.0
>>>>>
>>>>>
>>>>> General discussion item from v6:
>>>>
>>>> Thanks Eelco for the detailed review.
>>>>
>>>>
>>>>> EC> I have another general architecture question? What if for a specific packet
>>> (or
>>>>> batch of packets) the SIMD implementation is not able to process the
>>> packet/batch?
>>>>> EC> How can we handle this in a uniform way? Can you give this some thought
>>> and
>>>>> add support?
>>>>>
>>>>> EF> We support 100% of the action (regardless of input packet, we perform same
>>>>> action as scalar actions code).  Autovalidation ensures the modifications are
>>>>> identical.
>>>>> EF> As a result there is no value in a "fallback to scalar" backup, as this must never
>>>>> occur.
>>>>>
>>>>> What I'm trying to say is, that in certain cases, your SIMD specific code might not
>>> be
>>>>> able to optimize the action handling, and it might want to fall back to the scaler
>>>>> implementation.
>>>>
>>>> Today, the AVX512 implementation in this patchset will *always* be able to handle
>>> the action.
>>>> There is no requirement for scalar fallback.
>>>
>>> Well actually there is a need, but don’t worry I fixed/added it to my patch 10 review.
>>>
>>> Take a look at the quick fix here, https://mail.openvswitch.org/pipermail/ovs-
>>> dev/2022-June/395068.html, where some scalar functions get called.
>>
>> Specifically, you mean this bit?
>>
>> @@ -411,6 +412,13 @@ action_avx512_set_masked(struct dp_packet_batch *batch OVS_UNUSED,
>>
>>      if (avx512_impl.set_masked_funcs[attr_type]) {
>>          avx512_impl.set_masked_funcs[attr_type](batch, a);
>> +    } else {
>> +        struct dp_packet *packet;
>> +        a = nl_attr_get(a);
>> +
>> +        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>> +            odp_execute_masked_set_action(packet, a);
>> +        }
>>      }
>>
>> I don't see that as a *fallback* because something couldn't be handled,
>> but a "use-scalar" because the AVX512 isn't implemented (func ptr = NULL).
>>
>> It's not the case that the AVX512 couldn’t handle a specific input packet here;
>> the case is that Action has SubActions, and all are not implemented yet.
>>
>> Most Actions do NOT have SubActions (like SET_MASKED has). I think it's OK to handle
>> the "use-scalar" in these few cases.
>>
>> My main concern was around requirements hypothetical OpenCL implementations.
>> To handle in a generic way at the Action level (instead of SubAction like above) seems
>> to over-complicate things to me...
>>
>> Input would be useful to quickly find path forward and get that built & merged.
>> Is the above OK for 2.18, and refactor/rework if required for 2.19/onwards?
> 
> Hi Harry/Emma,
> 
> No problem, as an exception to speed things up for this patch set, I’ve created patches on top of Emma's patches with (almost) all the changes I would deem needed for approval. Guess the exception is the checksum handling in patch 11. And some potential AVX optimisations.
> 
> If anything in these changes I proposed is not acceptable, let's discuss this before you sent out a new revision. Same for the open items I did not fix, please either ACK or NACK the suggested changes. If NACKed, let's discuss them first before sending the next revision, so we can hopefully have one final rev.
> 
> 
> //Eelco

Amber reminded me about artificial dependencies in AVX512 code,
so I also took a look at this patch set again.

Just want to highlight that one of the main comments from v4
that I asked you to address back in January wasn't actually
addressed.

I read only a couple of relevant patches, but I see that the
optimization still depends on userspace datapath in v7.  And it
is a purely artificial restriction, because all the actual code
is placed outside of dpif-netdev and has no dependencies on the
datapath type, AFAICT.  Also if the change is enabled in the
dpif-netdev, it will also affect all other datapaths serviced by
the same daemon.  So, it can not be enabled for other datapaths,
but affects them if enabled.

Please, move all the code from dpif-netdev modules somewhere else
and re-name unixctl commands, so they don't have dpif-netdev in
their names.  I think, it's a critical thing to fix, as it will
be harder to do once the change is released.  Luckily, it seems to
be just a matter of moving and re-naming a couple of functions.

It also, probably, makes sense to clearly mark these features as
experimental in the docs, so changes to the API can be made more
easily in the future, if required.

Best regards, Ilya Maximets.
Emma Finn July 7, 2022, 1:31 p.m. UTC | #7
> -----Original Message-----
> From: Ilya Maximets <i.maximets@ovn.org>
> Sent: Friday 1 July 2022 13:35
> To: Eelco Chaudron <echaudro@redhat.com>; Van Haaren, Harry
> <harry.van.haaren@intel.com>; Finn, Emma <emma.finn@intel.com>
> Cc: dev@openvswitch.org; i.maximets@ovn.org; Stokes, Ian
> <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>; Mcnamara, John
> <john.mcnamara@intel.com>
> Subject: Re: [ovs-dev] [PATCH v7 02/11] odp-execute: Add function pointers
> to odp-execute for different action implementations.
> 
> On 6/30/22 14:35, Eelco Chaudron wrote:
> >
> >
> > On 30 Jun 2022, at 14:13, Van Haaren, Harry wrote:
> >
> >>> -----Original Message-----
> >>> From: Eelco Chaudron <echaudro@redhat.com>
> >>> Sent: Thursday, June 30, 2022 12:44 PM
> >>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> >>> Cc: Finn, Emma <emma.finn@intel.com>; Stokes, Ian
> >>> <ian.stokes@intel.com>; dev@openvswitch.org
> >>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers to
> >>> odp-execute for different action implementations.
> >>>
> >>>
> >>>
> >>> On 30 Jun 2022, at 10:16, Van Haaren, Harry wrote:
> >>>
> >>>>> -----Original Message-----
> >>>>> From: Eelco Chaudron <echaudro@redhat.com>
> >>>>> Sent: Thursday, June 23, 2022 4:38 PM
> >>>>> To: Finn, Emma <emma.finn@intel.com>
> >>>>> Cc: Stokes, Ian <ian.stokes@intel.com>; Van Haaren, Harry
> >>>>> <harry.van.haaren@intel.com>; dev@openvswitch.org
> >>>>> Subject: Re: [PATCH v7 02/11] odp-execute: Add function pointers
> >>>>> to odp-execute for different action implementations.
> >>>>>
> >>>>> On 14 Jun 2022, at 13:57, Emma Finn wrote:
> >>>>
> >>>> <snip patch contents>
> >>>>
> >>>>>> --
> >>>>>> 2.32.0
> >>>>>
> >>>>>
> >>>>> General discussion item from v6:
> >>>>
> >>>> Thanks Eelco for the detailed review.
> >>>>
> >>>>
> >>>>> EC> I have another general architecture question? What if for a
> >>>>> EC> specific packet
> >>> (or
> >>>>> batch of packets) the SIMD implementation is not able to process
> >>>>> the
> >>> packet/batch?
> >>>>> EC> How can we handle this in a uniform way? Can you give this
> >>>>> EC> some thought
> >>> and
> >>>>> add support?
> >>>>>
> >>>>> EF> We support 100% of the action (regardless of input packet, we
> >>>>> EF> perform same
> >>>>> action as scalar actions code).  Autovalidation ensures the
> >>>>> modifications are identical.
> >>>>> EF> As a result there is no value in a "fallback to scalar"
> >>>>> EF> backup, as this must never
> >>>>> occur.
> >>>>>
> >>>>> What I'm trying to say is, that in certain cases, your SIMD
> >>>>> specific code might not
> >>> be
> >>>>> able to optimize the action handling, and it might want to fall
> >>>>> back to the scaler implementation.
> >>>>
> >>>> Today, the AVX512 implementation in this patchset will *always* be
> >>>> able to handle
> >>> the action.
> >>>> There is no requirement for scalar fallback.
> >>>
> >>> Well actually there is a need, but don’t worry I fixed/added it to my
> patch 10 review.
> >>>
> >>> Take a look at the quick fix here,
> >>> https://mail.openvswitch.org/pipermail/ovs-
> >>> dev/2022-June/395068.html, where some scalar functions get called.
> >>
> >> Specifically, you mean this bit?
> >>
> >> @@ -411,6 +412,13 @@ action_avx512_set_masked(struct
> dp_packet_batch
> >> *batch OVS_UNUSED,
> >>
> >>      if (avx512_impl.set_masked_funcs[attr_type]) {
> >>          avx512_impl.set_masked_funcs[attr_type](batch, a);
> >> +    } else {
> >> +        struct dp_packet *packet;
> >> +        a = nl_attr_get(a);
> >> +
> >> +        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> >> +            odp_execute_masked_set_action(packet, a);
> >> +        }
> >>      }
> >>
> >> I don't see that as a *fallback* because something couldn't be
> >> handled, but a "use-scalar" because the AVX512 isn't implemented (func
> ptr = NULL).
> >>
> >> It's not the case that the AVX512 couldn’t handle a specific input
> >> packet here; the case is that Action has SubActions, and all are not
> implemented yet.
> >>
> >> Most Actions do NOT have SubActions (like SET_MASKED has). I think
> >> it's OK to handle the "use-scalar" in these few cases.
> >>
> >> My main concern was around requirements hypothetical OpenCL
> implementations.
> >> To handle in a generic way at the Action level (instead of SubAction
> >> like above) seems to over-complicate things to me...
> >>
> >> Input would be useful to quickly find path forward and get that built &
> merged.
> >> Is the above OK for 2.18, and refactor/rework if required for
> 2.19/onwards?
> >
> > Hi Harry/Emma,
> >
> > No problem, as an exception to speed things up for this patch set, I’ve
> created patches on top of Emma's patches with (almost) all the changes I
> would deem needed for approval. Guess the exception is the checksum
> handling in patch 11. And some potential AVX optimisations.
> >
> > If anything in these changes I proposed is not acceptable, let's discuss this
> before you sent out a new revision. Same for the open items I did not fix,
> please either ACK or NACK the suggested changes. If NACKed, let's discuss
> them first before sending the next revision, so we can hopefully have one
> final rev.
> >
> >
> > //Eelco
> 
> Amber reminded me about artificial dependencies in AVX512 code, so I also
> took a look at this patch set again.
> 
> Just want to highlight that one of the main comments from v4 that I asked
> you to address back in January wasn't actually addressed.
> 
> I read only a couple of relevant patches, but I see that the optimization still
> depends on userspace datapath in v7.  And it is a purely artificial restriction,
> because all the actual code is placed outside of dpif-netdev and has no
> dependencies on the datapath type, AFAICT.  Also if the change is enabled in
> the dpif-netdev, it will also affect all other datapaths serviced by the same
> daemon.  So, it can not be enabled for other datapaths, but affects them if
> enabled.
> 
> Please, move all the code from dpif-netdev modules somewhere else and
> re-name unixctl commands, so they don't have dpif-netdev in their names.  I
> think, it's a critical thing to fix, as it will be harder to do once the change is
> released.  Luckily, it seems to be just a matter of moving and re-naming a
> couple of functions.
> 
> It also, probably, makes sense to clearly mark these features as experimental
> in the docs, so changes to the API can be made more easily in the future, if
> required.
> 
> Best regards, Ilya Maximets.

Hi Eelco/Ilya,

Thanks for the reviews.

I have reworked this series based on all your feedback and I have a V8 ready to
push.  

Checksum handling has been reworked to update checksum and not recalculate
And now follows the same method as scalar checksum update.
The first patch in this series has been removed and all unit tests are passing.
The dependency on the userspace datapath has also been removed and feature has
been marked as experimental. Plus all other smaller review comments have been
taken on board and no open discussions on the v7 are left.

If you are okay with this, I will send v8 to the mailing list. 

Thanks, 
Emma
Eelco Chaudron July 7, 2022, 1:54 p.m. UTC | #8
On 7 Jul 2022, at 15:31, Finn, Emma wrote:

<SNIP>

>
> Hi Eelco/Ilya,
>
> Thanks for the reviews.
>
> I have reworked this series based on all your feedback and I have a V8 ready to
> push.
>
> Checksum handling has been reworked to update checksum and not recalculate
> And now follows the same method as scalar checksum update.
> The first patch in this series has been removed and all unit tests are passing.
> The dependency on the userspace datapath has also been removed and feature has
> been marked as experimental. Plus all other smaller review comments have been
> taken on board and no open discussions on the v7 are left.
>
> If you are okay with this, I will send v8 to the mailing list.

Hi Emma,

If all the items I brought up during the review are taken care of, and none of them require further discussion, please go ahead, and send out the v8.

If some things were not addressed or are implemented differently than suggested, let’s first discuss them.

Cheers,

Eelco
Emma Finn July 7, 2022, 2:20 p.m. UTC | #9
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Thursday 7 July 2022 14:54
> To: Finn, Emma <emma.finn@intel.com>
> Cc: Ilya Maximets <i.maximets@ovn.org>; Van Haaren, Harry
> <harry.van.haaren@intel.com>; dev@openvswitch.org; Stokes, Ian
> <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>; Mcnamara, John
> <john.mcnamara@intel.com>
> Subject: Re: [ovs-dev] [PATCH v7 02/11] odp-execute: Add function pointers
> to odp-execute for different action implementations.
> 
> 
> 
> On 7 Jul 2022, at 15:31, Finn, Emma wrote:
> 
> <SNIP>
> 
> >
> > Hi Eelco/Ilya,
> >
> > Thanks for the reviews.
> >
> > I have reworked this series based on all your feedback and I have a V8
> > ready to push.
> >
> > Checksum handling has been reworked to update checksum and not
> > recalculate And now follows the same method as scalar checksum update.
> > The first patch in this series has been removed and all unit tests are
> passing.
> > The dependency on the userspace datapath has also been removed and
> > feature has been marked as experimental. Plus all other smaller review
> > comments have been taken on board and no open discussions on the v7
> are left.
> >
> > If you are okay with this, I will send v8 to the mailing list.
> 
> Hi Emma,
> 
> If all the items I brought up during the review are taken care of, and none of
> them require further discussion, please go ahead, and send out the v8.
> 
> If some things were not addressed or are implemented differently than
> suggested, let’s first discuss them.
> 
> Cheers,
> 
> Eelco

There is only one comment where your suggestion has been implemented slightly differently. 
As there was an issue with mails bouncing I cannot reply inline as I have been working off 
patchwork. But I will snip the info in here.

> > diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> > index f8d0896b5..de2e4dfc4 100644
> > --- a/lib/odp-execute-private.c
> > +++ b/lib/odp-execute-private.c
> > @@ -42,6 +42,14 @@ static struct odp_execute_action_impl action_impls[] = {
> >          .name = "scalar",
> >          .init_func = odp_action_scalar_init,
> >      },
> > +
> > +    #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
> 
> From the v6 discussion:
> 
> EC> How about changing this instance to #ifdef ACTION_IMPL_AVX512? This way we only have one place where we have these compiler/arch checks.
> 
> EF> V7 will not include a fix here, but we will investigate and report back to OVS ML with results of investigation.
> 
> Not sure why you need further investigation? If ACTION_IMPL_AVX512 was not defined the above flags where false already in odp-execute-private.h.
> I think the change should simply be this:
> 
> -    #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
> +    #ifdef ACTION_IMPL_AVX512
> 
> Or am I missing something? I see one version on gcc complain about this, is this what you are figuring out?
> 
> gcc (GCC) 11.3.1 20220421 works fine, gcc (GCC) 11.2.1 20220127 seems to report an issues:
> 
>    lib/odp-execute-private.c:86:9: warning: iteration 2 invokes undefined behavior [-Waggressive-loop-optimizations]
> 
> I do not see this problem with clang. Also the github actions script compile just fine. You might just be as unlucky as I was, and you have a broken compile version?
> 
> > +    [ACTION_IMPL_AVX512] = {
> > +        .available = false,
> > +        .name = "avx512",
> > +        .init_func = action_avx512_init,
> > +    },
> > +    #endif
> >  };
> >

The #ifdef ACTION_IMPL_AVX512 is checking an enum in the C code, I didn’t think this was possibly and I tried but couldn't get this to work.
As an alternative I refactored the compile checks to a similar way done in this patch from Sunil -  
https://patchwork.ozlabs.org/project/openvswitch/patch/20220704122700.54825-1-sunil.pai.g@intel.com/
This will simplify the build time checks around the code.
Eelco Chaudron July 7, 2022, 3:10 p.m. UTC | #10
On 7 Jul 2022, at 16:20, Finn, Emma wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Thursday 7 July 2022 14:54
>> To: Finn, Emma <emma.finn@intel.com>
>> Cc: Ilya Maximets <i.maximets@ovn.org>; Van Haaren, Harry
>> <harry.van.haaren@intel.com>; dev@openvswitch.org; Stokes, Ian
>> <ian.stokes@intel.com>; Flavio Leitner <fbl@sysclose.org>; Mcnamara, John
>> <john.mcnamara@intel.com>
>> Subject: Re: [ovs-dev] [PATCH v7 02/11] odp-execute: Add function pointers
>> to odp-execute for different action implementations.
>>
>>
>>
>> On 7 Jul 2022, at 15:31, Finn, Emma wrote:
>>
>> <SNIP>
>>
>>>
>>> Hi Eelco/Ilya,
>>>
>>> Thanks for the reviews.
>>>
>>> I have reworked this series based on all your feedback and I have a V8
>>> ready to push.
>>>
>>> Checksum handling has been reworked to update checksum and not
>>> recalculate And now follows the same method as scalar checksum update.
>>> The first patch in this series has been removed and all unit tests are
>> passing.
>>> The dependency on the userspace datapath has also been removed and
>>> feature has been marked as experimental. Plus all other smaller review
>>> comments have been taken on board and no open discussions on the v7
>> are left.
>>>
>>> If you are okay with this, I will send v8 to the mailing list.
>>
>> Hi Emma,
>>
>> If all the items I brought up during the review are taken care of, and none of
>> them require further discussion, please go ahead, and send out the v8.
>>
>> If some things were not addressed or are implemented differently than
>> suggested, let’s first discuss them.
>>
>> Cheers,
>>
>> Eelco
>
> There is only one comment where your suggestion has been implemented slightly differently.
> As there was an issue with mails bouncing I cannot reply inline as I have been working off
> patchwork. But I will snip the info in here.
>
>>> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
>>> index f8d0896b5..de2e4dfc4 100644
>>> --- a/lib/odp-execute-private.c
>>> +++ b/lib/odp-execute-private.c
>>> @@ -42,6 +42,14 @@ static struct odp_execute_action_impl action_impls[] = {
>>>          .name = "scalar",
>>>          .init_func = odp_action_scalar_init,
>>>      },
>>> +
>>> +    #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
>>
>> From the v6 discussion:
>>
>> EC> How about changing this instance to #ifdef ACTION_IMPL_AVX512? This way we only have one place where we have these compiler/arch checks.
>>
>> EF> V7 will not include a fix here, but we will investigate and report back to OVS ML with results of investigation.
>>
>> Not sure why you need further investigation? If ACTION_IMPL_AVX512 was not defined the above flags where false already in odp-execute-private.h.
>> I think the change should simply be this:
>>
>> -    #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
>> +    #ifdef ACTION_IMPL_AVX512
>>
>> Or am I missing something? I see one version on gcc complain about this, is this what you are figuring out?
>>
>> gcc (GCC) 11.3.1 20220421 works fine, gcc (GCC) 11.2.1 20220127 seems to report an issues:
>>
>>    lib/odp-execute-private.c:86:9: warning: iteration 2 invokes undefined behavior [-Waggressive-loop-optimizations]
>>
>> I do not see this problem with clang. Also the github actions script compile just fine. You might just be as unlucky as I was, and you have a broken compile version?
>>
>>> +    [ACTION_IMPL_AVX512] = {
>>> +        .available = false,
>>> +        .name = "avx512",
>>> +        .init_func = action_avx512_init,
>>> +    },
>>> +    #endif
>>>  };
>>>
>
> The #ifdef ACTION_IMPL_AVX512 is checking an enum in the C code, I didn’t think this was possibly and I tried but couldn't get this to work.
> As an alternative I refactored the compile checks to a similar way done in this patch from Sunil -
> https://patchwork.ozlabs.org/project/openvswitch/patch/20220704122700.54825-1-sunil.pai.g@intel.com/
> This will simplify the build time checks around the code.

Guess this enum part might be the reason it was working with some compiler version but not with an older one.

I’ll take another look at this in your v8, as I still need to take a look at Sunil’s patch…

Looking forward to your v8.

//Eelco
diff mbox series

Patch

diff --git a/lib/automake.mk b/lib/automake.mk
index cb50578eb..1a49dd30b 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -213,6 +213,8 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/object-collection.h \
 	lib/odp-execute.c \
 	lib/odp-execute.h \
+	lib/odp-execute-private.c \
+	lib/odp-execute-private.h \
 	lib/odp-util.c \
 	lib/odp-util.h \
 	lib/ofp-actions.c \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ff57b3961..47dd7a1a6 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1675,6 +1675,10 @@  create_dpif_netdev(struct dp_netdev *dp)
     dpif->dp = dp;
     dpif->last_port_seq = seq_read(dp->port_seq);
 
+    /* Called once at initialization time. This handles setting up the state
+     * of the actions functions at init time. */
+    odp_execute_init();
+
     return &dpif->dpif;
 }
 
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
new file mode 100644
index 000000000..92db2386c
--- /dev/null
+++ b/lib/odp-execute-private.c
@@ -0,0 +1,80 @@ 
+/*
+ * Copyright (c) 2022 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "dpdk.h"
+#include "dp-packet.h"
+#include "odp-execute-private.h"
+#include "odp-netlink.h"
+#include "odp-util.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(odp_execute_impl);
+
+static struct odp_execute_action_impl action_impls[] = {
+    [ACTION_IMPL_SCALAR] = {
+        .available = false,
+        .name = "scalar",
+        .init_func = NULL,
+    },
+};
+
+static void
+action_impl_copy_funcs(struct odp_execute_action_impl *src,
+                       const struct odp_execute_action_impl *dst)
+{
+    for (int i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
+        atomic_store_relaxed(&src->funcs[i], dst->funcs[i]);
+    }
+}
+
+void
+odp_execute_action_init(void)
+{
+    /* Each impl's function array is initialized to reflect the scalar
+     * implementation. This simplifies adding optimized implementations,
+     * as the autovalidator can always compare all actions.
+     *
+     * Below will check if impl is available and copies the scalar functions
+     * to all other implementations.
+     */
+    for (int i = 0; i < ACTION_IMPL_MAX; i++) {
+        bool avail = true;
+
+        if (action_impls[i].init_func) {
+            /* Return zero is success, non-zero means error. */
+            avail = (action_impls[i].init_func(&action_impls[i]) == 0);
+        }
+
+        action_impls[i].available = avail;
+
+        if (i != ACTION_IMPL_SCALAR) {
+            action_impl_copy_funcs(&action_impls[i],
+                                   &action_impls[ACTION_IMPL_SCALAR]);
+        }
+
+        if (action_impls[i].available == true) {
+            action_impls[i].init_func(&action_impls[i]);
+        }
+
+        VLOG_INFO("Action implementation %s (available: %s)\n",
+                  action_impls[i].name, avail ? "available" : "not available");
+    }
+}
diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
new file mode 100644
index 000000000..08d1faf38
--- /dev/null
+++ b/lib/odp-execute-private.h
@@ -0,0 +1,74 @@ 
+/*
+ * Copyright (c) 2022 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ODP_EXTRACT_PRIVATE
+#define ODP_EXTRACT_PRIVATE 1
+
+#include "dp-packet.h"
+#include "odp-execute.h"
+#include "odp-netlink.h"
+#include "ovs-atomic.h"
+
+/* Forward declaration for typedef. */
+struct odp_execute_action_impl;
+
+/* Typedef for an initialization function that can initialize each
+ * implementation, checking requirements such as CPU ISA.
+ */
+typedef int (*odp_execute_action_init_func)
+                    (struct odp_execute_action_impl *self);
+
+/* Structure represents an implementation of the odp actions. */
+struct odp_execute_action_impl {
+    /* When set, the CPU ISA required for this implementation is available
+     * and the implementation can be used.
+     */
+    bool available;
+
+    /* Name of the implementation. */
+    const char *name;
+
+    /* Function is used to detect if this CPU has the ISA required
+     * to run the optimized action implementation and if available, initializes
+     * the implementation for use.
+     */
+    odp_execute_action_init_func init_func;
+
+    /* An array of callback functions, one for each action. */
+    ATOMIC(odp_execute_action_cb) funcs[__OVS_ACTION_ATTR_MAX];
+};
+
+/* Order of Actions implementations. */
+enum odp_execute_action_impl_idx {
+    ACTION_IMPL_SCALAR,
+    /* See ACTION_IMPL_BEGIN below, for "first to-be-validated" impl.
+     * Do not change the autovalidator position in this list without updating
+     * the define below.
+     */
+
+    ACTION_IMPL_MAX,
+};
+
+/* Index to start verifying implementations from. */
+BUILD_ASSERT_DECL(ACTION_IMPL_SCALAR == 0);
+
+/* Odp execute init handles setting up the state of the actions functions at
+ * initialization time. It cannot return errors, as it must always succeed in
+ * initializing the scalar/generic codepath.
+ */
+void odp_execute_action_init(void);
+
+#endif /* ODP_EXTRACT_PRIVATE */
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 7da56793d..67ebbe195 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -17,6 +17,7 @@ 
 
 #include <config.h>
 #include "odp-execute.h"
+#include "odp-execute-private.h"
 #include <sys/types.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
@@ -833,6 +834,23 @@  requires_datapath_assistance(const struct nlattr *a)
     return false;
 }
 
+/* The active function pointers on the datapath. ISA optimized implementations
+ * are enabled by plugging them into this static arary, which is consulted when
+ * applying actions on the datapath.
+ */
+static struct odp_execute_action_impl actions_active_impl;
+
+void
+odp_execute_init(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+    if (ovsthread_once_start(&once)) {
+        odp_execute_action_init();
+        ovsthread_once_done(&once);
+    }
+}
+
+
 /* Executes all of the 'actions_len' bytes of datapath actions in 'actions' on
  * the packets in 'batch'.  If 'steal' is true, possibly modifies and
  * definitely free the packets in 'batch', otherwise leaves 'batch' unchanged.
@@ -857,14 +875,12 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
 
     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
         int type = nl_attr_type(a);
+        enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
         bool last_action = (left <= NLA_ALIGN(a->nla_len));
+        bool should_steal = steal && last_action;
 
         if (requires_datapath_assistance(a)) {
             if (dp_execute_action) {
-                /* Allow 'dp_execute_action' to steal the packet data if we do
-                 * not need it any more. */
-                bool should_steal = steal && last_action;
-
                 dp_execute_action(dp, batch, a, should_steal);
 
                 if (last_action || dp_packet_batch_is_empty(batch)) {
@@ -879,8 +895,20 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             continue;
         }
 
-        switch ((enum ovs_action_attr) type) {
+        /* If type is set in the active actions implementation, call the
+         * function-pointer and continue to the next action.
+         */
+        if (actions_active_impl.funcs[attr_type] &&
+            attr_type <= OVS_ACTION_ATTR_MAX) {
+            actions_active_impl.funcs[attr_type](batch, a);
+            continue;
+        }
+
+        /* If the action was not handled by the active function pointers above,
+         * process them by switching on the type below.
+         */
 
+        switch (attr_type) {
         case OVS_ACTION_ATTR_HASH: {
             const struct ovs_action_hash *hash_act = nl_attr_get(a);
 
@@ -1094,6 +1122,9 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case __OVS_ACTION_ATTR_MAX:
             OVS_NOT_REACHED();
         }
+
+        /* Do not add any generic processing here, as it won't be executed when
+         * an ISA-specific action implementation exists. */
     }
 
     dp_packet_delete_batch(batch, steal);
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index a3578a575..0921ee924 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -28,6 +28,13 @@  struct dp_packet;
 struct pkt_metadata;
 struct dp_packet_batch;
 
+
+/* Called once at initialization time. */
+void odp_execute_init(void);
+
+typedef void (*odp_execute_action_cb)(struct dp_packet_batch *batch,
+                                      const struct nlattr *action);
+
 typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
                                const struct nlattr *action, bool should_steal);