diff mbox series

[ovs-dev,v6,10/11] odp-execute: Add ISA implementation of set_masked ETH action

Message ID 20220510142202.1087967-11-emma.finn@intel.com
State Changes Requested
Headers show
Series Actions Infrastructure + Optimizations | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Emma Finn May 10, 2022, 2:22 p.m. UTC
This commit includes infrastructure changes for enabling set_masked_X
actions and also adds support for the AVX512 implementation of the
eth_set_addrs action.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 .../linux/compat/include/linux/openvswitch.h  |  2 +-
 lib/odp-execute-avx512.c                      | 56 ++++++++++++++-
 lib/odp-execute-private.c                     | 68 ++++++++++++++++--
 lib/odp-execute-private.h                     |  5 +-
 lib/odp-execute.c                             | 69 ++++++++++++++-----
 lib/odp-execute.h                             |  4 +-
 6 files changed, 174 insertions(+), 30 deletions(-)

Comments

Eelco Chaudron June 2, 2022, 2:40 p.m. UTC | #1
On 10 May 2022, at 16:22, Emma Finn wrote:

> This commit includes infrastructure changes for enabling set_masked_X
> actions and also adds support for the AVX512 implementation of the
> eth_set_addrs action.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  .../linux/compat/include/linux/openvswitch.h  |  2 +-
>  lib/odp-execute-avx512.c                      | 56 ++++++++++++++-
>  lib/odp-execute-private.c                     | 68 ++++++++++++++++--
>  lib/odp-execute-private.h                     |  5 +-
>  lib/odp-execute.c                             | 69 ++++++++++++++-----
>  lib/odp-execute.h                             |  4 +-
>  6 files changed, 174 insertions(+), 30 deletions(-)
>
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
> index 8bb5abdc8..ccb54d6c6 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -473,8 +473,8 @@ enum ovs_frag_type {
>  #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
>
>  struct ovs_key_ethernet {
> -	__u8	 eth_src[ETH_ALEN];
>  	__u8	 eth_dst[ETH_ALEN];
> +	__u8	 eth_src[ETH_ALEN];
>  };

This is a red flag, you are changing a kernel UAPI data structure :)

I guess this is why the datapath tests are failing, can you please make sure you are not breaking any dp tests in the next patch version?

Please run the following (in addition to the normal make check) and make sure you have zero failures, and only skipped tests due to missing features (not due to missing test infrastructure):

  make check-kernel
  make check-offloads
  make check-system-userspace
  make check-dpdk
  make check-afxdp

>
>  struct ovs_key_mpls {
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index 5d095f867..ede00b750 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -38,6 +38,12 @@ BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
>                             MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
>                             offsetof(struct dp_packet, l4_ofs));
>
> +BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_dst) +
> +                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst) ==
> +                  offsetof(struct ovs_key_ethernet, eth_src));
> +

Add some help why this build assert exists

> +static struct odp_execute_action_impl active_impl;

The name makes no sense to me, I would call this avx512_impl, as this might not be the active implementation.

> +
>  static inline void ALWAYS_INLINE
>  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>  {
> @@ -139,6 +145,51 @@ action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      }
>  }
>
> +static void
> +action_avx512_eth_set_addrs(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                       const struct nlattr *a,
> +                       bool should_steal OVS_UNUSED)
> +{
> +    a = nl_attr_get(a);
> +    const struct ovs_key_ethernet *key = nl_attr_get(a);
> +    const struct ovs_key_ethernet *mask = get_mask(a, struct ovs_key_ethernet);
> +    struct dp_packet *packet;
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +
> +        struct eth_header *eh = dp_packet_eth(packet);
> +
> +        if (!eh) {
> +            continue;
> +        }

Did not review this code yet (see below/above), but please add enough details for people to understand it by reading the comments.

> +        __m128i v_src = _mm_maskz_loadu_epi16(0x3F, key);
> +        __m128i v_mask = _mm_maskz_loadu_epi16(0x3F, mask);
> +        __m128i v_dst = _mm_maskz_loadu_epi16(0xFF, eh);
> +
> +        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
> +        __m128i res = _mm_or_si128(v_src, dst_masked);
> +
> +        __m128i res_blend = _mm_blend_epi16(v_dst, res, 0x3F);
> +        _mm_storeu_si128((void *) eh, res_blend);
> +    }
> +}
> +
> +static void
> +action_avx512_set_masked(void *dp OVS_UNUSED,
> +                         struct dp_packet_batch *batch OVS_UNUSED,
> +                         const struct nlattr *a,
> +                         bool should_steal OVS_UNUSED)
> +{
> +    a = nl_attr_get(a);
> +    enum ovs_key_attr attr_type = nl_attr_type(a);
> +
> +    if (active_impl.set_masked_funcs[attr_type]) {
> +        active_impl.set_masked_funcs[attr_type](NULL, batch, a, should_steal);
> +    }
> +
> +}
> +
>  /* Probe functions to check ISA requirements. */
>  static int32_t
>  avx512_isa_probe(uint32_t needs_vbmi)
> @@ -173,13 +224,16 @@ action_avx512_probe(void)
>      return avx512_isa_probe(needs_vbmi);
>  }
>
> -
>  int32_t
>  action_avx512_init(struct odp_execute_action_impl *self)
>  {
>      avx512_isa_probe(0);
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
>      self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
> +    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_avx512_set_masked;
> +    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] =
> +                            action_avx512_eth_set_addrs;
> +    active_impl = *self;
>
>      return 0;
>  }
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index 6b09ad353..34f13523a 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -31,6 +31,8 @@ int32_t action_autoval_init(struct odp_execute_action_impl *self);
>  VLOG_DEFINE_THIS_MODULE(odp_execute_private);
>  static uint32_t active_action_impl_index;
>  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +static struct odp_execute_action_impl active_impl;
> +static bool set_masked = false;
>
>  static struct odp_execute_action_impl action_impls[] = {
>      [ACTION_IMPL_AUTOVALIDATOR] = {
> @@ -63,6 +65,11 @@ action_impl_init_funcs(struct odp_execute_action_impl *to)
>      for (uint32_t i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
>          atomic_init(&to->funcs[i], NULL);
>      }
> +
> +    for (uint32_t i = 0; i < __OVS_KEY_ATTR_MAX; i++) {
> +        atomic_init(&to->set_masked_funcs[i], NULL);
> +    }
> +
>  }
>
>  static void
> @@ -72,6 +79,11 @@ action_impl_copy_funcs(struct odp_execute_action_impl *to,
>      for (uint32_t i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
>          atomic_store_relaxed(&to->funcs[i], from->funcs[i]);
>      }
> +
> +    for (uint32_t i = 0; i < __OVS_KEY_ATTR_MAX; i++) {
> +        atomic_store_relaxed(&to->set_masked_funcs[i],
> +                             from->set_masked_funcs[i]);
> +    }
>  }
>
>  int32_t
> @@ -155,21 +167,40 @@ action_autoval_generic(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      uint32_t failed = 0;
>
>      int type = nl_attr_type(a);
> -    enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
> +    enum ovs_action_attr action_attr_type = (enum ovs_action_attr) type;
> +    enum ovs_key_attr key_attr_type = (enum ovs_key_attr) type;
> +
> +    if (action_attr_type == OVS_ACTION_ATTR_SET_MASKED) {
> +        set_masked = true;
> +        const struct nlattr *key = nl_attr_get(a);
> +        key_attr_type = nl_attr_type(key);
> +    }
>
>      struct odp_execute_action_impl *scalar = &action_impls[ACTION_IMPL_SCALAR];
>
>      struct dp_packet_batch good_batch;
>      dp_packet_batch_clone(&good_batch, batch);
> -
> -    scalar->funcs[attr_type](NULL, &good_batch, a, should_steal);
> +    if (!set_masked) {
> +        scalar->funcs[action_attr_type](NULL, &good_batch, a, should_steal);
> +    } else {
> +        scalar->set_masked_funcs[key_attr_type](NULL, &good_batch, a,
> +                                                should_steal);
> +    }
>
>      for (uint32_t impl = ACTION_IMPL_BEGIN; impl < ACTION_IMPL_MAX; impl++) {
>          /* Clone original batch and execute implementation under test. */
>          struct dp_packet_batch test_batch;
>          dp_packet_batch_clone(&test_batch, batch);
> -        action_impls[impl].funcs[attr_type](NULL, &test_batch, a,
> -                                            should_steal);
> +
> +        if (!set_masked) {
> +            action_impls[impl].funcs[action_attr_type](NULL, &test_batch, a,
> +                                                       should_steal);
> +        } else {
> +            action_impls[impl].set_masked_funcs[key_attr_type](NULL,
> +                                                               &test_batch,
> +                                                               a,
> +                                                               should_steal);
> +        }
>
>          /* Loop over implementations, checking each one. */
>          for (uint32_t pidx = 0; pidx < batch->count; pidx++) {
> @@ -221,7 +252,29 @@ action_autoval_generic(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      dp_packet_delete_batch(&good_batch, 1);
>
>      /* Apply the action to the original batch for continued processing. */
> -    scalar->funcs[attr_type](NULL, batch, a, should_steal);
> +    if (!set_masked) {
> +        scalar->funcs[action_attr_type](NULL, batch, a, should_steal);
> +    } else {
> +        scalar->set_masked_funcs[key_attr_type](NULL, batch, a, should_steal);
> +    }
> +
> +    set_masked = false;
> +}
> +
> +static void
> +action_set_masked_init(void *dp OVS_UNUSED,
> +                       struct dp_packet_batch *batch OVS_UNUSED,
> +                       const struct nlattr *a,
> +                       bool should_steal OVS_UNUSED)
> +{
> +    a = nl_attr_get(a);
> +    enum ovs_key_attr attr_type = nl_attr_type(a);
> +
> +    if (active_impl.set_masked_funcs[attr_type]) {
> +        set_masked = true;
> +        active_impl.set_masked_funcs[attr_type](NULL, batch, a, should_steal);
> +    }
> +
>  }
>
>  int32_t
> @@ -229,6 +282,9 @@ action_autoval_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
>      self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
> +    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked_init;
> +    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_autoval_generic;
> +    active_impl = *self;
>
>      return 0;
>  }
> diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
> index 231d72492..2c0233f10 100644
> --- a/lib/odp-execute-private.h
> +++ b/lib/odp-execute-private.h
> @@ -60,7 +60,9 @@ struct odp_execute_action_impl {
>      odp_execute_action_init_func init_func;
>
>      /* An array of callback functions, one for each action. */
> -    ATOMIC(odp_execute_cb) funcs[__OVS_KEY_ATTR_MAX];
> +    ATOMIC(odp_execute_cb) funcs[__OVS_ACTION_ATTR_MAX];
> +    /* An array of callback functions, one for each key. */
> +    ATOMIC(odp_execute_cb) set_masked_funcs[__OVS_KEY_ATTR_MAX];
>  };
>
>  /* Order of Actions implementations. */
> @@ -93,7 +95,6 @@ void odp_execute_action_init(void);
>  void odp_execute_action_get(struct ds *name);
>  int32_t odp_execute_action_set(const char *name,
>                                 struct odp_execute_action_impl *active);
> -
>  /* Init function for the scalar implementation. Calls into the odp-execute.c
>   * file, and initializes the function pointers for optimized action types.
>   */
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 690e7e1ce..5c4dd8e33 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -561,8 +561,6 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
>      }
>  }
>
> -#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
> -
>  static void
>  odp_execute_masked_set_action(struct dp_packet *packet,
>                                const struct nlattr *a)
> @@ -582,11 +580,6 @@ odp_execute_masked_set_action(struct dp_packet *packet,
>              | (md->pkt_mark & ~*get_mask(a, uint32_t));
>          break;
>
> -    case OVS_KEY_ATTR_ETHERNET:
> -        odp_eth_set_addrs(packet, nl_attr_get(a),
> -                          get_mask(a, struct ovs_key_ethernet));
> -        break;
> -
>      case OVS_KEY_ATTR_NSH: {
>          odp_set_nsh(packet, a, true);
>          break;
> @@ -661,6 +654,7 @@ odp_execute_masked_set_action(struct dp_packet *packet,
>      case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
>      case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
>      case OVS_KEY_ATTR_ENCAP:
> +    case OVS_KEY_ATTR_ETHERNET:
>      case OVS_KEY_ATTR_ETHERTYPE:
>      case OVS_KEY_ATTR_IN_PORT:
>      case OVS_KEY_ATTR_VLAN:
> @@ -833,6 +827,11 @@ requires_datapath_assistance(const struct nlattr *a)
>
>      return false;
>  }
> +/* The active function pointers on the datapath. ISA optimized implementations
> + * are enabled by plugging them into this static arary, which is consulted when
> + * applying actions on the datapath.
> + */
> +static struct odp_execute_action_impl actions_active_impl;
>
>  static void
>  action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> @@ -858,6 +857,41 @@ action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      }
>  }
>
> +static void
> +action_set_masked(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                const struct nlattr *a OVS_UNUSED,
> +                bool should_steal OVS_UNUSED)
> +{
> +    struct dp_packet *packet;
> +
> +    const struct nlattr *key = nl_attr_get(a);
> +    enum ovs_key_attr key_type = nl_attr_type(key);
> +
> +    if (actions_active_impl.set_masked_funcs[key_type]) {
> +        actions_active_impl.set_masked_funcs[key_type](NULL, batch, a,
> +                                                       should_steal);
> +    } else {
> +        a = nl_attr_get(a);
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +            odp_execute_masked_set_action(packet, a);
> +        }
> +    }
> +}
> +
> +static void
> +action_mod_eth(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                const struct nlattr *a OVS_UNUSED,
> +                bool should_steal OVS_UNUSED)
> +{
> +    a = nl_attr_get(a);
> +    struct dp_packet *packet;
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        odp_eth_set_addrs(packet, nl_attr_get(a),
> +                          get_mask(a, struct ovs_key_ethernet));
> +    }
> +}
> +
>  /* Implementation of the scalar actions impl init function. Build up the
>   * array of func ptrs here.
>   */
> @@ -866,15 +900,13 @@ odp_action_scalar_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
>      self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
> +    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked;
> +    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_mod_eth;
> +    actions_active_impl = *self;
>
>      return 0;
>  }
>
> -/* The active function pointers on the datapath. ISA optimized implementations
> - * are enabled by plugging them into this static arary, which is consulted when
> - * applying actions on the datapath.
> - */
> -static struct odp_execute_action_impl actions_active_impl;
>
>  void
>  odp_execute_init(void)
> @@ -952,7 +984,11 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>           * function-pointer and continue to the next action.
>           */
>          enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
> -        if (actions_active_impl.funcs[attr_type]) {
> +
> +        if (attr_type == OVS_ACTION_ATTR_SET_MASKED) {
> +            action_set_masked(NULL, batch, a, should_steal);
> +            continue;


I have not reviewed this patch other than this section (and the comment above), as I do not think this is the right approach.

OVS_ACTION_ATTR_SET_MASKED should be handled the same way as any other actions, and the action_set_masked() type of handling should be hidden in the SIMD specific code.
Especially as OVS_ACTION_ATTR_* are not mask types, just data types in general.

If a mask is not supported it should be handled by the scalar implementation, see my general note in patch 2, “I have another general architecture question?...."

> +        } else if (actions_active_impl.funcs[attr_type]) {
>              actions_active_impl.funcs[attr_type](NULL, batch, a, should_steal);
>              continue;
>          }
> @@ -1029,12 +1065,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>              }
>              break;
>
> -        case OVS_ACTION_ATTR_SET_MASKED:
> -            DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
> -                odp_execute_masked_set_action(packet, nl_attr_get(a));
> -            }
> -            break;
> -
>          case OVS_ACTION_ATTR_SAMPLE:
>              DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>                  odp_execute_sample(dp, packet, steal && last_action, a,
> @@ -1153,6 +1183,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>          case OVS_ACTION_ATTR_LB_OUTPUT:
>          case OVS_ACTION_ATTR_POP_VLAN:
>          case OVS_ACTION_ATTR_PUSH_VLAN:
> +        case OVS_ACTION_ATTR_SET_MASKED:

Move to special section to be created.

>          case OVS_ACTION_ATTR_TUNNEL_PUSH:
>          case OVS_ACTION_ATTR_TUNNEL_POP:
>          case OVS_ACTION_ATTR_USERSPACE:
> diff --git a/lib/odp-execute.h b/lib/odp-execute.h
> index 4f4cdc4ac..a14af2b4e 100644
> --- a/lib/odp-execute.h
> +++ b/lib/odp-execute.h
> @@ -34,7 +34,6 @@ struct dp_packet_batch;
>  void odp_execute_init(void);
>
>  /* Runtime update get/set functionality. */
> -int32_t odp_actions_impl_get(struct ds *name);

Why is get removed here? If it was added by mistake before, it should be removed in that patch.

>  int32_t odp_actions_impl_set(const char *name);
>
>  typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
> @@ -48,4 +47,7 @@ void odp_execute_actions(void *dp, struct dp_packet_batch *batch,
>                           bool steal,
>                           const struct nlattr *actions, size_t actions_len,
>                           odp_execute_cb dp_execute_action);
> +
> +#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
> +
>  #endif
> -- 
> 2.25.1
diff mbox series

Patch

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 8bb5abdc8..ccb54d6c6 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -473,8 +473,8 @@  enum ovs_frag_type {
 #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
 
 struct ovs_key_ethernet {
-	__u8	 eth_src[ETH_ALEN];
 	__u8	 eth_dst[ETH_ALEN];
+	__u8	 eth_src[ETH_ALEN];
 };
 
 struct ovs_key_mpls {
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 5d095f867..ede00b750 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -38,6 +38,12 @@  BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
                            MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
                            offsetof(struct dp_packet, l4_ofs));
 
+BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_dst) +
+                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst) ==
+                  offsetof(struct ovs_key_ethernet, eth_src));
+
+static struct odp_execute_action_impl active_impl;
+
 static inline void ALWAYS_INLINE
 avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
 {
@@ -139,6 +145,51 @@  action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     }
 }
 
+static void
+action_avx512_eth_set_addrs(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                       const struct nlattr *a,
+                       bool should_steal OVS_UNUSED)
+{
+    a = nl_attr_get(a);
+    const struct ovs_key_ethernet *key = nl_attr_get(a);
+    const struct ovs_key_ethernet *mask = get_mask(a, struct ovs_key_ethernet);
+    struct dp_packet *packet;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+
+        struct eth_header *eh = dp_packet_eth(packet);
+
+        if (!eh) {
+            continue;
+        }
+
+        __m128i v_src = _mm_maskz_loadu_epi16(0x3F, key);
+        __m128i v_mask = _mm_maskz_loadu_epi16(0x3F, mask);
+        __m128i v_dst = _mm_maskz_loadu_epi16(0xFF, eh);
+
+        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
+        __m128i res = _mm_or_si128(v_src, dst_masked);
+
+        __m128i res_blend = _mm_blend_epi16(v_dst, res, 0x3F);
+        _mm_storeu_si128((void *) eh, res_blend);
+    }
+}
+
+static void
+action_avx512_set_masked(void *dp OVS_UNUSED,
+                         struct dp_packet_batch *batch OVS_UNUSED,
+                         const struct nlattr *a,
+                         bool should_steal OVS_UNUSED)
+{
+    a = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(a);
+
+    if (active_impl.set_masked_funcs[attr_type]) {
+        active_impl.set_masked_funcs[attr_type](NULL, batch, a, should_steal);
+    }
+
+}
+
 /* Probe functions to check ISA requirements. */
 static int32_t
 avx512_isa_probe(uint32_t needs_vbmi)
@@ -173,13 +224,16 @@  action_avx512_probe(void)
     return avx512_isa_probe(needs_vbmi);
 }
 
-
 int32_t
 action_avx512_init(struct odp_execute_action_impl *self)
 {
     avx512_isa_probe(0);
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_avx512_set_masked;
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] =
+                            action_avx512_eth_set_addrs;
+    active_impl = *self;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 6b09ad353..34f13523a 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -31,6 +31,8 @@  int32_t action_autoval_init(struct odp_execute_action_impl *self);
 VLOG_DEFINE_THIS_MODULE(odp_execute_private);
 static uint32_t active_action_impl_index;
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+static struct odp_execute_action_impl active_impl;
+static bool set_masked = false;
 
 static struct odp_execute_action_impl action_impls[] = {
     [ACTION_IMPL_AUTOVALIDATOR] = {
@@ -63,6 +65,11 @@  action_impl_init_funcs(struct odp_execute_action_impl *to)
     for (uint32_t i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
         atomic_init(&to->funcs[i], NULL);
     }
+
+    for (uint32_t i = 0; i < __OVS_KEY_ATTR_MAX; i++) {
+        atomic_init(&to->set_masked_funcs[i], NULL);
+    }
+
 }
 
 static void
@@ -72,6 +79,11 @@  action_impl_copy_funcs(struct odp_execute_action_impl *to,
     for (uint32_t i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
         atomic_store_relaxed(&to->funcs[i], from->funcs[i]);
     }
+
+    for (uint32_t i = 0; i < __OVS_KEY_ATTR_MAX; i++) {
+        atomic_store_relaxed(&to->set_masked_funcs[i],
+                             from->set_masked_funcs[i]);
+    }
 }
 
 int32_t
@@ -155,21 +167,40 @@  action_autoval_generic(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     uint32_t failed = 0;
 
     int type = nl_attr_type(a);
-    enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
+    enum ovs_action_attr action_attr_type = (enum ovs_action_attr) type;
+    enum ovs_key_attr key_attr_type = (enum ovs_key_attr) type;
+
+    if (action_attr_type == OVS_ACTION_ATTR_SET_MASKED) {
+        set_masked = true;
+        const struct nlattr *key = nl_attr_get(a);
+        key_attr_type = nl_attr_type(key);
+    }
 
     struct odp_execute_action_impl *scalar = &action_impls[ACTION_IMPL_SCALAR];
 
     struct dp_packet_batch good_batch;
     dp_packet_batch_clone(&good_batch, batch);
-
-    scalar->funcs[attr_type](NULL, &good_batch, a, should_steal);
+    if (!set_masked) {
+        scalar->funcs[action_attr_type](NULL, &good_batch, a, should_steal);
+    } else {
+        scalar->set_masked_funcs[key_attr_type](NULL, &good_batch, a,
+                                                should_steal);
+    }
 
     for (uint32_t impl = ACTION_IMPL_BEGIN; impl < ACTION_IMPL_MAX; impl++) {
         /* Clone original batch and execute implementation under test. */
         struct dp_packet_batch test_batch;
         dp_packet_batch_clone(&test_batch, batch);
-        action_impls[impl].funcs[attr_type](NULL, &test_batch, a,
-                                            should_steal);
+
+        if (!set_masked) {
+            action_impls[impl].funcs[action_attr_type](NULL, &test_batch, a,
+                                                       should_steal);
+        } else {
+            action_impls[impl].set_masked_funcs[key_attr_type](NULL,
+                                                               &test_batch,
+                                                               a,
+                                                               should_steal);
+        }
 
         /* Loop over implementations, checking each one. */
         for (uint32_t pidx = 0; pidx < batch->count; pidx++) {
@@ -221,7 +252,29 @@  action_autoval_generic(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     dp_packet_delete_batch(&good_batch, 1);
 
     /* Apply the action to the original batch for continued processing. */
-    scalar->funcs[attr_type](NULL, batch, a, should_steal);
+    if (!set_masked) {
+        scalar->funcs[action_attr_type](NULL, batch, a, should_steal);
+    } else {
+        scalar->set_masked_funcs[key_attr_type](NULL, batch, a, should_steal);
+    }
+
+    set_masked = false;
+}
+
+static void
+action_set_masked_init(void *dp OVS_UNUSED,
+                       struct dp_packet_batch *batch OVS_UNUSED,
+                       const struct nlattr *a,
+                       bool should_steal OVS_UNUSED)
+{
+    a = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(a);
+
+    if (active_impl.set_masked_funcs[attr_type]) {
+        set_masked = true;
+        active_impl.set_masked_funcs[attr_type](NULL, batch, a, should_steal);
+    }
+
 }
 
 int32_t
@@ -229,6 +282,9 @@  action_autoval_init(struct odp_execute_action_impl *self)
 {
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked_init;
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_autoval_generic;
+    active_impl = *self;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
index 231d72492..2c0233f10 100644
--- a/lib/odp-execute-private.h
+++ b/lib/odp-execute-private.h
@@ -60,7 +60,9 @@  struct odp_execute_action_impl {
     odp_execute_action_init_func init_func;
 
     /* An array of callback functions, one for each action. */
-    ATOMIC(odp_execute_cb) funcs[__OVS_KEY_ATTR_MAX];
+    ATOMIC(odp_execute_cb) funcs[__OVS_ACTION_ATTR_MAX];
+    /* An array of callback functions, one for each key. */
+    ATOMIC(odp_execute_cb) set_masked_funcs[__OVS_KEY_ATTR_MAX];
 };
 
 /* Order of Actions implementations. */
@@ -93,7 +95,6 @@  void odp_execute_action_init(void);
 void odp_execute_action_get(struct ds *name);
 int32_t odp_execute_action_set(const char *name,
                                struct odp_execute_action_impl *active);
-
 /* Init function for the scalar implementation. Calls into the odp-execute.c
  * file, and initializes the function pointers for optimized action types.
  */
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 690e7e1ce..5c4dd8e33 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -561,8 +561,6 @@  odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
     }
 }
 
-#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
-
 static void
 odp_execute_masked_set_action(struct dp_packet *packet,
                               const struct nlattr *a)
@@ -582,11 +580,6 @@  odp_execute_masked_set_action(struct dp_packet *packet,
             | (md->pkt_mark & ~*get_mask(a, uint32_t));
         break;
 
-    case OVS_KEY_ATTR_ETHERNET:
-        odp_eth_set_addrs(packet, nl_attr_get(a),
-                          get_mask(a, struct ovs_key_ethernet));
-        break;
-
     case OVS_KEY_ATTR_NSH: {
         odp_set_nsh(packet, a, true);
         break;
@@ -661,6 +654,7 @@  odp_execute_masked_set_action(struct dp_packet *packet,
     case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
     case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
     case OVS_KEY_ATTR_ENCAP:
+    case OVS_KEY_ATTR_ETHERNET:
     case OVS_KEY_ATTR_ETHERTYPE:
     case OVS_KEY_ATTR_IN_PORT:
     case OVS_KEY_ATTR_VLAN:
@@ -833,6 +827,11 @@  requires_datapath_assistance(const struct nlattr *a)
 
     return false;
 }
+/* The active function pointers on the datapath. ISA optimized implementations
+ * are enabled by plugging them into this static arary, which is consulted when
+ * applying actions on the datapath.
+ */
+static struct odp_execute_action_impl actions_active_impl;
 
 static void
 action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
@@ -858,6 +857,41 @@  action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     }
 }
 
+static void
+action_set_masked(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                const struct nlattr *a OVS_UNUSED,
+                bool should_steal OVS_UNUSED)
+{
+    struct dp_packet *packet;
+
+    const struct nlattr *key = nl_attr_get(a);
+    enum ovs_key_attr key_type = nl_attr_type(key);
+
+    if (actions_active_impl.set_masked_funcs[key_type]) {
+        actions_active_impl.set_masked_funcs[key_type](NULL, batch, a,
+                                                       should_steal);
+    } else {
+        a = nl_attr_get(a);
+        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+            odp_execute_masked_set_action(packet, a);
+        }
+    }
+}
+
+static void
+action_mod_eth(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                const struct nlattr *a OVS_UNUSED,
+                bool should_steal OVS_UNUSED)
+{
+    a = nl_attr_get(a);
+    struct dp_packet *packet;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        odp_eth_set_addrs(packet, nl_attr_get(a),
+                          get_mask(a, struct ovs_key_ethernet));
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -866,15 +900,13 @@  odp_action_scalar_init(struct odp_execute_action_impl *self)
 {
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked;
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_mod_eth;
+    actions_active_impl = *self;
 
     return 0;
 }
 
-/* The active function pointers on the datapath. ISA optimized implementations
- * are enabled by plugging them into this static arary, which is consulted when
- * applying actions on the datapath.
- */
-static struct odp_execute_action_impl actions_active_impl;
 
 void
 odp_execute_init(void)
@@ -952,7 +984,11 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
          * function-pointer and continue to the next action.
          */
         enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
-        if (actions_active_impl.funcs[attr_type]) {
+
+        if (attr_type == OVS_ACTION_ATTR_SET_MASKED) {
+            action_set_masked(NULL, batch, a, should_steal);
+            continue;
+        } else if (actions_active_impl.funcs[attr_type]) {
             actions_active_impl.funcs[attr_type](NULL, batch, a, should_steal);
             continue;
         }
@@ -1029,12 +1065,6 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             }
             break;
 
-        case OVS_ACTION_ATTR_SET_MASKED:
-            DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
-                odp_execute_masked_set_action(packet, nl_attr_get(a));
-            }
-            break;
-
         case OVS_ACTION_ATTR_SAMPLE:
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 odp_execute_sample(dp, packet, steal && last_action, a,
@@ -1153,6 +1183,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_LB_OUTPUT:
         case OVS_ACTION_ATTR_POP_VLAN:
         case OVS_ACTION_ATTR_PUSH_VLAN:
+        case OVS_ACTION_ATTR_SET_MASKED:
         case OVS_ACTION_ATTR_TUNNEL_PUSH:
         case OVS_ACTION_ATTR_TUNNEL_POP:
         case OVS_ACTION_ATTR_USERSPACE:
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 4f4cdc4ac..a14af2b4e 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -34,7 +34,6 @@  struct dp_packet_batch;
 void odp_execute_init(void);
 
 /* Runtime update get/set functionality. */
-int32_t odp_actions_impl_get(struct ds *name);
 int32_t odp_actions_impl_set(const char *name);
 
 typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
@@ -48,4 +47,7 @@  void odp_execute_actions(void *dp, struct dp_packet_batch *batch,
                          bool steal,
                          const struct nlattr *actions, size_t actions_len,
                          odp_execute_cb dp_execute_action);
+
+#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
+
 #endif