diff mbox series

[ovs-dev,v7,10/11] odp-execute: Add ISA implementation of set_masked ETH

Message ID 20220614115743.1143341-11-emma.finn@intel.com
State Changes Requested
Headers show
Series [ovs-dev,v7,01/11] ofproto-dpif: Fix incorrect checksums in input packets | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Emma Finn June 14, 2022, 11:57 a.m. UTC
This commit includes infrastructure changes for enabling set_masked_X
actions and also adds support for the AVX512 implementation of the
eth_set_addrs action.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 lib/odp-execute-avx512.c  | 69 +++++++++++++++++++++++++++++++++++++++
 lib/odp-execute-private.c | 56 +++++++++++++++++++++++++++++--
 lib/odp-execute-private.h |  4 +++
 lib/odp-execute.c         | 65 +++++++++++++++++++++++++-----------
 lib/odp-execute.h         |  3 ++
 5 files changed, 175 insertions(+), 22 deletions(-)

Comments

Eelco Chaudron June 23, 2022, 3:38 p.m. UTC | #1
On 14 Jun 2022, at 13:57, Emma Finn wrote:

> This commit includes infrastructure changes for enabling set_masked_X
> actions and also adds support for the AVX512 implementation of the
> eth_set_addrs action.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---

One small question on this piece of code:

+/* This function will load the contents of eth_header into a 128-bit wide
+ * register. Then an 8-byte shuffle is required to shuffle both key and
+ * mask to match the layout of the eth_header struct. A bitwise ANDNOT and OR
+ * is performed on the entire header and results are stored back. */
+static void
+action_avx512_eth_set_addrs(struct dp_packet_batch *batch,
+                            const struct nlattr *a)
+{
+    a = nl_attr_get(a);
+    const struct ovs_key_ethernet *key = nl_attr_get(a);
+    const struct ovs_key_ethernet *mask = get_mask(a, struct ovs_key_ethernet);
+    struct dp_packet *packet;
+
+    __m128i v_src = _mm_loadu_si128((void *) key);
+    __m128i v_mask = _mm_loadu_si128((void *) mask);

The second load, loads 128 bits of data, but there are only 12 bytes to load. What happens if the memory at the remaining 6 bytes are not mapped in memory? Will we crash!?
Guess the key is fine, as we will read some bytes of the mask data.


+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+
+        struct eth_header *eh = dp_packet_eth(packet);
+
+        if (!eh) {
+            continue;
+        }
+
+        static const uint8_t eth_shuffle[16] = {
+            6, 7, 8, 9, 10, 11, 0, 1,
+            2, 3, 4, 5, 12, 13, 14, 15
+        };

As the last 4 bytes are not part of the read data structure, should we set them to 0xff?

+
+        __m128i v_dst = _mm_loadu_si128((void *) eh);
+        __m128i v_shuf = _mm_loadu_si128((void *) eth_shuffle);
+
+        v_src = _mm_shuffle_epi8(v_src, v_shuf);
+        v_mask = _mm_shuffle_epi8(v_mask, v_shuf);

Can the above 3 code lines not be move outside the DP_PACKET_BATCH_FOR_EACH() loop as they are constant for all iterations?

+
+        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
+        __m128i res = _mm_or_si128(v_src, dst_masked);
+
+        __m128i res_blend = _mm_blend_epi16(v_dst, res, 0x3F);
+        _mm_storeu_si128((void *) eh, res_blend);


In general on the v6 patch, I requested you to move the mask handling into the implementation-specific code.
I was assuming this would result in only changes to the AVX portion, and not to the general infrastructure.
You made all handling still part of the general infrastructure, which is not the right place for sub-offloads of actions for a specific implementation.

So to speed things up I wrote a new v7+ patch with the changes I had in mind. I wanted to do a diff first, but due to other suggested changes, this was not working out well.

Please take a look and let me know what you think?! It also has some general changes based on comments in earlier patches:

======================

commit 7e0022dd335ecc1baad3365c4389973091d9f68d
Author: Emma Finn <emma.finn@intel.com>
Date:   Tue Jun 14 11:57:42 2022 +0000

    odp-execute: Add ISA implementation of set_masked ETH

    This commit includes infrastructure changes for enabling set_masked_X
    actions and also adds support for the AVX512 implementation of the
    eth_set_addrs action.

    Signed-off-by: Emma Finn <emma.finn@intel.com>

    p10_changes-----------------------------------------------------------

    my changes to the mocked up p10

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 54313cd1f..ddfbef6a7 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -31,6 +31,9 @@

 VLOG_DEFINE_THIS_MODULE(odp_execute_avx512);

+/* Array of callback functions, one for each masked operation. */
+odp_execute_action_cb impl_set_masked_funcs[__OVS_KEY_ATTR_MAX];
+
 /* The below three build asserts make sure that l2_5_ofs, l3_ofs, and l4_ofs
  * fields remain in the same order and offset to l2_padd_size. This is needed
  * as the avx512_dp_packet_resize_l2() function will manipulate those fields at
@@ -52,6 +55,13 @@ BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
 BUILD_ASSERT_DECL(sizeof(struct dp_packet) -
                   offsetof(struct dp_packet, l2_pad_size) >= sizeof(__m128i));

+/* The below build assert makes sure the order of eth_src and eth_dst is not
+ * changing in the ovs_key_ethernet structure. This should not happen as this
+ * is defined under the Linux uapi. */
+BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_src) +
+                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src) ==
+                  offsetof(struct ovs_key_ethernet, eth_dst));
+

 static inline void ALWAYS_INLINE
 avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
@@ -207,6 +217,85 @@ action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }

+/* This function performance the same operation on each packet in the batch as
+ * the scalar odp_eth_set_addrs() function. */
+static void
+action_avx512_eth_set_addrs(struct dp_packet_batch *batch,
+                            const struct nlattr *a)
+{
+    const struct ovs_key_ethernet *key, *mask;
+    struct dp_packet *packet;
+
+    a = nl_attr_get(a);
+    key = nl_attr_get(a);
+    mask = odp_get_key_mask(a, struct ovs_key_ethernet);
+
+    /* Read the content of the key(src) and mask in the respective registers.
+     * Note that we load more than the size of the actual structure, which
+     * is only 96-bits and not 128-bits. */
+    __m128i v_src = _mm_loadu_si128((void *) key);
+    __m128i v_mask = _mm_loadu_si128((void *) mask);
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+
+        struct eth_header *eh = dp_packet_eth(packet);
+
+        if (!eh) {
+            continue;
+        }
+
+        /* This shuffle mask is used below, and each position tells where to
+         * move the bytes to. So here, the fourth sixth byte in
+         * ovs_key_ethernet  is moved to byte location 0 in v_src/v_mask.
+         * The seventh is moved to 1, etc., etc.
+         * This swap is needed to move the src and dest MAC addresses in the
+         * same order as in the ethernet packet. */
+        static const uint8_t eth_shuffle[16] = {
+            6, 7, 8, 9, 10, 11, 0, 1,
+            2, 3, 4, 5, 12, 13, 14, 15
+        };
+
+        /* Load the first 128-bits of the packet into the v_ether register. */
+        __m128i v_dst = _mm_loadu_si128((void *) eh);
+
+        /* Load the shuffle mask in v_shuf. */
+        __m128i v_shuf = _mm_loadu_si128((void *) eth_shuffle);
+
+        /* Swap the key/mask src and dest addresses to the ethernet order. */
+        v_src = _mm_shuffle_epi8(v_src, v_shuf);
+        v_mask = _mm_shuffle_epi8(v_mask, v_shuf);
+
+        /* AND the v_mask to the packet data (v_dst). */
+        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
+
+        /* OR the new addresses (v_src) with the masked packet addresses
+         * (dst_masked). */
+        __m128i res = _mm_or_si128(v_src, dst_masked);
+
+        /* Previous operations were done on 128-bits, but we only need the
+         * MAC addresses, 96-bits. Take the first six 16-bit words (0x3f) from
+         * res and combine them with the last two 16-bit words from v_dest. */
+        __m128i res_blend = _mm_blend_epi16(v_dst, res, 0x3F);
+
+        /* Write back the modified ethernet addresses. */
+        _mm_storeu_si128((void *) eh, res_blend);
+    }
+}
+
+static void
+action_avx512_set_masked(struct dp_packet_batch *batch,
+                         const struct nlattr *a)
+{
+    const struct nlattr *mask = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(mask);
+
+    if (attr_type <= OVS_KEY_ATTR_MAX && impl_set_masked_funcs[attr_type]) {
+        impl_set_masked_funcs[attr_type](batch, a);
+    } else {
+        odp_execute_scalar_action(batch, a);
+    }
+}
+
 /* Probe functions to check ISA requirements. */
 static bool
 action_avx512_isa_probe(void)
@@ -238,6 +327,11 @@ action_avx512_init(struct odp_execute_action_impl *self OVS_UNUSED)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_avx512_set_masked;
+
+    /* Set function pointers for the individual operations supported by the
+     * SET_MASKED action. */
+    impl_set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_avx512_eth_set_addrs;

     return 0;
 }
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 40d8dd8ec..2960363df 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -225,3 +225,16 @@ action_autoval_init(struct odp_execute_action_impl *self)
     }
     return 0;
 }
+
+void
+odp_execute_scalar_action(struct dp_packet_batch *batch,
+                          const struct nlattr *action)
+{
+    enum ovs_action_attr type = nl_attr_type(action);
+
+    if (action_impls[ACTION_IMPL_SCALAR].funcs[type] &&
+        type <= OVS_ACTION_ATTR_MAX) {
+
+        action_impls[ACTION_IMPL_SCALAR].funcs[type](batch, action);
+    }
+}
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 52d7e044e..faad8ab0a 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -561,8 +561,6 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
     }
 }

-#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
-
 static void
 odp_execute_masked_set_action(struct dp_packet *packet,
                               const struct nlattr *a)
@@ -574,17 +572,16 @@ odp_execute_masked_set_action(struct dp_packet *packet,
     switch (type) {
     case OVS_KEY_ATTR_PRIORITY:
         md->skb_priority = nl_attr_get_u32(a)
-            | (md->skb_priority & ~*get_mask(a, uint32_t));
+            | (md->skb_priority & ~*odp_get_key_mask(a, uint32_t));
         break;

     case OVS_KEY_ATTR_SKB_MARK:
         md->pkt_mark = nl_attr_get_u32(a)
-            | (md->pkt_mark & ~*get_mask(a, uint32_t));
+            | (md->pkt_mark & ~*odp_get_key_mask(a, uint32_t));
         break;
-
     case OVS_KEY_ATTR_ETHERNET:
         odp_eth_set_addrs(packet, nl_attr_get(a),
-                          get_mask(a, struct ovs_key_ethernet));
+                          odp_get_key_mask(a, struct ovs_key_ethernet));
         break;

     case OVS_KEY_ATTR_NSH: {
@@ -594,27 +591,27 @@ odp_execute_masked_set_action(struct dp_packet *packet,

     case OVS_KEY_ATTR_IPV4:
         odp_set_ipv4(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_ipv4));
+                     odp_get_key_mask(a, struct ovs_key_ipv4));
         break;

     case OVS_KEY_ATTR_IPV6:
         odp_set_ipv6(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_ipv6));
+                     odp_get_key_mask(a, struct ovs_key_ipv6));
         break;

     case OVS_KEY_ATTR_TCP:
         odp_set_tcp(packet, nl_attr_get(a),
-                    get_mask(a, struct ovs_key_tcp));
+                    odp_get_key_mask(a, struct ovs_key_tcp));
         break;

     case OVS_KEY_ATTR_UDP:
         odp_set_udp(packet, nl_attr_get(a),
-                    get_mask(a, struct ovs_key_udp));
+                    odp_get_key_mask(a, struct ovs_key_udp));
         break;

     case OVS_KEY_ATTR_SCTP:
         odp_set_sctp(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_sctp));
+                     odp_get_key_mask(a, struct ovs_key_sctp));
         break;

     case OVS_KEY_ATTR_MPLS:
@@ -622,33 +619,33 @@ odp_execute_masked_set_action(struct dp_packet *packet,
         if (mh) {
             put_16aligned_be32(&mh->mpls_lse, nl_attr_get_be32(a)
                                | (get_16aligned_be32(&mh->mpls_lse)
-                                  & ~*get_mask(a, ovs_be32)));
+                                  & ~*odp_get_key_mask(a, ovs_be32)));
         }
         break;

     case OVS_KEY_ATTR_ARP:
         set_arp(packet, nl_attr_get(a),
-                get_mask(a, struct ovs_key_arp));
+                odp_get_key_mask(a, struct ovs_key_arp));
         break;

     case OVS_KEY_ATTR_ND:
         odp_set_nd(packet, nl_attr_get(a),
-                   get_mask(a, struct ovs_key_nd));
+                   odp_get_key_mask(a, struct ovs_key_nd));
         break;

     case OVS_KEY_ATTR_ND_EXTENSIONS:
         odp_set_nd_ext(packet, nl_attr_get(a),
-                       get_mask(a, struct ovs_key_nd_extensions));
+                       odp_get_key_mask(a, struct ovs_key_nd_extensions));
         break;

     case OVS_KEY_ATTR_DP_HASH:
         md->dp_hash = nl_attr_get_u32(a)
-            | (md->dp_hash & ~*get_mask(a, uint32_t));
+            | (md->dp_hash & ~*odp_get_key_mask(a, uint32_t));
         break;

     case OVS_KEY_ATTR_RECIRC_ID:
         md->recirc_id = nl_attr_get_u32(a)
-            | (md->recirc_id & ~*get_mask(a, uint32_t));
+            | (md->recirc_id & ~*odp_get_key_mask(a, uint32_t));
         break;

     case OVS_KEY_ATTR_TUNNEL:    /* Masked data not supported for tunnel. */
@@ -856,6 +853,17 @@ action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }

+static void
+action_set_masked(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+   const struct nlattr *key = nl_attr_get(a);
+   struct dp_packet *packet;
+
+   DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        odp_execute_masked_set_action(packet, key);
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -866,6 +874,7 @@ odp_action_scalar_init(struct odp_execute_action_impl *self)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked;

     return 0;
 }
@@ -1030,12 +1039,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             }
             break;

-        case OVS_ACTION_ATTR_SET_MASKED:
-            DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
-                odp_execute_masked_set_action(packet, nl_attr_get(a));
-            }
-            break;
-
         case OVS_ACTION_ATTR_SAMPLE:
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 odp_execute_sample(dp, packet, steal && last_action, a,
@@ -1162,6 +1165,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         /* The following actions are handled by the scalar implementation. */
         case OVS_ACTION_ATTR_POP_VLAN:
         case OVS_ACTION_ATTR_PUSH_VLAN:
+        case OVS_ACTION_ATTR_SET_MASKED:
             OVS_NOT_REACHED();
         }

diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 8668ab73f..a765b02eb 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -50,4 +50,10 @@ void odp_execute_actions(void *dp, struct dp_packet_batch *batch,
                          bool steal,
                          const struct nlattr *actions, size_t actions_len,
                          odp_execute_cb dp_execute_action);
+
+void odp_execute_scalar_action(struct dp_packet_batch *batch,
+                               const struct nlattr *action);
+
+#define odp_get_key_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
+
 #endif
diff mbox series

Patch

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index bb178cbac..ffe25b41d 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -38,6 +38,12 @@  BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
                            MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
                            offsetof(struct dp_packet, l4_ofs));
 
+BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_src) +
+                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src) ==
+                  offsetof(struct ovs_key_ethernet, eth_dst));
+
+static struct odp_execute_action_impl avx512_impl;
+
 /* Adjust the size of the l2 portion of the dp_packet, updating the l2
  * pointer and the layer offsets. The function will broadcast resize_by_bytes
  * across a register and uses a kmask to identify which lanes should be
@@ -144,6 +150,61 @@  action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }
 
+/* This function will load the contents of eth_header into a 128-bit wide
+ * register. Then an 8-byte shuffle is required to shuffle both key and
+ * mask to match the layout of the eth_header struct. A bitwise ANDNOT and OR
+ * is performed on the entire header and results are stored back. */
+static void
+action_avx512_eth_set_addrs(struct dp_packet_batch *batch,
+                            const struct nlattr *a)
+{
+    a = nl_attr_get(a);
+    const struct ovs_key_ethernet *key = nl_attr_get(a);
+    const struct ovs_key_ethernet *mask = get_mask(a, struct ovs_key_ethernet);
+    struct dp_packet *packet;
+
+    __m128i v_src = _mm_loadu_si128((void *) key);
+    __m128i v_mask = _mm_loadu_si128((void *) mask);
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+
+        struct eth_header *eh = dp_packet_eth(packet);
+
+        if (!eh) {
+            continue;
+        }
+
+        static const uint8_t eth_shuffle[16] = {
+            6, 7, 8, 9, 10, 11, 0, 1,
+            2, 3, 4, 5, 12, 13, 14, 15
+        };
+
+        __m128i v_dst = _mm_loadu_si128((void *) eh);
+        __m128i v_shuf = _mm_loadu_si128((void *) eth_shuffle);
+
+        v_src = _mm_shuffle_epi8(v_src, v_shuf);
+        v_mask = _mm_shuffle_epi8(v_mask, v_shuf);
+
+        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
+        __m128i res = _mm_or_si128(v_src, dst_masked);
+
+        __m128i res_blend = _mm_blend_epi16(v_dst, res, 0x3F);
+        _mm_storeu_si128((void *) eh, res_blend);
+    }
+}
+
+static void
+action_avx512_set_masked(struct dp_packet_batch *batch OVS_UNUSED,
+                         const struct nlattr *a)
+{
+    a = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(a);
+
+    if (avx512_impl.set_masked_funcs[attr_type]) {
+        avx512_impl.set_masked_funcs[attr_type](batch, a);
+    }
+}
+
 /* Probe functions to check ISA requirements. */
 static bool
 avx512_isa_probe(void)
@@ -176,6 +237,14 @@  action_avx512_init(struct odp_execute_action_impl *self)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_avx512_set_masked;
+
+    /* Set function pointers that need a 2nd-level function. SET_MASKED action
+     * requires further processing for action type. Note that 2nd level items
+     * are identified by OVS_KEY_ATTR_*. */
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] =
+                            action_avx512_eth_set_addrs;
+    avx512_impl = *self;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 751a68fe3..e2d650779 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -29,6 +29,8 @@ 
 VLOG_DEFINE_THIS_MODULE(odp_execute_impl);
 static int active_action_impl_index;
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+static struct odp_execute_action_impl autoval_impl;
+static bool set_masked = false;
 
 static struct odp_execute_action_impl action_impls[] = {
     [ACTION_IMPL_AUTOVALIDATOR] = {
@@ -59,6 +61,11 @@  action_impl_copy_funcs(struct odp_execute_action_impl *src,
     for (int i = 0; i < __OVS_ACTION_ATTR_MAX; i++) {
         atomic_store_relaxed(&src->funcs[i], dst->funcs[i]);
     }
+
+    for (uint32_t i = 0; i < __OVS_KEY_ATTR_MAX; i++) {
+         atomic_store_relaxed(&src->set_masked_funcs[i],
+                              dst->set_masked_funcs[i]);
+    }
 }
 
 int
@@ -135,19 +142,36 @@  action_autoval_generic(struct dp_packet_batch *batch, const struct nlattr *a)
     bool failed = false;
     int type = nl_attr_type(a);
     enum ovs_action_attr attr_type = (enum ovs_action_attr) type;
+    enum ovs_key_attr key_attr_type = (enum ovs_key_attr) type;
+
+    if (attr_type == OVS_ACTION_ATTR_SET_MASKED) {
+        set_masked = true;
+        const struct nlattr *key = nl_attr_get(a);
+        key_attr_type = nl_attr_type(key);
+    }
+
     struct odp_execute_action_impl *scalar = &action_impls[ACTION_IMPL_SCALAR];
     struct dp_packet_batch good_batch;
 
     dp_packet_batch_clone(&good_batch, batch);
 
-    scalar->funcs[attr_type](&good_batch, a);
+    if (!set_masked) {
+        scalar->funcs[attr_type](&good_batch, a);
+    } else {
+        scalar->set_masked_funcs[key_attr_type](&good_batch, a);
+    }
 
     for (int impl = ACTION_IMPL_BEGIN; impl < ACTION_IMPL_MAX; impl++) {
         /* Clone original batch and execute implementation under test. */
         struct dp_packet_batch test_batch;
 
         dp_packet_batch_clone(&test_batch, batch);
-        action_impls[impl].funcs[attr_type](&test_batch, a);
+
+        if (!set_masked) {
+            action_impls[impl].funcs[attr_type](&test_batch, a);
+        } else {
+            action_impls[impl].set_masked_funcs[key_attr_type](&test_batch, a);
+        }
 
         /* Loop over implementations, checking each one. */
         for (int pidx = 0; pidx < batch->count; pidx++) {
@@ -200,7 +224,26 @@  action_autoval_generic(struct dp_packet_batch *batch, const struct nlattr *a)
     dp_packet_delete_batch(&good_batch, 1);
 
     /* Apply the action to the original batch for continued processing. */
-    scalar->funcs[attr_type](batch, a);
+    if (!set_masked) {
+        scalar->funcs[attr_type](batch, a);
+    } else {
+        scalar->set_masked_funcs[key_attr_type](batch, a);
+    }
+
+    set_masked = false;
+}
+
+static void
+action_set_masked_init(struct dp_packet_batch *batch OVS_UNUSED,
+                       const struct nlattr *a)
+{
+    const struct nlattr *type = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(type);
+
+    if (autoval_impl.set_masked_funcs[attr_type]) {
+        set_masked = true;
+        autoval_impl.set_masked_funcs[attr_type](batch, a);
+    }
 }
 
 int
@@ -210,6 +253,13 @@  action_autoval_init(struct odp_execute_action_impl *self)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked_init;
+
+    /* Set function pointers that need a 2nd-level function. SET_MASKED action
+     * requires further processing for action type. Note that 2nd level items
+     * are identified by OVS_KEY_ATTR_*. */
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_autoval_generic;
+    autoval_impl = *self;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
index e4724b8b2..1f4d614ca 100644
--- a/lib/odp-execute-private.h
+++ b/lib/odp-execute-private.h
@@ -49,6 +49,10 @@  struct odp_execute_action_impl {
 
     /* An array of callback functions, one for each action. */
     ATOMIC(odp_execute_action_cb) funcs[__OVS_ACTION_ATTR_MAX];
+
+    /* An array of callback functions, one for each action type. */
+    ATOMIC(odp_execute_action_cb) set_masked_funcs[__OVS_KEY_ATTR_MAX];
+
 };
 
 /* Order of Actions implementations. */
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 59f6bdc64..db6e1ec03 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -561,8 +561,6 @@  odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
     }
 }
 
-#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
-
 static void
 odp_execute_masked_set_action(struct dp_packet *packet,
                               const struct nlattr *a)
@@ -582,11 +580,6 @@  odp_execute_masked_set_action(struct dp_packet *packet,
             | (md->pkt_mark & ~*get_mask(a, uint32_t));
         break;
 
-    case OVS_KEY_ATTR_ETHERNET:
-        odp_eth_set_addrs(packet, nl_attr_get(a),
-                          get_mask(a, struct ovs_key_ethernet));
-        break;
-
     case OVS_KEY_ATTR_NSH: {
         odp_set_nsh(packet, a, true);
         break;
@@ -669,6 +662,8 @@  odp_execute_masked_set_action(struct dp_packet *packet,
     case OVS_KEY_ATTR_TCP_FLAGS:
     case OVS_KEY_ATTR_TUNNEL_INFO:
     case __OVS_KEY_ATTR_MAX:
+    /* The following action types are handled by the scalar implementation. */
+    case OVS_KEY_ATTR_ETHERNET:
     default:
         OVS_NOT_REACHED();
     }
@@ -834,6 +829,12 @@  requires_datapath_assistance(const struct nlattr *a)
     return false;
 }
 
+/* The active function pointers on the datapath. ISA optimized implementations
+ * are enabled by plugging them into this static arary, which is consulted when
+ * applying actions on the datapath.
+ */
+static struct odp_execute_action_impl actions_active_impl;
+
 static void
 action_pop_vlan(struct dp_packet_batch *batch,
                 const struct nlattr *a OVS_UNUSED)
@@ -856,6 +857,36 @@  action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }
 
+static void
+action_set_masked(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+    struct dp_packet *packet;
+
+    const struct nlattr *key = nl_attr_get(a);
+    enum ovs_key_attr key_type = nl_attr_type(key);
+
+    if (actions_active_impl.set_masked_funcs[key_type]) {
+        actions_active_impl.set_masked_funcs[key_type](batch, a);
+    } else {
+        a = nl_attr_get(a);
+        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+            odp_execute_masked_set_action(packet, a);
+        }
+    }
+}
+
+static void
+action_mod_eth(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+    a = nl_attr_get(a);
+    struct dp_packet *packet;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        odp_eth_set_addrs(packet, nl_attr_get(a),
+                          get_mask(a, struct ovs_key_ethernet));
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -866,16 +897,17 @@  odp_action_scalar_init(struct odp_execute_action_impl *self)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked;
+
+    /* Set function pointers that need a 2nd-level function. SET_MASKED action
+     * requires further processing for action type. Note that 2nd level items
+     * are identified by OVS_KEY_ATTR_*. */
+    self->set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_mod_eth;
+    actions_active_impl = *self;
 
     return 0;
 }
 
-/* The active function pointers on the datapath. ISA optimized implementations
- * are enabled by plugging them into this static arary, which is consulted when
- * applying actions on the datapath.
- */
-static struct odp_execute_action_impl actions_active_impl;
-
 void
 odp_execute_init(void)
 {
@@ -1028,12 +1060,6 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             }
             break;
 
-        case OVS_ACTION_ATTR_SET_MASKED:
-            DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
-                odp_execute_masked_set_action(packet, nl_attr_get(a));
-            }
-            break;
-
         case OVS_ACTION_ATTR_SAMPLE:
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 odp_execute_sample(dp, packet, steal && last_action, a,
@@ -1160,6 +1186,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         /* The following actions are handled by the scalar implementation. */
         case OVS_ACTION_ATTR_POP_VLAN:
         case OVS_ACTION_ATTR_PUSH_VLAN:
+        case OVS_ACTION_ATTR_SET_MASKED:
             OVS_NOT_REACHED();
         }
 
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 8668ab73f..762b99473 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -50,4 +50,7 @@  void odp_execute_actions(void *dp, struct dp_packet_batch *batch,
                          bool steal,
                          const struct nlattr *actions, size_t actions_len,
                          odp_execute_cb dp_execute_action);
+
+#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
+
 #endif