diff mbox series

[ovs-dev,v7,09/11] odp-execute: Add ISA implementation of push_vlan action.

Message ID 20220614115743.1143341-10-emma.finn@intel.com
State Changes Requested
Headers show
Series [ovs-dev,v7,01/11] ofproto-dpif: Fix incorrect checksums in input packets | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Emma Finn June 14, 2022, 11:57 a.m. UTC
This commit adds the AVX512 implementation of the
push_vlan action.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 lib/odp-execute-avx512.c  | 37 +++++++++++++++++++++++++++++++++++++
 lib/odp-execute-private.c |  1 +
 lib/odp-execute.c         | 22 +++++++++++++---------
 3 files changed, 51 insertions(+), 9 deletions(-)

Comments

Eelco Chaudron June 23, 2022, 3:37 p.m. UTC | #1
On 14 Jun 2022, at 13:57, Emma Finn wrote:

> This commit adds the AVX512 implementation of the
> push_vlan action.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  lib/odp-execute-avx512.c  | 37 +++++++++++++++++++++++++++++++++++++
>  lib/odp-execute-private.c |  1 +
>  lib/odp-execute.c         | 22 +++++++++++++---------
>  3 files changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index f9e2b1727..bb178cbac 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -109,6 +109,41 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch,
>      }
>  }
>
> +/* This function will load the entire eth_header into a 128-bit wide register.
> + * Then use an 8-byte shuffle to shift the data left to make room for
> + * the vlan header. Insert the new vlan header and then store back to the
> + * original packet. */
> +static void
> +action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +    ovs_be16 tpid, tci;
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        tpid = vlan->vlan_tpid;
> +        tci = vlan->vlan_tci;
> +
> +        avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
> +
> +        /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
> +        char *pkt_data = (char *) dp_packet_data(packet);
> +        const uint16_t tci_proc = tci & htons(~VLAN_CFI);
> +        const uint32_t tpid_tci = (tci_proc << 16) | tpid;
> +
> +        static const uint8_t vlan_push_shuffle_mask[16] = {
> +            4, 5, 6, 7, 8, 9, 10, 11,
> +            12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
> +        };
> +
> +        __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
> +        __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
> +        __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
> +        __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
> +        _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
> +    }
> +}
> +
>  /* Probe functions to check ISA requirements. */
>  static bool
>  avx512_isa_probe(void)
> @@ -140,6 +175,8 @@ action_avx512_init(struct odp_execute_action_impl *self)
>      /* Set function pointers for actions that can be applied directly, these
>       * are identified by OVS_ACTION_ATTR_*. */
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
> +
>      return 0;
>  }
>
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index de2e4dfc4..751a68fe3 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -209,6 +209,7 @@ action_autoval_init(struct odp_execute_action_impl *self)
>      /* Set function pointers for actions that can be applied directly, these
>       * are identified by OVS_ACTION_ATTR_*. */
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;

Based on my previous comments I think this should no longer be needed.

>
>      return 0;
>  }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index a49b331ef..59f6bdc64 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -845,6 +845,17 @@ action_pop_vlan(struct dp_packet_batch *batch,
>      }
>  }
>
> +static void
> +action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> +    }
> +}
> +
>  /* Implementation of the scalar actions impl init function. Build up the
>   * array of func ptrs here.
>   */
> @@ -854,6 +865,7 @@ odp_action_scalar_init(struct odp_execute_action_impl *self)
>      /* Set function pointers for actions that can be applied directly, these
>       * are identified by OVS_ACTION_ATTR_*. */
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
>
>      return 0;
>  }
> @@ -995,15 +1007,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>              break;
>          }
>
> -        case OVS_ACTION_ATTR_PUSH_VLAN: {
> -            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> -
> -            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> -                eth_push_vlanog og eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> -            }
> -            break;
> -        }
> -
>          case OVS_ACTION_ATTR_PUSH_MPLS: {
>              const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
>
> @@ -1156,6 +1159,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>          case __OVS_ACTION_ATTR_MAX:
>          /* The following actions are handled by the scalar implementation. */
>          case OVS_ACTION_ATTR_POP_VLAN:
> +        case OVS_ACTION_ATTR_PUSH_VLAN:
>              OVS_NOT_REACHED();
>          }
>
> -- 
> 2.32.0


Other than the one remark above I have nothing to add other than some more explicit comments on the AVX section. See diff below:

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 7b0e00ffc..54313cd1f 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -154,10 +154,8 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch,
     }
 }

-/* This function will load the entire eth_header into a 128-bit wide register.
- * Then use an 8-byte shuffle to shift the data left to make room for
- * the vlan header. Insert the new vlan header and then store back to the
- * original packet. */
+/* This function performance the same operation on each packet in the batch as
+ * the scalar eth_push_vlan() function. */
 static void
 action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
 {
@@ -169,22 +167,42 @@ action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
         tpid = vlan->vlan_tpid;
         tci = vlan->vlan_tci;

+        /* As we are about to insert the VLAN_HEADER we now need to adjust all
+         * the offsets. */
         avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);

-        /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
         char *pkt_data = (char *) dp_packet_data(packet);
+
+        /* Build up the VLAN TCI/TPID in a single uint32_t. */
         const uint16_t tci_proc = tci & htons(~VLAN_CFI);
         const uint32_t tpid_tci = (tci_proc << 16) | tpid;

+        /* This shuffle mask is used below, and each position tells where to
+         * move the bytes to. So here, the fourth byte in v_ether is moved to
+         * byte location 0 in v_shift. The fifth is moved to 1, etc., etc.
+         * The 0xFF is special it tells to fill that position with 0.
+         */
         static const uint8_t vlan_push_shuffle_mask[16] = {
             4, 5, 6, 7, 8, 9, 10, 11,
             12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
         };

+        /* Load the first 128-bits of the packet into the v_ether register.
+         * Note that this includes the 4 unused bytes (VLAN_HEADER_LEN). */
         __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
+
+        /* Load the shuffle mask in v_index. */
         __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
+
+        /* Move(shuffle) the veth_dst and veth_src data to create room for
+         * the vlan header. */
         __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
+
+        /* Copy(insert) the 32-bit VLAN header, tpid_tci, at the 3rd 32-bit
+         * word offset, i.e., ofssetof(vlan_eth_header, veth_type) */
         __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
+
+        /* Write back the modified ethernet header. */
         _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
     }
 }
diff mbox series

Patch

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index f9e2b1727..bb178cbac 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -109,6 +109,41 @@  action_avx512_pop_vlan(struct dp_packet_batch *batch,
     }
 }
 
+/* This function will load the entire eth_header into a 128-bit wide register.
+ * Then use an 8-byte shuffle to shift the data left to make room for
+ * the vlan header. Insert the new vlan header and then store back to the
+ * original packet. */
+static void
+action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+    struct dp_packet *packet;
+    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+    ovs_be16 tpid, tci;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        tpid = vlan->vlan_tpid;
+        tci = vlan->vlan_tci;
+
+        avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
+
+        /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
+        char *pkt_data = (char *) dp_packet_data(packet);
+        const uint16_t tci_proc = tci & htons(~VLAN_CFI);
+        const uint32_t tpid_tci = (tci_proc << 16) | tpid;
+
+        static const uint8_t vlan_push_shuffle_mask[16] = {
+            4, 5, 6, 7, 8, 9, 10, 11,
+            12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
+        };
+
+        __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
+        __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
+        __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
+        __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
+        _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
+    }
+}
+
 /* Probe functions to check ISA requirements. */
 static bool
 avx512_isa_probe(void)
@@ -140,6 +175,8 @@  action_avx512_init(struct odp_execute_action_impl *self)
     /* Set function pointers for actions that can be applied directly, these
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
+
     return 0;
 }
 
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index de2e4dfc4..751a68fe3 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -209,6 +209,7 @@  action_autoval_init(struct odp_execute_action_impl *self)
     /* Set function pointers for actions that can be applied directly, these
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
 
     return 0;
 }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index a49b331ef..59f6bdc64 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -845,6 +845,17 @@  action_pop_vlan(struct dp_packet_batch *batch,
     }
 }
 
+static void
+action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+    struct dp_packet *packet;
+    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -854,6 +865,7 @@  odp_action_scalar_init(struct odp_execute_action_impl *self)
     /* Set function pointers for actions that can be applied directly, these
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
 
     return 0;
 }
@@ -995,15 +1007,6 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             break;
         }
 
-        case OVS_ACTION_ATTR_PUSH_VLAN: {
-            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
-
-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
-            }
-            break;
-        }
-
         case OVS_ACTION_ATTR_PUSH_MPLS: {
             const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
 
@@ -1156,6 +1159,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case __OVS_ACTION_ATTR_MAX:
         /* The following actions are handled by the scalar implementation. */
         case OVS_ACTION_ATTR_POP_VLAN:
+        case OVS_ACTION_ATTR_PUSH_VLAN:
             OVS_NOT_REACHED();
         }