@@ -117,6 +117,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/dpif-netdev-private-dpcls.h \
lib/dpif-netdev-private-dpif.c \
lib/dpif-netdev-private-dpif.h \
+ lib/dpif-netdev-private-extract.c \
+ lib/dpif-netdev-private-extract.h \
lib/dpif-netdev-private-flow.h \
lib/dpif-netdev-private-hwol.h \
lib/dpif-netdev-private-thread.h \
@@ -120,7 +120,16 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
/* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */
uint32_t hwol_emc_smc_hitmask = 0;
- /* Perform first packet interation. */
+ /* Do a batch minfilow extract into keys. */
+ /* TODO: v2 will support cleaner integration into packet batch
+ * loops below. */
+ uint32_t mf_mask = 0;
+ if (pmd->miniflow_extract_opt) {
+ mf_mask = pmd->miniflow_extract_opt(packets, keys,
+ batch_size, in_port,
+ (void *) pmd);
+ }
+ /* Perform first packet interation */
uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1;
uint32_t iter = lookup_pkts_bitmask;
while (iter) {
@@ -152,11 +161,15 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
}
}
- /* Do miniflow extract into keys */
struct netdev_flow_key *key = &keys[i];
- miniflow_extract(packet, &key->mf);
-
- /* Cache TCP and byte values for all packets. */
+ /* Check the minfiflow mask to see if the packet was correctly
+ * classifed by vector mfex else do a scalar miniflow extract
+ * for that packet. */
+ if (!(mf_mask & (1 << i))) {
+ /* Do a scalar miniflow extract into keys */
+ miniflow_extract(packet, &key->mf);
+ }
+ /* Cache TCP and byte values for all packets */
pkt_meta[i].bytes = dp_packet_size(packet);
pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf);
new file mode 100644
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "dp-packet.h"
+#include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-extract.h"
+#include "dpif-netdev-private-thread.h"
+#include "flow.h"
+#include "openvswitch/vlog.h"
+#include "ovs-thread.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
+
+/* Implementations of available extract options. */
+static struct dpif_miniflow_extract_impl mfex_impls[] = {
+ {
+ .probe = NULL,
+ .extract_func = NULL,
+ .name = "disable",
+ },
+};
+
+BUILD_ASSERT_DECL(MFEX_IMPLS_MAX_SIZE > ARRAY_SIZE(mfex_impls));
+
+int32_t
+dpif_miniflow_extract_opt_get(const char *name,
+ struct dpif_miniflow_extract_impl **opt)
+{
+ ovs_assert(opt);
+ ovs_assert(name);
+
+ uint32_t i;
+ for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
+ if (strcmp(name, mfex_impls[i].name) == 0) {
+ *opt = &mfex_impls[i];
+ return 0;
+ }
+ }
+ return -ENOTSUP;
+}
+
+void
+dpif_miniflow_extract_init(void)
+{
+ /* Call probe on each impl, and cache the result. */
+ uint32_t i;
+ for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
+ int avail = 1;
+ if (mfex_impls[i].probe) {
+ /* Return zero is success, non-zero means error. */
+ avail = (mfex_impls[i].probe() == 0);
+ }
+ VLOG_INFO("Miniflow Extract implementation %s (available: %s)\n",
+ mfex_impls[i].name, avail ? "True" : "False");
+ mfex_impls[i].available = avail;
+ }
+}
+
+int32_t
+dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr)
+{
+ if (out_ptr == NULL) {
+ return -EINVAL;
+ }
+ *out_ptr = mfex_impls;
+ return ARRAY_SIZE(mfex_impls);
+}
new file mode 100644
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_AVX512_EXTRACT
+#define DPIF_NETDEV_AVX512_EXTRACT 1
+
+#include <sys/types.h>
+
+#include "openvswitch/types.h"
+
+/* Max size of dpif_miniflow_extract_impl array. */
+#define MFEX_IMPLS_MAX_SIZE (16)
+
+/* Forward declarations. */
+struct dp_packet;
+struct miniflow;
+struct dp_netdev_pmd_thread;
+struct dp_packet_batch;
+struct netdev_flow_key;
+
+/* Function pointer prototype to be implemented in the optimized miniflow
+ * extract code.
+ * returns the hitmask of the processed packets on success.
+ * returns zero on failure.
+ */
+typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
+ struct netdev_flow_key *keys,
+ uint32_t keys_size,
+ odp_port_t in_port,
+ void *pmd_handle);
+
+/* Probe function is used to detect if this CPU has the ISA required
+ * to run the optimized miniflow implementation.
+ * returns one on successful probe.
+ * returns zero on failure.
+ */
+typedef int32_t (*miniflow_extract_probe)(void);
+
+/* Structure representing the attributes of an optimized implementation. */
+struct dpif_miniflow_extract_impl {
+ /* When non-zero, this impl has passed the probe() checks. */
+ uint8_t available;
+
+ /* Probe function is used to detect if this CPU has the ISA required
+ * to run the optimized miniflow implementation.
+ */
+ miniflow_extract_probe probe;
+
+ /* Function to call to extract miniflows for a burst of packets. */
+ miniflow_extract_func extract_func;
+
+ /* Name of the optimized implementation. */
+ char *name;
+};
+
+/* Retrieve the opt structure for the requested implementation by name.
+ * Returns zero on success, and opt points to a valid struct, or
+ * returns a negative failure status.
+ * -ENOTSUP : invalid name requested
+ */
+int32_t
+dpif_miniflow_extract_opt_get(const char *name,
+ struct dpif_miniflow_extract_impl **opt);
+
+/* Initializes the available miniflow extract implementations by probing for
+ * the CPU ISA requirements. As the runtime available CPU ISA does not change
+ * and the required ISA of the implementation also does not change, it is safe
+ * to cache the probe() results, and not call probe() at runtime.
+ */
+void
+dpif_miniflow_extract_init(void);
+
+/* Retrieve the array of miniflow implementations for iteration.
+ * On error, returns a negative number.
+ * On success, returns the size of the arrays pointed to by the out parameter.
+ */
+int32_t
+dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl **out_ptr);
+
+
+#endif /* DPIF_NETDEV_AVX512_EXTRACT */
@@ -29,6 +29,7 @@
#include "openvswitch/thread.h"
#include "dpif-netdev-private-dpif.h"
+#include "dpif-netdev-private-extract.h"
#ifdef __cplusplus
extern "C" {
@@ -110,6 +111,9 @@ struct dp_netdev_pmd_thread {
/* Pointer for per-DPIF implementation scratch space. */
void *netdev_input_func_userdata;
+ /* Function pointer to call for miniflow_extract() functionality. */
+ miniflow_extract_func miniflow_extract_opt;
+
struct seq *reload_seq;
uint64_t last_reload_seq;
@@ -46,6 +46,7 @@
#include "dpif-netdev-lookup.h"
#include "dpif-netdev-perf.h"
#include "dpif-netdev-private-dfc.h"
+#include "dpif-netdev-private-extract.h"
#include "dpif-provider.h"
#include "dummy.h"
#include "fat-rwlock.h"
@@ -1089,6 +1090,102 @@ dpif_netdev_impl_set(struct unixctl_conn *conn, int argc,
ds_destroy(&reply);
}
+static void
+dpif_miniflow_extract_impl_get(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ struct dpif_miniflow_extract_impl *mfex_impls;
+ uint32_t count = dpif_miniflow_extract_info_get(&mfex_impls);
+ if (count == 0) {
+ unixctl_command_reply_error(conn, "error getting mfex names");
+ return;
+ }
+
+ /* Add all mfex functions to reply string. */
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_cstr(&reply, "Available Optimized Miniflow Extracts:\n");
+ for (uint32_t i = 0; i < count; i++) {
+ ds_put_format(&reply, " %s (available: %s)\n",
+ mfex_impls[i].name, mfex_impls[i].available ?
+ "True" : "False");
+ }
+ unixctl_command_reply(conn, ds_cstr(&reply));
+ ds_destroy(&reply);
+}
+
+static void
+dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ /* This function requires just one parameter, the miniflow name.
+ * A second optional parameter can identify the datapath instance.
+ */
+ const char *mfex_name = argv[1];
+
+ static const char *error_description[2] = {
+ "Unknown miniflow implementation",
+ "implementation doesn't exist",
+ };
+ struct dpif_miniflow_extract_impl *opt;
+ miniflow_extract_func new_func;
+ int32_t err = dpif_miniflow_extract_opt_get(mfex_name, &opt);
+ if (err) {
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_format(&reply,
+ "Miniflow implementation not available: %s %s.\n",
+ error_description[ (err == -ENOTSUP) ], mfex_name);
+ const char *reply_str = ds_cstr(&reply);
+ unixctl_command_reply(conn, reply_str);
+ VLOG_INFO("%s", reply_str);
+ ds_destroy(&reply);
+ return;
+ }
+ new_func = opt->extract_func;
+ /* argv[2] is optional datapath instance. If no datapath name is provided.
+ * and only one datapath exists, the one existing datapath is reprobed.
+ */
+ ovs_mutex_lock(&dp_netdev_mutex);
+ struct dp_netdev *dp = NULL;
+
+ if (argc == 3) {
+ dp = shash_find_data(&dp_netdevs, argv[2]);
+ } else if (shash_count(&dp_netdevs) == 1) {
+ dp = shash_first(&dp_netdevs)->data;
+ }
+
+ if (!dp) {
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply_error(conn,
+ "please specify an existing datapath");
+ return;
+ }
+
+ /* Get PMD threads list. */
+ size_t n;
+ struct dp_netdev_pmd_thread **pmd_list;
+ sorted_poll_thread_list(dp, &pmd_list, &n);
+
+ for (size_t i = 0; i < n; i++) {
+ struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+
+ /* Set PMD threads miniflow implementation to requested one. */
+ pmd->miniflow_extract_opt = *new_func;
+ };
+
+ ovs_mutex_unlock(&dp_netdev_mutex);
+
+ /* Reply with success to command. */
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_format(&reply, "Miniflow implementation set to %s.\n", mfex_name);
+ const char *reply_str = ds_cstr(&reply);
+ unixctl_command_reply(conn, reply_str);
+ VLOG_INFO("%s", reply_str);
+ ds_destroy(&reply);
+}
+
static void
dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
const char *argv[], void *aux OVS_UNUSED)
@@ -1315,9 +1412,16 @@ dpif_netdev_init(void)
"dpif_implementation_name [dp]",
1, 2, dpif_netdev_impl_set,
NULL);
+ unixctl_command_register("dpif-netdev/miniflow-parser-set",
+ "miniflow implementation name [dp]",
+ 1, 2, dpif_miniflow_extract_impl_set,
+ NULL);
unixctl_command_register("dpif-netdev/dpif-get", "",
0, 0, dpif_netdev_impl_get,
NULL);
+ unixctl_command_register("dpif-netdev/miniflow-parser-get", "",
+ 0, 0, dpif_miniflow_extract_impl_get,
+ NULL);
return 0;
}
@@ -1461,6 +1565,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
dp->conntrack = conntrack_init();
+ dpif_miniflow_extract_init();
+
atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
@@ -6144,6 +6250,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
/* Initialize DPIF function pointer to the default configured version. */
pmd->netdev_input_func = dp_netdev_impl_get_default();
+ /*Init default miniflow_extract function */
+ pmd->miniflow_extract_opt = NULL;
+
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
@@ -6697,11 +6806,13 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
bool md_is_valid, odp_port_t port_no)
{
struct netdev_flow_key *key = &keys[0];
+ struct dp_packet_batch single_packet;
size_t n_missed = 0, n_emc_hit = 0;
struct dfc_cache *cache = &pmd->flow_cache;
struct dp_packet *packet;
const size_t cnt = dp_packet_batch_size(packets_);
uint32_t cur_min = pmd->ctx.emc_insert_min;
+ int mf_ret;
int i;
uint16_t tcp_flags;
bool smc_enable_db;
@@ -6753,8 +6864,19 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
continue;
}
}
-
- miniflow_extract(packet, &key->mf);
+ /* Set the count and packet for miniflow_opt with batch_size 1. */
+ if ((pmd->miniflow_extract_opt) && (!md_is_valid)) {
+ single_packet.count = 1;
+ single_packet.packets[0] = packet;
+ mf_ret = pmd->miniflow_extract_opt(&single_packet, key, 1,
+ port_no, (void *) pmd);
+ /* Fallback to original miniflow_extract if there is a miss. */
+ if (!mf_ret) {
+ miniflow_extract(packet, &key->mf);
+ }
+ } else {
+ miniflow_extract(packet, &key->mf);
+ }
key->len = 0; /* Not computed yet. */
key->hash =
(md_is_valid == false)