@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
}
void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+ /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
{
/* Nothing */
}
@@ -358,6 +358,31 @@ dpdk_unixctl_log_set(struct unixctl_conn *conn, int argc, const char *argv[],
unixctl_command_reply(conn, NULL);
}
+#ifdef ALLOW_EXPERIMENTAL_API
+static void
+dpdk_unixctl_lcore_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ char *response = NULL;
+ FILE *stream;
+ size_t size;
+
+ stream = open_memstream(&response, &size);
+ if (!stream) {
+ response = xasprintf("Unable to open memstream: %s.",
+ ovs_strerror(errno));
+ unixctl_command_reply_error(conn, response);
+ goto out;
+ }
+
+ rte_lcore_dump(stream);
+ fclose(stream);
+ unixctl_command_reply(conn, response);
+out:
+ free(response);
+}
+#endif
+
static bool
dpdk_init__(const struct smap *ovs_other_config)
{
@@ -537,6 +562,10 @@ dpdk_init__(const struct smap *ovs_other_config)
dpdk_unixctl_log_list, NULL);
unixctl_command_register("dpdk/log-set", "pattern:level", 0, INT_MAX,
dpdk_unixctl_log_set, NULL);
+#ifdef ALLOW_EXPERIMENTAL_API
+ unixctl_command_register("dpdk/lcores-list", "", 0, 0,
+ dpdk_unixctl_lcore_list, NULL);
+#endif
/* We are called from the main thread here */
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
@@ -613,18 +642,34 @@ dpdk_available(void)
}
void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
{
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
+#ifdef ALLOW_EXPERIMENTAL_API
+ rte_thread_register();
+#else
if (cpu >= RTE_MAX_LCORE) {
cpu = LCORE_ID_ANY;
}
RTE_PER_LCORE(_lcore_id) = cpu;
+#endif
if (rte_lcore_id() == LCORE_ID_ANY) {
ovs_abort(0, "PMD thread init failed, trying to use more cores than "
"DPDK supports (RTE_MAX_LCORE %u).", RTE_MAX_LCORE);
}
+ VLOG_INFO("PMD thread is associated to DPDK lcore %u.", rte_lcore_id());
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+ unsigned int lcore_id = rte_lcore_id();
+
+#ifdef ALLOW_EXPERIMENTAL_API
+ rte_thread_unregister();
+#endif
+ VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
}
void
@@ -36,7 +36,8 @@ struct smap;
struct ovsrec_open_vswitch;
void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
@@ -5701,7 +5701,7 @@ pmd_thread_main(void *f_)
/* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
ovs_numa_thread_setaffinity_core(pmd->core_id);
- dpdk_set_lcore_id(pmd->core_id);
+ dpdk_init_thread_context(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
dfc_cache_init(&pmd->flow_cache);
pmd_alloc_static_tx_qid(pmd);
@@ -5821,6 +5821,7 @@ reload:
dfc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
+ dpdk_uninit_thread_context();
return NULL;
}
DPDK 20.08 introduced a new API that associates a non-EAL thread to a free lcore. This new API does not change the thread characteristics (like CPU affinity). Using this new API, there is no assumption on lcore X running on cpu X anymore which leaves OVS free from running its PMD thread on any cpu. DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64) which should be enough for OVS (hopefully). lcore/pmd threads mapping are logged at threads creation and destruction. A new command is added to help get DPDK point of view of the lcores: $ ovs-appctl dpdk/lcores-list lcore 0, socket 0, role RTE, cpuset 0 lcore 1, socket 0, role NON_EAL, cpuset 1 lcore 2, socket 0, role NON_EAL, cpuset 15 Signed-off-by: David Marchand <david.marchand@redhat.com> --- Changes since v2: - introduced a new api in DPDK 20.08 (still being discussed), inbox thread at http://inbox.dpdk.org/dev/20200610144506.30505-1-david.marchand@redhat.com/T/#t - this current patch depends on a patch on master I sent: https://patchwork.ozlabs.org/project/openvswitch/patch/20200626122738.28163-1-david.marchand@redhat.com/ - dropped 'dpdk-lcore-mask' compat handling, Changes since v1: - rewired existing configuration 'dpdk-lcore-mask' to use --lcores, - switched to a bitmap to track lcores, - added a command to dump current mapping (Flavio): used an experimental API to get DPDK lcores cpuset since it is the most reliable/portable information, - used the same code for the logs when starting DPDK/PMD threads, - addressed Ilya comments, --- lib/dpdk-stub.c | 8 +++++++- lib/dpdk.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- lib/dpdk.h | 3 ++- lib/dpif-netdev.c | 3 ++- 4 files changed, 57 insertions(+), 4 deletions(-)