@@ -375,4 +375,27 @@ system configuration (e.g. enabling processor C-states) and workloads.
rate.
+Max sleep request values can be set for individual PMDs using key:value pairs.
+Any PMD that has been assigned a specified value will use that. Any PMD that
+does not have a specified value will use the current global default.
+
+Specified values for individual PMDs can be added or removed at any time.
+
+For example, to set PMD thread cores 8 and 9 to never request a load based
+sleep and all others PMD cores to be able to request a max sleep of 50 usecs::
+
+ $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0
+
+The max sleep request for each PMD can be checked in the logs or with::
+
+ $ ovs-appctl dpif-netdev/pmd-sleep-show
+ PMD max sleep request is 50 usecs by default.
+ PMD load based sleeps are enabled by default.
+ PMD thread core 8 NUMA 0: Max sleep request set to 0 usecs.
+ PMD thread core 9 NUMA 1: Max sleep request set to 0 usecs.
+ PMD thread core 10 NUMA 0: Max sleep request set to 50 usecs.
+ PMD thread core 11 NUMA 1: Max sleep request set to 50 usecs.
+ PMD thread core 12 NUMA 0: Max sleep request set to 50 usecs.
+ PMD thread core 13 NUMA 1: Max sleep request set to 50 usecs.
+
.. _ovs-vswitchd(8):
http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html
@@ -181,4 +181,7 @@ struct dp_netdev_pmd_thread {
bool isolated;
+ /* Max sleep request. UINT64_MAX indicates dp default should be used.*/
+ atomic_uint64_t max_sleep;
+
/* Queue id used by this pmd thread to send packets on all netdevs if
* XPS disabled for this netdev. All static_tx_qid's are unique and less
@@ -180,4 +180,9 @@ static struct odp_support dp_netdev_support = {
#define PMD_SLEEP_INC_US 1
+struct pmd_sleep {
+ unsigned core_id;
+ uint64_t max_sleep;
+};
+
struct dpcls {
struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */
@@ -290,4 +295,6 @@ struct dp_netdev {
/* Max load based sleep request. */
atomic_uint64_t pmd_max_sleep;
+ /* Max load based sleep request user string. */
+ char *max_sleep_list;
/* Enable the SMC cache from ovsdb config */
atomic_bool smc_enable_db;
@@ -1013,9 +1020,15 @@ pmd_max_sleep_show(struct ds *reply, struct dp_netdev_pmd_thread *pmd,
{
if (pmd->core_id != NON_PMD_CORE_ID) {
+ uint64_t pmd_max_sleep;
+
+ atomic_read_relaxed(&pmd->max_sleep, &pmd_max_sleep);
ds_put_format(reply,
"PMD thread core %3u NUMA %2d: "
"Max sleep request set to",
pmd->core_id, pmd->numa_id);
- ds_put_format(reply, " %4"PRIu64" usecs.", default_max_sleep);
+ ds_put_format(reply, " %4"PRIu64" usecs.",
+ pmd_max_sleep == UINT64_MAX
+ ? default_max_sleep
+ : pmd_max_sleep);
ds_put_cstr(reply, "\n");
}
@@ -1528,7 +1541,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
atomic_read_relaxed(&dp->pmd_max_sleep, &default_max_sleep);
ds_put_format(&reply, "PMD max sleep request is %"PRIu64" "
- "usecs.", default_max_sleep);
+ "usecs by default.", default_max_sleep);
ds_put_cstr(&reply, "\n");
- ds_put_format(&reply, "PMD load based sleeps are %s.",
+ ds_put_format(&reply, "PMD load based sleeps are %s "
+ "by default.",
default_max_sleep ? "enabled" : "disabled");
ds_put_cstr(&reply, "\n");
@@ -1924,4 +1938,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
}
+ dp->max_sleep_list = NULL;
+
dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
*dpp = dp;
@@ -2033,4 +2049,5 @@ dp_netdev_free(struct dp_netdev *dp)
dp_netdev_meter_destroy(dp);
+ free(dp->max_sleep_list);
free(dp->pmd_cmask);
free(CONST_CAST(char *, dp->name));
@@ -4847,4 +4864,8 @@ set_pmd_auto_lb(struct dp_netdev *dp, bool state, bool always_log)
}
+static void
+set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config,
+ bool always_log);
+
/* Applies datapath configuration from the database. Some of the changes are
* actually applied in dpif_netdev_run(). */
@@ -4864,5 +4885,4 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
uint8_t cur_rebalance_load;
uint32_t rebalance_load, rebalance_improve;
- uint64_t pmd_max_sleep, cur_pmd_max_sleep;
bool log_autolb = false;
enum sched_assignment_type pmd_rxq_assign_type;
@@ -5015,15 +5035,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
set_pmd_auto_lb(dp, autolb_state, log_autolb);
- pmd_max_sleep = smap_get_ullong(other_config, "pmd-sleep-max", 0);
- pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
- atomic_read_relaxed(&dp->pmd_max_sleep, &cur_pmd_max_sleep);
- if (first_set_config || pmd_max_sleep != cur_pmd_max_sleep) {
- atomic_store_relaxed(&dp->pmd_max_sleep, pmd_max_sleep);
- VLOG_INFO("PMD max sleep request is %"PRIu64" usecs.", pmd_max_sleep);
- VLOG_INFO("PMD load based sleeps are %s.",
- pmd_max_sleep ? "enabled" : "disabled" );
- }
+ set_all_pmd_max_sleeps(dp, other_config, first_set_config);
- first_set_config = false;
+ first_set_config = false;
return 0;
}
@@ -5065,4 +5077,182 @@ parse_affinity_list(const char *affinity_list, unsigned *core_ids, int n_rxq)
}
+static int
+parse_pmd_sleep_list(const char *max_sleep_list,
+ struct pmd_sleep **pmd_sleeps)
+{
+ char *list, *copy, *key, *value;
+ int num_vals = 0;
+
+ if (!max_sleep_list) {
+ return num_vals;
+ }
+
+ list = copy = xstrdup(max_sleep_list);
+
+ while (ofputil_parse_key_value(&list, &key, &value)) {
+ char *error = NULL;
+ unsigned core;
+ uint64_t temp, pmd_max_sleep;
+ int i;
+
+ error = str_to_u64(key, &temp);
+ if (error) {
+ free(error);
+ continue;
+ }
+
+ error = str_to_u64(value, &pmd_max_sleep);
+ if (error) {
+ /* No value specified. key is dp default. */
+ core = UINT_MAX;
+ pmd_max_sleep = temp;
+ free(error);
+ } else {
+ /* Value specified. key is pmd core id.*/
+ if (temp >= UINT_MAX) {
+ continue;
+ }
+ core = (unsigned) temp;
+ }
+
+ /* Detect duplicate max sleep values for default or a specific core. */
+ for (i = 0; i < num_vals; i++) {
+ if ((*pmd_sleeps)[i].core_id == core) {
+ break;
+ }
+ }
+ if (i == num_vals) {
+ /* Not duplicate, add a new entry. */
+ *pmd_sleeps = xrealloc(*pmd_sleeps,
+ (num_vals + 1) * sizeof **pmd_sleeps);
+ num_vals++;
+ }
+
+ pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
+
+ (*pmd_sleeps)[i].core_id = core;
+ (*pmd_sleeps)[i].max_sleep = pmd_max_sleep;
+ }
+
+ free(copy);
+ return num_vals;
+}
+
+static void log_pmd_sleep(unsigned core_id, int numa_id,
+ uint64_t pmd_max_sleep, uint64_t default_max_sleep)
+{
+ VLOG_INFO("PMD thread core %3u NUMA %2d: Max sleep request set to "
+ "%4"PRIu64" usecs.", core_id, numa_id,
+ pmd_max_sleep == UINT64_MAX
+ ? default_max_sleep
+ : pmd_max_sleep);
+}
+
+static void
+set_pmd_max_sleep(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
+{
+ struct pmd_sleep *pmd_sleeps = NULL;
+ uint64_t max_sleep = UINT64_MAX;
+ int num_vals;
+
+ num_vals = parse_pmd_sleep_list(dp->max_sleep_list, &pmd_sleeps);
+
+ /* Check if the user has set a specific value for this pmd. */
+ for (int i = 0; i < num_vals; i++) {
+ if (pmd_sleeps[i].core_id == pmd->core_id) {
+ max_sleep = pmd_sleeps[i].max_sleep;
+ break;
+ }
+ }
+ atomic_init(&pmd->max_sleep, max_sleep);
+ log_pmd_sleep(pmd->core_id, pmd->numa_id, max_sleep, dp->pmd_max_sleep);
+ free(pmd_sleeps);
+}
+
+static void
+set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config,
+ bool always_log)
+{
+ const char *max_sleep_list = smap_get(config, "pmd-sleep-max");
+ struct pmd_sleep *pmd_sleeps = NULL;
+ struct dp_netdev_pmd_thread **pmd_list = NULL;
+ struct dp_netdev_pmd_thread *pmd;
+ int num_vals = 0;
+ uint64_t default_max_sleep = 0;
+ uint64_t cur_default_max_sleep;
+ size_t n;
+
+ if (nullable_string_is_equal(max_sleep_list, dp->max_sleep_list)
+ && !always_log) {
+ return;
+ }
+
+ /* Free existing string and copy new one. */
+ free(dp->max_sleep_list);
+ dp->max_sleep_list = nullable_xstrdup(max_sleep_list);
+
+ num_vals = parse_pmd_sleep_list(max_sleep_list, &pmd_sleeps);
+
+ /* Check if the user has set a dp default. */
+ for (int i = 0; i < num_vals; i++) {
+ if (pmd_sleeps[i].core_id == UINT_MAX) {
+ default_max_sleep = pmd_sleeps[i].max_sleep;
+ break;
+ }
+ }
+ atomic_read_relaxed(&dp->pmd_max_sleep, &cur_default_max_sleep);
+ if (default_max_sleep != cur_default_max_sleep) {
+ atomic_store_relaxed(&dp->pmd_max_sleep, default_max_sleep);
+ always_log = true;
+ }
+
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ uint64_t new_max_sleep, cur_pmd_max_sleep;
+
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+
+ /* Default to dp value. */
+ new_max_sleep = UINT64_MAX;
+
+ /* Check for pmd specific value. */
+ for (int i = 0; i < num_vals; i++) {
+ if (pmd->core_id == pmd_sleeps[i].core_id) {
+ new_max_sleep = pmd_sleeps[i].max_sleep;
+ break;
+ }
+ }
+ atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
+ if (new_max_sleep != cur_pmd_max_sleep) {
+ atomic_store_relaxed(&pmd->max_sleep, new_max_sleep);
+ always_log = true;
+ }
+ }
+
+ if (always_log) {
+ VLOG_INFO("PMD max sleep request is %"PRIu64" "
+ "usecs by default.", default_max_sleep);
+ VLOG_INFO("PMD load based sleeps are %s by default.",
+ default_max_sleep ? "enabled" : "disabled" );
+
+ sorted_poll_thread_list(dp, &pmd_list, &n);
+
+ for (size_t i = 0; i < n; i++) {
+ uint64_t cur_pmd_max_sleep;
+
+ pmd = pmd_list[i];
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+ atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
+ log_pmd_sleep(pmd->core_id, pmd->numa_id, cur_pmd_max_sleep,
+ default_max_sleep);
+ }
+ free(pmd_list);
+ }
+ free(pmd_sleeps);
+}
+
/* Parses 'affinity_list' and applies configuration if it is valid. */
static int
@@ -7054,5 +7244,8 @@ reload:
atomic_read_relaxed(&pmd->dp->smc_enable_db, &pmd->ctx.smc_enable_db);
- atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep);
+ atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
+ if (max_sleep == UINT64_MAX) {
+ atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep);
+ }
for (i = 0; i < poll_cnt; i++) {
@@ -7616,4 +7809,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
cmap_init(&pmd->tx_bonds);
+ set_pmd_max_sleep(dp, pmd);
+
/* Initialize DPIF function pointer to the default configured version. */
atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default());
@@ -66,6 +66,6 @@ dnl Checks correct pmd load based sleep is set for the datapath.
dnl Checking starts from line number 'line' in ovs-vswithd.log .
m4_define([CHECK_DP_SLEEP_MAX], [
- SLEEP_TIME="PMD max sleep request is $1 usecs."
- SLEEP_STATE="PMD load based sleeps are $2."
+ SLEEP_TIME="PMD max sleep request is $1 usecs by default."
+ SLEEP_STATE="PMD load based sleeps are $2 by default."
line_st=$3
if [[ -z "$line_st" ]]
@@ -1279,6 +1279,6 @@ CHECK_DP_SLEEP_MAX([0], [disabled], [])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 0 usecs.
-PMD load based sleeps are disabled.
+PMD max sleep request is 0 usecs by default.
+PMD load based sleeps are disabled by default.
])
@@ -1288,6 +1288,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="1"])
CHECK_DP_SLEEP_MAX([1], [enabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 1 usecs.
-PMD load based sleeps are enabled.
+PMD max sleep request is 1 usecs by default.
+PMD load based sleeps are enabled by default.
])
@@ -1297,6 +1297,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="10000"])
CHECK_DP_SLEEP_MAX([10000], [enabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 10000 usecs.
-PMD load based sleeps are enabled.
+PMD max sleep request is 10000 usecs by default.
+PMD load based sleeps are enabled by default.
])
@@ -1306,6 +1306,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="0"])
CHECK_DP_SLEEP_MAX([0], [disabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 0 usecs.
-PMD load based sleeps are disabled.
+PMD max sleep request is 0 usecs by default.
+PMD load based sleeps are disabled by default.
])
@@ -1315,6 +1315,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="10001"])
CHECK_DP_SLEEP_MAX([10000], [enabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 10000 usecs.
-PMD load based sleeps are enabled.
+PMD max sleep request is 10000 usecs by default.
+PMD load based sleeps are enabled by default.
])
@@ -1324,6 +1324,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="490"])
CHECK_DP_SLEEP_MAX([490], [enabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 490 usecs.
-PMD load based sleeps are enabled.
+PMD max sleep request is 490 usecs by default.
+PMD load based sleeps are enabled by default.
])
@@ -1333,6 +1333,6 @@ AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="499"])
CHECK_DP_SLEEP_MAX([499], [enabled], [+$LINENUM])
AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
-PMD max sleep request is 499 usecs.
-PMD load based sleeps are enabled.
+PMD max sleep request is 499 usecs by default.
+PMD load based sleeps are enabled by default.
])
Extend 'pmd-sleep-max' so that individual PMD thread cores may have a specified max sleep request value. Any PMD thread core without a value will use the datapath default (no sleep request) or datapath global value set by the user. To set PMD thread cores 8 and 9 to never request a load based sleep and all other PMD thread cores to be able to request a max sleep of 50 usecs: $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0 To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs and all other PMD thread cores to never request a sleep: $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100 'pmd-sleep-show' can be used to dump the global and individual PMD thread core max sleep request values. Signed-off-by: Kevin Traynor <ktraynor@redhat.com> --- Documentation/topics/dpdk/pmd.rst | 23 +++ lib/dpif-netdev-private-thread.h | 3 + lib/dpif-netdev.c | 225 ++++++++++++++++++++++++++++-- tests/pmd.at | 32 ++--- 4 files changed, 252 insertions(+), 31 deletions(-)