diff mbox series

[ovs-dev,v6,1/1] dpif-netdev: Add per pmd sleep config.

Message ID 20231214111515.453167-2-ktraynor@redhat.com
State Accepted
Commit 4cbbf56e6cccd9b2593bc77395c7ca2e78f34191
Headers show
Series Per pmd load based sleeping | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Kevin Traynor Dec. 14, 2023, 11:15 a.m. UTC
Extend 'pmd-sleep-max' so that individual PMD thread cores
may have a specified max sleep request value.

Existing behaviour is maintained.

Any PMD thread core without a value will use the global value
if set or default no sleep.

To set PMD thread cores 8 and 9 to never request a load based sleep
and all other PMD thread cores to be able to request a max sleep of
50 usecs:

$ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0

To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs
and all other PMD thread cores to never request a sleep:

$ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100

'pmd-sleep-show' is updated to show the max sleep value for each PMD thread.

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
---
 Documentation/topics/dpdk/pmd.rst |  34 ++-
 NEWS                              |   4 +
 lib/dpif-netdev-private-thread.h  |   3 +
 lib/dpif-netdev.c                 | 270 ++++++++++++++++++++---
 tests/pmd.at                      | 350 ++++++++++++++++++++++++++++--
 vswitchd/vswitch.xml              |  31 ++-
 6 files changed, 642 insertions(+), 50 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
index f43819be0..dd6ee46bd 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -354,8 +354,4 @@  time not processing packets will be determined by the sleep and processor
 wake-up times and should be tested with each system configuration.
 
-The current configuration of the PMD load based sleeping can be shown with::
-
-    $ ovs-appctl dpif-netdev/pmd-sleep-show
-
 Sleep time statistics for 10 secs can be seen with::
 
@@ -380,4 +376,34 @@  system configuration (e.g. enabling processor C-states) and workloads.
     rate.
 
+Maximum sleep values can also be set for individual PMD threads using
+key:value pairs in the form of core:max_sleep. Any PMD thread that has been
+assigned a specified value will use that. Any PMD thread that does not have
+a specified value will use the current global value.
+
+Specified values for individual PMD threads can be added or removed at
+any time.
+
+For example, to set PMD threads on cores 8 and 9 to never request a load based
+sleep and all others PMD threads to be able to request a max sleep of
+50 microseconds (us)::
+
+    $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0
+
+The max sleep value for each PMD threads can be checked in the logs or with::
+
+    $ ovs-appctl dpif-netdev/pmd-sleep-show
+    pmd thread numa_id 0 core_id 8:
+      max sleep:    0 us
+    pmd thread numa_id 1 core_id 9:
+      max sleep:    0 us
+    pmd thread numa_id 0 core_id 10:
+      max sleep:   50 us
+    pmd thread numa_id 1 core_id 11:
+      max sleep:   50 us
+    pmd thread numa_id 0 core_id 12:
+      max sleep:   50 us
+    pmd thread numa_id 1 core_id 13:
+      max sleep:   50 us
+
 .. _ovs-vswitchd(8):
     http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html
diff --git a/NEWS b/NEWS
index 63f2842ae..48bd264ed 100644
--- a/NEWS
+++ b/NEWS
@@ -27,4 +27,8 @@  Post-v3.2.0
        TSO is enabled but not supported by an egress interface (except for
        tunnel interfaces).
+     * 'pmd-sleep-max' is updated to also accept pmd-thread-core:sleep-max.
+       The existing behaviour is maintained and a non key:value pair value
+       will be applied to all other PMD thread cores.'pmd-sleep-show' is
+       updated to show the maximum sleep for each PMD thread core.
 
 
diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index 1ec3cd794..8715b3837 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -181,4 +181,7 @@  struct dp_netdev_pmd_thread {
     bool isolated;
 
+    /* Max sleep request in microseconds. */
+    atomic_uint64_t max_sleep;
+
     /* Queue id used by this pmd thread to send packets on all netdevs if
      * XPS disabled for this netdev. All static_tx_qid's are unique and less
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 9a59a1b03..f859ef618 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -180,4 +180,9 @@  static struct odp_support dp_netdev_support = {
 #define PMD_SLEEP_INC_US 1
 
+struct pmd_sleep {
+    unsigned core_id;
+    uint64_t max_sleep;
+};
+
 struct dpcls {
     struct cmap_node node;      /* Within dp_netdev_pmd_thread.classifiers */
@@ -288,6 +293,6 @@  struct dp_netdev {
     /* Enable collection of PMD performance metrics. */
     atomic_bool pmd_perf_metrics;
-    /* Max load based sleep request. */
-    atomic_uint64_t pmd_max_sleep;
+    /* Default max load based sleep request. */
+    uint64_t pmd_max_sleep_default;
     /* Enable the SMC cache from ovsdb config */
     atomic_bool smc_enable_db;
@@ -327,4 +332,7 @@  struct dp_netdev {
     char *pmd_cmask;
 
+    /* PMD load based max sleep request user string. */
+    char *max_sleep_list;
+
     uint64_t last_tnl_conf_seq;
 
@@ -1429,4 +1437,17 @@  dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
 }
 
+static void
+pmd_info_show_sleep(struct ds *reply, unsigned core_id, int numa_id,
+                    uint64_t pmd_max_sleep)
+{
+    if (core_id == NON_PMD_CORE_ID) {
+        return;
+    }
+    ds_put_format(reply,
+                  "pmd thread numa_id %d core_id %d:\n"
+                  "  max sleep: %4"PRIu64" us\n",
+                  numa_id, core_id, pmd_max_sleep);
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
@@ -1443,7 +1464,6 @@  dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
     unsigned long long max_secs = (PMD_INTERVAL_LEN * PMD_INTERVAL_MAX)
                                       / INTERVAL_USEC_TO_SEC;
-    uint64_t default_max_sleep = 0;
     bool show_header = true;
-
+    uint64_t max_sleep;
 
     ovs_mutex_lock(&dp_netdev_mutex);
@@ -1513,10 +1533,11 @@  dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
         } else if (type == PMD_INFO_SLEEP_SHOW) {
             if (show_header) {
-                atomic_read_relaxed(&dp->pmd_max_sleep, &default_max_sleep);
-                ds_put_format(&reply, "Default max sleep: %4"PRIu64" us",
-                              default_max_sleep);
-                ds_put_cstr(&reply, "\n");
+                ds_put_format(&reply, "Default max sleep: %4"PRIu64" us\n",
+                              dp->pmd_max_sleep_default);
                 show_header = false;
             }
+            atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
+            pmd_info_show_sleep(&reply, pmd->core_id, pmd->numa_id,
+                                max_sleep);
         }
     }
@@ -1907,4 +1928,6 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     }
 
+    dp->max_sleep_list = NULL;
+
     dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
     *dpp = dp;
@@ -2016,4 +2039,5 @@  dp_netdev_free(struct dp_netdev *dp)
     dp_netdev_meter_destroy(dp);
 
+    free(dp->max_sleep_list);
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -4848,4 +4872,207 @@  set_pmd_auto_lb(struct dp_netdev *dp, bool state, bool always_log)
 }
 
+static int
+parse_pmd_sleep_list(const char *max_sleep_list,
+                     struct pmd_sleep **pmd_sleeps)
+{
+    char *list, *copy, *key, *value;
+    int num_vals = 0;
+
+    if (!max_sleep_list) {
+        return num_vals;
+    }
+
+    list = copy = xstrdup(max_sleep_list);
+
+    while (ofputil_parse_key_value(&list, &key, &value)) {
+        uint64_t temp, pmd_max_sleep;
+        char *error = NULL;
+        unsigned core;
+        int i;
+
+        error = str_to_u64(key, &temp);
+        if (error) {
+            free(error);
+            continue;
+        }
+
+        if (value[0] == '\0') {
+            /* No value specified. key is dp default. */
+            core = UINT_MAX;
+            pmd_max_sleep = temp;
+        } else {
+            error = str_to_u64(value, &pmd_max_sleep);
+            if (!error && temp < UINT_MAX) {
+                /* Key is pmd core id. */
+                core = (unsigned) temp;
+            } else {
+                free(error);
+                continue;
+            }
+        }
+
+        /* Detect duplicate max sleep values. */
+        for (i = 0; i < num_vals; i++) {
+            if ((*pmd_sleeps)[i].core_id == core) {
+                break;
+            }
+        }
+        if (i == num_vals) {
+            /* Not duplicate, add a new entry. */
+            *pmd_sleeps = xrealloc(*pmd_sleeps,
+                                   (num_vals + 1) * sizeof **pmd_sleeps);
+            num_vals++;
+        }
+
+        pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
+
+        (*pmd_sleeps)[i].core_id = core;
+        (*pmd_sleeps)[i].max_sleep = pmd_max_sleep;
+    }
+
+    free(copy);
+    return num_vals;
+}
+
+static void
+log_pmd_sleep(unsigned core_id, int numa_id, uint64_t pmd_max_sleep)
+{
+    if (core_id == NON_PMD_CORE_ID) {
+        return;
+    }
+    VLOG_INFO("PMD thread on numa_id: %d, core id: %2d, "
+              "max sleep: %4"PRIu64" us.", numa_id, core_id, pmd_max_sleep);
+}
+
+static void
+pmd_init_max_sleep(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
+{
+    uint64_t max_sleep = dp->pmd_max_sleep_default;
+    struct pmd_sleep *pmd_sleeps = NULL;
+    int num_vals;
+
+    num_vals = parse_pmd_sleep_list(dp->max_sleep_list, &pmd_sleeps);
+
+    /* Check if the user has set a specific value for this pmd. */
+    for (int i = 0; i < num_vals; i++) {
+        if (pmd_sleeps[i].core_id == pmd->core_id) {
+            max_sleep = pmd_sleeps[i].max_sleep;
+            break;
+        }
+    }
+    atomic_init(&pmd->max_sleep, max_sleep);
+    log_pmd_sleep(pmd->core_id, pmd->numa_id, max_sleep);
+    free(pmd_sleeps);
+}
+
+static bool
+assign_sleep_values_to_pmds(struct dp_netdev *dp, int num_vals,
+                            struct pmd_sleep *pmd_sleeps)
+{
+    struct dp_netdev_pmd_thread *pmd;
+    bool value_changed = false;
+
+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+        uint64_t new_max_sleep, cur_pmd_max_sleep;
+
+        if (pmd->core_id == NON_PMD_CORE_ID) {
+            continue;
+        }
+
+        /* Default to global value. */
+        new_max_sleep = dp->pmd_max_sleep_default;
+
+        /* Check for pmd specific value. */
+        for (int i = 0;  i < num_vals; i++) {
+            if (pmd->core_id == pmd_sleeps[i].core_id) {
+                new_max_sleep = pmd_sleeps[i].max_sleep;
+                break;
+            }
+        }
+        atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
+        if (new_max_sleep != cur_pmd_max_sleep) {
+            atomic_store_relaxed(&pmd->max_sleep, new_max_sleep);
+            value_changed = true;
+        }
+    }
+    return value_changed;
+}
+
+static void
+log_all_pmd_sleeps(struct dp_netdev *dp)
+{
+    struct dp_netdev_pmd_thread **pmd_list = NULL;
+    struct dp_netdev_pmd_thread *pmd;
+    size_t n;
+
+    VLOG_INFO("Default PMD thread max sleep: %4"PRIu64" us.",
+              dp->pmd_max_sleep_default);
+
+    sorted_poll_thread_list(dp, &pmd_list, &n);
+
+    for (size_t i = 0; i < n; i++) {
+        uint64_t cur_pmd_max_sleep;
+
+        pmd = pmd_list[i];
+        atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
+        log_pmd_sleep(pmd->core_id, pmd->numa_id, cur_pmd_max_sleep);
+    }
+    free(pmd_list);
+}
+
+static bool
+set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config)
+{
+    const char *max_sleep_list = smap_get(config, "pmd-sleep-max");
+    struct pmd_sleep *pmd_sleeps = NULL;
+    uint64_t default_max_sleep = 0;
+    bool default_changed = false;
+    bool pmd_changed = false;
+    uint64_t pmd_maxsleep;
+    int num_vals = 0;
+
+    /* Check for deprecated 'pmd-maxsleep' value. */
+    pmd_maxsleep = smap_get_ullong(config, "pmd-maxsleep", UINT64_MAX);
+    if (pmd_maxsleep != UINT64_MAX && !max_sleep_list) {
+        VLOG_WARN_ONCE("pmd-maxsleep is deprecated. "
+                       "Please use pmd-sleep-max instead.");
+        default_max_sleep = pmd_maxsleep;
+    }
+
+    /* Check if there is no change in string or value. */
+    if (!!dp->max_sleep_list == !!max_sleep_list) {
+        if (max_sleep_list
+            ? nullable_string_is_equal(max_sleep_list, dp->max_sleep_list)
+            : default_max_sleep == dp->pmd_max_sleep_default) {
+            return false;
+        }
+    }
+
+    /* Free existing string and copy new one (if any). */
+    free(dp->max_sleep_list);
+    dp->max_sleep_list = nullable_xstrdup(max_sleep_list);
+
+    if (max_sleep_list) {
+        num_vals = parse_pmd_sleep_list(max_sleep_list, &pmd_sleeps);
+
+        /* Check if the user has set a global value. */
+        for (int i = 0; i < num_vals; i++) {
+            if (pmd_sleeps[i].core_id == UINT_MAX) {
+                default_max_sleep = pmd_sleeps[i].max_sleep;
+                break;
+            }
+        }
+    }
+
+    if (dp->pmd_max_sleep_default != default_max_sleep) {
+        dp->pmd_max_sleep_default = default_max_sleep;
+        default_changed = true;
+    }
+    pmd_changed = assign_sleep_values_to_pmds(dp, num_vals, pmd_sleeps);
+
+    free(pmd_sleeps);
+    return default_changed || pmd_changed;
+}
+
 /* Applies datapath configuration from the database. Some of the changes are
  * actually applied in dpif_netdev_run(). */
@@ -4865,5 +5092,4 @@  dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
     uint8_t cur_rebalance_load;
     uint32_t rebalance_load, rebalance_improve;
-    uint64_t  pmd_max_sleep, cur_pmd_max_sleep;
     bool log_autolb = false;
     enum sched_assignment_type pmd_rxq_assign_type;
@@ -5016,24 +5242,10 @@  dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
     set_pmd_auto_lb(dp, autolb_state, log_autolb);
 
-    pmd_max_sleep = smap_get_ullong(other_config, "pmd-maxsleep", UINT64_MAX);
-    if (pmd_max_sleep != UINT64_MAX) {
-        VLOG_WARN("pmd-maxsleep is deprecated. "
-                  "Please use pmd-sleep-max instead.");
-    } else {
-        pmd_max_sleep = 0;
+    bool sleep_changed = set_all_pmd_max_sleeps(dp, other_config);
+    if (first_set_config || sleep_changed) {
+        log_all_pmd_sleeps(dp);
     }
 
-    pmd_max_sleep = smap_get_ullong(other_config, "pmd-sleep-max",
-                                    pmd_max_sleep);
-    pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
-    atomic_read_relaxed(&dp->pmd_max_sleep, &cur_pmd_max_sleep);
-    if (first_set_config || pmd_max_sleep != cur_pmd_max_sleep) {
-        atomic_store_relaxed(&dp->pmd_max_sleep, pmd_max_sleep);
-        VLOG_INFO("PMD max sleep request is %"PRIu64" usecs.", pmd_max_sleep);
-        VLOG_INFO("PMD load based sleeps are %s.",
-                  pmd_max_sleep ? "enabled" : "disabled" );
-    }
-
-    first_set_config  = false;
+    first_set_config = false;
     return 0;
 }
@@ -7064,5 +7276,5 @@  reload:
 
         atomic_read_relaxed(&pmd->dp->smc_enable_db, &pmd->ctx.smc_enable_db);
-        atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep);
+        atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
 
         for (i = 0; i < poll_cnt; i++) {
@@ -7651,4 +7863,6 @@  dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     cmap_init(&pmd->tx_bonds);
 
+    pmd_init_max_sleep(dp, pmd);
+
     /* Initialize DPIF function pointer to the default configured version. */
     atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default());
diff --git a/tests/pmd.at b/tests/pmd.at
index 06cc90477..658c79d4e 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -61,12 +61,11 @@  m4_define([CHECK_PMD_THREADS_CREATED], [
 ])
 
-dnl CHECK_DP_SLEEP_MAX([max_sleep], [enabled], [+line])
+dnl CHECK_DP_SLEEP_MAX([max_sleep], [+line])
 dnl
-dnl Checks correct pmd load based sleep is set for the datapath.
+dnl Checks correct pmd load based sleep value for the datapath.
 dnl Checking starts from line number 'line' in ovs-vswithd.log .
 m4_define([CHECK_DP_SLEEP_MAX], [
-    SLEEP_TIME="PMD max sleep request is $1 usecs."
-    SLEEP_STATE="PMD load based sleeps are $2."
-    line_st=$3
+    SLEEP_TIME="Default PMD thread max sleep: *[$1] us."
+    line_st=$2
     if [[ -z "$line_st" ]]
     then
@@ -74,5 +73,18 @@  m4_define([CHECK_DP_SLEEP_MAX], [
     fi
     OVS_WAIT_UNTIL([tail -n $line_st ovs-vswitchd.log | grep "$SLEEP_TIME"])
-    OVS_WAIT_UNTIL([tail -n $line_st ovs-vswitchd.log | grep "$SLEEP_STATE"])
+])
+
+dnl CHECK_PMD_SLEEP_MAX([core_id], [numa_id], [max_sleep], [+line])
+dnl
+dnl Checks max sleep time of each pmd with core_id.
+dnl Checking starts from line number 'line' in ovs-vswithd.log .
+m4_define([CHECK_PMD_SLEEP_MAX], [
+    PATTERN="PMD thread on numa_id: *[$1], core id: *[$2], max sleep: *[$3] us."
+    line_st=$4
+    if [[ -z "$line_st" ]]
+    then
+        line_st="+0"
+    fi
+    OVS_WAIT_UNTIL([tail -n $line_st ovs-vswitchd.log | grep "$PATTERN"])
 ])
 
@@ -1273,11 +1285,19 @@  AT_CLEANUP
 
 AT_SETUP([PMD - pmd sleep])
-OVS_VSWITCHD_START
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1], [], [], [--dummy-numa 0,0,0,1,1,8,8])
 
 dnl Check default
-CHECK_DP_SLEEP_MAX([0], [disabled], [])
-
+CHECK_DP_SLEEP_MAX([0], [])
+CHECK_PMD_SLEEP_MAX([0], [0], [0], [])
+CHECK_PMD_SLEEP_MAX([1], [3], [0], [])
+CHECK_PMD_SLEEP_MAX([8], [5], [0], [])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    0 us
 ])
 
@@ -1285,7 +1305,16 @@  dnl Check low value max sleep
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="1"])
-CHECK_DP_SLEEP_MAX([1], [enabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([1], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [1], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [1], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [1], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep:    1 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    1 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    1 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    1 us
 ])
 
@@ -1293,7 +1322,16 @@  dnl Check high value max sleep
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="10000"])
-CHECK_DP_SLEEP_MAX([10000], [enabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [10000], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep: 10000 us
+pmd thread numa_id 0 core_id 0:
+  max sleep: 10000 us
+pmd thread numa_id 1 core_id 3:
+  max sleep: 10000 us
+pmd thread numa_id 8 core_id 5:
+  max sleep: 10000 us
 ])
 
@@ -1301,7 +1339,16 @@  dnl Check setting max sleep to zero
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="0"])
-CHECK_DP_SLEEP_MAX([0], [disabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [0], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    0 us
 ])
 
@@ -1309,7 +1356,16 @@  dnl Check above high value max sleep
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="10001"])
-CHECK_DP_SLEEP_MAX([10000], [enabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [10000], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [10000], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep: 10000 us
+pmd thread numa_id 0 core_id 0:
+  max sleep: 10000 us
+pmd thread numa_id 1 core_id 3:
+  max sleep: 10000 us
+pmd thread numa_id 8 core_id 5:
+  max sleep: 10000 us
 ])
 
@@ -1317,7 +1373,16 @@  dnl Check rounding
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="490"])
-CHECK_DP_SLEEP_MAX([490], [enabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([490], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [490], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [490], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [490], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep:  490 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  490 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  490 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  490 us
 ])
 
@@ -1325,7 +1390,262 @@  dnl Check rounding
 get_log_next_line_num
 AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-sleep-max="499"])
-CHECK_DP_SLEEP_MAX([499], [enabled], [+$LINENUM])
+CHECK_DP_SLEEP_MAX([499], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [499], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [499], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [499], [+$LINENUM])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
 Default max sleep:  499 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  499 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  499 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  499 us
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([PMD - per pmd sleep])
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1], [], [], [--dummy-numa 0,0,0,1,1,8,8])
+
+dnl Check system default
+CHECK_DP_SLEEP_MAX([0], [])
+CHECK_PMD_SLEEP_MAX([0], [0], [0], [])
+CHECK_PMD_SLEEP_MAX([1], [3], [0], [])
+CHECK_PMD_SLEEP_MAX([8], [5], [0], [])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    0 us
+])
+
+dnl only per pmd
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=3:300,0:100,5:400])
+CHECK_DP_SLEEP_MAX([0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [100], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [300], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [400], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  100 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  300 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  400 us
+])
+
+dnl mix of not used default and per-pmd
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=50,3:300,0:100,5:200])
+CHECK_DP_SLEEP_MAX([50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [100], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [200], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [200], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   50 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  100 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  300 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  200 us
+])
+
+dnl remove a per-pmd entry and use default
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=50,3:300])
+CHECK_DP_SLEEP_MAX([50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [300], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [50], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   50 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:   50 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  300 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:   50 us
+])
+
+dnl mix and change values
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=3:400,200])
+CHECK_DP_SLEEP_MAX([200], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [200], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [400], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [200], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:  200 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  200 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  400 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  200 us
+])
+
+dnl add values for pmds that don't exist yet
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=2:600,50,3:300,0:100,6:400,5:200])
+CHECK_DP_SLEEP_MAX([50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [100], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [300], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [200], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   50 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  100 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  300 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  200 us
+])
+
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=7f])
+CHECK_PMD_SLEEP_MAX([0], [1], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [2], [600], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [4], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [6],[400], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   50 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:  100 us
+pmd thread numa_id 0 core_id 1:
+  max sleep:   50 us
+pmd thread numa_id 0 core_id 2:
+  max sleep:  600 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:  300 us
+pmd thread numa_id 1 core_id 4:
+  max sleep:   50 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:  200 us
+pmd thread numa_id 8 core_id 6:
+  max sleep:  400 us
+])
+
+dnl go back to just a global value
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=90])
+CHECK_DP_SLEEP_MAX([90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [1], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [2], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [4], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [90], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [6], [90], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   90 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:   90 us
+pmd thread numa_id 0 core_id 1:
+  max sleep:   90 us
+pmd thread numa_id 0 core_id 2:
+  max sleep:   90 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:   90 us
+pmd thread numa_id 1 core_id 4:
+  max sleep:   90 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:   90 us
+pmd thread numa_id 8 core_id 6:
+  max sleep:   90 us
+])
+
+dnl try invalid value
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=qwe])
+CHECK_DP_SLEEP_MAX([0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [1], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [2], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [4], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [6], [0], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    0 us
+pmd thread numa_id 0 core_id 1:
+  max sleep:    0 us
+pmd thread numa_id 0 core_id 2:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 4:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 6:
+  max sleep:    0 us
+])
+
+dnl try invalid key:value
+get_log_next_line_num
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-sleep-max=50,1:qwe,2:0])
+CHECK_DP_SLEEP_MAX([50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [1], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [2], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [4], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [50], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [6], [50], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:   50 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:   50 us
+pmd thread numa_id 0 core_id 1:
+  max sleep:   50 us
+pmd thread numa_id 0 core_id 2:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:   50 us
+pmd thread numa_id 1 core_id 4:
+  max sleep:   50 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:   50 us
+pmd thread numa_id 8 core_id 6:
+  max sleep:   50 us
+])
+
+dnl remove config
+get_log_next_line_num
+AT_CHECK([ovs-vsctl remove Open_vSwitch . other_config pmd-sleep-max])
+CHECK_DP_SLEEP_MAX([0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [0], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [1], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([0], [2], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [3], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([1], [4], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [5], [0], [+$LINENUM])
+CHECK_PMD_SLEEP_MAX([8], [6], [0], [+$LINENUM])
+AT_CHECK([ovs-appctl dpif-netdev/pmd-sleep-show], [0], [dnl
+Default max sleep:    0 us
+pmd thread numa_id 0 core_id 0:
+  max sleep:    0 us
+pmd thread numa_id 0 core_id 1:
+  max sleep:    0 us
+pmd thread numa_id 0 core_id 2:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 3:
+  max sleep:    0 us
+pmd thread numa_id 1 core_id 4:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 5:
+  max sleep:    0 us
+pmd thread numa_id 8 core_id 6:
+  max sleep:    0 us
 ])
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index eaccd85cf..612ba41e3 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -803,7 +803,5 @@ 
         </p>
       </column>
-      <column name="other_config" key="pmd-sleep-max"
-              type='{"type": "integer",
-                     "minInteger": 0, "maxInteger": 10000}'>
+      <column name="other_config" key="pmd-sleep-max">
         <p>
           Specifies the maximum sleep time that will be requested in
@@ -824,4 +822,31 @@ 
           The maximum value is <code>10000 microseconds</code>.
         </p>
+        <p>
+         <code>other_config:pmd-sleep-max=&lt;pmd-sleep-list&gt;</code>
+        </p>
+        <p>where</p>
+        <p>
+         <ul>
+           <li>
+             &lt;pmd-sleep-list&gt; ::= NULL | &lt;non-empty-list&gt;
+           </li>
+           <li>
+             &lt;non-empty-list&gt; ::= &lt;pmd-sleep-value&gt; |
+                                        &lt;pmd-sleep-value&gt; ,
+                                        &lt;non-empty-list&gt;
+           </li>
+           <li>
+             &lt;pmd-sleep-value&gt; ::= &lt;global-default-sleep-value&gt; |
+                                         &lt;pmd-core-sleep-pair&gt;
+           </li>
+           <li>
+             &lt;global-default-sleep-value&gt; ::= &lt;max-sleep-time&gt;
+           </li>
+           <li>
+             &lt;pmd-core-sleep-pair&gt; ::= &lt;core&gt; :
+                                             &lt;max-sleep-time&gt;
+           </li>
+         </ul>
+        </p>
       </column>
       <column name="other_config" key="userspace-tso-enable"