[ovs-dev,RFC,8/8] dpif-netdev: Add percentage of pmd/core used by each rxq.

Message ID 1515096166-16257-9-git-send-email-jan.scheurich@ericsson.com
State Superseded
Delegated to: Ian Stokes
Headers show
Series
  • dpif-netdev: Refactor cycle count and rebased patches
Related show

Commit Message

Jan Scheurich Jan. 4, 2018, 8:02 p.m.
It is based on the length of history that is stored about an
rxq (currently 1 min).

$ ovs-appctl dpif-netdev/pmd-rxq-show
pmd thread numa_id 0 core_id 4:
        isolated : false
        port:         dpdkphy1  queue-id:  0    pmd usage: 70 %
        port:       dpdkvhost0  queue-id:  0    pmd usage:  0 %
pmd thread numa_id 0 core_id 6:
        isolated : false
        port:         dpdkphy0  queue-id:  0    pmd usage: 64 %
        port:       dpdkvhost1  queue-id:  0    pmd usage:  0 %

These values are what would be used as part of rxq to pmd
assignment due to a reconfiguration event e.g. adding pmds,
adding rxqs or with the command:

ovs-appctl dpif-netdev/pmd-rxq-rebalance

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
Co-authored-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
---
 Documentation/howto/dpdk.rst | 12 +++++++
 NEWS                         |  1 +
 lib/dpif-netdev.c            | 85 +++++++++++++++++++++++++++++++++-----------
 tests/pmd.at                 | 51 +++++++++++++++++++-------
 4 files changed, 116 insertions(+), 33 deletions(-)

Patch

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 2393c2f..1597e1c 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -139,6 +139,18 @@  Core 3: Q1 (80%) |
 Core 7: Q4 (70%) | Q5 (10%)
 core 8: Q3 (60%) | Q0 (30%)
 
+To see the current measured usage history of pmd core cycles for each rxq::
+
+    $ ovs-appctl dpif-netdev/pmd-rxq-show
+
+.. note::
+
+  A history of one minute is recorded and shown for each rxq to allow for
+  traffic pattern spikes. An rxq's pmd core cycles usage changes due to traffic
+  pattern or reconfig changes will take one minute before they are fully
+  reflected in the stats. In this way the the stats show what would be used
+  during a new rxq to pmd assignment.
+
 Rxq to pmds assignment takes place whenever there are configuration changes
 or can be triggered by using::
 
diff --git a/NEWS b/NEWS
index d9c6641..e2ea776 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,7 @@  Post-v2.8.0
      * Add support for vHost IOMMU
      * New debug appctl command 'netdev-dpdk/get-mempool-info'.
      * All the netdev-dpdk appctl commands described in ovs-vswitchd man page.
+     * Add rxq utilization of pmd cycles to pmd-rxq-show
    - Userspace datapath:
      * Output packet batching support.
    - vswitchd:
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index fc10f8e..4761d3b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -369,6 +369,8 @@  struct dp_netdev_rxq {
     /* We store PMD_RXQ_INTERVAL_MAX intervals of data for an rxq and then
        sum them to yield the cycles used for an rxq. */
     atomic_ullong cycles_intrvl[PMD_RXQ_INTERVAL_MAX];
+    atomic_ullong intrvl_tsc1[PMD_RXQ_INTERVAL_MAX];
+    atomic_ullong intrvl_tsc2[PMD_RXQ_INTERVAL_MAX];
 };
 
 /* A port in a netdev-based datapath. */
@@ -573,7 +575,7 @@  struct dp_netdev_pmd_thread {
     /* Periodically sort subtable vectors according to hit frequencies */
     long long int next_optimization;
     /* End of the next time interval for which processing cycles
-       are stored for each polled rxq. */
+       are stored for each polled rxq. Same unit as pmd->ctx.now. */
     long long int rxq_next_cycle_store;
 
     /* Current context of the PMD thread. */
@@ -700,6 +702,8 @@  static inline void
 dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
                            struct polled_queue *poll_list, int poll_cnt);
 static void
+dp_netdev_rxq_cycles_reset(struct dp_netdev_rxq *rx);
+static void
 dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type,
                          unsigned long long cycles);
@@ -708,7 +712,8 @@  dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type);
 static void
 dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
-                           unsigned long long cycles);
+                                uint64_t tsc_timestamp,
+                                uint64_t cycles);
 static uint64_t
 dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx);
 static void
@@ -981,9 +986,8 @@  static void
 pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
 {
     if (pmd->core_id != NON_PMD_CORE_ID) {
-        const char *prev_name = NULL;
         struct rxq_poll *list;
-        size_t i, n;
+        size_t n_rxq, idx;
 
         ds_put_format(reply,
                       "pmd thread numa_id %d core_id %u:\n\tisolated : %s\n",
@@ -991,22 +995,41 @@  pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
                                                   ? "true" : "false");
 
         ovs_mutex_lock(&pmd->port_mutex);
-        sorted_poll_list(pmd, &list, &n);
-        for (i = 0; i < n; i++) {
-            const char *name = netdev_rxq_get_name(list[i].rxq->rx);
-
-            if (!prev_name || strcmp(name, prev_name)) {
-                if (prev_name) {
-                    ds_put_cstr(reply, "\n");
-                }
-                ds_put_format(reply, "\tport: %s\tqueue-id:", name);
+        sorted_poll_list(pmd, &list, &n_rxq);
+
+        for (int i = 0; i < n_rxq; i++) {
+
+            struct dp_netdev_rxq *rxq = list[i].rxq;
+            const char *name = netdev_rxq_get_name(rxq->rx);
+            uint64_t proc_cycles = 0;
+            uint64_t total_cycles = 0;
+
+            /* Collect the rxq cycle stats. */
+            idx = (rxq->intrvl_idx - 1) % PMD_RXQ_INTERVAL_MAX;
+            if (rxq->intrvl_tsc2[idx] > 0) {
+                /* Only show pmd usage if a full set of interval
+                 * measurements is available. */
+                total_cycles = rxq->intrvl_tsc1[idx] -
+                        rxq->intrvl_tsc2[idx];
+            }
+            for (int j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
+                idx = (rxq->intrvl_idx + j) % PMD_RXQ_INTERVAL_MAX;
+                proc_cycles += rxq->cycles_intrvl[idx];
             }
-            ds_put_format(reply, " %d",
+
+            ds_put_format(reply, "\tport: %16s\tqueue-id: %2d", name,
                           netdev_rxq_get_queue_id(list[i].rxq->rx));
-            prev_name = name;
+            ds_put_format(reply, "\tpmd usage: ");
+            if (total_cycles > 0) {
+                ds_put_format(reply, "%2"PRIu64"",
+                              proc_cycles * 100 / total_cycles);
+                ds_put_cstr(reply, " %");
+            } else {
+                ds_put_format(reply, "%s", "NOT AVAIL");
+            }
+            ds_put_cstr(reply, "\n");
         }
         ovs_mutex_unlock(&pmd->port_mutex);
-        ds_put_cstr(reply, "\n");
         free(list);
     }
 }
@@ -3263,6 +3286,18 @@  pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd)
 }
 
 static void
+dp_netdev_rxq_cycles_reset(struct dp_netdev_rxq *rx)
+{
+    atomic_store_relaxed(&rx->cycles[RXQ_CYCLES_PROC_CURR], 0);
+    for (int i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
+        atomic_store_relaxed(&rx->cycles_intrvl[i], 0);
+        atomic_store_relaxed(&rx->intrvl_tsc1[i], 0);
+        atomic_store_relaxed(&rx->intrvl_tsc2[i], 0);
+    }
+    rx->intrvl_idx = 0;
+}
+
+static void
 dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type,
                          unsigned long long cycles)
@@ -3289,10 +3324,17 @@  dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
 
 static void
 dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
-                                unsigned long long cycles)
+                                uint64_t tsc_timestamp,
+                                uint64_t cycles)
 {
-    unsigned int idx = rx->intrvl_idx++ % PMD_RXQ_INTERVAL_MAX;
+    uint64_t old_tsc_ts;
+    size_t idx = rx->intrvl_idx % PMD_RXQ_INTERVAL_MAX;
+
     atomic_store_relaxed(&rx->cycles_intrvl[idx], cycles);
+    atomic_read_relaxed(&rx->intrvl_tsc1[idx], &old_tsc_ts);
+    atomic_store_relaxed(&rx->intrvl_tsc2[idx], old_tsc_ts);
+    atomic_store_relaxed(&rx->intrvl_tsc1[idx], tsc_timestamp);
+    rx->intrvl_idx++;
 }
 
 static uint64_t
@@ -4247,7 +4289,8 @@  reload:
                 pmd->core_id, netdev_rxq_get_name(poll_list[i].rxq->rx),
                 netdev_rxq_get_queue_id(poll_list[i].rxq->rx));
        /* Reset the rxq current cycles counter. */
-       dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR, 0);
+       dp_netdev_rxq_cycles_reset(poll_list[i].rxq);
+       // dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR, 0);
     }
 
     if (!poll_cnt) {
@@ -6247,7 +6290,9 @@  dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
         for (unsigned i = 0; i < poll_cnt; i++) {
             uint64_t rxq_cyc_curr = dp_netdev_rxq_get_cycles(poll_list[i].rxq,
                                                         RXQ_CYCLES_PROC_CURR);
-            dp_netdev_rxq_set_intrvl_cycles(poll_list[i].rxq, rxq_cyc_curr);
+            dp_netdev_rxq_set_intrvl_cycles(poll_list[i].rxq,
+                                            cycles_counter(pmd),
+                                            rxq_cyc_curr);
             dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR,
                                      0);
         }
diff --git a/tests/pmd.at b/tests/pmd.at
index 0356f87..430b875 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -6,7 +6,15 @@  m4_divert_push([PREPARE_TESTS])
 # of every rxq (one per line) in the form:
 # port_name rxq_id numa_id core_id
 parse_pmd_rxq_show () {
-    awk '/pmd/ {numa=$4; core=substr($6, 1, length($6) - 1)} /\t/{for (i=4; i<=NF; i++) print  $2, $i, numa, core}' | sort
+    awk '/pmd thread/ {numa=$4; core=substr($6, 1, length($6) - 1)} /\tport:/ {print  $2, $4, numa, core}' | sort
+}
+
+# Given the output of `ovs-appctl dpif-netdev/pmd-rxq-show`,
+# and with queues for each core on one line, prints the rxqs
+# of the core on one line
+# 'port:' port_name 'queue_id:' rxq_id rxq_id rxq_id rxq_id
+parse_pmd_rxq_show_group () {
+   awk '/port:/ {print  $1, $2, $3, $4, $12, $20, $28}'
 }
 
 # Given the output of `ovs-appctl dpctl/dump-flows`, prints a list of flows
@@ -53,7 +61,7 @@  m4_define([CHECK_PMD_THREADS_CREATED], [
 ])
 
 m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
-m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5 6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
+m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/1 2 5 6/<group>/;s/0 3 4 7/<group>/"])
 m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
 
 AT_SETUP([PMD - creating a thread/add-port])
@@ -65,7 +73,7 @@  CHECK_PMD_THREADS_CREATED()
 AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
 	isolated : false
-	port: p0	queue-id: 0
+	port:               p0	queue-id:  0	pmd usage: NOT AVAIL
 ])
 
 AT_CHECK([ovs-appctl dpif/show | sed 's/\(tx_queues=\)[[0-9]]*/\1<cleared>/g'], [0], [dnl
@@ -96,7 +104,14 @@  dummy@ovs-dummy: hit:0 missed:0
 AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
 	isolated : false
-	port: p0	queue-id: 0 1 2 3 4 5 6 7
+	port:               p0	queue-id:  0	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  1	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  2	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  3	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  4	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  5	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  6	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  7	pmd usage: NOT AVAIL
 ])
 
 OVS_VSWITCHD_STOP
@@ -120,20 +135,23 @@  dummy@ovs-dummy: hit:0 missed:0
 AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
 	isolated : false
-	port: p0	queue-id: 0 1 2 3 4 5 6 7
+	port:               p0	queue-id:  0	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  1	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  2	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  3	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  4	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  5	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  6	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  7	pmd usage: NOT AVAIL
 ])
 
 TMP=$(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])
 AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x3])
 CHECK_PMD_THREADS_CREATED([2], [], [+$TMP])
 
-AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_QUEUE_PATTERN], [0], [dnl
-pmd thread numa_id <cleared> core_id <cleared>:
-	isolated : false
-	port: p0	queue-id: <cleared>
-pmd thread numa_id <cleared> core_id <cleared>:
-	isolated : false
-	port: p0	queue-id: <cleared>
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_PATTERN], [0], [dnl
+port: p0 queue-id: <group>
+port: p0 queue-id: <group>
 ])
 
 TMP=$(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])
@@ -143,7 +161,14 @@  CHECK_PMD_THREADS_CREATED([1], [], [+$TMP])
 AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
 	isolated : false
-	port: p0	queue-id: 0 1 2 3 4 5 6 7
+	port:               p0	queue-id:  0	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  1	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  2	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  3	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  4	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  5	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  6	pmd usage: NOT AVAIL
+	port:               p0	queue-id:  7	pmd usage: NOT AVAIL
 ])
 
 OVS_VSWITCHD_STOP