@@ -695,8 +695,10 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
struct dp_packet_batch {
size_t count;
+ size_t qfill; /* Number of packets in queue when reading rx burst. */
bool trunc; /* true if the batch needs truncate. */
struct dp_packet *packets[NETDEV_MAX_BURST];
+
};
static inline void
@@ -15,6 +15,7 @@
*/
#include <config.h>
+#include <stdint.h>
#ifdef DPDK_NETDEV
#include <rte_cycles.h>
@@ -27,13 +28,304 @@
VLOG_DEFINE_THIS_MODULE(pmd_perf);
+#ifdef DPDK_NETDEV
+static uint64_t
+get_tsc_hz(void)
+{
+ return rte_get_tsc_hz();
+}
+#else
+static uint64_t
+read_tsc(void)
+{
+ register uint64_t tsc asm("eax");
+ asm volatile (".byte 15, 49" : : : "eax", "edx");
+ return tsc;
+}
+
+static uint64_t
+get_tsc_hz(void)
+{
+ uint64_t tsc_start = read_tsc();
+ xsleep(1);
+ return read_tsc() - tsc_start;
+}
+#endif
+
+static void
+histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
+{
+ int i;
+
+ ovs_assert(min < max);
+ for (i = 0; i < NUM_BINS-1; i++) {
+ hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
+ }
+ hist->wall[NUM_BINS-1] = UINT32_MAX;
+}
+
+static void
+histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
+{
+ int i, start, bins, wall;
+ double log_min, log_max;
+
+ ovs_assert(min < max);
+ if (min > 0) {
+ log_min = log(min);
+ log_max = log(max);
+ start = 0;
+ bins = NUM_BINS - 1;
+ } else {
+ hist->wall[0] = 0;
+ log_min = log(1);
+ log_max = log(max);
+ start = 1;
+ bins = NUM_BINS - 2;
+ }
+ wall = start;
+ for (i = 0; i < bins; i++) {
+ /* Make sure each wall is monotonically increasing. */
+ wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1)));
+ hist->wall[start + i] = wall++;
+ }
+ if (hist->wall[NUM_BINS-2] < max) {
+ hist->wall[NUM_BINS-2] = max;
+ }
+ hist->wall[NUM_BINS-1] = UINT32_MAX;
+}
+
+uint64_t
+histogram_samples(const struct histogram *hist)
+{
+ uint64_t samples = 0;
+
+ for (int i = 0; i < NUM_BINS; i++) {
+ samples += hist->bin[i];
+ }
+ return samples;
+}
+
+static void
+histogram_clear(struct histogram *hist)
+{
+ int i;
+
+ for (i = 0; i < NUM_BINS; i++) {
+ hist->bin[i] = 0;
+ }
+}
+
+static void
+history_init(struct history *h)
+{
+ memset(h, 0, sizeof(*h));
+}
+
void
pmd_perf_stats_init(struct pmd_perf_stats *s) {
- memset(s, 0 , sizeof(*s));
+ /* The struct has been zeroed at allocation. */
+ histogram_walls_set_log(&s->cycles, 500, 24000000);
+ histogram_walls_set_log(&s->pkts, 0, 1000);
+ histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
+ histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
+ histogram_walls_set_lin(&s->upcalls, 0, 30);
+ histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
+ histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
s->start_ms = time_msec();
}
void
+pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
+ double duration)
+{
+ uint64_t stats[PMD_N_STATS];
+ double us_per_cycle = 1000000.0 / get_tsc_hz();
+
+ if (duration == 0) {
+ return;
+ }
+
+ pmd_perf_read_counters(s, stats);
+ uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
+ stats[PMD_CYCLES_ITER_BUSY];
+ uint64_t packets = stats[PMD_STAT_RECV];
+ uint64_t passes = stats[PMD_STAT_RECV] +
+ stats[PMD_STAT_RECIRC];
+ uint64_t upcalls = stats[PMD_STAT_MISS];
+ uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
+ uint64_t tot_iter = histogram_samples(&s->pkts);
+ uint64_t idle_iter = s->pkts.bin[0];
+
+ ds_put_format(str,
+ " Cycles: %12"PRIu64" (%.2f GHz)\n"
+ " Iterations: %12"PRIu64" (%.2f us/it)\n"
+ " - idle: %12"PRIu64" (%4.1f %% cycles)\n"
+ " - busy: %12"PRIu64" (%4.1f %% cycles)\n",
+ tot_cycles, (tot_cycles / duration) / 1E9,
+ tot_iter, tot_cycles * us_per_cycle / tot_iter,
+ idle_iter,
+ 100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
+ tot_iter - idle_iter,
+ 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
+ if (packets > 0) {
+ ds_put_format(str,
+ " Packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n"
+ " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n"
+ " - EMC hits: %12"PRIu64" (%4.1f %%)\n"
+ " - Megaflow hits: %12"PRIu64" (%4.1f %%, %.2f subtbl lookups/"
+ "hit)\n"
+ " - Upcalls: %12"PRIu64" (%4.1f %%, %.1f us/upcall)\n"
+ " - Lost upcalls: %12"PRIu64" (%4.1f %%)\n"
+ "\n",
+ packets, (packets / duration) / 1000,
+ 1.0 * stats[PMD_CYCLES_ITER_BUSY] / packets,
+ passes, packets ? 1.0 * passes / packets : 0,
+ stats[PMD_STAT_EXACT_HIT],
+ 100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
+ stats[PMD_STAT_MASKED_HIT],
+ 100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
+ 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT],
+ upcalls, 100.0 * upcalls / passes,
+ upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
+ stats[PMD_STAT_LOST],
+ 100.0 * stats[PMD_STAT_LOST] / passes);
+ } else {
+ ds_put_format(str,
+ " Packets: %12"PRIu64"\n"
+ "\n",
+ 0UL);
+ }
+}
+
+void
+pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
+{
+ int i;
+
+ ds_put_cstr(str, "Histograms\n");
+ ds_put_format(str,
+ " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
+ "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
+ "max vhost qlen", "upcalls/it", "cycles/upcall");
+ for (i = 0; i < NUM_BINS-1; i++) {
+ ds_put_format(str,
+ " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
+ " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
+ " %-9d %-11"PRIu64"\n",
+ s->cycles.wall[i], s->cycles.bin[i],
+ s->pkts.wall[i],s->pkts.bin[i],
+ s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
+ s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
+ s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
+ s->upcalls.wall[i], s->upcalls.bin[i],
+ s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
+ }
+ ds_put_format(str,
+ " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
+ " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
+ " %-9s %-11"PRIu64"\n",
+ ">", s->cycles.bin[i],
+ ">", s->pkts.bin[i],
+ ">", s->cycles_per_pkt.bin[i],
+ ">", s->pkts_per_batch.bin[i],
+ ">", s->max_vhost_qfill.bin[i],
+ ">", s->upcalls.bin[i],
+ ">", s->cycles_per_upcall.bin[i]);
+ if (s->totals.iterations > 0) {
+ ds_put_cstr(str,
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "------------------------------------------------\n");
+ ds_put_format(str,
+ " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
+ "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
+ "vhost qlen", "upcalls/it", "cycles/upcall");
+ ds_put_format(str,
+ " %-21"PRIu64" %-21.5f %-21"PRIu64
+ " %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
+ s->totals.cycles / s->totals.iterations,
+ 1.0 * s->totals.pkts / s->totals.iterations,
+ s->totals.pkts
+ ? s->totals.busy_cycles / s->totals.pkts : 0,
+ s->totals.batches
+ ? 1.0 * s->totals.pkts / s->totals.batches : 0,
+ 1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
+ 1.0 * s->totals.upcalls / s->totals.iterations,
+ s->totals.upcalls
+ ? s->totals.upcall_cycles / s->totals.upcalls : 0);
+ }
+}
+
+void
+pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
+ int n_iter)
+{
+ struct iter_stats *is;
+ size_t index;
+ int i;
+
+ if (n_iter == 0) {
+ return;
+ }
+ ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
+ "%-10s %-10s %-10s\n",
+ "tsc", "cycles", "packets", "cycles/pkt", "pkts/batch",
+ "vhost qlen", "upcalls", "cycles/upcall");
+ for (i = 1; i <= n_iter; i++) {
+ index = (s->iterations.idx + HISTORY_LEN - i) % HISTORY_LEN;
+ is = &s->iterations.sample[index];
+ ds_put_format(str,
+ " %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
+ " %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
+ " %-11"PRIu32" %-11"PRIu32"\n",
+ is->timestamp,
+ is->cycles,
+ is->pkts,
+ is->pkts ? is->cycles / is->pkts : 0,
+ is->batches ? is->pkts / is->batches : 0,
+ is->max_vhost_qfill,
+ is->upcalls,
+ is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+ }
+}
+
+void
+pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
+{
+ struct iter_stats *is;
+ size_t index;
+ int i;
+
+ if (n_ms == 0) {
+ return;
+ }
+ ds_put_format(str,
+ " %-12s %-10s %-10s %-10s %-10s"
+ " %-10s %-10s %-10s %-10s\n",
+ "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
+ "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
+ for (i = 1; i <= n_ms; i++) {
+ index = (s->milliseconds.idx + HISTORY_LEN - i) % HISTORY_LEN;
+ is = &s->milliseconds.sample[index];
+ ds_put_format(str,
+ " %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
+ " %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
+ " %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
+ is->timestamp,
+ is->iterations,
+ is->iterations ? is->cycles / is->iterations : 0,
+ is->pkts,
+ is->pkts ? is->busy_cycles / is->pkts : 0,
+ is->batches ? is->pkts / is->batches : 0,
+ is->iterations
+ ? is->max_vhost_qfill / is->iterations : 0,
+ is->upcalls,
+ is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+ }
+}
+
+void
pmd_perf_read_counters(struct pmd_perf_stats *s,
uint64_t stats[PMD_N_STATS])
{
@@ -59,9 +351,21 @@ void
pmd_perf_stats_clear(struct pmd_perf_stats *s)
{
s->start_ms = 0; /* Marks start of clearing. */
+ memset(&s->current, 0 , sizeof(struct iter_stats));
+ memset(&s->totals, 0 , sizeof(struct iter_stats));
for (int i = 0; i < PMD_N_STATS; i++) {
atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
}
+ histogram_clear(&s->cycles);
+ histogram_clear(&s->pkts);
+ histogram_clear(&s->cycles_per_pkt);
+ histogram_clear(&s->upcalls);
+ histogram_clear(&s->cycles_per_upcall);
+ histogram_clear(&s->pkts_per_batch);
+ histogram_clear(&s->max_vhost_qfill);
+ history_init(&s->iterations);
+ history_init(&s->milliseconds);
s->start_ms = time_msec(); /* Clearing finished. */
+ s->milliseconds.sample[0].timestamp = s->start_ms;
}
@@ -33,6 +33,11 @@
extern "C" {
#endif
+#define NUM_BINS 32 /* Number of histogram bins. */
+#define HISTORY_LEN 1000 /* Length of recorded history
+ (iterations and ms). */
+#define DEF_HIST_SHOW 20 /* Default number of history samples. */
+
enum pmd_stat_type {
PMD_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */
PMD_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
@@ -60,6 +65,28 @@ enum pmd_stat_type {
PMD_N_STATS
};
+struct histogram {
+ uint32_t wall[NUM_BINS];
+ uint64_t bin[NUM_BINS];
+};
+
+struct iter_stats {
+ uint64_t timestamp;
+ uint64_t cycles;
+ uint64_t busy_cycles;
+ uint32_t iterations;
+ uint32_t pkts;
+ uint32_t upcalls;
+ uint32_t upcall_cycles;
+ uint32_t batches;
+ uint32_t max_vhost_qfill;
+};
+
+struct history {
+ uint64_t idx;
+ struct iter_stats sample[HISTORY_LEN];
+};
+
struct pmd_counters {
atomic_uint64_t n[PMD_N_STATS]; /* Indexed by PMD_STAT_*. */
uint64_t zero[PMD_N_STATS];
@@ -69,6 +96,17 @@ struct pmd_perf_stats {
uint64_t start_ms;
uint64_t last_tsc;
struct pmd_counters counters;
+ struct iter_stats current;
+ struct iter_stats totals;
+ struct histogram cycles;
+ struct histogram pkts;
+ struct histogram cycles_per_pkt;
+ struct histogram upcalls;
+ struct histogram cycles_per_upcall;
+ struct histogram pkts_per_batch;
+ struct histogram max_vhost_qfill;
+ struct history iterations;
+ struct history milliseconds;
};
enum pmd_info_type;
@@ -96,6 +134,63 @@ pmd_perf_update_counter(struct pmd_perf_stats *s,
atomic_store_relaxed(&s->counters.n[counter], tmp);
}
+void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
+ double duration);
+void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
+void pmd_perf_format_iteration_history(struct ds *str,
+ struct pmd_perf_stats *s,
+ int n_iter);
+void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
+ int n_ms);
+
+void pmd_perf_show(struct unixctl_conn *conn, int argc,
+ const char *argv[],
+ void *aux OVS_UNUSED);
+static inline void
+histogram_add_sample(struct histogram *hist, uint32_t val)
+{
+ /* TODO: Can do better with binary search? */
+ for (int i = 0; i < NUM_BINS-1; i++) {
+ if (val <= hist->wall[i]) {
+ hist->bin[i]++;
+ return;
+ }
+ }
+ hist->bin[NUM_BINS-1]++;
+}
+
+uint64_t histogram_samples(const struct histogram *hist);
+
+static inline struct iter_stats *
+history_current(struct history *h)
+{
+ return &h->sample[h->idx];
+}
+
+static inline struct iter_stats *
+history_next(struct history *h)
+{
+ struct iter_stats *next;
+
+ h->idx++;
+ if (h->idx == HISTORY_LEN) {
+ h->idx = 0;
+ }
+ next = &h->sample[h->idx];
+ memset(next, 0, sizeof(*next));
+ return next;
+}
+
+static inline struct iter_stats *
+history_store(struct history *h, struct iter_stats *is)
+{
+ if (is) {
+ h->sample[h->idx] = *is;
+ }
+ /* Advance the history pointer */
+ return history_next(h);
+}
+
static inline void
pmd_perf_start_iteration(struct pmd_perf_stats *s, uint64_t now_tsc)
{
@@ -103,14 +198,25 @@ pmd_perf_start_iteration(struct pmd_perf_stats *s, uint64_t now_tsc)
/* Stats not yet fully initialised. */
return;
}
- s->last_tsc = now_tsc;
+ memset(&s->current, 0, sizeof(struct iter_stats));
+ s->current.timestamp = now_tsc;
}
static inline void
pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc,
int packets)
{
- uint64_t cycles = now_tsc - s->last_tsc;
+ struct iter_stats *cum_ms;
+ uint64_t cycles, cycles_per_pkt = 0;
+
+ if (OVS_UNLIKELY(s->current.timestamp == 0)) {
+ /* Stats were cleared during the ongoing iteration. */
+ return;
+ }
+
+ cycles = now_tsc - s->current.timestamp;
+ s->current.cycles = cycles;
+ s->current.pkts = packets;
/* No need for atomic updates as only invoked within PMD thread. */
if (packets > 0) {
@@ -118,6 +224,62 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc,
} else {
s->counters.n[PMD_CYCLES_ITER_IDLE] += cycles;
}
+ s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
+
+ /* Add iteration sample to histograms. */
+ histogram_add_sample(&s->cycles, cycles);
+ histogram_add_sample(&s->pkts, packets);
+ if (packets > 0) {
+ cycles_per_pkt = cycles / packets;
+ histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
+ }
+ if (s->current.batches > 0) {
+ histogram_add_sample(&s->pkts_per_batch, packets / s->current.batches);
+ }
+ histogram_add_sample(&s->upcalls, s->current.upcalls);
+ if (s->current.upcalls > 0) {
+ histogram_add_sample(&s->cycles_per_upcall,
+ s->current.upcall_cycles / s->current.upcalls);
+ }
+ histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
+
+ /* Add iteration samples to millisecond stats. */
+ cum_ms = history_current(&s->milliseconds);
+ cum_ms->iterations++;
+ cum_ms->cycles += cycles;
+ if (packets > 0) {
+ cum_ms->busy_cycles += cycles;
+ }
+ cum_ms->pkts += s->current.pkts;
+ cum_ms->upcalls += s->current.upcalls;
+ cum_ms->upcall_cycles += s->current.upcall_cycles;
+ cum_ms->batches += s->current.batches;
+ cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
+
+ /* Store in iteration history. */
+ history_store(&s->iterations, &s->current);
+ if (now_tsc - s->last_tsc > 10000) {
+ /* Check if ms is completed and store in milliseconds history. */
+ uint64_t now = time_msec();
+ if (now != cum_ms->timestamp) {
+ /* Add ms stats to totals. */
+ s->totals.iterations += cum_ms->iterations;
+ s->totals.cycles += cum_ms->cycles;
+ s->totals.busy_cycles += cum_ms->busy_cycles;
+ s->totals.pkts += cum_ms->pkts;
+ s->totals.upcalls += cum_ms->upcalls;
+ s->totals.upcall_cycles += cum_ms->upcall_cycles;
+ s->totals.batches += cum_ms->batches;
+ s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
+ cum_ms = history_next(&s->milliseconds);
+ cum_ms->timestamp = now;
+ }
+ s->last_tsc = now_tsc;
+ }
}
+#ifdef __cplusplus
+}
+#endif
+
#endif /* pmd-perf.h */
@@ -52,6 +52,7 @@
#include "id-pool.h"
#include "latch.h"
#include "netdev.h"
+#include "netdev-provider.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "odp-execute.h"
@@ -749,7 +750,8 @@ get_dp_netdev(const struct dpif *dpif)
enum pmd_info_type {
PMD_INFO_SHOW_STATS, /* Show how cpu cycles are spent. */
PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
- PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
+ PMD_INFO_SHOW_RXQ, /* Show poll-lists of pmd threads. */
+ PMD_INFO_PERF_SHOW, /* Show pmd performance details. */
};
static void
@@ -828,6 +830,42 @@ pmd_info_show_stats(struct ds *reply,
stats[PMD_CYCLES_POLL_BUSY], total_packets);
}
+static void
+pmd_info_show_perf(struct ds *reply,
+ struct dp_netdev_pmd_thread *pmd,
+ struct pmd_perf_params *par)
+{
+ if (pmd->core_id != NON_PMD_CORE_ID) {
+ char *time_str =
+ xastrftime_msec("%H:%M:%S.###", time_wall_msec(), true);
+ long long now = time_msec();
+ double duration = (now - pmd->perf_stats.start_ms) / 1000.0;
+
+ ds_put_cstr(reply, "\n");
+ ds_put_format(reply, "Time: %s\n", time_str);
+ ds_put_format(reply, "Measurement duration: %.3f s\n", duration);
+ ds_put_cstr(reply, "\n");
+ format_pmd_thread(reply, pmd);
+ ds_put_cstr(reply, "\n");
+ pmd_perf_format_overall_stats(reply, &pmd->perf_stats, duration);
+ if (par->histograms) {
+ ds_put_cstr(reply, "\n");
+ pmd_perf_format_histograms(reply, &pmd->perf_stats);
+ }
+ if (par->iter_hist_len > 0) {
+ ds_put_cstr(reply, "\n");
+ pmd_perf_format_iteration_history(reply, &pmd->perf_stats,
+ par->iter_hist_len);
+ }
+ if (par->ms_hist_len > 0) {
+ ds_put_cstr(reply, "\n");
+ pmd_perf_format_ms_history(reply, &pmd->perf_stats,
+ par->ms_hist_len);
+ }
+ free(time_str);
+ }
+}
+
static int
compare_poll_list(const void *a_, const void *b_)
{
@@ -1030,6 +1068,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
pmd_perf_stats_clear(&pmd->perf_stats);
} else if (type == PMD_INFO_SHOW_STATS) {
pmd_info_show_stats(&reply, pmd);
+ } else if (type == PMD_INFO_PERF_SHOW) {
+ pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
}
}
free(pmd_list);
@@ -1039,6 +1079,48 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
unixctl_command_reply(conn, ds_cstr(&reply));
ds_destroy(&reply);
}
+
+static void
+pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
+ const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ struct pmd_perf_params par;
+ long int it_hist = 0, ms_hist = 0;
+ par.histograms = true;
+
+ while (argc > 1) {
+ if (!strcmp(argv[1], "-nh")) {
+ par.histograms = false;
+ argc -= 1;
+ argv += 1;
+ } else if (!strcmp(argv[1], "-it") && argc > 2) {
+ it_hist = strtol(argv[2], NULL, 10);
+ if (it_hist < 0) {
+ it_hist = 0;
+ } else if (it_hist > HISTORY_LEN) {
+ it_hist = HISTORY_LEN;
+ }
+ argc -= 2;
+ argv += 2;
+ } else if (!strcmp(argv[1], "-ms") && argc > 2) {
+ ms_hist = strtol(argv[2], NULL, 10);
+ if (ms_hist < 0) {
+ ms_hist = 0;
+ } else if (ms_hist > HISTORY_LEN) {
+ ms_hist = HISTORY_LEN;
+ }
+ argc -= 2;
+ argv += 2;
+ } else {
+ break;
+ }
+ }
+ par.iter_hist_len = it_hist;
+ par.ms_hist_len = ms_hist;
+ par.command_type = PMD_INFO_PERF_SHOW;
+ dpif_netdev_pmd_info(conn, argc, argv, &par);
+}
static int
dpif_netdev_init(void)
@@ -1056,6 +1138,12 @@ dpif_netdev_init(void)
unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core] [dp]",
0, 2, dpif_netdev_pmd_info,
(void *)&poll_aux);
+ unixctl_command_register("dpif-netdev/pmd-perf-show",
+ "[-nh] [-it iter-history-len]"
+ " [-ms ms-history-len]"
+ " [-pmd core] [dp]",
+ 0, 7, pmd_perf_show_cmd,
+ NULL);
unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
0, 1, dpif_netdev_pmd_rebalance,
NULL);
@@ -3143,6 +3231,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct netdev_rxq *rx,
odp_port_t port_no)
{
+ struct pmd_perf_stats *s = &pmd->perf_stats;
struct dp_packet_batch batch;
int error;
int batch_cnt = 0;
@@ -3151,8 +3240,23 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
error = netdev_rxq_recv(rx, &batch);
if (!error) {
*recirc_depth_get() = 0;
-
+ /* Update batch histogram. */
batch_cnt = batch.count;
+ s->current.batches++;
+ histogram_add_sample(&s->pkts_per_batch, batch_cnt);
+ /* Update the maximum Rx queue fill level. */
+ uint32_t qfill = batch.qfill;
+ switch (netdev_dpdk_get_type(netdev_rxq_get_netdev(rx))) {
+ case DPDK_DEV_VHOST:
+ if (qfill > s->current.max_vhost_qfill) {
+ s->current.max_vhost_qfill = qfill;
+ }
+ break;
+ case DPDK_DEV_ETH:
+ default:
+ break;
+ }
+ /* Process packet batch. */
dp_netdev_input(pmd, &batch, port_no);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -4902,6 +5006,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
struct match match;
ovs_u128 ufid;
int error = 0;
+ uint64_t cycles = cycles_counter();
match.tun_md.valid = false;
miniflow_expand(&key->mf, &match.flow);
@@ -4955,6 +5060,12 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_unlock(&pmd->flow_mutex);
emc_probabilistic_insert(pmd, key, netdev_flow);
}
+ /* Update upcall stats. */
+ cycles = cycles_counter() - cycles;
+ struct pmd_perf_stats *s = &pmd->perf_stats;
+ s->current.upcalls++;
+ s->current.upcall_cycles += cycles;
+ histogram_add_sample(&s->cycles_per_upcall, cycles);
return error;
}
@@ -35,6 +35,7 @@
#include <rte_mbuf.h>
#include <rte_meter.h>
#include <rte_pci.h>
+#include <rte_version.h>
#include <rte_vhost.h>
#include "dirs.h"
@@ -196,11 +197,6 @@ enum { DPDK_RING_SIZE = 256 };
BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
enum { DRAIN_TSC = 200000ULL };
-enum dpdk_dev_type {
- DPDK_DEV_ETH = 0,
- DPDK_DEV_VHOST = 1,
-};
-
/* Quality of Service */
/* An instance of a QoS configuration. Always associated with a particular
@@ -846,6 +842,13 @@ netdev_dpdk_cast(const struct netdev *netdev)
return CONTAINER_OF(netdev, struct netdev_dpdk, up);
}
+enum dpdk_dev_type
+netdev_dpdk_get_type(const struct netdev *netdev)
+{
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ return dev->type;
+}
+
static struct netdev *
netdev_dpdk_alloc(void)
{
@@ -1656,6 +1659,19 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
return EAGAIN;
}
+ batch->qfill = nb_rx;
+
+ if (OVS_UNLIKELY(nb_rx == NETDEV_MAX_BURST)) {
+ /* This vhostuser API call is only implemented in DPDK 17.08.
+ * The need for this API call disappears when switching to the
+ * vhostuser PMD. */
+#if RTE_VERSION >= RTE_VERSION_NUM (17, 8, 0, RTE_VER_RELEASE)
+ batch->qfill += rte_vhost_rx_queue_count(netdev_dpdk_get_vid(dev),
+ qid * VIRTIO_QNUM
+ + VIRTIO_TXQ);
+#endif
+ }
+
if (policer) {
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
@@ -1695,6 +1711,8 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
return EAGAIN;
}
+ batch->qfill = nb_rx;
+
if (policer) {
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
@@ -22,11 +22,18 @@
#include "openvswitch/compiler.h"
struct dp_packet;
+struct netdev;
+
+enum dpdk_dev_type {
+ DPDK_DEV_ETH = 0,
+ DPDK_DEV_VHOST = 1,
+};
#ifdef DPDK_NETDEV
void netdev_dpdk_register(void);
void free_dpdk_buf(struct dp_packet *);
+enum dpdk_dev_type netdev_dpdk_get_type(const struct netdev *netdev);
#else
@@ -41,6 +48,13 @@ free_dpdk_buf(struct dp_packet *buf OVS_UNUSED)
/* Nothing */
}
+static inline enum dpdk_dev_type
+netdev_dpdk_get_type(const struct netdev *netdev OVS_UNUSED)
+{
+ /* Nothing to do. Return value zero to make compiler happy. */
+ return DPDK_DEV_ETH;
+}
+
#endif
#endif /* netdev-dpdk.h */
@@ -5308,7 +5308,8 @@ dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
dpif_get_dp_stats(backer->dpif, &dp_stats);
ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
- dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
+ dpif_name(backer->dpif),
+ dp_stats.n_hit + dp_stats.n_mask_hit, dp_stats.n_missed);
shash_init(&ofproto_shash);
ofprotos = get_ofprotos(&ofproto_shash);
This patch instruments the dpif-netdev datapath to record detailed statistics of what is happening in every iteration of a PMD thread. The covered metrics per iteration are: - cycles - packets - (rx) batches - packets/batch - max. vhostuser qlen - upcalls - cycles spent in upcalls This raw recorded data is used threefold: 1. In histograms for each of the following metrics: - cycles/iteration (log.) - packets/iteration (log.) - cycles/packet - packets/batch - max. vhostuser qlen (log.) - upcalls - cycles/upcall (log) The histograms bins are divided linear or logarithmic. 2. A cyclic history of the above statistics for 1024 iterations 3. A cyclic history of the cumulative/average values per millisecond wall clock for the last 1024 milliseconds: - number of iterations - avg. cycles/iteration - packets (Kpps) - avg. packets/batch - avg. max vhost qlen - upcalls - avg. cycles/upcall The gathered performance statists can be printed at any time with the new CLI command ovs-appctl dpif-netdev/pmd-perf-show [-nh] [-it iter_len] [-ms ms_len] [-pmd core] [dp] The options are -nh: Suppress the histograms -it iter_len: Display the last iter_len iteration stats -ms ms_len: Display the last ms_len millisecond stats -pmd core: Display only The performance statistics are reset with the existing dpif-netdev/pmd-stats-clear command. The output always contains the following global PMD statistics, similar to the pmd-stats-show command: Time: 15:24:55.270 Measurement duration: 1.008 s pmd thread numa_id 0 core_id 1: Cycles: 2419034712 (2.40 GHz) Iterations: 572817 (1.76 us/it) - idle: 486808 (15.9 % cycles) - busy: 86009 (84.1 % cycles) Packets: 2399607 (2381 Kpps, 848 cycles/pkt) Datapath passes: 3599415 (1.50 passes/pkt) - EMC hits: 336472 ( 9.3 %) - Megaflow hits: 3262943 (90.7 %, 1.00 subtbl lookups/hit) - Upcalls: 0 ( 0.0 %, 0.0 us/upcall) - Lost upcalls: 0 ( 0.0 %) The other output will be documented separately. Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> --- lib/dp-packet.h | 2 + lib/dpif-netdev-perf.c | 306 ++++++++++++++++++++++++++++++++++++++++++++++++- lib/dpif-netdev-perf.h | 166 ++++++++++++++++++++++++++- lib/dpif-netdev.c | 115 ++++++++++++++++++- lib/netdev-dpdk.c | 28 ++++- lib/netdev-dpdk.h | 14 +++ ofproto/ofproto-dpif.c | 3 +- 7 files changed, 623 insertions(+), 11 deletions(-)