@@ -214,6 +214,10 @@ Available probes in ``ovs_vswitchd``:
- dpif_recv:recv_upcall
- main:poll_block
- main:run_start
+- revalidate_ukey\_\_:entry
+- revalidate_ukey\_\_:exit
+- udpif_revalidator:start_dump
+- udpif_revalidator:sweep_done
dpif_netlink_operate\_\_:op_flow_del
@@ -327,6 +331,7 @@ probe main:run_start
~~~~~~~~~~~~~~~~~~~~
**Description**:
+
The ovs-vswitchd's main process contains a loop that runs every time some work
needs to be done. This probe gets triggered every time the loop starts from the
beginning. See also the ``main:poll_block`` probe below.
@@ -344,6 +349,7 @@ probe main:poll_block
~~~~~~~~~~~~~~~~~~~~~
**Description**:
+
The ovs-vswitchd's main process contains a loop that runs every time some work
needs to be done. This probe gets triggered every time the loop is done, and
it's about to wait for being re-started by a poll_block() call returning.
@@ -358,6 +364,84 @@ See also the ``main:run_start`` probe above.
- ``utilities/usdt-scripts/bridge_loop.bt``
+revalidate_ukey\_\_:entry
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Description**:
+
+This probe gets triggered on entry of the revalidate_ukey__() function.
+
+**Arguments**:
+
+- *arg0*: ``(struct udpif *) udpif``
+- *arg1*: ``(struct udpif_key *) ukey``
+- *arg2*: ``(uint16_t) tcp_flags``
+- *arg3*: ``(struct ofpbuf *) odp_actions``
+- *arg4*: ``(struct recirc_refs *) recircs``
+- *arg5*: ``(struct xlate_cache *) xcache``
+
+**Script references**:
+
+- ``utilities/usdt-scripts/reval_monitor.py``
+
+
+revalidate_ukey\_\_:exit
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Description**:
+
+This probe gets triggered right before the revalidate_ukey__() function exits.
+
+**Arguments**:
+
+- *arg0*: ``(struct udpif *) udpif``
+- *arg1*: ``(struct udpif_key *) ukey``
+- *arg2*: ``(enum reval_result) result``
+
+**Script references**:
+
+*None*
+
+
+udpif_revalidator:start_dump
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Description**:
+
+The ovs-vswitchd's revalidator process contains a loop that runs every time
+revalidation work is needed. This probe gets triggered every time the
+dump phase has started.
+
+**Arguments**:
+
+- *arg0*: ``(struct udpif *) udpif``
+- *arg1*: ``(size_t) n_flows``
+
+**Script references**:
+
+- ``utilities/usdt-scripts/reval_monitor.py``
+
+
+udpif_revalidator:sweep_done
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Description**:
+
+The ovs-vswitchd's revalidator process contains a loop that runs every time
+revalidation work is needed. This probe gets triggered every time the
+sweep phase was completed.
+
+**Arguments**:
+
+- *arg0*: ``(struct udpif *) udpif``
+- *arg1*: ``(size_t) n_flows``
+- *arg2*: ``(unsigned) MIN(ofproto_max_idle, ofproto_max_revalidator)``
+
+**Script references**:
+
+- ``utilities/usdt-scripts/reval_monitor.py``
+
+
Adding your own probes
----------------------
@@ -42,6 +42,7 @@
#include "seq.h"
#include "tunnel.h"
#include "unixctl.h"
+#include "openvswitch/usdt-probes.h"
#include "openvswitch/vlog.h"
#include "lib/netdev-provider.h"
@@ -965,6 +966,7 @@ udpif_revalidator(void *arg)
terse_dump = udpif_use_ufid(udpif);
udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump,
NULL);
+ OVS_USDT_PROBE(udpif_revalidator, start_dump, udpif, n_flows);
}
}
@@ -1016,6 +1018,9 @@ udpif_revalidator(void *arg)
duration);
}
+ OVS_USDT_PROBE(udpif_revalidator, sweep_done, udpif, n_flows,
+ MIN(ofproto_max_idle, ofproto_max_revalidator));
+
poll_timer_wait_until(start_time + MIN(ofproto_max_idle,
ofproto_max_revalidator));
seq_wait(udpif->reval_seq, last_reval_seq);
@@ -2215,6 +2220,9 @@ revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
.wc = &wc,
};
+ OVS_USDT_PROBE(revalidate_ukey__, entry, udpif, ukey, tcp_flags,
+ odp_actions, recircs, xcache);
+
result = UKEY_DELETE;
xoutp = NULL;
netflow = NULL;
@@ -2278,6 +2286,9 @@ exit:
netflow_flow_clear(netflow, &ctx.flow);
}
xlate_out_uninit(xoutp);
+
+ OVS_USDT_PROBE(revalidate_ukey__, exit, udpif, ukey, result);
+
return result;
}
@@ -63,8 +63,10 @@ EXTRA_DIST += \
utilities/docker/debian/Dockerfile \
utilities/docker/debian/build-kernel-modules.sh \
utilities/usdt-scripts/bridge_loop.bt \
+ utilities/usdt-scripts/ovs_structs.h \
utilities/usdt-scripts/upcall_cost.py \
- utilities/usdt-scripts/upcall_monitor.py
+ utilities/usdt-scripts/upcall_monitor.py \
+ utilities/usdt-scripts/reval_monitor.py
MAN_ROOTS += \
utilities/ovs-testcontroller.8.in \
utilities/ovs-dpctl.8.in \
new file mode 100644
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2022 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * The purpose of this include file is to define commonly used OVS data
+ * structures so they can easily be used/accessed by BPF programs.
+ */
+#ifndef OVS_STRUCTS_H
+#define OVS_STRUCTS_H 1
+
+/* From eBPF we do not care about atomic reading for now :) */
+#define ATOMIC(TYPE) TYPE
+
+
+/* Various typedef's. */
+typedef uint32_t HANDLE;
+
+
+/* The below we can not get from importing the pthread.h file as this will
+ * clash with the Linux kernel includes done by BCC. */
+typedef struct pthread_mutex_s {
+ char size[OVS_PTHREAD_MUTEX_T_SIZE];
+} pthread_mutex_t;
+
+
+/* Included from lib/ovs-rcu.h */
+#define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; }
+
+
+/* Included from lib/ovs-atomic.h */
+typedef ATOMIC(bool) atomic_bool;
+typedef ATOMIC(int) atomic_int;
+typedef ATOMIC(unsigned int) atomic_uint;
+typedef ATOMIC(long long) atomic_llong;
+typedef ATOMIC(unsigned long long) atomic_ullong;
+
+
+/* Included from include/openvswitch/list.h */
+struct ovs_list {
+ struct ovs_list *prev;
+ struct ovs_list *next;
+};
+
+
+/* Included from lib/latch.h */
+struct latch {
+ HANDLE wevent;
+ bool is_set;
+};
+
+/* Included from lib/ovs-thread.h */
+struct ovs_barrier_impl;
+struct ovs_barrier {
+ OVSRCU_TYPE(struct ovs_barrier_impl *) impl;
+};
+
+
+/* Included from include/openvswitch/thread.h */
+struct ovs_mutex {
+ pthread_mutex_t lock;
+ const char *where;
+};
+
+
+/* Include from ofproto/ofproto-dpif.c */
+struct udpif {
+ struct ovs_list list_node;
+
+ struct dpif *dpif;
+ struct dpif_backer *backer;
+
+ struct handler *handlers;
+ uint32_t n_handlers;
+
+ struct revalidator *revalidators;
+ uint32_t n_revalidators;
+
+ struct latch exit_latch;
+
+ struct seq *reval_seq;
+ bool reval_exit;
+ struct ovs_barrier reval_barrier;
+ struct dpif_flow_dump *dump;
+ long long int dump_duration;
+ struct seq *dump_seq;
+ atomic_bool enable_ufid;
+
+ bool pause;
+ struct latch pause_latch;
+ struct ovs_barrier pause_barrier;
+
+ struct umap *ukeys;
+
+ unsigned int max_n_flows;
+ unsigned int avg_n_flows;
+
+ atomic_uint flow_limit;
+
+ atomic_uint n_flows;
+ atomic_llong n_flows_timestamp;
+ struct ovs_mutex n_flows_mutex;
+
+ struct unixctl_conn **conns;
+ uint64_t conn_seq;
+ size_t n_conns;
+
+ long long int offload_rebalance_time;
+};
+
+
+#endif /* OVS_STRUCTS_H */
new file mode 100755
@@ -0,0 +1,756 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Script information:
+# -------------------
+# reval_monitor.py uses various user-defined tracepoints to get all the
+# revalidator-process related variables and will display them in a (dynamic)
+# graph. In addition, it will also dump the data to the console
+# in a CSV format. Note that all the graphical output can be disabled.
+#
+# All the USDT events can be saved to a file and than can be used to
+# replay the trace offline and still get the plots.
+#
+# The script can simple be invoked without any options, and it will try
+# to find the running ovs-vswitchd instance:
+#
+# # ./reval_monitor.py
+# # Starting trace @2022-09-20T04:07:43.588749 (08:07:43 UTC)
+# ts_start, ts_complete, n_flows, n_reval_flows, avg_n_flows, max_n_flows,
+# flow_limit, dump_duration, poll_wait, actual_wait
+# 1741367714251645, 1741367714532545, 0, 0, 0, 10000, 69000, 1, 500, 500.52
+# 1741368215056961, 1741368215318223, 0, 0, 0, 10000, 69000, 1, 500, 500.55
+# 1741368715865871, 1741368716107089, 0, 0, 0, 10000, 69000, 1, 500, 499.48
+# ^C# Stopping trace @2022-09-20T04:07:49.893827 (08:07:49 UTC)
+#
+# The following are the available options:
+#
+# usage: reval_monitor.py [-h] [-c] [--buffer-page-count NUMBER]
+# [-D [DEBUG]] [-g] [--no-ukey-count]
+# [-p VSWITCHD_PID] [-r FILE] [-R]
+# [-u SECONDS] [-w FILE] [-W FILE]
+#
+# options:
+# -h, --help show this help message and exit
+# -c, --compress-output
+# Compress output, i.e. only dump changes in the dataset
+# --buffer-page-count NUMBER
+# Number of BPF ring buffer pages, default 1024
+# -D [DEBUG], --debug [DEBUG]
+# Enable eBPF debugging
+# -g, --no-gui Do use the gui to display plots
+# --no-ukey-count No revalidate_ukey() counting
+# -p VSWITCHD_PID, --pid VSWITCHD_PID
+# ovs-vswitch's PID
+# -r FILE, --read-events FILE
+# Read events from <FILE> instead of installing
+# tracepoints
+# -R, --no-realtime-plots
+# Do not show realtime plot while tracing
+# -u SECONDS, --update-interval SECONDS
+# Seconds to wait between real time update, default 1
+# -w FILE, --write-events FILE
+# Write events to <FILE>
+# -W FILE, --write-charts FILE
+# Write overall charts to <FILE>.png
+#
+# The -g option disabled all GUI output of matplotlib, -R only disables the
+# real-time plots. As real-time plots are rather slow, the -u option can be
+# used to only update the graph every x seconds, which might speed up the
+# processing.
+#
+# The --no-ukey-count option disables counting of the number of flows actually
+# being revalidated against the current OpenFlow ruleset. This will not install
+# the specific tracepoint which would be called for each flow being
+# revalidated.
+#
+# What is plotted in the graphs (and dumped in the CSV output)?
+# - n_flows: Number of flows active in the system.
+# - n_reval_flows: Number of flows that where revalidated against the OpenFlow
+# ruleset.
+# - dump_duration: Time it took to dump and process all flows.
+# - avg_n_flows: Average number of flows in the system.
+# - max_n_flows: Maximum number of flows in the system.
+# - flow_limit: Dynamic flow limit.
+# - poll_wait: Time requested for the poll wait.
+# - actual_wait: Time it took to be woken up.
+#
+# Dependencies:
+# The script needs GDB to be installed, as it will used to determine the size
+# of the pthread_mutex_t structure.
+#
+
+
+try:
+ from bcc import BPF, USDT, USDTException
+except ModuleNotFoundError:
+ print("WARNING: Can't find the BPF Compiler Collection (BCC) tools!")
+ print(" This is NOT problem if you analyzing previously collected"
+ " data.\n")
+
+from collections import namedtuple
+
+import argparse
+import ast
+import datetime
+import re
+import subprocess
+import sys
+import time
+
+import pytz
+import psutil
+import matplotlib.pyplot as plt
+
+#
+# Actual eBPF source code
+#
+EBPF_SOURCE = """
+#include <linux/sched.h>
+
+#define OVS_PTHREAD_MUTEX_T_SIZE <PTHREAD_MUTEX_T_SIZE>
+#include "ovs_structs.h"
+
+struct event_t {
+ u64 ts;
+ u32 pid;
+ u32 id;
+ u64 n_flows;
+ u32 avg_n_flows;
+ u32 max_n_flows;
+ u32 flow_limit;
+ u32 dump_duration;
+ u32 poll_wait;
+};
+
+
+BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>);
+BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1);
+
+static struct event_t *get_event(uint32_t id) {
+ struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
+
+ if (!event) {
+ uint32_t type = 0;
+ uint64_t *value = dropcnt.lookup(&type);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+
+ return NULL;
+ }
+
+ event->id = id;
+ event->ts = bpf_ktime_get_ns();
+ event->pid = bpf_get_current_pid_tgid();
+
+ return event;
+}
+
+int probe__start_dump(struct pt_regs *ctx) {
+ struct event_t *event = get_event(0);
+ if (!event)
+ return 1;
+
+ events.ringbuf_submit(event, 0);
+ return 0;
+};
+
+int probe__sweep_done(struct pt_regs *ctx) {
+ struct udpif udpif;
+
+ bpf_usdt_readarg_p(1, ctx, &udpif, sizeof(udpif));
+
+ struct event_t *event = get_event(1);
+ if (!event)
+ return 1;
+
+ event->avg_n_flows = udpif.avg_n_flows;
+ event->max_n_flows = udpif.max_n_flows;
+ event->flow_limit = udpif.flow_limit;
+ event->dump_duration = udpif.dump_duration;
+
+ bpf_usdt_readarg(2, ctx, &event->n_flows);
+ bpf_usdt_readarg(3, ctx, &event->poll_wait);
+
+ events.ringbuf_submit(event, 0);
+ return 0;
+};
+
+int probe__reval_entry(struct pt_regs *ctx) {
+ struct event_t *event = get_event(2);
+ if (!event)
+ return 1;
+
+ events.ringbuf_submit(event, 0);
+ return 0;
+};
+"""
+
+
+#
+# event_to_dict()
+#
+def event_to_dict(event):
+ event_dict = {}
+
+ for field, _ in event._fields_:
+ if isinstance(getattr(event, field), (int, bytes)):
+ event_dict[field] = getattr(event, field)
+
+ return event_dict
+
+
+#
+# print_csv_header()
+#
+def print_csv_header():
+ print("ts_start, ts_complete, n_flows, n_reval_flows, avg_n_flows, "
+ "max_n_flows, flow_limit, dump_duration, poll_wait, actual_wait")
+
+
+#
+# process_event()
+#
+def process_event(ctx, data, size):
+ event = b['events'].event(data)
+ _process_event(event)
+
+
+def _process_event(event):
+ global graph
+
+ if export_file is not None:
+ export_file.write("event = {}\n".format(event_to_dict(event)))
+
+ if event.id == 0 and not state['running']:
+ start = state["last_start"]
+ done = state["last_done"]
+ if done is not None and start is not None:
+ actual_wait = (event.ts - done.ts) / 1000000
+ csv = "{}, {}, {}, {}, {}, {}, {}, {}, {}, {:.2f}".format(
+ start.ts, done.ts, done.n_flows, graph.ukey_count,
+ done.avg_n_flows, done.max_n_flows, done.flow_limit,
+ done.dump_duration, done.poll_wait, actual_wait)
+
+ if graph.base_time == 0:
+ graph = graph._replace(base_time=done.ts)
+
+ graph.time.append((done.ts - graph.base_time) / 1000000000)
+ graph.n_flows.append(done.n_flows)
+ graph.n_reval_flows.append(graph.ukey_count)
+ graph.avg_n_flows.append(done.avg_n_flows)
+ graph.max_n_flows.append(done.max_n_flows)
+ graph.flow_limit.append(done.flow_limit)
+ graph.dump_duration.append(done.dump_duration)
+ graph.poll_wait.append(done.poll_wait)
+ graph.actual_wait.append(actual_wait)
+
+ if not options.no_gui and not options.no_realtime_plots:
+ updated_graph = dynamic_plot_update(
+ graph, refresh=options.update_interval)
+ if updated_graph is None:
+ raise KeyboardInterrupt
+ graph = updated_graph
+
+ if options.compress_output:
+ last_csv = state["last_csv"]
+ if last_csv is None or \
+ csv.split(",")[2:-1] != last_csv.split(",")[2:-1] or \
+ abs((event.ts - done.ts) / 1000000 - done.poll_wait) > 100:
+ print(csv)
+ else:
+ state["last_not_printed_csv"] = csv
+
+ state["last_csv"] = csv
+ else:
+ print(csv)
+
+ state["last_start"] = event
+ state['running'] = True
+ graph = graph._replace(ukey_count=0)
+ elif event.id == 1 and state['running']:
+ state["last_done"] = event
+ state['running'] = False
+ elif event.id == 2 and state['running']:
+ graph = graph._replace(ukey_count=graph.ukey_count + 1)
+
+
+#
+# run_program()
+#
+def run_program(command, need_result=True):
+ try:
+ process = subprocess.run(command,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ encoding='utf8',
+ check=True)
+
+ except subprocess.CalledProcessError as perror:
+ if need_result:
+ return perror.returncode, perror.stdout
+
+ return perror.returncode
+
+ if need_result:
+ return 0, process.stdout
+
+ return 0
+
+
+#
+# gdb_get_struct_size()
+#
+def gdb_get_struct_size(struct_name, pid="self"):
+ error, result = run_program(["gdb", "/proc/{}/exe".format(str(pid)),
+ "-ex", "p sizeof({})".format(struct_name),
+ "-ex", "quit"])
+ if error:
+ print("ERROR: Can't run gdb to execute 'print sizeof({})'\n{}".
+ format(struct_name, result.rstrip()))
+ sys.exit(-1)
+
+ match = re.search(r"^\$1 = ([0-9]*)$", result, flags=re.MULTILINE)
+ if match is not None:
+ return int(match.group(1))
+
+ print("ERROR: Can't extract sizeof data from gdb output!\n{}".
+ format(result.rstrip()))
+ sys.exit(-1)
+
+
+#
+# next_power_of_two()
+#
+def next_power_of_two(val):
+ np = 1
+ while np < val:
+ np *= 2
+ return np
+
+
+#
+# dynamic_plot_init()
+#
+def dynamic_plot_init(real_time=True):
+
+ if real_time:
+ lines = []
+ fig, axs = plt.subplots(4, figsize=(19, 10))
+ fig.suptitle('Revalidator Handling')
+ for ax in axs:
+ ax.grid()
+
+ axs[0].set_ylabel("Numer of flows", weight='bold')
+ axs[1].set_ylabel("Time spend (ms)", weight='bold')
+ axs[2].set_ylabel("Numer of flows", weight='bold')
+ axs[3].set_ylabel("Time spend (ms)", weight='bold')
+ axs[3].set_xlabel("Time (seconds since start)", weight='bold')
+
+ lines.append(axs[0].plot([], [], label="n_flows", marker='o')[0])
+ lines.append(axs[0].plot([], [], label="n_reval_flows")[0])
+ axs[0].legend(bbox_to_anchor=(1, 1), loc='upper left',
+ borderaxespad=0.5)
+ axs[0].set_xlim(0, 30)
+ axs[0].set_ylim(-4, 104)
+
+ lines.append(axs[1].plot([], [], color="orange",
+ label="dump_duration")[0])
+ axs[1].legend(bbox_to_anchor=(1, 1),
+ loc='upper left', borderaxespad=0.5)
+ axs[1].set_xlim(0, 30)
+ axs[1].set_ylim(-0.4, 10.4)
+
+ lines.append(axs[2].plot([], [], label="avg_n_flows")[0])
+ lines.append(axs[2].plot([], [], label="max_n_flows")[0])
+ lines.append(axs[2].plot([], [], label="flow_limit")[0])
+ axs[2].legend(bbox_to_anchor=(1, 1), loc='upper left',
+ borderaxespad=0.5)
+ axs[2].set_xlim(0, 30)
+ axs[2].set_ylim(-600, 15600)
+
+ lines.append(axs[3].plot([], [], label="poll_wait")[0])
+ lines.append(axs[3].plot([], [], label="actual_wait")[0])
+ axs[3].legend(bbox_to_anchor=(1, 1), loc='upper left',
+ borderaxespad=0.5)
+ axs[3].set_xlim(0, 30)
+ axs[3].set_ylim(-20, 520)
+
+ fig.tight_layout()
+
+ plt.ion()
+ plt.show()
+ else:
+ fig = None
+ axs = None
+ lines = None
+
+ graph_data = {"base_time": 0,
+ "l_index": 0,
+ "fig": fig,
+ "axs": axs,
+ "lines": lines,
+ "last_update": 0,
+ "ukey_count": 0,
+ "time": [],
+ "n_flows": [],
+ "n_reval_flows": [],
+ "avg_n_flows": [],
+ "max_n_flows": [],
+ "flow_limit": [],
+ "dump_duration": [],
+ "poll_wait": [],
+ "actual_wait": []}
+
+ return namedtuple("GraphData", graph_data.keys())(*graph_data.values())
+
+
+#
+# dynamic_plot_update()
+#
+def dynamic_plot_update(graph_data, refresh=1):
+
+ if graph_data.last_update != 0 and \
+ (graph_data.time[-1] - graph_data.last_update) < refresh:
+ return graph_data
+
+ graph_data = graph_data._replace(last_update=graph_data.time[-1])
+
+ if (graph_data.time[-1] - graph_data.time[graph_data.l_index]) > 30:
+ for i in range(graph_data.l_index + 1, len(graph_data.time)):
+ if (graph_data.time[-1] - graph_data.time[i]) <= 30:
+ graph_data = graph_data._replace(l_index=i)
+ break
+
+ for line in graph_data.lines:
+ line.set_xdata(graph_data.time[graph_data.l_index:])
+
+ graph_data.lines[0].set_ydata(graph_data.n_flows[graph_data.l_index:])
+ graph_data.lines[1].set_ydata(
+ graph_data.n_reval_flows[graph_data.l_index:])
+ graph_data.lines[2].set_ydata(
+ graph_data.dump_duration[graph_data.l_index:])
+ graph_data.lines[3].set_ydata(graph_data.avg_n_flows[graph_data.l_index:])
+ graph_data.lines[4].set_ydata(graph_data.max_n_flows[graph_data.l_index:])
+ graph_data.lines[5].set_ydata(graph_data.flow_limit[graph_data.l_index:])
+ graph_data.lines[6].set_ydata(graph_data.poll_wait[graph_data.l_index:])
+ graph_data.lines[7].set_ydata(graph_data.actual_wait[graph_data.l_index:])
+
+ for ax in graph_data.axs:
+ if graph_data.l_index == 0:
+ ax.autoscale(enable=True, axis='y')
+ else:
+ ax.autoscale(enable=True)
+
+ ax.relim(visible_only=True)
+ ax.autoscale_view(tight=True, scalex=True, scaley=True)
+
+ try:
+ graph_data.fig.canvas.draw()
+ graph_data.fig.canvas.flush_events()
+ except KeyboardInterrupt:
+ return None
+
+ return graph_data
+
+
+#
+# show_graph()
+#
+def show_graph(graph_data, gui=False, file_name=None):
+
+ if len(graph_data.time) == 0 or (not gui and file_name is None):
+ return
+
+ plt.ioff()
+
+ fig, (nf_ax, dd_ax, f_ax, t_ax) = plt.subplots(4, figsize=(19, 10))
+ fig.suptitle('Revalidator Handling')
+ nf_ax.grid()
+ f_ax.grid()
+ dd_ax.grid()
+ t_ax.grid()
+
+ nf_ax.set_ylabel("Numer of flows", weight='bold')
+ f_ax.set_ylabel("Numer of flows", weight='bold')
+ dd_ax.set_ylabel("Time spend (ms)", weight='bold')
+ t_ax.set_ylabel("Time spend (ms)", weight='bold')
+ t_ax.set_xlabel("Time (seconds since start)", weight='bold')
+
+ nf_ax.plot(graph_data.time, graph_data.n_flows, label="n_flows")
+ nf_ax.plot(graph_data.time, graph_data.n_reval_flows,
+ label="n_reval_flows")
+ nf_ax.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0.5)
+
+ dd_ax.plot(graph_data.time, graph_data.dump_duration, color="orange",
+ label="dump_duration")
+ dd_ax.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0.5)
+
+ f_ax.plot(graph_data.time, graph_data.avg_n_flows, label="avg_n_flows")
+ f_ax.plot(graph_data.time, graph_data.max_n_flows, label="max_n_flows")
+ f_ax.plot(graph_data.time, graph_data.flow_limit, label="flow_limit")
+ f_ax.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0.5)
+
+ t_ax.plot(graph_data.time, graph_data.poll_wait, label="poll_wait")
+ t_ax.plot(graph_data.time, graph_data.actual_wait, label="actual_wait")
+ t_ax.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0.5)
+
+ fig.tight_layout()
+
+ if file_name is not None and file_name != "":
+ fig.savefig(file_name + '.png')
+
+ if gui:
+ try:
+ plt.show()
+ except KeyboardInterrupt:
+ pass
+
+ plt.close(fig)
+
+
+#
+# process_events_from_file()
+#
+def process_events_from_file(file_name):
+ try:
+ with open(file_name, 'r') as fd:
+ print("- Reading events from \"{}\"...".format(file_name))
+
+ print_csv_header()
+ for entry in fd:
+ entry.rstrip()
+ if entry.startswith('event = {'):
+ event = ast.literal_eval(entry[8:])
+ event = namedtuple("EventObject",
+ event.keys())(*event.values())
+ try:
+ _process_event(event)
+ except KeyboardInterrupt:
+ break
+
+ except (FileNotFoundError, PermissionError):
+ print("ERROR: Can't open file \"{}\" for reading!".format(file_name))
+ sys.exit(-1)
+
+ show_graph(graph, gui=not options.no_gui, file_name=options.write_charts)
+
+
+#
+# main()
+#
+def main():
+ #
+ # Don't like these globals, but ctx passing does not seem to work with the
+ # existing open_ring_buffer() API :(
+ #
+ global b
+ global export_file
+ global options
+ global state
+ global graph
+
+ #
+ # Argument parsing
+ #
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("-c", "--compress-output", action="store_true",
+ help="Compress output, i.e. only dump changes in "
+ "the dataset")
+ parser.add_argument("--buffer-page-count",
+ help="Number of BPF ring buffer pages, default 1024",
+ type=int, default=1024, metavar="NUMBER")
+ parser.add_argument("-D", "--debug",
+ help="Enable eBPF debugging",
+ type=int, const=0x3f, default=0, nargs='?')
+ parser.add_argument("-g", "--no-gui", action="store_true",
+ help="Do use the gui to display plots")
+ parser.add_argument("--no-ukey-count", action="store_true",
+ help="No revalidate_ukey() counting")
+ parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID",
+ help="ovs-vswitch's PID",
+ type=int, default=None)
+ parser.add_argument("-r", "--read-events",
+ help="Read events from <FILE> instead of installing "
+ "tracepoints", type=str, default=None, metavar="FILE")
+ parser.add_argument("-R", "--no-realtime-plots", action="store_true",
+ help="Do not show realtime plot while tracing")
+ parser.add_argument("-u", "--update-interval",
+ help="Seconds to wait between real time update, "
+ "default 1", type=float, default=1, metavar="SECONDS")
+ parser.add_argument("-w", "--write-events",
+ help="Write events to <FILE>",
+ type=str, default=None, metavar="FILE")
+ parser.add_argument("-W", "--write-charts",
+ help="Write overall charts to <FILE>.png",
+ type=str, default=None, metavar="FILE")
+
+ options = parser.parse_args()
+
+ #
+ # Find the PID of the ovs-vswitchd daemon if not specified.
+ #
+ if options.pid is None and options.read_events is None:
+ for proc in psutil.process_iter():
+ if 'ovs-vswitchd' in proc.name():
+ if options.pid is not None:
+ print("ERROR: Multiple ovs-vswitchd daemons running, "
+ "use the -p option!")
+ sys.exit(-1)
+
+ options.pid = proc.pid
+
+ #
+ # Error checking on input parameters
+ #
+ if options.pid is None and options.read_events is None:
+ print("ERROR: Failed to find ovs-vswitchd's PID!")
+ sys.exit(-1)
+
+ if options.read_events is not None and options.write_events is not None:
+ print("ERROR: Either supply the read or write events option, "
+ "not both!")
+ sys.exit(-1)
+
+ options.buffer_page_count = next_power_of_two(options.buffer_page_count)
+
+ #
+ # Define the state and graph
+ #
+ state = {"last_start": None,
+ "last_done": None,
+ "running": False,
+ "last_csv": None,
+ "last_not_printed_csv": None}
+
+ export_file = None
+
+ graph = dynamic_plot_init(real_time=(not options.no_gui
+ and not options.no_realtime_plots))
+
+ #
+ # Process events from file if required.
+ #
+ if options.read_events is not None:
+ process_events_from_file(options.read_events)
+ sys.exit(0)
+
+ #
+ # Open write handle if needed.
+ #
+ if options.write_events is not None:
+ try:
+ export_file = open(options.write_events, "w")
+ except (FileNotFoundError, IOError, PermissionError) as e:
+ print("ERROR: Can't create export file \"{}\": {}".format(
+ options.write_events, e.strerror))
+ sys.exit(-1)
+
+ #
+ # Attach the usdt probe
+ #
+ u = USDT(pid=int(options.pid))
+ try:
+ u.enable_probe(probe="start_dump", fn_name="probe__start_dump")
+ u.enable_probe(probe="sweep_done", fn_name="probe__sweep_done")
+ if not options.no_ukey_count:
+ u.enable_probe(probe="revalidate_ukey__:entry",
+ fn_name="probe__reval_entry")
+ except USDTException as e:
+ print("ERROR: {}"
+ "ovs-vswitchd!".format(
+ (re.sub('^', ' ' * 7, str(e), flags=re.MULTILINE)).strip().
+ replace("--with-dtrace or --enable-dtrace",
+ "--enable-usdt-probes")))
+ sys.exit(-1)
+
+ #
+ # Attach probe to running process
+ #
+ source = EBPF_SOURCE.replace("<BUFFER_PAGE_CNT>",
+ str(options.buffer_page_count))
+ source = source.replace("<PTHREAD_MUTEX_T_SIZE>",
+ str(gdb_get_struct_size("pthread_mutex_t",
+ pid=options.pid)))
+
+ b = BPF(text=source, usdt_contexts=[u], debug=options.debug)
+
+ #
+ # Print header
+ #
+ ltz = datetime.datetime.now()
+ utc = ltz.astimezone(pytz.utc)
+ time_string = "# Starting trace @{} ({} UTC)".format(
+ ltz.isoformat(), utc.strftime("%H:%M:%S"))
+
+ if export_file is not None:
+ export_file.write(time_string + "\n")
+
+ print(time_string)
+ print_csv_header()
+
+ #
+ # Process all events
+ b['events'].open_ring_buffer(process_event)
+ while 1:
+ try:
+ b.ring_buffer_poll()
+ time.sleep(0.5)
+ except KeyboardInterrupt:
+ break
+
+ dropcnt = b.get_table("dropcnt")
+ for k in dropcnt.keys():
+ count = dropcnt.sum(k).value
+ if k.value == 0 and count > 0:
+ print("\n# WARNING: Not all upcalls were captured, {} were "
+ "dropped!\n# Increase the BPF ring buffer size "
+ "with the --buffer-page-count option.".format(count))
+
+ #
+ # Display footer.
+ #
+ if state["last_not_printed_csv"] is not None:
+ print(state["last_not_printed_csv"])
+
+ ltz = datetime.datetime.now()
+ utc = ltz.astimezone(pytz.utc)
+ time_string = "# Stopping trace @{} ({} UTC)".format(
+ ltz.isoformat(), utc.strftime("%H:%M:%S"))
+
+ if export_file is not None:
+ export_file.write(time_string + "\n")
+
+ print(time_string)
+
+ #
+ # Close event file is used.
+ #
+ if options.write_events is not None:
+ export_file.close()
+
+ #
+ # Do final graph if requested.
+ #
+ show_graph(graph, gui=not options.no_gui, file_name=options.write_charts)
+
+
+#
+# Start main() as the default entry point...
+#
+if __name__ == '__main__':
+ main()
This patch adds a Python script that can be used to analyze the revalidator runs by providing statistics (including some real time graphs). The USDT events can also be captured to a file and used for later offline analysis. Signed-off-by: Eelco Chaudron <echaudro@redhat.com> --- I'm planning a blog post to explain the Open vSwitch revalidator implementation, and this tool will help you determine what is happening in your system. Also, here is a link to an example of a real-time and overall plot. https://photos.app.goo.gl/rdx63zuFure7QE3t6 Documentation/topics/usdt-probes.rst | 84 +++ ofproto/ofproto-dpif-upcall.c | 11 utilities/automake.mk | 4 utilities/usdt-scripts/ovs_structs.h | 123 +++++ utilities/usdt-scripts/reval_monitor.py | 756 +++++++++++++++++++++++++++++++ 5 files changed, 977 insertions(+), 1 deletion(-) create mode 100644 utilities/usdt-scripts/ovs_structs.h create mode 100755 utilities/usdt-scripts/reval_monitor.py