diff mbox series

[ovs-dev,v2] utilities: Add TASK_STOPPED accounting to the kernel_delay.py script.

Message ID 133df783097cbd7c62d0ff3efdfd1f349c1aa421.1706532702.git.echaudro@redhat.com
State Accepted
Commit 5f2af0b7a30e7de84de97556223f892ef63ec14b
Delegated to: Eelco Chaudron
Headers show
Series [ovs-dev,v2] utilities: Add TASK_STOPPED accounting to the kernel_delay.py script. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Eelco Chaudron Jan. 29, 2024, 12:51 p.m. UTC
This changes add statistics for when a thread is put into stop state.
For example with the following:

kill -STOP $(pidof ovs-vswitchd); sleep 1; kill -CONT $(pidof ovs-vswitchd);

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 v2: - Removed worst/max ns delay zero checks.

 utilities/usdt-scripts/kernel_delay.py  | 110 ++++++++++++++++++------
 utilities/usdt-scripts/kernel_delay.rst |  24 ++++++
 2 files changed, 110 insertions(+), 24 deletions(-)

Comments

Simon Horman Jan. 30, 2024, 9:53 a.m. UTC | #1
On Mon, Jan 29, 2024 at 01:51:42PM +0100, Eelco Chaudron wrote:
> This changes add statistics for when a thread is put into stop state.
> For example with the following:
> 
> kill -STOP $(pidof ovs-vswitchd); sleep 1; kill -CONT $(pidof ovs-vswitchd);
> 
> Acked-by: Simon Horman <horms@ovn.org>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  v2: - Removed worst/max ns delay zero checks.

Thanks for the update.
I confirm that I'm (still) happy with this patch.
Eelco Chaudron Feb. 20, 2024, 12:47 p.m. UTC | #2
On 30 Jan 2024, at 10:53, Simon Horman wrote:

> On Mon, Jan 29, 2024 at 01:51:42PM +0100, Eelco Chaudron wrote:
>> This changes add statistics for when a thread is put into stop state.
>> For example with the following:
>>
>> kill -STOP $(pidof ovs-vswitchd); sleep 1; kill -CONT $(pidof ovs-vswitchd);
>>
>> Acked-by: Simon Horman <horms@ovn.org>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  v2: - Removed worst/max ns delay zero checks.
>
> Thanks for the update.
> I confirm that I'm (still) happy with this patch.

Thanks Simon for the review. Patch applied to the main branch.

//Eelco
diff mbox series

Patch

diff --git a/utilities/usdt-scripts/kernel_delay.py b/utilities/usdt-scripts/kernel_delay.py
index b2012fdf2..de6b0c9de 100755
--- a/utilities/usdt-scripts/kernel_delay.py
+++ b/utilities/usdt-scripts/kernel_delay.py
@@ -81,7 +81,6 @@  struct event_t {
 
     u32 syscall;
     u64 entry_ts;
-
 };
 
 BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>);
@@ -220,7 +219,7 @@  TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
         u64 delta = bpf_ktime_get_ns() - *start_ns;
         val->count++;
         val->total_ns += delta;
-        if (val->worst_ns == 0 || delta > val->worst_ns)
+        if (delta > val->worst_ns)
             val->worst_ns = delta;
 
         if (<SYSCALL_TRACE_EVENTS>) {
@@ -243,13 +242,12 @@  TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
 
 
 /*
- * For measuring the thread run time, we need the following.
+ * For measuring the thread stopped time, we need the following.
  */
-struct run_time_data_t {
+struct stop_time_data_t {
     u64 count;
     u64 total_ns;
-    u64 max_ns;
-    u64 min_ns;
+    u64 worst_ns;
 };
 
 struct pid_tid_key_t {
@@ -257,6 +255,43 @@  struct pid_tid_key_t {
     u32  tid;
 };
 
+BPF_HASH(stop_start, u64, u64);
+BPF_HASH(stop_data, struct pid_tid_key_t, struct stop_time_data_t);
+
+static inline void thread_handle_stopped_run(u32 pid, u32 tgid, u64 ktime)
+{
+    u64 pid_tgid = (u64) tgid << 32 | pid;
+    u64 *start_ns = stop_start.lookup(&pid_tgid);
+
+    if (!start_ns || *start_ns == 0)
+        return;
+
+    struct stop_time_data_t *val, zero = {};
+    struct pid_tid_key_t key = { .pid = tgid,
+                                 .tid = pid };
+
+    val = stop_data.lookup_or_try_init(&key, &zero);
+    if (val) {
+        u64 delta = ktime - *start_ns;
+        val->count++;
+        val->total_ns += delta;
+        if (delta > val->worst_ns)
+            val->worst_ns = delta;
+    }
+    *start_ns = 0;
+}
+
+
+/*
+ * For measuring the thread run time, we need the following.
+ */
+struct run_time_data_t {
+    u64 count;
+    u64 total_ns;
+    u64 max_ns;
+    u64 min_ns;
+};
+
 BPF_HASH(run_start, u64, u64);
 BPF_HASH(run_data, struct pid_tid_key_t, struct run_time_data_t);
 
@@ -282,7 +317,7 @@  static inline void thread_stop_run(u32 pid, u32 tgid, u64 ktime)
         u64 delta = ktime - *start_ns;
         val->count++;
         val->total_ns += delta;
-        if (val->max_ns == 0 || delta > val->max_ns)
+        if (delta > val->max_ns)
             val->max_ns = delta;
         if (val->min_ns == 0 || delta < val->min_ns)
             val->min_ns = delta;
@@ -312,6 +347,8 @@  static inline int sched_wakeup__(u32 pid, u32 tgid)
 
     u64 t = bpf_ktime_get_ns();
     ready_start.update(&pid_tgid, &t);
+
+    thread_handle_stopped_run(pid, tgid, t);
     return 0;
 }
 
@@ -336,22 +373,26 @@  RAW_TRACEPOINT_PROBE(sched_switch)
     if (!capture_enabled__())
         return 0;
 
-    if (prev-><STATE_FIELD> == TASK_RUNNING && prev->tgid == MONITOR_PID)
-        sched_wakeup__(prev->pid, prev->tgid);
-
     if (prev->tgid == MONITOR_PID) {
+        u64 prev_pid_tgid = (u64)next->tgid << 32 | next->pid;
         ktime = bpf_ktime_get_ns();
+
+        if (prev-><STATE_FIELD> == TASK_RUNNING)
+            ready_start.update(&prev_pid_tgid, &ktime);
+
+        if (prev-><STATE_FIELD> & __TASK_STOPPED)
+            stop_start.update(&prev_pid_tgid, &ktime);
+
         thread_stop_run(prev->pid, prev->tgid, ktime);
     }
 
-    u64 pid_tgid = (u64)next->tgid << 32 | next->pid;
-
     if (next->tgid != MONITOR_PID)
         return 0;
 
     if (ktime == 0)
         ktime = bpf_ktime_get_ns();
 
+    u64 pid_tgid = (u64)next->tgid << 32 | next->pid;
     u64 *start_ns = ready_start.lookup(&pid_tgid);
 
     if (start_ns && *start_ns != 0) {
@@ -365,7 +406,7 @@  RAW_TRACEPOINT_PROBE(sched_switch)
             u64 delta = ktime - *start_ns;
             val->count++;
             val->total_ns += delta;
-            if (val->worst_ns == 0 || delta > val->worst_ns)
+            if (delta > val->worst_ns)
                 val->worst_ns = delta;
         }
         *start_ns = 0;
@@ -438,7 +479,7 @@  TRACEPOINT_PROBE(irq, irq_handler_exit)
             u64 delta = bpf_ktime_get_ns() - data->start_ns;
             val->count++;
             val->total_ns += delta;
-            if (val->worst_ns == 0 || delta > val->worst_ns)
+            if (delta > val->worst_ns)
                 val->worst_ns = delta;
         }
     }
@@ -508,7 +549,7 @@  TRACEPOINT_PROBE(irq, softirq_exit)
         u64 delta = bpf_ktime_get_ns() - data->start_ns;
         val->count++;
         val->total_ns += delta;
-        if (val->worst_ns == 0 || delta > val->worst_ns)
+        if (delta > val->worst_ns)
             val->worst_ns = delta;
     }
 
@@ -844,6 +885,8 @@  def reset_capture():
     bpf["softirq_start"].clear()
     bpf["softirq_data"].clear()
     bpf["stack_traces"].clear()
+    bpf["stop_start"].clear()
+    bpf["stop_data"].clear()
 
 
 #
@@ -879,6 +922,9 @@  def process_results(syscall_events=None, trigger_delta=None):
     threads_ready = {k.tid for k, _ in bpf["ready_data"].items()
                      if k.pid != 0xffffffff}
 
+    threads_stopped = {k.tid for k, _ in bpf["stop_data"].items()
+                       if k.pid != 0xffffffff}
+
     threads_hardirq = {k.tid for k, _ in bpf["hardirq_data"].items()
                        if k.pid != 0xffffffff}
 
@@ -886,7 +932,7 @@  def process_results(syscall_events=None, trigger_delta=None):
                        if k.pid != 0xffffffff}
 
     threads = sorted(threads_syscall | threads_run | threads_ready |
-                     threads_hardirq | threads_softirq,
+                     threads_stopped | threads_hardirq | threads_softirq,
                      key=lambda x: get_thread_name(options.pid, x))
 
     #
@@ -933,28 +979,44 @@  def process_results(syscall_events=None, trigger_delta=None):
         #
         # THREAD RUN STATISTICS
         #
-        print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}  {:>16}".format(
-            "", "", "[THREAD RUN STATISTICS]", indent,
-            "SCHED_CNT", "TOTAL ns", "MIN ns", "MAX ns"))
-
         for k, v in filter(lambda t: t[0].tid == thread,
                            bpf["run_data"].items()):
 
+            print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}  {:>16}".format(
+                "", "", "[THREAD RUN STATISTICS]", indent,
+                "SCHED_CNT", "TOTAL ns", "MIN ns", "MAX ns"))
+
             print("{}{:10}  {:16,}  {:16,}  {:16,}".format(
                 indent, v.count, v.total_ns, v.min_ns, v.max_ns))
+            break
 
         #
         # THREAD READY STATISTICS
         #
-        print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}".format(
-            "", "", "[THREAD READY STATISTICS]", indent,
-            "SCHED_CNT", "TOTAL ns", "MAX ns"))
-
         for k, v in filter(lambda t: t[0].tid == thread,
                            bpf["ready_data"].items()):
 
+            print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}".format(
+                "", "", "[THREAD READY STATISTICS]", indent,
+                "SCHED_CNT", "TOTAL ns", "MAX ns"))
+
             print("{}{:10}  {:16,}  {:16,}".format(
                 indent, v.count, v.total_ns, v.worst_ns))
+            break
+
+        #
+        # THREAD STOPPED STATISTICS
+        #
+        for k, v in filter(lambda t: t[0].tid == thread,
+                           bpf["stop_data"].items()):
+
+            print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}".format(
+                "", "", "[THREAD STOPPED STATISTICS]", indent,
+                "STOP_CNT", "TOTAL ns", "MAX ns"))
+
+            print("{}{:10}  {:16,}  {:16,}".format(
+                indent, v.count, v.total_ns, v.worst_ns))
+            break
 
         #
         # HARD IRQ STATISTICS
diff --git a/utilities/usdt-scripts/kernel_delay.rst b/utilities/usdt-scripts/kernel_delay.rst
index 0ebd30afb..e2e43752d 100644
--- a/utilities/usdt-scripts/kernel_delay.rst
+++ b/utilities/usdt-scripts/kernel_delay.rst
@@ -75,6 +75,10 @@  with the ``--pid`` option.
                   SCHED_CNT           TOTAL ns            MAX ns
                        7            11,334             6,636
 
+                  [THREAD STOPPED STATISTICS]
+                  STOP_CNT            TOTAL ns            MAX ns
+                       3         3,045,728,323     1,015,739,474
+
                   [HARD IRQ STATISTICS]
                   NAME                       COUNT          TOTAL ns            MAX ns
                   eno8303-rx-1                   1             3,586             3,586
@@ -102,6 +106,7 @@  followed by resource-specific data. Which are:
 - ``SYSCALL STATISTICS``
 - ``THREAD RUN STATISTICS``
 - ``THREAD READY STATISTICS``
+- ``THREAD STOPPED STATISTICS``
 - ``HARD IRQ STATISTICS``
 - ``SOFT IRQ STATISTICS``
 
@@ -143,6 +148,25 @@  Note that these statistics only count events where the thread was getting
 ready to run and started running during the measurement interval.
 
 
+``THREAD STOPPED STATISTICS``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+``THREAD STOPPED STATISTICS`` reveal the number of instances where the thread
+has been scheduled out while in the running state due to its transition to
+the TASK_STOPPED state.
+
+This behavior can be replicated by manually placing the thread in the stopped
+state and subsequently resuming it. For instance:
+
+.. code-block:: console
+
+  # kill -STOP $(pidof ovs-vswitchd); \
+    sleep 1; \
+    kill -CONT $(pidof ovs-vswitchd);
+
+Note that these statistics only count events where the thread was running at
+the time it was put to stopped state.
+
+
 ``HARD IRQ STATISTICS``
 ~~~~~~~~~~~~~~~~~~~~~~~
 ``HARD IRQ STATISTICS`` tell you how much time was spent servicing hard