diff mbox series

[RFC,bpf-next,15/16] tools/bpf: selftests: add dumper progs for bpf_map/task/task_file

Message ID 20200408232538.2676626-1-yhs@fb.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf based dumping of kernel data structures | expand

Commit Message

Yonghong Song April 8, 2020, 11:25 p.m. UTC
The implementation is arbitrary, just to show how the bpf programs
can be written for bpf_map/task/task_file. They can be costomized
for specific needs.

For example, for bpf_map, the dumper prints out:
  $ cat /sys/kernel/bpfdump/bpf_map/my1
      id   refcnt  usercnt  locked_vm
       3        2        0         20
       6        2        0         20
       9        2        0         20
      12        2        0         20
      13        2        0         20
      16        2        0         20
      19        2        0         20

For task, the dumper prints out:
  $ cat /sys/kernel/bpfdump/task/my1
    tgid      gid
       1        1
       2        2
    ....
    1944     1944
    1948     1948
    1949     1949
    1953     1953

For task/file, the dumper prints out:
  $ cat /sys/kernel/bpfdump/task/file/my1
    tgid      gid       fd      file
       1        1        0 ffffffff95c97600
       1        1        1 ffffffff95c97600
       1        1        2 ffffffff95c97600
    ....
    1895     1895      255 ffffffff95c8fe00
    1932     1932        0 ffffffff95c8fe00
    1932     1932        1 ffffffff95c8fe00
    1932     1932        2 ffffffff95c8fe00
    1932     1932        3 ffffffff95c185c0

This is able to print out all open files (fd and file->f_op), so user can compare
f_op against a particular kernel file operations to find what it is.
For example, from /proc/kallsyms, we can find
  ffffffff95c185c0 r eventfd_fops
so we will know tgid 1932 fd 3 is an eventfd file descriptor.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 .../selftests/bpf/progs/bpfdump_bpf_map.c     | 24 +++++++++++++++++++
 .../selftests/bpf/progs/bpfdump_task.c        | 21 ++++++++++++++++
 .../selftests/bpf/progs/bpfdump_task_file.c   | 24 +++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpfdump_bpf_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpfdump_task.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpfdump_task_file.c

Comments

Alexei Starovoitov April 10, 2020, 3:33 a.m. UTC | #1
On Wed, Apr 08, 2020 at 04:25:38PM -0700, Yonghong Song wrote:
> For task/file, the dumper prints out:
>   $ cat /sys/kernel/bpfdump/task/file/my1
>     tgid      gid       fd      file
>        1        1        0 ffffffff95c97600
>        1        1        1 ffffffff95c97600
>        1        1        2 ffffffff95c97600
>     ....
>     1895     1895      255 ffffffff95c8fe00
>     1932     1932        0 ffffffff95c8fe00
>     1932     1932        1 ffffffff95c8fe00
>     1932     1932        2 ffffffff95c8fe00
>     1932     1932        3 ffffffff95c185c0
...
> +SEC("dump//sys/kernel/bpfdump/task/file")
> +int BPF_PROG(dump_tasks, struct task_struct *task, __u32 fd, struct file *file,
> +	     struct seq_file *seq, u64 seq_num)
> +{
> +	static char const banner[] = "    tgid      gid       fd      file\n";
> +	static char const fmt1[] = "%8d %8d";
> +	static char const fmt2[] = " %8d %lx\n";
> +
> +	if (seq_num == 0)
> +		bpf_seq_printf(seq, banner, sizeof(banner));
> +
> +	bpf_seq_printf(seq, fmt1, sizeof(fmt1), task->tgid, task->pid);
> +	bpf_seq_printf(seq, fmt2, sizeof(fmt2), fd, (long)file->f_op);
> +	return 0;
> +}

I wonder what is the speed of walking all files in all tasks with an empty
program? If it's fast I can imagine a million use cases for such searching bpf
prog. Like finding which task owns particular socket. This could be a massive
feature.

With one redundant spin_lock removed it seems it will be one spin_lock per prog
invocation? May be eventually it can be amortized within seq_file iterating
logic. Would be really awesome if the cost is just refcnt ++/-- per call and
rcu_read_lock.
Yonghong Song April 10, 2020, 6:41 a.m. UTC | #2
On 4/9/20 8:33 PM, Alexei Starovoitov wrote:
> On Wed, Apr 08, 2020 at 04:25:38PM -0700, Yonghong Song wrote:
>> For task/file, the dumper prints out:
>>    $ cat /sys/kernel/bpfdump/task/file/my1
>>      tgid      gid       fd      file
>>         1        1        0 ffffffff95c97600
>>         1        1        1 ffffffff95c97600
>>         1        1        2 ffffffff95c97600
>>      ....
>>      1895     1895      255 ffffffff95c8fe00
>>      1932     1932        0 ffffffff95c8fe00
>>      1932     1932        1 ffffffff95c8fe00
>>      1932     1932        2 ffffffff95c8fe00
>>      1932     1932        3 ffffffff95c185c0
> ...
>> +SEC("dump//sys/kernel/bpfdump/task/file")
>> +int BPF_PROG(dump_tasks, struct task_struct *task, __u32 fd, struct file *file,
>> +	     struct seq_file *seq, u64 seq_num)
>> +{
>> +	static char const banner[] = "    tgid      gid       fd      file\n";
>> +	static char const fmt1[] = "%8d %8d";
>> +	static char const fmt2[] = " %8d %lx\n";
>> +
>> +	if (seq_num == 0)
>> +		bpf_seq_printf(seq, banner, sizeof(banner));
>> +
>> +	bpf_seq_printf(seq, fmt1, sizeof(fmt1), task->tgid, task->pid);
>> +	bpf_seq_printf(seq, fmt2, sizeof(fmt2), fd, (long)file->f_op);
>> +	return 0;
>> +}
> 
> I wonder what is the speed of walking all files in all tasks with an empty
> program? If it's fast I can imagine a million use cases for such searching bpf
> prog. Like finding which task owns particular socket. This could be a massive
> feature.
> 
> With one redundant spin_lock removed it seems it will be one spin_lock per prog
> invocation? May be eventually it can be amortized within seq_file iterating
> logic. Would be really awesome if the cost is just refcnt ++/-- per call and
> rcu_read_lock.

The main seq_read() loop is below:
         while (1) {
                 size_t offs = m->count;
                 loff_t pos = m->index;

                 p = m->op->next(m, p, &m->index);
                 if (pos == m->index)
                         /* Buggy ->next function */
                         m->index++;
                 if (!p || IS_ERR(p)) {
                         err = PTR_ERR(p);
                         break;
                 }
                 if (m->count >= size)
                         break;
                 err = m->op->show(m, p);
                 if (seq_has_overflowed(m) || err) {
                         m->count = offs;
                         if (likely(err <= 0))
                                 break;
                 }
         }

If we remove the spin_lock() as in another email comment,
we won't have spin_lock() in seq_ops->next() function, only
refcnt ++/-- and rcu_read_{lock, unlock}s. The seq_ops->show() does
not have any spin_lock() either.

I have not got time to do a perf measurement yet.
Will do in the next revision.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/progs/bpfdump_bpf_map.c b/tools/testing/selftests/bpf/progs/bpfdump_bpf_map.c
new file mode 100644
index 000000000000..c85f5a330010
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpfdump_bpf_map.c
@@ -0,0 +1,24 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("dump//sys/kernel/bpfdump/bpf_map")
+int BPF_PROG(dump_bpf_map, struct bpf_map *map, struct seq_file *seq, u64 seq_num)
+{
+	static const char banner[] = "      id   refcnt  usercnt  locked_vm\n";
+	static const char fmt1[] = "%8u %8ld ";
+	static const char fmt2[] = "%8ld %10lu\n";
+
+	if (seq_num == 0)
+		bpf_seq_printf(seq, banner, sizeof(banner));
+
+	bpf_seq_printf(seq, fmt1, sizeof(fmt1), map->id, map->refcnt.counter);
+	bpf_seq_printf(seq, fmt2, sizeof(fmt2), map->usercnt.counter,
+		       map->memory.user->locked_vm.counter);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpfdump_task.c b/tools/testing/selftests/bpf/progs/bpfdump_task.c
new file mode 100644
index 000000000000..4d90ba97fbda
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpfdump_task.c
@@ -0,0 +1,21 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("dump//sys/kernel/bpfdump/task")
+int BPF_PROG(dump_tasks, struct task_struct *task, struct seq_file *seq, u64 seq_num)
+{
+	static char const banner[] = "    tgid      gid\n";
+	static char const fmt[] = "%8d %8d\n";
+
+	if (seq_num == 0)
+		bpf_seq_printf(seq, banner, sizeof(banner));
+
+	bpf_seq_printf(seq, fmt, sizeof(fmt), task->tgid, task->pid);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpfdump_task_file.c b/tools/testing/selftests/bpf/progs/bpfdump_task_file.c
new file mode 100644
index 000000000000..5cf02c050e1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpfdump_task_file.c
@@ -0,0 +1,24 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("dump//sys/kernel/bpfdump/task/file")
+int BPF_PROG(dump_tasks, struct task_struct *task, __u32 fd, struct file *file,
+	     struct seq_file *seq, u64 seq_num)
+{
+	static char const banner[] = "    tgid      gid       fd      file\n";
+	static char const fmt1[] = "%8d %8d";
+	static char const fmt2[] = " %8d %lx\n";
+
+	if (seq_num == 0)
+		bpf_seq_printf(seq, banner, sizeof(banner));
+
+	bpf_seq_printf(seq, fmt1, sizeof(fmt1), task->tgid, task->pid);
+	bpf_seq_printf(seq, fmt2, sizeof(fmt2), fd, (long)file->f_op);
+	return 0;
+}