diff mbox series

[bpf-next,v4,7/7] tools/bpftool: add perf subcommand

Message ID 20180524002020.1182296-3-yhs@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: implement BPF_TASK_FD_QUERY | expand

Commit Message

Yonghong Song May 24, 2018, 12:20 a.m. UTC
The new command "bpftool perf [show | list]" will traverse
all processes under /proc, and if any fd is associated
with a perf event, it will print out related perf event
information. Documentation is also added.

Below is an example to show the results using bcc commands.
Running the following 4 bcc commands:
  kprobe:     trace.py '__x64_sys_nanosleep'
  kretprobe:  trace.py 'r::__x64_sys_nanosleep'
  tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
  uprobe:     trace.py 'p:/home/yhs/a.out:main'

The bpftool command line and result:

  $ bpftool perf
  pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
  pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
  pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
  pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159

  $ bpftool -j perf
  [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
   {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
   {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
   {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]

  $ bpftool prog
  5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
	  loaded_at 2018-05-15T04:46:37-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 4
  7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
	  loaded_at 2018-05-15T04:48:32-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 7
  8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
	  loaded_at 2018-05-15T04:48:48-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 8
  9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
	  loaded_at 2018-05-15T04:49:52-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 9

  $ ps ax | grep "python ./trace.py"
  21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
  21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
  21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
  21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
  22374 pts/1    S+     0:00 grep --color=auto python ./trace.py

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/Documentation/bpftool-perf.rst |  81 ++++++++
 tools/bpf/bpftool/Documentation/bpftool.rst      |   5 +-
 tools/bpf/bpftool/bash-completion/bpftool        |   9 +
 tools/bpf/bpftool/main.c                         |   3 +-
 tools/bpf/bpftool/main.h                         |   1 +
 tools/bpf/bpftool/perf.c                         | 246 +++++++++++++++++++++++
 6 files changed, 343 insertions(+), 2 deletions(-)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-perf.rst
 create mode 100644 tools/bpf/bpftool/perf.c
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 0000000..e3eb0ea
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@ 
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+	*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+|	**bpftool** **perf { show | list }**
+|	**bpftool** **perf help**
+
+DESCRIPTION
+===========
+	**bpftool perf { show | list }**
+		  List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+		  Output will start with process id and file descriptor in that process,
+		  followed by bpf program id, attachment information, and attachment point.
+		  The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+		  The attachment point for k[ret]probe is either symbol name and offset,
+		  or a kernel virtual address.
+		  The attachment point for u[ret]probe is the file name and the file offset.
+
+	**bpftool perf help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+      pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
+      pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
+      pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
+      pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+    [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+     {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+     {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+     {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 564cb0d..b6f5d56 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@  SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **cgroup** }
+	*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
 
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@  SYNOPSIS
 
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
+	*PERF-COMMANDS* := { **show** | **list** | **help** }
+
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@  OPTIONS
 SEE ALSO
 ========
 	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+        **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index b301c9b..7bc198d 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -448,6 +448,15 @@  _bpftool()
                     ;;
             esac
             ;;
+        perf)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d..eea7f14 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@  static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup }\n"
+		"       OBJECT := { prog | map | cgroup | perf }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@  static const struct cmd cmds[] = {
 	{ "prog",	do_prog },
 	{ "map",	do_map },
 	{ "cgroup",	do_cgroup },
+	{ "perf",	do_perf },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6173cd9..63fdb31 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -119,6 +119,7 @@  int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 0000000..ac6b1a1
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,246 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	char buf[256];
+	int fd;
+
+	if (perf_query_supported)
+		goto out;
+
+	fd = open(bin_name, O_RDONLY);
+	if (fd < 0) {
+		p_err("perf_query_support: %s", strerror(errno));
+		goto out;
+	}
+
+	/* the following query will fail as no bpf attachment,
+	 * the expected errno is ENOTSUPP
+	 */
+	errno = 0;
+	len = sizeof(buf);
+	bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+			  &fd_type, &probe_offset, &probe_addr);
+
+	if (errno == 524 /* ENOTSUPP */) {
+		perf_query_supported = 1;
+		goto close_fd;
+	}
+
+	perf_query_supported = 2;
+	p_err("perf_query_support: %s", strerror(errno));
+	fprintf(stderr,
+		"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+	close(fd);
+out:
+	return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	jsonw_start_object(json_wtr);
+	jsonw_int_field(json_wtr, "pid", pid);
+	jsonw_int_field(json_wtr, "fd", fd);
+	jsonw_uint_field(json_wtr, "prog_id", prog_id);
+	switch (fd_type) {
+	case BPF_FD_TYPE_RAW_TRACEPOINT:
+		jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_FD_TYPE_TRACEPOINT:
+		jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_FD_TYPE_KPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "kprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_FD_TYPE_KRETPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_FD_TYPE_UPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "uprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	case BPF_FD_TYPE_URETPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	}
+	jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+			     char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	printf("pid %d  fd %d: prog_id %u  ", pid, fd, prog_id);
+	switch (fd_type) {
+	case BPF_FD_TYPE_RAW_TRACEPOINT:
+		printf("raw_tracepoint  %s\n", buf);
+		break;
+	case BPF_FD_TYPE_TRACEPOINT:
+		printf("tracepoint  %s\n", buf);
+		break;
+	case BPF_FD_TYPE_KPROBE:
+		if (buf[0] != '\0')
+			printf("kprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_FD_TYPE_KRETPROBE:
+		if (buf[0] != '\0')
+			printf("kretprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kretprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_FD_TYPE_UPROBE:
+		printf("uprobe  filename %s  offset %llu\n", buf, probe_offset);
+		break;
+	case BPF_FD_TYPE_URETPROBE:
+		printf("uretprobe  filename %s  offset %llu\n", buf,
+		       probe_offset);
+		break;
+	}
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+		     int tflag, struct FTW *ftwbuf)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	int err, pid = 0, fd = 0;
+	const char *pch;
+	char buf[4096];
+
+	/* prefix always /proc */
+	pch = fpath + 5;
+	if (*pch == '\0')
+		return 0;
+
+	/* pid should be all numbers */
+	pch++;
+	while (isdigit(*pch)) {
+		pid = pid * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd directory */
+	pch++;
+	if (strncmp(pch, "fd", 2))
+		return FTW_SKIP_SUBTREE;
+	pch += 2;
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd/<fd_num> */
+	pch++;
+	while (isdigit(*pch)) {
+		fd = fd * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch != '\0')
+		return FTW_SKIP_SUBTREE;
+
+	/* query (pid, fd) for potential perf events */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+				&probe_offset, &probe_addr);
+	if (err < 0)
+		return 0;
+
+	if (json_output)
+		print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+				probe_addr);
+	else
+		print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+				 probe_addr);
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+	int err = 0, nopenfd = 16;
+
+	if (!has_perf_query_support())
+		return -1;
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+		p_err("%s", strerror(errno));
+		err = -1;
+	}
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s { show | list | help }\n"
+		"",
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}