[bpf-next,7/7] tools/bpftool: add perf subcommand

Message ID 20180515234521.856763-8-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series
  • bpf: implement BPF_PERF_EVENT_QUERY for perf event query
Related show

Commit Message

Yonghong Song May 15, 2018, 11:45 p.m.
The new command "bpftool perf [show]" will traverse
all processes under /proc, and if any fd is associated
with a perf event, it will print out related perf event
information.

Below is an example to show the results using bcc commands.
Running the following 4 bcc commands:
  kprobe:     trace.py '__x64_sys_nanosleep'
  kretprobe:  trace.py 'r::__x64_sys_nanosleep'
  tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
  uprobe:     trace.py 'p:/home/yhs/a.out:main'

The bpftool command line and result:

  $ bpftool perf
  21711: prog_id 5 kprobe func __x64_sys_write offset 0
  21765: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
  21767: prog_id 8 tracepoint sys_enter_nanosleep
  21800: prog_id 9 uprobe filename /home/yhs/a.out offset 1159

  $ bpftool -j perf
  {"pid":21711,"prog_id":5,"prog_info":"kprobe","func":"__x64_sys_write","offset":0}, \
  {"pid":21765,"prog_id":7,"prog_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
  {"pid":21767,"prog_id":8,"prog_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
  {"pid":21800,"prog_id":9,"prog_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}

  $ bpftool prog
  5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
	  loaded_at 2018-05-15T04:46:37-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 4
  7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
	  loaded_at 2018-05-15T04:48:32-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 7
  8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
	  loaded_at 2018-05-15T04:48:48-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 8
  9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
	  loaded_at 2018-05-15T04:49:52-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 9

  $ ps ax | grep "python ./trace.py"
  21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
  21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
  21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
  21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
  22374 pts/1    S+     0:00 grep --color=auto python ./trace.py

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/main.c |   3 +-
 tools/bpf/bpftool/main.h |   1 +
 tools/bpf/bpftool/perf.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 tools/bpf/bpftool/perf.c

Comments

Jakub Kicinski May 16, 2018, 4:41 a.m. | #1
On Tue, 15 May 2018 16:45:21 -0700, Yonghong Song wrote:
> The new command "bpftool perf [show]" will traverse
> all processes under /proc, and if any fd is associated
> with a perf event, it will print out related perf event
> information.
> 
> Below is an example to show the results using bcc commands.
> Running the following 4 bcc commands:
>   kprobe:     trace.py '__x64_sys_nanosleep'
>   kretprobe:  trace.py 'r::__x64_sys_nanosleep'
>   tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
>   uprobe:     trace.py 'p:/home/yhs/a.out:main'
> 
> The bpftool command line and result:
> 
>   $ bpftool perf
>   21711: prog_id 5 kprobe func __x64_sys_write offset 0
>   21765: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
>   21767: prog_id 8 tracepoint sys_enter_nanosleep
>   21800: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
> 
>   $ bpftool -j perf
>   {"pid":21711,"prog_id":5,"prog_info":"kprobe","func":"__x64_sys_write","offset":0}, \
>   {"pid":21765,"prog_id":7,"prog_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
>   {"pid":21767,"prog_id":8,"prog_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
>   {"pid":21800,"prog_id":9,"prog_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}

You need to wrap the objects inside an array, so

	if (json_output)
		jsonw_start_array(json_wtr);
	nftw();
	if (json_output)
		jsonw_end_array(json_wtr);

otherwise output will not be a valid JSON.  To validate JSON try:

$ bpftool -j perf | python -m json.tool

>   $ bpftool prog
>   5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
> 	  loaded_at 2018-05-15T04:46:37-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 4
>   7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
> 	  loaded_at 2018-05-15T04:48:32-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 7
>   8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
> 	  loaded_at 2018-05-15T04:48:48-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 8
>   9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
> 	  loaded_at 2018-05-15T04:49:52-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 9
> 
>   $ ps ax | grep "python ./trace.py"
>   21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
>   21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
>   21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
>   21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
>   22374 pts/1    S+     0:00 grep --color=auto python ./trace.py
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/bpf/bpftool/main.c |   3 +-
>  tools/bpf/bpftool/main.h |   1 +
>  tools/bpf/bpftool/perf.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++

Would you be able to also extend the Documentation/ and bash
completions?

>  3 files changed, 191 insertions(+), 1 deletion(-)
>  create mode 100644 tools/bpf/bpftool/perf.c
> 
> diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
> index 1ec852d..eea7f14 100644
> --- a/tools/bpf/bpftool/main.c
> +++ b/tools/bpf/bpftool/main.c
> @@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
>  		"       %s batch file FILE\n"
>  		"       %s version\n"
>  		"\n"
> -		"       OBJECT := { prog | map | cgroup }\n"
> +		"       OBJECT := { prog | map | cgroup | perf }\n"
>  		"       " HELP_SPEC_OPTIONS "\n"
>  		"",
>  		bin_name, bin_name, bin_name);
> @@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
>  	{ "prog",	do_prog },
>  	{ "map",	do_map },
>  	{ "cgroup",	do_cgroup },
> +	{ "perf",	do_perf },
>  	{ "version",	do_version },
>  	{ 0 }
>  };
> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
> index 6173cd9..63fdb31 100644
> --- a/tools/bpf/bpftool/main.h
> +++ b/tools/bpf/bpftool/main.h
> @@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
>  int do_map(int argc, char **arg);
>  int do_event_pipe(int argc, char **argv);
>  int do_cgroup(int argc, char **arg);
> +int do_perf(int argc, char **arg);
>  
>  int prog_parse_fd(int *argc, char ***argv);
>  int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
> diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
> new file mode 100644
> index 0000000..6d676e4
> --- /dev/null
> +++ b/tools/bpf/bpftool/perf.c
> @@ -0,0 +1,188 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +// Copyright (C) 2018 Facebook
> +// Author: Yonghong Song <yhs@fb.com>
> +
> +#define _GNU_SOURCE
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <ftw.h>
> +
> +#include <bpf.h>
> +
> +#include "main.h"
> +
> +static void print_perf_json(int pid, __u32 prog_id, __u32 prog_info,
> +			    char *buf, __u64 probe_offset, __u64 probe_addr)
> +{
> +	jsonw_start_object(json_wtr);
> +	jsonw_int_field(json_wtr, "pid", pid);
> +	jsonw_uint_field(json_wtr, "prog_id", prog_id);
> +	switch (prog_info) {
> +	case BPF_PERF_INFO_TP_NAME:
> +		jsonw_string_field(json_wtr, "prog_info", "tracepoint");
> +		jsonw_string_field(json_wtr, "tracepoint", buf);
> +		break;
> +	case BPF_PERF_INFO_KPROBE:
> +		jsonw_string_field(json_wtr, "prog_info", "kprobe");
> +		if (buf[0] != '\0') {
> +			jsonw_string_field(json_wtr, "func", buf);
> +			jsonw_lluint_field(json_wtr, "offset", probe_offset);
> +		} else {
> +			jsonw_lluint_field(json_wtr, "addr", probe_addr);
> +		}
> +		break;
> +	case BPF_PERF_INFO_KRETPROBE:
> +		jsonw_string_field(json_wtr, "prog_info", "kretprobe");
> +		if (buf[0] != '\0') {
> +			jsonw_string_field(json_wtr, "func", buf);
> +			jsonw_lluint_field(json_wtr, "offset", probe_offset);
> +		} else {
> +			jsonw_lluint_field(json_wtr, "addr", probe_addr);
> +		}
> +		break;
> +	case BPF_PERF_INFO_UPROBE:
> +		jsonw_string_field(json_wtr, "prog_info", "uprobe");
> +		jsonw_string_field(json_wtr, "filename", buf);
> +		jsonw_lluint_field(json_wtr, "offset", probe_offset);
> +		break;
> +	case BPF_PERF_INFO_URETPROBE:
> +		jsonw_string_field(json_wtr, "prog_info", "uretprobe");
> +		jsonw_string_field(json_wtr, "filename", buf);
> +		jsonw_lluint_field(json_wtr, "offset", probe_offset);
> +		break;
> +	}
> +	jsonw_end_object(json_wtr);
> +}
> +
> +static void print_perf_plain(int pid, __u32 prog_id, __u32 prog_info,
> +			    char *buf, __u64 probe_offset, __u64 probe_addr)
> +{
> +	printf("%d: prog_id %u ", pid, prog_id);

nit: for consistency with prog and map listings consider using double
spaces after prog_id (i.e. between fields).  Not a hard requirement,
though, perhaps I'm the only one who finds that more readable :)

> +	switch (prog_info) {
> +	case BPF_PERF_INFO_TP_NAME:
> +		printf("tracepoint %s\n", buf);
> +		break;
> +	case BPF_PERF_INFO_KPROBE:
> +		if (buf[0] != '\0')
> +			printf("kprobe func %s offset %llu\n", buf,
> +			       probe_offset);
> +		else
> +			printf("kprobe addr %llu\n", probe_addr);
> +		break;
> +	case BPF_PERF_INFO_KRETPROBE:
> +		if (buf[0] != '\0')
> +			printf("kretprobe func %s offset %llu\n", buf,
> +			       probe_offset);
> +		else
> +			printf("kretprobe addr %llu\n", probe_addr);
> +		break;
> +	case BPF_PERF_INFO_UPROBE:
> +		printf("uprobe filename %s offset %llu\n", buf, probe_offset);
> +		break;
> +	case BPF_PERF_INFO_URETPROBE:
> +		printf("uretprobe filename %s offset %llu\n", buf,
> +		       probe_offset);
> +		break;
> +	}
> +}
> +
> +static int show_proc(const char *fpath, const struct stat *sb,
> +		     int tflag, struct FTW *ftwbuf)
> +{
> +	__u64 probe_offset, probe_addr;
> +	__u32 prog_id, prog_info;
> +	int err, pid = 0, fd = 0;
> +	const char *pch;
> +	char buf[4096];
> +
> +	/* prefix always /proc */
> +	pch = fpath + 5;
> +	if (*pch == '\0')
> +		return 0;
> +
> +	/* pid should be all numbers */
> +	pch++;
> +	while (*pch >= '0' && *pch <= '9') {

nit: isdigit()?  strtoul() with its endptr also an option.  That said
     the code is actually quite readable as is, so I'm not sure if it's
     worth complicating it.

> +		pid = pid * 10 + *pch - '0';
> +		pch++;
> +	}
> +	if (*pch == '\0')
> +		return 0;
> +	if (*pch != '/')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* check /proc/<pid>/fd directory */
> +	pch++;
> +	if (*pch == '\0' || *pch != 'f')
> +		return FTW_SKIP_SUBTREE;

but == '\0' implies != 'f'

> +	pch++;
> +	if (*pch == '\0' || *pch != 'd')
> +		return FTW_SKIP_SUBTREE;

nit: possibly just:
     if (strncmp(pch, "fd", 2))
          return FTW_SKIP_SUBTREE;
     pch += 2;

> +	pch++;
> +	if (*pch == '\0')
> +		return 0;
> +	if (*pch != '/')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* check /proc/<pid>/fd/<fd_num> */
> +	pch++;
> +	while (*pch >= '0' && *pch <= '9') {
> +		fd = fd * 10 + *pch - '0';
> +		pch++;
> +	}
> +	if (*pch != '\0')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* query (pid, fd) for potential perf events */
> +	err = bpf_trace_event_query(pid, fd, buf, sizeof(buf),
> +		&prog_id, &prog_info, &probe_offset, &probe_addr);

nit: continuation line not aligned with opening bracket

> +	if (err < 0)
> +		return 0;
> +
> +	if (json_output)
> +		print_perf_json(pid, prog_id, prog_info, buf, probe_offset,
> +				probe_addr);
> +	else
> +		print_perf_plain(pid, prog_id, prog_info, buf, probe_offset,
> +				 probe_addr);
> +
> +	return 0;
> +}
> +
> +static int do_show(int argc, char **argv)
> +{
> +	int nopenfd = 16;
> +	int flags = FTW_ACTIONRETVAL | FTW_PHYS;

nit: reverse christmas tree networking style if you don't mind

> +	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
> +		perror("nftw");

nit: p_err("%s", strerror(errno)); would also show up in JSON output

> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int do_help(int argc, char **argv)
> +{
> +	fprintf(stderr,
> +		"Usage: %s %s { show | help }\n"
> +		"",
> +		bin_name, argv[-2]);
> +
> +	return 0;
> +}
> +
> +static const struct cmd cmds[] = {
> +	{ "show",	do_show },

Other commands alias show and list, so could you add:

	{ "list",	do_show },

and list to help output?

> +	{ "help",	do_help },
> +	{ 0 }
> +};
> +
> +int do_perf(int argc, char **argv)
> +{
> +	return cmd_select(cmds, argc, argv, do_help);
> +}

Thanks a lot for adding bpftool support, and with JSON output included!
Yonghong Song May 16, 2018, 5:54 a.m. | #2
On 5/15/18 9:41 PM, Jakub Kicinski wrote:
> On Tue, 15 May 2018 16:45:21 -0700, Yonghong Song wrote:
>> The new command "bpftool perf [show]" will traverse
>> all processes under /proc, and if any fd is associated
>> with a perf event, it will print out related perf event
>> information.
>>
>> Below is an example to show the results using bcc commands.
>> Running the following 4 bcc commands:
>>    kprobe:     trace.py '__x64_sys_nanosleep'
>>    kretprobe:  trace.py 'r::__x64_sys_nanosleep'
>>    tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
>>    uprobe:     trace.py 'p:/home/yhs/a.out:main'
>>
>> The bpftool command line and result:
>>
>>    $ bpftool perf
>>    21711: prog_id 5 kprobe func __x64_sys_write offset 0
>>    21765: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
>>    21767: prog_id 8 tracepoint sys_enter_nanosleep
>>    21800: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
>>
>>    $ bpftool -j perf
>>    {"pid":21711,"prog_id":5,"prog_info":"kprobe","func":"__x64_sys_write","offset":0}, \
>>    {"pid":21765,"prog_id":7,"prog_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
>>    {"pid":21767,"prog_id":8,"prog_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
>>    {"pid":21800,"prog_id":9,"prog_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}
> 
> You need to wrap the objects inside an array, so
> 
> 	if (json_output)
> 		jsonw_start_array(json_wtr);
> 	nftw();
> 	if (json_output)
> 		jsonw_end_array(json_wtr);
> 
> otherwise output will not be a valid JSON.  To validate JSON try:
> 
> $ bpftool -j perf | python -m json.tool

Thanks for detailed review! All of your comments make sense.
I will address them in next revision after getting some feedback
for other patches.

> 
>>    $ bpftool prog
>>    5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
>> 	  loaded_at 2018-05-15T04:46:37-0700  uid 0
>> 	  xlated 200B  not jited  memlock 4096B  map_ids 4
>>    7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
>> 	  loaded_at 2018-05-15T04:48:32-0700  uid 0
>> 	  xlated 200B  not jited  memlock 4096B  map_ids 7
>>    8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
>> 	  loaded_at 2018-05-15T04:48:48-0700  uid 0
>> 	  xlated 200B  not jited  memlock 4096B  map_ids 8
>>    9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
>> 	  loaded_at 2018-05-15T04:49:52-0700  uid 0
>> 	  xlated 200B  not jited  memlock 4096B  map_ids 9
>>
>>    $ ps ax | grep "python ./trace.py"
>>    21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
>>    21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
>>    21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
>>    21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
>>    22374 pts/1    S+     0:00 grep --color=auto python ./trace.py
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   tools/bpf/bpftool/main.c |   3 +-
>>   tools/bpf/bpftool/main.h |   1 +
>>   tools/bpf/bpftool/perf.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++
> 
> Would you be able to also extend the Documentation/ and bash
> completions?
> 
>>   3 files changed, 191 insertions(+), 1 deletion(-)
>>   create mode 100644 tools/bpf/bpftool/perf.c
>>
>> diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
>> index 1ec852d..eea7f14 100644
>> --- a/tools/bpf/bpftool/main.c
>> +++ b/tools/bpf/bpftool/main.c
>> @@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
>>   		"       %s batch file FILE\n"
>>   		"       %s version\n"
>>   		"\n"
>> -		"       OBJECT := { prog | map | cgroup }\n"
>> +		"       OBJECT := { prog | map | cgroup | perf }\n"
>>   		"       " HELP_SPEC_OPTIONS "\n"
>>   		"",
>>   		bin_name, bin_name, bin_name);
>> @@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
>>   	{ "prog",	do_prog },
>>   	{ "map",	do_map },
>>   	{ "cgroup",	do_cgroup },
>> +	{ "perf",	do_perf },
>>   	{ "version",	do_version },
>>   	{ 0 }
>>   };
>> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
>> index 6173cd9..63fdb31 100644
>> --- a/tools/bpf/bpftool/main.h
>> +++ b/tools/bpf/bpftool/main.h
>> @@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
>>   int do_map(int argc, char **arg);
>>   int do_event_pipe(int argc, char **argv);
>>   int do_cgroup(int argc, char **arg);
>> +int do_perf(int argc, char **arg);
>>   
>>   int prog_parse_fd(int *argc, char ***argv);
>>   int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
>> diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
>> new file mode 100644
>> index 0000000..6d676e4
>> --- /dev/null
>> +++ b/tools/bpf/bpftool/perf.c
>> @@ -0,0 +1,188 @@
>> +// SPDX-License-Identifier: GPL-2.0+
>> +// Copyright (C) 2018 Facebook
>> +// Author: Yonghong Song <yhs@fb.com>
>> +
>> +#define _GNU_SOURCE
>> +#include <fcntl.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <sys/stat.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <ftw.h>
>> +
>> +#include <bpf.h>
>> +
>> +#include "main.h"
>> +
>> +static void print_perf_json(int pid, __u32 prog_id, __u32 prog_info,
>> +			    char *buf, __u64 probe_offset, __u64 probe_addr)
>> +{
>> +	jsonw_start_object(json_wtr);
>> +	jsonw_int_field(json_wtr, "pid", pid);
>> +	jsonw_uint_field(json_wtr, "prog_id", prog_id);
>> +	switch (prog_info) {
>> +	case BPF_PERF_INFO_TP_NAME:
>> +		jsonw_string_field(json_wtr, "prog_info", "tracepoint");
>> +		jsonw_string_field(json_wtr, "tracepoint", buf);
>> +		break;
>> +	case BPF_PERF_INFO_KPROBE:
>> +		jsonw_string_field(json_wtr, "prog_info", "kprobe");
>> +		if (buf[0] != '\0') {
>> +			jsonw_string_field(json_wtr, "func", buf);
>> +			jsonw_lluint_field(json_wtr, "offset", probe_offset);
>> +		} else {
>> +			jsonw_lluint_field(json_wtr, "addr", probe_addr);
>> +		}
>> +		break;
>> +	case BPF_PERF_INFO_KRETPROBE:
>> +		jsonw_string_field(json_wtr, "prog_info", "kretprobe");
>> +		if (buf[0] != '\0') {
>> +			jsonw_string_field(json_wtr, "func", buf);
>> +			jsonw_lluint_field(json_wtr, "offset", probe_offset);
>> +		} else {
>> +			jsonw_lluint_field(json_wtr, "addr", probe_addr);
>> +		}
>> +		break;
>> +	case BPF_PERF_INFO_UPROBE:
>> +		jsonw_string_field(json_wtr, "prog_info", "uprobe");
>> +		jsonw_string_field(json_wtr, "filename", buf);
>> +		jsonw_lluint_field(json_wtr, "offset", probe_offset);
>> +		break;
>> +	case BPF_PERF_INFO_URETPROBE:
>> +		jsonw_string_field(json_wtr, "prog_info", "uretprobe");
>> +		jsonw_string_field(json_wtr, "filename", buf);
>> +		jsonw_lluint_field(json_wtr, "offset", probe_offset);
>> +		break;
>> +	}
>> +	jsonw_end_object(json_wtr);
>> +}
>> +
>> +static void print_perf_plain(int pid, __u32 prog_id, __u32 prog_info,
>> +			    char *buf, __u64 probe_offset, __u64 probe_addr)
>> +{
>> +	printf("%d: prog_id %u ", pid, prog_id);
> 
> nit: for consistency with prog and map listings consider using double
> spaces after prog_id (i.e. between fields).  Not a hard requirement,
> though, perhaps I'm the only one who finds that more readable :)
> 
>> +	switch (prog_info) {
>> +	case BPF_PERF_INFO_TP_NAME:
>> +		printf("tracepoint %s\n", buf);
>> +		break;
>> +	case BPF_PERF_INFO_KPROBE:
>> +		if (buf[0] != '\0')
>> +			printf("kprobe func %s offset %llu\n", buf,
>> +			       probe_offset);
>> +		else
>> +			printf("kprobe addr %llu\n", probe_addr);
>> +		break;
>> +	case BPF_PERF_INFO_KRETPROBE:
>> +		if (buf[0] != '\0')
>> +			printf("kretprobe func %s offset %llu\n", buf,
>> +			       probe_offset);
>> +		else
>> +			printf("kretprobe addr %llu\n", probe_addr);
>> +		break;
>> +	case BPF_PERF_INFO_UPROBE:
>> +		printf("uprobe filename %s offset %llu\n", buf, probe_offset);
>> +		break;
>> +	case BPF_PERF_INFO_URETPROBE:
>> +		printf("uretprobe filename %s offset %llu\n", buf,
>> +		       probe_offset);
>> +		break;
>> +	}
>> +}
>> +
>> +static int show_proc(const char *fpath, const struct stat *sb,
>> +		     int tflag, struct FTW *ftwbuf)
>> +{
>> +	__u64 probe_offset, probe_addr;
>> +	__u32 prog_id, prog_info;
>> +	int err, pid = 0, fd = 0;
>> +	const char *pch;
>> +	char buf[4096];
>> +
>> +	/* prefix always /proc */
>> +	pch = fpath + 5;
>> +	if (*pch == '\0')
>> +		return 0;
>> +
>> +	/* pid should be all numbers */
>> +	pch++;
>> +	while (*pch >= '0' && *pch <= '9') {
> 
> nit: isdigit()?  strtoul() with its endptr also an option.  That said
>       the code is actually quite readable as is, so I'm not sure if it's
>       worth complicating it.
> 
>> +		pid = pid * 10 + *pch - '0';
>> +		pch++;
>> +	}
>> +	if (*pch == '\0')
>> +		return 0;
>> +	if (*pch != '/')
>> +		return FTW_SKIP_SUBTREE;
>> +
>> +	/* check /proc/<pid>/fd directory */
>> +	pch++;
>> +	if (*pch == '\0' || *pch != 'f')
>> +		return FTW_SKIP_SUBTREE;
> 
> but == '\0' implies != 'f'
> 
>> +	pch++;
>> +	if (*pch == '\0' || *pch != 'd')
>> +		return FTW_SKIP_SUBTREE;
> 
> nit: possibly just:
>       if (strncmp(pch, "fd", 2))
>            return FTW_SKIP_SUBTREE;
>       pch += 2;
> 
>> +	pch++;
>> +	if (*pch == '\0')
>> +		return 0;
>> +	if (*pch != '/')
>> +		return FTW_SKIP_SUBTREE;
>> +
>> +	/* check /proc/<pid>/fd/<fd_num> */
>> +	pch++;
>> +	while (*pch >= '0' && *pch <= '9') {
>> +		fd = fd * 10 + *pch - '0';
>> +		pch++;
>> +	}
>> +	if (*pch != '\0')
>> +		return FTW_SKIP_SUBTREE;
>> +
>> +	/* query (pid, fd) for potential perf events */
>> +	err = bpf_trace_event_query(pid, fd, buf, sizeof(buf),
>> +		&prog_id, &prog_info, &probe_offset, &probe_addr);
> 
> nit: continuation line not aligned with opening bracket
> 
>> +	if (err < 0)
>> +		return 0;
>> +
>> +	if (json_output)
>> +		print_perf_json(pid, prog_id, prog_info, buf, probe_offset,
>> +				probe_addr);
>> +	else
>> +		print_perf_plain(pid, prog_id, prog_info, buf, probe_offset,
>> +				 probe_addr);
>> +
>> +	return 0;
>> +}
>> +
>> +static int do_show(int argc, char **argv)
>> +{
>> +	int nopenfd = 16;
>> +	int flags = FTW_ACTIONRETVAL | FTW_PHYS;
> 
> nit: reverse christmas tree networking style if you don't mind
> 
>> +	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
>> +		perror("nftw");
> 
> nit: p_err("%s", strerror(errno)); would also show up in JSON output
> 
>> +		return -1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int do_help(int argc, char **argv)
>> +{
>> +	fprintf(stderr,
>> +		"Usage: %s %s { show | help }\n"
>> +		"",
>> +		bin_name, argv[-2]);
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct cmd cmds[] = {
>> +	{ "show",	do_show },
> 
> Other commands alias show and list, so could you add:
> 
> 	{ "list",	do_show },
> 
> and list to help output?
> 
>> +	{ "help",	do_help },
>> +	{ 0 }
>> +};
>> +
>> +int do_perf(int argc, char **argv)
>> +{
>> +	return cmd_select(cmds, argc, argv, do_help);
>> +}
> 
> Thanks a lot for adding bpftool support, and with JSON output included!
>

Patch

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d..eea7f14 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@  static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup }\n"
+		"       OBJECT := { prog | map | cgroup | perf }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@  static const struct cmd cmds[] = {
 	{ "prog",	do_prog },
 	{ "map",	do_map },
 	{ "cgroup",	do_cgroup },
+	{ "perf",	do_perf },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6173cd9..63fdb31 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -119,6 +119,7 @@  int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 0000000..6d676e4
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,188 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static void print_perf_json(int pid, __u32 prog_id, __u32 prog_info,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	jsonw_start_object(json_wtr);
+	jsonw_int_field(json_wtr, "pid", pid);
+	jsonw_uint_field(json_wtr, "prog_id", prog_id);
+	switch (prog_info) {
+	case BPF_PERF_INFO_TP_NAME:
+		jsonw_string_field(json_wtr, "prog_info", "tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_PERF_INFO_KPROBE:
+		jsonw_string_field(json_wtr, "prog_info", "kprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_PERF_INFO_KRETPROBE:
+		jsonw_string_field(json_wtr, "prog_info", "kretprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_PERF_INFO_UPROBE:
+		jsonw_string_field(json_wtr, "prog_info", "uprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	case BPF_PERF_INFO_URETPROBE:
+		jsonw_string_field(json_wtr, "prog_info", "uretprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	}
+	jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, __u32 prog_id, __u32 prog_info,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	printf("%d: prog_id %u ", pid, prog_id);
+	switch (prog_info) {
+	case BPF_PERF_INFO_TP_NAME:
+		printf("tracepoint %s\n", buf);
+		break;
+	case BPF_PERF_INFO_KPROBE:
+		if (buf[0] != '\0')
+			printf("kprobe func %s offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kprobe addr %llu\n", probe_addr);
+		break;
+	case BPF_PERF_INFO_KRETPROBE:
+		if (buf[0] != '\0')
+			printf("kretprobe func %s offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kretprobe addr %llu\n", probe_addr);
+		break;
+	case BPF_PERF_INFO_UPROBE:
+		printf("uprobe filename %s offset %llu\n", buf, probe_offset);
+		break;
+	case BPF_PERF_INFO_URETPROBE:
+		printf("uretprobe filename %s offset %llu\n", buf,
+		       probe_offset);
+		break;
+	}
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+		     int tflag, struct FTW *ftwbuf)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, prog_info;
+	int err, pid = 0, fd = 0;
+	const char *pch;
+	char buf[4096];
+
+	/* prefix always /proc */
+	pch = fpath + 5;
+	if (*pch == '\0')
+		return 0;
+
+	/* pid should be all numbers */
+	pch++;
+	while (*pch >= '0' && *pch <= '9') {
+		pid = pid * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd directory */
+	pch++;
+	if (*pch == '\0' || *pch != 'f')
+		return FTW_SKIP_SUBTREE;
+	pch++;
+	if (*pch == '\0' || *pch != 'd')
+		return FTW_SKIP_SUBTREE;
+	pch++;
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd/<fd_num> */
+	pch++;
+	while (*pch >= '0' && *pch <= '9') {
+		fd = fd * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch != '\0')
+		return FTW_SKIP_SUBTREE;
+
+	/* query (pid, fd) for potential perf events */
+	err = bpf_trace_event_query(pid, fd, buf, sizeof(buf),
+		&prog_id, &prog_info, &probe_offset, &probe_addr);
+	if (err < 0)
+		return 0;
+
+	if (json_output)
+		print_perf_json(pid, prog_id, prog_info, buf, probe_offset,
+				probe_addr);
+	else
+		print_perf_plain(pid, prog_id, prog_info, buf, probe_offset,
+				 probe_addr);
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int nopenfd = 16;
+	int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+
+	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+		perror("nftw");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s { show | help }\n"
+		"",
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}