diff mbox series

[v5,bpf-next,4/9] libbpf: add kprobe/uprobe attach API

Message ID 20190701235903.660141-5-andriin@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series libbpf: add bpf_link and tracing attach APIs | expand

Commit Message

Andrii Nakryiko July 1, 2019, 11:58 p.m. UTC
Add ability to attach to kernel and user probes and retprobes.
Implementation depends on perf event support for kprobes/uprobes.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
---
 tools/lib/bpf/libbpf.c   | 169 +++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |   7 ++
 tools/lib/bpf/libbpf.map |   2 +
 3 files changed, 178 insertions(+)

Comments

Daniel Borkmann July 3, 2019, 12:39 p.m. UTC | #1
On 07/02/2019 01:58 AM, Andrii Nakryiko wrote:
> Add ability to attach to kernel and user probes and retprobes.
> Implementation depends on perf event support for kprobes/uprobes.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> Reviewed-by: Stanislav Fomichev <sdf@google.com>
> ---
>  tools/lib/bpf/libbpf.c   | 169 +++++++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.h   |   7 ++
>  tools/lib/bpf/libbpf.map |   2 +
>  3 files changed, 178 insertions(+)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index bcaa294f819d..7b6142408b15 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -4021,6 +4021,175 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
>  	return (struct bpf_link *)link;
>  }
>  
> +/*
> + * this function is expected to parse integer in the range of [0, 2^31-1] from
> + * given file using scanf format string fmt. If actual parsed value is
> + * negative, the result might be indistinguishable from error
> + */
> +static int parse_uint_from_file(const char *file, const char *fmt)
> +{
> +	char buf[STRERR_BUFSIZE];
> +	int err, ret;
> +	FILE *f;
> +
> +	f = fopen(file, "r");
> +	if (!f) {
> +		err = -errno;
> +		pr_debug("failed to open '%s': %s\n", file,
> +			 libbpf_strerror_r(err, buf, sizeof(buf)));
> +		return err;
> +	}
> +	err = fscanf(f, fmt, &ret);
> +	if (err != 1) {
> +		err = err == EOF ? -EIO : -errno;
> +		pr_debug("failed to parse '%s': %s\n", file,
> +			libbpf_strerror_r(err, buf, sizeof(buf)));
> +		fclose(f);
> +		return err;
> +	}
> +	fclose(f);
> +	return ret;
> +}
> +
> +static int determine_kprobe_perf_type(void)
> +{
> +	const char *file = "/sys/bus/event_source/devices/kprobe/type";
> +
> +	return parse_uint_from_file(file, "%d\n");
> +}
> +
> +static int determine_uprobe_perf_type(void)
> +{
> +	const char *file = "/sys/bus/event_source/devices/uprobe/type";
> +
> +	return parse_uint_from_file(file, "%d\n");
> +}
> +
> +static int determine_kprobe_retprobe_bit(void)
> +{
> +	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
> +
> +	return parse_uint_from_file(file, "config:%d\n");
> +}
> +
> +static int determine_uprobe_retprobe_bit(void)
> +{
> +	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
> +
> +	return parse_uint_from_file(file, "config:%d\n");
> +}
> +
> +static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
> +				 uint64_t offset, int pid)
> +{
> +	struct perf_event_attr attr = {};
> +	char errmsg[STRERR_BUFSIZE];
> +	int type, pfd, err;
> +
> +	type = uprobe ? determine_uprobe_perf_type()
> +		      : determine_kprobe_perf_type();
> +	if (type < 0) {
> +		pr_warning("failed to determine %s perf type: %s\n",
> +			   uprobe ? "uprobe" : "kprobe",
> +			   libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> +		return type;
> +	}
> +	if (retprobe) {
> +		int bit = uprobe ? determine_uprobe_retprobe_bit()
> +				 : determine_kprobe_retprobe_bit();
> +
> +		if (bit < 0) {
> +			pr_warning("failed to determine %s retprobe bit: %s\n",
> +				   uprobe ? "uprobe" : "kprobe",
> +				   libbpf_strerror_r(bit, errmsg,
> +						     sizeof(errmsg)));
> +			return bit;
> +		}
> +		attr.config |= 1 << bit;
> +	}
> +	attr.size = sizeof(attr);
> +	attr.type = type;
> +	attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */
> +	attr.config2 = offset;		       /* kprobe_addr or probe_offset */
> +
> +	/* pid filter is meaningful only for uprobes */
> +	pfd = syscall(__NR_perf_event_open, &attr,
> +		      pid < 0 ? -1 : pid /* pid */,
> +		      pid == -1 ? 0 : -1 /* cpu */,
> +		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
> +	if (pfd < 0) {
> +		err = -errno;
> +		pr_warning("%s perf_event_open() failed: %s\n",
> +			   uprobe ? "uprobe" : "kprobe",
> +			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return err;
> +	}
> +	return pfd;
> +}
> +
> +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
> +					    bool retprobe,
> +					    const char *func_name)
> +{
> +	char errmsg[STRERR_BUFSIZE];
> +	struct bpf_link *link;
> +	int pfd, err;
> +
> +	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
> +				    0 /* offset */, -1 /* pid */);
> +	if (pfd < 0) {
> +		pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
> +			   bpf_program__title(prog, false),
> +			   retprobe ? "kretprobe" : "kprobe", func_name,
> +			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> +		return ERR_PTR(pfd);
> +	}
> +	link = bpf_program__attach_perf_event(prog, pfd);
> +	if (IS_ERR(link)) {
> +		close(pfd);
> +		err = PTR_ERR(link);
> +		pr_warning("program '%s': failed to attach to %s '%s': %s\n",
> +			   bpf_program__title(prog, false),
> +			   retprobe ? "kretprobe" : "kprobe", func_name,
> +			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return link;
> +	}
> +	return link;
> +}
> +
> +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
> +					    bool retprobe, pid_t pid,
> +					    const char *binary_path,
> +					    size_t func_offset)
> +{
> +	char errmsg[STRERR_BUFSIZE];
> +	struct bpf_link *link;
> +	int pfd, err;
> +
> +	pfd = perf_event_open_probe(true /* uprobe */, retprobe,
> +				    binary_path, func_offset, pid);
> +	if (pfd < 0) {
> +		pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
> +			   bpf_program__title(prog, false),
> +			   retprobe ? "uretprobe" : "uprobe",
> +			   binary_path, func_offset,
> +			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> +		return ERR_PTR(pfd);
> +	}
> +	link = bpf_program__attach_perf_event(prog, pfd);
> +	if (IS_ERR(link)) {
> +		close(pfd);
> +		err = PTR_ERR(link);
> +		pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
> +			   bpf_program__title(prog, false),
> +			   retprobe ? "uretprobe" : "uprobe",
> +			   binary_path, func_offset,
> +			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return link;
> +	}
> +	return link;
> +}

Hm, this only addresses half the feedback I had in prior version [0]. Patch 2/9
with bpf_link with destructor looks good to me, but my feedback from back then was
that all the kprobe/uprobe/tracepoint/raw_tracepoint should be split API-wise, so
you'll end up with something like the below, that is, 1) a set of functions that
only /create/ the bpf_link handle /once/, and 2) a helper that allows /attaching/
progs to one or multiple bpf_links. The set of APIs would look like:

struct bpf_link *bpf_link__create_kprobe(bool retprobe, const char *func_name);
struct bpf_link *bpf_link__create_uprobe(bool retprobe, pid_t pid,
					 const char *binary_path,
					 size_t func_offset);
int bpf_program__attach_to_link(struct bpf_link *link, struct bpf_program *prog);
int bpf_link__destroy(struct bpf_link *link);

This seems much more natural to me. Right now you sort of do both in one single API.
Detangling the bpf_program__attach_{uprobe,kprobe}() would also avoid that you have
to redo all the perf_event_open_probe() work over and over in order to get the pfd
context where you can later attach something to. Given bpf_program__attach_to_link()
API, you also wouldn't need to expose the bpf_program__attach_perf_event() from
patch 3/9. Thoughts?

  [0] https://lore.kernel.org/bpf/a7780057-1d70-9ace-960b-ff65867dc277@iogearbox.net/

>  enum bpf_perf_event_ret
>  bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
>  			   void **copy_mem, size_t *copy_size,
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 1bf66c4a9330..bd767cc11967 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -171,6 +171,13 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
>  
>  LIBBPF_API struct bpf_link *
>  bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
> +LIBBPF_API struct bpf_link *
> +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
> +			   const char *func_name);
> +LIBBPF_API struct bpf_link *
> +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
> +			   pid_t pid, const char *binary_path,
> +			   size_t func_offset);
>  
>  struct bpf_insn;
>  
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index 756f5aa802e9..57a40fb60718 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -169,7 +169,9 @@ LIBBPF_0.0.4 {
>  	global:
>  		bpf_link__destroy;
>  		bpf_object__load_xattr;
> +		bpf_program__attach_kprobe;
>  		bpf_program__attach_perf_event;
> +		bpf_program__attach_uprobe;
>  		btf_dump__dump_type;
>  		btf_dump__free;
>  		btf_dump__new;
>
Andrii Nakryiko July 3, 2019, 4:47 p.m. UTC | #2
On Wed, Jul 3, 2019 at 5:39 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 07/02/2019 01:58 AM, Andrii Nakryiko wrote:
> > Add ability to attach to kernel and user probes and retprobes.
> > Implementation depends on perf event support for kprobes/uprobes.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > Reviewed-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  tools/lib/bpf/libbpf.c   | 169 +++++++++++++++++++++++++++++++++++++++
> >  tools/lib/bpf/libbpf.h   |   7 ++
> >  tools/lib/bpf/libbpf.map |   2 +
> >  3 files changed, 178 insertions(+)
> >
> > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > index bcaa294f819d..7b6142408b15 100644
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -4021,6 +4021,175 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
> >       return (struct bpf_link *)link;
> >  }
> >
> > +/*
> > + * this function is expected to parse integer in the range of [0, 2^31-1] from
> > + * given file using scanf format string fmt. If actual parsed value is
> > + * negative, the result might be indistinguishable from error
> > + */
> > +static int parse_uint_from_file(const char *file, const char *fmt)
> > +{
> > +     char buf[STRERR_BUFSIZE];
> > +     int err, ret;
> > +     FILE *f;
> > +
> > +     f = fopen(file, "r");
> > +     if (!f) {
> > +             err = -errno;
> > +             pr_debug("failed to open '%s': %s\n", file,
> > +                      libbpf_strerror_r(err, buf, sizeof(buf)));
> > +             return err;
> > +     }
> > +     err = fscanf(f, fmt, &ret);
> > +     if (err != 1) {
> > +             err = err == EOF ? -EIO : -errno;
> > +             pr_debug("failed to parse '%s': %s\n", file,
> > +                     libbpf_strerror_r(err, buf, sizeof(buf)));
> > +             fclose(f);
> > +             return err;
> > +     }
> > +     fclose(f);
> > +     return ret;
> > +}
> > +
> > +static int determine_kprobe_perf_type(void)
> > +{
> > +     const char *file = "/sys/bus/event_source/devices/kprobe/type";
> > +
> > +     return parse_uint_from_file(file, "%d\n");
> > +}
> > +
> > +static int determine_uprobe_perf_type(void)
> > +{
> > +     const char *file = "/sys/bus/event_source/devices/uprobe/type";
> > +
> > +     return parse_uint_from_file(file, "%d\n");
> > +}
> > +
> > +static int determine_kprobe_retprobe_bit(void)
> > +{
> > +     const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
> > +
> > +     return parse_uint_from_file(file, "config:%d\n");
> > +}
> > +
> > +static int determine_uprobe_retprobe_bit(void)
> > +{
> > +     const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
> > +
> > +     return parse_uint_from_file(file, "config:%d\n");
> > +}
> > +
> > +static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
> > +                              uint64_t offset, int pid)
> > +{
> > +     struct perf_event_attr attr = {};
> > +     char errmsg[STRERR_BUFSIZE];
> > +     int type, pfd, err;
> > +
> > +     type = uprobe ? determine_uprobe_perf_type()
> > +                   : determine_kprobe_perf_type();
> > +     if (type < 0) {
> > +             pr_warning("failed to determine %s perf type: %s\n",
> > +                        uprobe ? "uprobe" : "kprobe",
> > +                        libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> > +             return type;
> > +     }
> > +     if (retprobe) {
> > +             int bit = uprobe ? determine_uprobe_retprobe_bit()
> > +                              : determine_kprobe_retprobe_bit();
> > +
> > +             if (bit < 0) {
> > +                     pr_warning("failed to determine %s retprobe bit: %s\n",
> > +                                uprobe ? "uprobe" : "kprobe",
> > +                                libbpf_strerror_r(bit, errmsg,
> > +                                                  sizeof(errmsg)));
> > +                     return bit;
> > +             }
> > +             attr.config |= 1 << bit;
> > +     }
> > +     attr.size = sizeof(attr);
> > +     attr.type = type;
> > +     attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */
> > +     attr.config2 = offset;                 /* kprobe_addr or probe_offset */
> > +
> > +     /* pid filter is meaningful only for uprobes */
> > +     pfd = syscall(__NR_perf_event_open, &attr,
> > +                   pid < 0 ? -1 : pid /* pid */,
> > +                   pid == -1 ? 0 : -1 /* cpu */,
> > +                   -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
> > +     if (pfd < 0) {
> > +             err = -errno;
> > +             pr_warning("%s perf_event_open() failed: %s\n",
> > +                        uprobe ? "uprobe" : "kprobe",
> > +                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return err;
> > +     }
> > +     return pfd;
> > +}
> > +
> > +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
> > +                                         bool retprobe,
> > +                                         const char *func_name)
> > +{
> > +     char errmsg[STRERR_BUFSIZE];
> > +     struct bpf_link *link;
> > +     int pfd, err;
> > +
> > +     pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
> > +                                 0 /* offset */, -1 /* pid */);
> > +     if (pfd < 0) {
> > +             pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
> > +                        bpf_program__title(prog, false),
> > +                        retprobe ? "kretprobe" : "kprobe", func_name,
> > +                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> > +             return ERR_PTR(pfd);
> > +     }
> > +     link = bpf_program__attach_perf_event(prog, pfd);
> > +     if (IS_ERR(link)) {
> > +             close(pfd);
> > +             err = PTR_ERR(link);
> > +             pr_warning("program '%s': failed to attach to %s '%s': %s\n",
> > +                        bpf_program__title(prog, false),
> > +                        retprobe ? "kretprobe" : "kprobe", func_name,
> > +                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return link;
> > +     }
> > +     return link;
> > +}
> > +
> > +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
> > +                                         bool retprobe, pid_t pid,
> > +                                         const char *binary_path,
> > +                                         size_t func_offset)
> > +{
> > +     char errmsg[STRERR_BUFSIZE];
> > +     struct bpf_link *link;
> > +     int pfd, err;
> > +
> > +     pfd = perf_event_open_probe(true /* uprobe */, retprobe,
> > +                                 binary_path, func_offset, pid);
> > +     if (pfd < 0) {
> > +             pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
> > +                        bpf_program__title(prog, false),
> > +                        retprobe ? "uretprobe" : "uprobe",
> > +                        binary_path, func_offset,
> > +                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> > +             return ERR_PTR(pfd);
> > +     }
> > +     link = bpf_program__attach_perf_event(prog, pfd);
> > +     if (IS_ERR(link)) {
> > +             close(pfd);
> > +             err = PTR_ERR(link);
> > +             pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
> > +                        bpf_program__title(prog, false),
> > +                        retprobe ? "uretprobe" : "uprobe",
> > +                        binary_path, func_offset,
> > +                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return link;
> > +     }
> > +     return link;
> > +}
>
> Hm, this only addresses half the feedback I had in prior version [0]. Patch 2/9

Hi Daniel,

Yes, and I explained why in reply to your original email, please see [1].

I've started with exactly separation you wanted, but it turned out to
be cumbersome and harder to use user API, while also somewhat more
complicated to implement. Mostly because in that design bpf_link
exists in two states: created-but-not-attached and attached. Which
forces user to do additional clean ups if creation succeeded, but
attachment failed. It also makes it a bit harder to provide good
contextual error logging if something goes wrong, because not all
original parameters are preserved, as some of them might be needed
only for creation, but not attachment (or we'll have to allocate and
copy extra stuff just for logging purposes).

On the other hand, having separate generic attach_event method doesn't
help that much, as there is little common functionality to reuse
across all kinds of possible bpf_link types.


  [1] https://lore.kernel.org/bpf/20190621045555.4152743-4-andriin@fb.com/T/#m6cfc141e7b57970bc948134bf671a46972b95134

> with bpf_link with destructor looks good to me, but my feedback from back then was
> that all the kprobe/uprobe/tracepoint/raw_tracepoint should be split API-wise, so
> you'll end up with something like the below, that is, 1) a set of functions that
> only /create/ the bpf_link handle /once/, and 2) a helper that allows /attaching/
> progs to one or multiple bpf_links. The set of APIs would look like:
>
> struct bpf_link *bpf_link__create_kprobe(bool retprobe, const char *func_name);
> struct bpf_link *bpf_link__create_uprobe(bool retprobe, pid_t pid,
>                                          const char *binary_path,
>                                          size_t func_offset);
> int bpf_program__attach_to_link(struct bpf_link *link, struct bpf_program *prog);
> int bpf_link__destroy(struct bpf_link *link);
>
> This seems much more natural to me. Right now you sort of do both in one single API.

It felt that way for me as well, until I implemented it and used it in
selftests. And then it felt unnecessarily verbose without giving any
benefit. I still have a local patchset with that change, I can post it
as RFC, if you don't trust my judgement. Please let me know.

> Detangling the bpf_program__attach_{uprobe,kprobe}() would also avoid that you have
> to redo all the perf_event_open_probe() work over and over in order to get the pfd

What do you mean by "redo all the perf_event_open_probe work"? In
terms of code, I just reuse the same function, so there is no
duplicate code. And in either design you'll have to open that
perf_event, so that work will have to be done one way or another.

> context where you can later attach something to. Given bpf_program__attach_to_link()
> API, you also wouldn't need to expose the bpf_program__attach_perf_event() from

I'd expose attach_perf_event either way, it's high-level API I want to
provide, we have use cases where user is creating some specific
non-kprobe/non-tracepoint perf events and wants to attach to it. E.g.,
HW counter overflow events for CPU profilers. So that API is not some
kind of leaked abstraction, it's something I want to have anyway.


> patch 3/9. Thoughts?

I believe this hybrid approach provides better usability without
compromising anything. The only theoretical benefit of complete
separation of bpf_link creation and attachment is that user code would
be able to separate those two steps code organization-wise. But it's
easily doable through custom application code (just encapsulate all
the parameters and type of attachment and pass it around until you
actually need to attach), but I don't think it's necessary in practice
(so far I never needed anything like that).

Hope I convinced you that while elegant, it's not that practical. Also
hybrid approach isn't inelegant either and doesn't produce code
duplication (it actually eliminates some unnecessary allocations,
e.g., for storing tp_name for raw_tracepoint attach) :)

>
>   [0] https://lore.kernel.org/bpf/a7780057-1d70-9ace-960b-ff65867dc277@iogearbox.net/
>
> >  enum bpf_perf_event_ret
> >  bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
> >                          void **copy_mem, size_t *copy_size,
> > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > index 1bf66c4a9330..bd767cc11967 100644
> > --- a/tools/lib/bpf/libbpf.h
> > +++ b/tools/lib/bpf/libbpf.h
> > @@ -171,6 +171,13 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
> >
> >  LIBBPF_API struct bpf_link *
> >  bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
> > +LIBBPF_API struct bpf_link *
> > +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
> > +                        const char *func_name);
> > +LIBBPF_API struct bpf_link *
> > +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
> > +                        pid_t pid, const char *binary_path,
> > +                        size_t func_offset);
> >
> >  struct bpf_insn;
> >
> > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> > index 756f5aa802e9..57a40fb60718 100644
> > --- a/tools/lib/bpf/libbpf.map
> > +++ b/tools/lib/bpf/libbpf.map
> > @@ -169,7 +169,9 @@ LIBBPF_0.0.4 {
> >       global:
> >               bpf_link__destroy;
> >               bpf_object__load_xattr;
> > +             bpf_program__attach_kprobe;
> >               bpf_program__attach_perf_event;
> > +             bpf_program__attach_uprobe;
> >               btf_dump__dump_type;
> >               btf_dump__free;
> >               btf_dump__new;
> >
>
Andrii Nakryiko July 4, 2019, 12:57 a.m. UTC | #3
On Wed, Jul 3, 2019 at 9:47 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Jul 3, 2019 at 5:39 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 07/02/2019 01:58 AM, Andrii Nakryiko wrote:
> > > Add ability to attach to kernel and user probes and retprobes.
> > > Implementation depends on perf event support for kprobes/uprobes.
> > >
> > > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > > Reviewed-by: Stanislav Fomichev <sdf@google.com>
> > > ---

<snip>

> > > +}
> >
> > Hm, this only addresses half the feedback I had in prior version [0]. Patch 2/9
>
> Hi Daniel,
>
> Yes, and I explained why in reply to your original email, please see [1].
>
> I've started with exactly separation you wanted, but it turned out to
> be cumbersome and harder to use user API, while also somewhat more
> complicated to implement. Mostly because in that design bpf_link
> exists in two states: created-but-not-attached and attached. Which
> forces user to do additional clean ups if creation succeeded, but
> attachment failed. It also makes it a bit harder to provide good
> contextual error logging if something goes wrong, because not all
> original parameters are preserved, as some of them might be needed
> only for creation, but not attachment (or we'll have to allocate and
> copy extra stuff just for logging purposes).
>
> On the other hand, having separate generic attach_event method doesn't
> help that much, as there is little common functionality to reuse
> across all kinds of possible bpf_link types.
>
>
>   [1] https://lore.kernel.org/bpf/20190621045555.4152743-4-andriin@fb.com/T/#m6cfc141e7b57970bc948134bf671a46972b95134
>
> > with bpf_link with destructor looks good to me, but my feedback from back then was
> > that all the kprobe/uprobe/tracepoint/raw_tracepoint should be split API-wise, so
> > you'll end up with something like the below, that is, 1) a set of functions that
> > only /create/ the bpf_link handle /once/, and 2) a helper that allows /attaching/
> > progs to one or multiple bpf_links. The set of APIs would look like:
> >
> > struct bpf_link *bpf_link__create_kprobe(bool retprobe, const char *func_name);
> > struct bpf_link *bpf_link__create_uprobe(bool retprobe, pid_t pid,
> >                                          const char *binary_path,
> >                                          size_t func_offset);
> > int bpf_program__attach_to_link(struct bpf_link *link, struct bpf_program *prog);
> > int bpf_link__destroy(struct bpf_link *link);
> >
> > This seems much more natural to me. Right now you sort of do both in one single API.
>
> It felt that way for me as well, until I implemented it and used it in
> selftests. And then it felt unnecessarily verbose without giving any
> benefit. I still have a local patchset with that change, I can post it
> as RFC, if you don't trust my judgement. Please let me know.
>
> > Detangling the bpf_program__attach_{uprobe,kprobe}() would also avoid that you have
> > to redo all the perf_event_open_probe() work over and over in order to get the pfd

So re-reading this again, I wonder if you meant that with separate
bpf_link (or rather bpf_hook in that case) creation and attachment
operations, one would be able to create single bpf_hook for same
kprobe and then attach multiple BPF programs to that single pfd
representing that specific probe.

If that's how I should have read it, I agree that it probably would be
possible for some types of hooks, but not for every type of hook. But
furthermore, how often in practice same application attaches many
different BPF programs to the same hook? And it's also hard to imagine
that hook creation (i.e., creating such FD for BPF hook), would ever
be a bottleneck.

So I still think it's not a strong reason to go with API that's harder
to use for typical use cases just because of hypothetical benefits in
some extreme cases.

>
> What do you mean by "redo all the perf_event_open_probe work"? In
> terms of code, I just reuse the same function, so there is no
> duplicate code. And in either design you'll have to open that
> perf_event, so that work will have to be done one way or another.
>
> > context where you can later attach something to. Given bpf_program__attach_to_link()
> > API, you also wouldn't need to expose the bpf_program__attach_perf_event() from
>
> I'd expose attach_perf_event either way, it's high-level API I want to
> provide, we have use cases where user is creating some specific
> non-kprobe/non-tracepoint perf events and wants to attach to it. E.g.,
> HW counter overflow events for CPU profilers. So that API is not some
> kind of leaked abstraction, it's something I want to have anyway.
>
>
> > patch 3/9. Thoughts?
>
> I believe this hybrid approach provides better usability without
> compromising anything. The only theoretical benefit of complete
> separation of bpf_link creation and attachment is that user code would
> be able to separate those two steps code organization-wise. But it's
> easily doable through custom application code (just encapsulate all
> the parameters and type of attachment and pass it around until you
> actually need to attach), but I don't think it's necessary in practice
> (so far I never needed anything like that).
>
> Hope I convinced you that while elegant, it's not that practical. Also
> hybrid approach isn't inelegant either and doesn't produce code
> duplication (it actually eliminates some unnecessary allocations,
> e.g., for storing tp_name for raw_tracepoint attach) :)
>
> >
> >   [0] https://lore.kernel.org/bpf/a7780057-1d70-9ace-960b-ff65867dc277@iogearbox.net/
> >
> > >  enum bpf_perf_event_ret
> > >  bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
> > >                          void **copy_mem, size_t *copy_size,
> > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > > index 1bf66c4a9330..bd767cc11967 100644
> > > --- a/tools/lib/bpf/libbpf.h
> > > +++ b/tools/lib/bpf/libbpf.h
> > > @@ -171,6 +171,13 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
> > >
> > >  LIBBPF_API struct bpf_link *
> > >  bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
> > > +LIBBPF_API struct bpf_link *
> > > +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
> > > +                        const char *func_name);
> > > +LIBBPF_API struct bpf_link *
> > > +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
> > > +                        pid_t pid, const char *binary_path,
> > > +                        size_t func_offset);
> > >
> > >  struct bpf_insn;
> > >
> > > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> > > index 756f5aa802e9..57a40fb60718 100644
> > > --- a/tools/lib/bpf/libbpf.map
> > > +++ b/tools/lib/bpf/libbpf.map
> > > @@ -169,7 +169,9 @@ LIBBPF_0.0.4 {
> > >       global:
> > >               bpf_link__destroy;
> > >               bpf_object__load_xattr;
> > > +             bpf_program__attach_kprobe;
> > >               bpf_program__attach_perf_event;
> > > +             bpf_program__attach_uprobe;
> > >               btf_dump__dump_type;
> > >               btf_dump__free;
> > >               btf_dump__new;
> > >
> >
Daniel Borkmann July 5, 2019, 8:46 p.m. UTC | #4
On 07/04/2019 02:57 AM, Andrii Nakryiko wrote:
> On Wed, Jul 3, 2019 at 9:47 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
[...]
>>   [1] https://lore.kernel.org/bpf/20190621045555.4152743-4-andriin@fb.com/T/#m6cfc141e7b57970bc948134bf671a46972b95134
>>
>>> with bpf_link with destructor looks good to me, but my feedback from back then was
>>> that all the kprobe/uprobe/tracepoint/raw_tracepoint should be split API-wise, so
>>> you'll end up with something like the below, that is, 1) a set of functions that
>>> only /create/ the bpf_link handle /once/, and 2) a helper that allows /attaching/
>>> progs to one or multiple bpf_links. The set of APIs would look like:
>>>
>>> struct bpf_link *bpf_link__create_kprobe(bool retprobe, const char *func_name);
>>> struct bpf_link *bpf_link__create_uprobe(bool retprobe, pid_t pid,
>>>                                          const char *binary_path,
>>>                                          size_t func_offset);
>>> int bpf_program__attach_to_link(struct bpf_link *link, struct bpf_program *prog);
>>> int bpf_link__destroy(struct bpf_link *link);
>>>
>>> This seems much more natural to me. Right now you sort of do both in one single API.
>>
>> It felt that way for me as well, until I implemented it and used it in
>> selftests. And then it felt unnecessarily verbose without giving any
>> benefit. I still have a local patchset with that change, I can post it
>> as RFC, if you don't trust my judgement. Please let me know.
>>
>>> Detangling the bpf_program__attach_{uprobe,kprobe}() would also avoid that you have
>>> to redo all the perf_event_open_probe() work over and over in order to get the pfd
> 
> So re-reading this again, I wonder if you meant that with separate
> bpf_link (or rather bpf_hook in that case) creation and attachment
> operations, one would be able to create single bpf_hook for same
> kprobe and then attach multiple BPF programs to that single pfd
> representing that specific probe.
> 
> If that's how I should have read it, I agree that it probably would be
> possible for some types of hooks, but not for every type of hook. But
> furthermore, how often in practice same application attaches many
> different BPF programs to the same hook? And it's also hard to imagine
> that hook creation (i.e., creating such FD for BPF hook), would ever
> be a bottleneck.
> 
> So I still think it's not a strong reason to go with API that's harder
> to use for typical use cases just because of hypothetical benefits in
> some extreme cases.

Was thinking along that lines, yes, as we run over an array of BPF progs,
but I just double checked the kernel code again and the relationship of
a BPF prog to perf_event is really just 1:1, just that the backing tp_event
(trace_event_call) contains the shared array. Given that, all makes sense
and there is no point in splitting. Therefore, applied, thanks!
Andrii Nakryiko July 6, 2019, 3:48 a.m. UTC | #5
On Fri, Jul 5, 2019 at 1:46 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 07/04/2019 02:57 AM, Andrii Nakryiko wrote:
> > On Wed, Jul 3, 2019 at 9:47 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> [...]
> >>   [1] https://lore.kernel.org/bpf/20190621045555.4152743-4-andriin@fb.com/T/#m6cfc141e7b57970bc948134bf671a46972b95134
> >>
> >>> with bpf_link with destructor looks good to me, but my feedback from back then was
> >>> that all the kprobe/uprobe/tracepoint/raw_tracepoint should be split API-wise, so
> >>> you'll end up with something like the below, that is, 1) a set of functions that
> >>> only /create/ the bpf_link handle /once/, and 2) a helper that allows /attaching/
> >>> progs to one or multiple bpf_links. The set of APIs would look like:
> >>>
> >>> struct bpf_link *bpf_link__create_kprobe(bool retprobe, const char *func_name);
> >>> struct bpf_link *bpf_link__create_uprobe(bool retprobe, pid_t pid,
> >>>                                          const char *binary_path,
> >>>                                          size_t func_offset);
> >>> int bpf_program__attach_to_link(struct bpf_link *link, struct bpf_program *prog);
> >>> int bpf_link__destroy(struct bpf_link *link);
> >>>
> >>> This seems much more natural to me. Right now you sort of do both in one single API.
> >>
> >> It felt that way for me as well, until I implemented it and used it in
> >> selftests. And then it felt unnecessarily verbose without giving any
> >> benefit. I still have a local patchset with that change, I can post it
> >> as RFC, if you don't trust my judgement. Please let me know.
> >>
> >>> Detangling the bpf_program__attach_{uprobe,kprobe}() would also avoid that you have
> >>> to redo all the perf_event_open_probe() work over and over in order to get the pfd
> >
> > So re-reading this again, I wonder if you meant that with separate
> > bpf_link (or rather bpf_hook in that case) creation and attachment
> > operations, one would be able to create single bpf_hook for same
> > kprobe and then attach multiple BPF programs to that single pfd
> > representing that specific probe.
> >
> > If that's how I should have read it, I agree that it probably would be
> > possible for some types of hooks, but not for every type of hook. But
> > furthermore, how often in practice same application attaches many
> > different BPF programs to the same hook? And it's also hard to imagine
> > that hook creation (i.e., creating such FD for BPF hook), would ever
> > be a bottleneck.
> >
> > So I still think it's not a strong reason to go with API that's harder
> > to use for typical use cases just because of hypothetical benefits in
> > some extreme cases.
>
> Was thinking along that lines, yes, as we run over an array of BPF progs,
> but I just double checked the kernel code again and the relationship of
> a BPF prog to perf_event is really just 1:1, just that the backing tp_event
> (trace_event_call) contains the shared array. Given that, all makes sense
> and there is no point in splitting. Therefore, applied, thanks!

Great, thanks a lot!
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index bcaa294f819d..7b6142408b15 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4021,6 +4021,175 @@  struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 	return (struct bpf_link *)link;
 }
 
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+	char buf[STRERR_BUFSIZE];
+	int err, ret;
+	FILE *f;
+
+	f = fopen(file, "r");
+	if (!f) {
+		err = -errno;
+		pr_debug("failed to open '%s': %s\n", file,
+			 libbpf_strerror_r(err, buf, sizeof(buf)));
+		return err;
+	}
+	err = fscanf(f, fmt, &ret);
+	if (err != 1) {
+		err = err == EOF ? -EIO : -errno;
+		pr_debug("failed to parse '%s': %s\n", file,
+			libbpf_strerror_r(err, buf, sizeof(buf)));
+		fclose(f);
+		return err;
+	}
+	fclose(f);
+	return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+				 uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	type = uprobe ? determine_uprobe_perf_type()
+		      : determine_kprobe_perf_type();
+	if (type < 0) {
+		pr_warning("failed to determine %s perf type: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	if (retprobe) {
+		int bit = uprobe ? determine_uprobe_retprobe_bit()
+				 : determine_kprobe_retprobe_bit();
+
+		if (bit < 0) {
+			pr_warning("failed to determine %s retprobe bit: %s\n",
+				   uprobe ? "uprobe" : "kprobe",
+				   libbpf_strerror_r(bit, errmsg,
+						     sizeof(errmsg)));
+			return bit;
+		}
+		attr.config |= 1 << bit;
+	}
+	attr.size = sizeof(attr);
+	attr.type = type;
+	attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */
+	attr.config2 = offset;		       /* kprobe_addr or probe_offset */
+
+	/* pid filter is meaningful only for uprobes */
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid /* pid */,
+		      pid == -1 ? 0 : -1 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warning("%s perf_event_open() failed: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+					    bool retprobe,
+					    const char *func_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+				    0 /* offset */, -1 /* pid */);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+					    bool retprobe, pid_t pid,
+					    const char *binary_path,
+					    size_t func_offset)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+				    binary_path, func_offset, pid);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1bf66c4a9330..bd767cc11967 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -171,6 +171,13 @@  LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+			   const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+			   pid_t pid, const char *binary_path,
+			   size_t func_offset);
 
 struct bpf_insn;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 756f5aa802e9..57a40fb60718 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -169,7 +169,9 @@  LIBBPF_0.0.4 {
 	global:
 		bpf_link__destroy;
 		bpf_object__load_xattr;
+		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
+		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
 		btf_dump__free;
 		btf_dump__new;