diff mbox series

[RFC,bpf-next,04/16] bpf: allow loading of a dumper program

Message ID 20200408232524.2675603-1-yhs@fb.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf based dumping of kernel data structures | expand

Commit Message

Yonghong Song April 8, 2020, 11:25 p.m. UTC
A dumper bpf program is a tracing program with attach type
BPF_TRACE_DUMP. During bpf program load, the load attribute
   attach_prog_fd
carries the target directory fd. The program will be
verified against btf_id of the target_proto.

If the program is loaded successfully, the dump target, as
represented as a relative path to /sys/kernel/bpfdump,
will be remembered in prog->aux->dump_target, which will
be used later to create dumpers.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/dump.c              | 40 ++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  8 ++++++-
 kernel/bpf/verifier.c          | 15 +++++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 66 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko April 10, 2020, 10:36 p.m. UTC | #1
On Wed, Apr 8, 2020 at 4:25 PM Yonghong Song <yhs@fb.com> wrote:
>
> A dumper bpf program is a tracing program with attach type
> BPF_TRACE_DUMP. During bpf program load, the load attribute
>    attach_prog_fd
> carries the target directory fd. The program will be
> verified against btf_id of the target_proto.
>
> If the program is loaded successfully, the dump target, as
> represented as a relative path to /sys/kernel/bpfdump,
> will be remembered in prog->aux->dump_target, which will
> be used later to create dumpers.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h            |  2 ++
>  include/uapi/linux/bpf.h       |  1 +
>  kernel/bpf/dump.c              | 40 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c           |  8 ++++++-
>  kernel/bpf/verifier.c          | 15 +++++++++++++
>  tools/include/uapi/linux/bpf.h |  1 +
>  6 files changed, 66 insertions(+), 1 deletion(-)
>

[...]

>
> +int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
> +{
> +       struct bpfdump_target_info *tinfo;
> +       const char *target_proto;
> +       struct file *target_file;
> +       struct fd tfd;
> +       int err = 0, btf_id;
> +
> +       if (!btf_vmlinux)
> +               return -EINVAL;
> +
> +       tfd = fdget(target_fd);
> +       target_file = tfd.file;
> +       if (!target_file)
> +               return -EBADF;

fdput is missing (or rather err = -BADF; goto done; ?)


> +
> +       if (target_file->f_inode->i_op != &bpf_dir_iops) {
> +               err = -EINVAL;
> +               goto done;
> +       }
> +
> +       tinfo = target_file->f_inode->i_private;
> +       target_proto = tinfo->target_proto;
> +       btf_id = btf_find_by_name_kind(btf_vmlinux, target_proto,
> +                                      BTF_KIND_FUNC);
> +
> +       if (btf_id > 0) {
> +               prog->aux->dump_target = tinfo->target;
> +               prog->aux->attach_btf_id = btf_id;
> +       }
> +
> +       err = min(btf_id, 0);

this min trick looks too clever... why not more straightforward and composable:

if (btf_id < 0) {
    err = btf_id;
    goto done;
}

prog->aux->dump_target = tinfo->target;
prog->aux->attach_btf_id = btf_id;

?

> +done:
> +       fdput(tfd);
> +       return err;
> +}
> +
>  int bpf_dump_reg_target(const char *target,
>                         const char *target_proto,
>                         const struct seq_operations *seq_ops,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 64783da34202..41005dee8957 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2060,7 +2060,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>
>         prog->expected_attach_type = attr->expected_attach_type;
>         prog->aux->attach_btf_id = attr->attach_btf_id;
> -       if (attr->attach_prog_fd) {
> +       if (type == BPF_PROG_TYPE_TRACING &&
> +           attr->expected_attach_type == BPF_TRACE_DUMP) {
> +               err = bpf_dump_set_target_info(attr->attach_prog_fd, prog);

looking at bpf_attr, it's not clear why attach_prog_fd and
prog_ifindex were not combined into a single union field... this
probably got missed? But in this case I'd say let's create a

union {
    __u32 attach_prog_fd;
    __u32 attach_target_fd; (similar to terminology for BPF_PROG_ATTACH)
};

instead of reusing not-exactly-matching field names?

> +               if (err)
> +                       goto free_prog_nouncharge;
> +       } else if (attr->attach_prog_fd) {
>                 struct bpf_prog *tgt_prog;
>
>                 tgt_prog = bpf_prog_get(attr->attach_prog_fd);
> @@ -2145,6 +2150,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>         err = bpf_prog_new_fd(prog);
>         if (err < 0)
>                 bpf_prog_put(prog);
> +
>         return err;
>

[...]
Yonghong Song April 10, 2020, 11:28 p.m. UTC | #2
On 4/10/20 3:36 PM, Andrii Nakryiko wrote:
> On Wed, Apr 8, 2020 at 4:25 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> A dumper bpf program is a tracing program with attach type
>> BPF_TRACE_DUMP. During bpf program load, the load attribute
>>     attach_prog_fd
>> carries the target directory fd. The program will be
>> verified against btf_id of the target_proto.
>>
>> If the program is loaded successfully, the dump target, as
>> represented as a relative path to /sys/kernel/bpfdump,
>> will be remembered in prog->aux->dump_target, which will
>> be used later to create dumpers.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h            |  2 ++
>>   include/uapi/linux/bpf.h       |  1 +
>>   kernel/bpf/dump.c              | 40 ++++++++++++++++++++++++++++++++++
>>   kernel/bpf/syscall.c           |  8 ++++++-
>>   kernel/bpf/verifier.c          | 15 +++++++++++++
>>   tools/include/uapi/linux/bpf.h |  1 +
>>   6 files changed, 66 insertions(+), 1 deletion(-)
>>
> 
> [...]
> 
>>
>> +int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
>> +{
>> +       struct bpfdump_target_info *tinfo;
>> +       const char *target_proto;
>> +       struct file *target_file;
>> +       struct fd tfd;
>> +       int err = 0, btf_id;
>> +
>> +       if (!btf_vmlinux)
>> +               return -EINVAL;
>> +
>> +       tfd = fdget(target_fd);
>> +       target_file = tfd.file;
>> +       if (!target_file)
>> +               return -EBADF;
> 
> fdput is missing (or rather err = -BADF; goto done; ?)

No need to do fdput if tfd.file is NULL.

> 
> 
>> +
>> +       if (target_file->f_inode->i_op != &bpf_dir_iops) {
>> +               err = -EINVAL;
>> +               goto done;
>> +       }
>> +
>> +       tinfo = target_file->f_inode->i_private;
>> +       target_proto = tinfo->target_proto;
>> +       btf_id = btf_find_by_name_kind(btf_vmlinux, target_proto,
>> +                                      BTF_KIND_FUNC);
>> +
>> +       if (btf_id > 0) {
>> +               prog->aux->dump_target = tinfo->target;
>> +               prog->aux->attach_btf_id = btf_id;
>> +       }
>> +
>> +       err = min(btf_id, 0);
> 
> this min trick looks too clever... why not more straightforward and composable:
> 
> if (btf_id < 0) {
>      err = btf_id;
>      goto done;
> }
> 
> prog->aux->dump_target = tinfo->target;
> prog->aux->attach_btf_id = btf_id;
> 
> ?

this can be done.

> 
>> +done:
>> +       fdput(tfd);
>> +       return err;
>> +}
>> +
>>   int bpf_dump_reg_target(const char *target,
>>                          const char *target_proto,
>>                          const struct seq_operations *seq_ops,
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 64783da34202..41005dee8957 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -2060,7 +2060,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>>
>>          prog->expected_attach_type = attr->expected_attach_type;
>>          prog->aux->attach_btf_id = attr->attach_btf_id;
>> -       if (attr->attach_prog_fd) {
>> +       if (type == BPF_PROG_TYPE_TRACING &&
>> +           attr->expected_attach_type == BPF_TRACE_DUMP) {
>> +               err = bpf_dump_set_target_info(attr->attach_prog_fd, prog);
> 
> looking at bpf_attr, it's not clear why attach_prog_fd and
> prog_ifindex were not combined into a single union field... this
> probably got missed? But in this case I'd say let's create a
> 
> union {
>      __u32 attach_prog_fd;
>      __u32 attach_target_fd; (similar to terminology for BPF_PROG_ATTACH)
> };
> 
> instead of reusing not-exactly-matching field names?

I thought about this, but thinking to avoid uapi change (although 
compatible). Maybe we should. Let me think about this.

> 
>> +               if (err)
>> +                       goto free_prog_nouncharge;
>> +       } else if (attr->attach_prog_fd) {
>>                  struct bpf_prog *tgt_prog;
>>
>>                  tgt_prog = bpf_prog_get(attr->attach_prog_fd);
>> @@ -2145,6 +2150,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>>          err = bpf_prog_new_fd(prog);
>>          if (err < 0)
>>                  bpf_prog_put(prog);
>> +
>>          return err;
>>
> 
> [...]
>
Andrii Nakryiko April 13, 2020, 7:33 p.m. UTC | #3
On Fri, Apr 10, 2020 at 4:28 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 4/10/20 3:36 PM, Andrii Nakryiko wrote:
> > On Wed, Apr 8, 2020 at 4:25 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> A dumper bpf program is a tracing program with attach type
> >> BPF_TRACE_DUMP. During bpf program load, the load attribute
> >>     attach_prog_fd
> >> carries the target directory fd. The program will be
> >> verified against btf_id of the target_proto.
> >>
> >> If the program is loaded successfully, the dump target, as
> >> represented as a relative path to /sys/kernel/bpfdump,
> >> will be remembered in prog->aux->dump_target, which will
> >> be used later to create dumpers.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   include/linux/bpf.h            |  2 ++
> >>   include/uapi/linux/bpf.h       |  1 +
> >>   kernel/bpf/dump.c              | 40 ++++++++++++++++++++++++++++++++++
> >>   kernel/bpf/syscall.c           |  8 ++++++-
> >>   kernel/bpf/verifier.c          | 15 +++++++++++++
> >>   tools/include/uapi/linux/bpf.h |  1 +
> >>   6 files changed, 66 insertions(+), 1 deletion(-)
> >>
> >
> > [...]
> >
> >>
> >> +int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
> >> +{
> >> +       struct bpfdump_target_info *tinfo;
> >> +       const char *target_proto;
> >> +       struct file *target_file;
> >> +       struct fd tfd;
> >> +       int err = 0, btf_id;
> >> +
> >> +       if (!btf_vmlinux)
> >> +               return -EINVAL;
> >> +
> >> +       tfd = fdget(target_fd);
> >> +       target_file = tfd.file;
> >> +       if (!target_file)
> >> +               return -EBADF;
> >
> > fdput is missing (or rather err = -BADF; goto done; ?)
>
> No need to do fdput if tfd.file is NULL.

ah, right :)

>
> >
> >
> >> +
> >> +       if (target_file->f_inode->i_op != &bpf_dir_iops) {
> >> +               err = -EINVAL;
> >> +               goto done;
> >> +       }
> >> +
> >> +       tinfo = target_file->f_inode->i_private;
> >> +       target_proto = tinfo->target_proto;
> >> +       btf_id = btf_find_by_name_kind(btf_vmlinux, target_proto,
> >> +                                      BTF_KIND_FUNC);
> >> +
> >> +       if (btf_id > 0) {
> >> +               prog->aux->dump_target = tinfo->target;
> >> +               prog->aux->attach_btf_id = btf_id;
> >> +       }
> >> +
> >> +       err = min(btf_id, 0);
> >
> > this min trick looks too clever... why not more straightforward and composable:
> >
> > if (btf_id < 0) {
> >      err = btf_id;
> >      goto done;
> > }
> >
> > prog->aux->dump_target = tinfo->target;
> > prog->aux->attach_btf_id = btf_id;
> >
> > ?
>
> this can be done.
>
> >
> >> +done:
> >> +       fdput(tfd);
> >> +       return err;
> >> +}
> >> +
> >>   int bpf_dump_reg_target(const char *target,
> >>                          const char *target_proto,
> >>                          const struct seq_operations *seq_ops,
> >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >> index 64783da34202..41005dee8957 100644
> >> --- a/kernel/bpf/syscall.c
> >> +++ b/kernel/bpf/syscall.c
> >> @@ -2060,7 +2060,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
> >>
> >>          prog->expected_attach_type = attr->expected_attach_type;
> >>          prog->aux->attach_btf_id = attr->attach_btf_id;
> >> -       if (attr->attach_prog_fd) {
> >> +       if (type == BPF_PROG_TYPE_TRACING &&
> >> +           attr->expected_attach_type == BPF_TRACE_DUMP) {
> >> +               err = bpf_dump_set_target_info(attr->attach_prog_fd, prog);
> >
> > looking at bpf_attr, it's not clear why attach_prog_fd and
> > prog_ifindex were not combined into a single union field... this
> > probably got missed? But in this case I'd say let's create a
> >
> > union {
> >      __u32 attach_prog_fd;
> >      __u32 attach_target_fd; (similar to terminology for BPF_PROG_ATTACH)
> > };
> >
> > instead of reusing not-exactly-matching field names?
>
> I thought about this, but thinking to avoid uapi change (although
> compatible). Maybe we should. Let me think about this.

This is creating a new alias for the same field, so should be fine
from UAPI perspective.

>
> >
> >> +               if (err)
> >> +                       goto free_prog_nouncharge;
> >> +       } else if (attr->attach_prog_fd) {
> >>                  struct bpf_prog *tgt_prog;
> >>
> >>                  tgt_prog = bpf_prog_get(attr->attach_prog_fd);
> >> @@ -2145,6 +2150,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
> >>          err = bpf_prog_new_fd(prog);
> >>          if (err < 0)
> >>                  bpf_prog_put(prog);
> >> +
> >>          return err;
> >>
> >
> > [...]
> >
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 53914bec7590..44268d36d901 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -673,6 +673,7 @@  struct bpf_prog_aux {
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
 	struct user_struct *user;
+	const char *dump_target;
 	u64 load_time; /* ns since boottime */
 	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
@@ -1112,6 +1113,7 @@  int bpf_obj_get_user(const char __user *pathname, int flags);
 int bpf_dump_reg_target(const char *target, const char *target_proto,
 			const struct seq_operations *seq_ops,
 			u32 seq_priv_size, u32 target_feature);
+int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e29a671d67e..0f1cbed446c1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -215,6 +215,7 @@  enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_DUMP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/dump.c b/kernel/bpf/dump.c
index 45528846557f..1091affe8b3f 100644
--- a/kernel/bpf/dump.c
+++ b/kernel/bpf/dump.c
@@ -11,6 +11,9 @@ 
 #include <linux/fs_parser.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
+
+extern struct btf *btf_vmlinux;
 
 struct bpfdump_target_info {
 	struct list_head list;
@@ -48,6 +51,43 @@  static const struct inode_operations bpf_dir_iops = {
 	.unlink		= dumper_unlink,
 };
 
+int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
+{
+	struct bpfdump_target_info *tinfo;
+	const char *target_proto;
+	struct file *target_file;
+	struct fd tfd;
+	int err = 0, btf_id;
+
+	if (!btf_vmlinux)
+		return -EINVAL;
+
+	tfd = fdget(target_fd);
+	target_file = tfd.file;
+	if (!target_file)
+		return -EBADF;
+
+	if (target_file->f_inode->i_op != &bpf_dir_iops) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	tinfo = target_file->f_inode->i_private;
+	target_proto = tinfo->target_proto;
+	btf_id = btf_find_by_name_kind(btf_vmlinux, target_proto,
+				       BTF_KIND_FUNC);
+
+	if (btf_id > 0) {
+		prog->aux->dump_target = tinfo->target;
+		prog->aux->attach_btf_id = btf_id;
+	}
+
+	err = min(btf_id, 0);
+done:
+	fdput(tfd);
+	return err;
+}
+
 int bpf_dump_reg_target(const char *target,
 			const char *target_proto,
 			const struct seq_operations *seq_ops,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64783da34202..41005dee8957 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2060,7 +2060,12 @@  static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	prog->expected_attach_type = attr->expected_attach_type;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
-	if (attr->attach_prog_fd) {
+	if (type == BPF_PROG_TYPE_TRACING &&
+	    attr->expected_attach_type == BPF_TRACE_DUMP) {
+		err = bpf_dump_set_target_info(attr->attach_prog_fd, prog);
+		if (err)
+			goto free_prog_nouncharge;
+	} else if (attr->attach_prog_fd) {
 		struct bpf_prog *tgt_prog;
 
 		tgt_prog = bpf_prog_get(attr->attach_prog_fd);
@@ -2145,6 +2150,7 @@  static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	err = bpf_prog_new_fd(prog);
 	if (err < 0)
 		bpf_prog_put(prog);
+
 	return err;
 
 free_used_maps:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 04c6630cc18f..f531cee24fc5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10426,6 +10426,7 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
 	u32 btf_id = prog->aux->attach_btf_id;
 	const char prefix[] = "btf_trace_";
+	struct btf_func_model fmodel;
 	int ret = 0, subprog = -1, i;
 	struct bpf_trampoline *tr;
 	const struct btf_type *t;
@@ -10566,6 +10567,20 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		prog->aux->attach_btf_trace = true;
 		return 0;
+	case BPF_TRACE_DUMP:
+		if (!btf_type_is_func(t)) {
+			verbose(env, "attach_btf_id %u is not a function\n",
+				btf_id);
+			return -EINVAL;
+		}
+		t = btf_type_by_id(btf, t->type);
+		if (!btf_type_is_func_proto(t))
+			return -EINVAL;
+		prog->aux->attach_func_name = tname;
+		prog->aux->attach_func_proto = t;
+		ret = btf_distill_func_proto(&env->log, btf, t,
+					     tname, &fmodel);
+		return ret;
 	default:
 		if (!prog_extension)
 			return -EINVAL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2e29a671d67e..0f1cbed446c1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -215,6 +215,7 @@  enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_DUMP,
 	__MAX_BPF_ATTACH_TYPE
 };