Message ID | 20200427201240.2994985-1-yhs@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: implement bpf iterator for kernel data | expand |
On Mon, Apr 27, 2020 at 01:12:40PM -0700, Yonghong Song wrote: > Given a bpf program, the step to create an anonymous bpf iterator is: > - create a bpf_iter_link, which combines bpf program and the target. > In the future, there could be more information recorded in the link. > A link_fd will be returned to the user space. > - create an anonymous bpf iterator with the given link_fd. > > The anonymous bpf iterator (and its underlying bpf_link) will be > used to create file based bpf iterator as well. > > The benefit to use of bpf_iter_link: > - for file based bpf iterator, bpf_iter_link provides a standard > way to replace underlying bpf programs. > - for both anonymous and free based iterators, bpf link query > capability can be leveraged. > > The patch added support of tracing/iter programs for BPF_LINK_CREATE. > > Signed-off-by: Yonghong Song <yhs@fb.com> > --- > include/linux/bpf.h | 2 ++ > kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 15 ++++++++++++ > 3 files changed, 71 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 4ac8d61f7c3e..60ecb73d8f6d 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1034,6 +1034,7 @@ extern const struct file_operations bpf_prog_fops; > extern const struct bpf_prog_ops bpf_offload_prog_ops; > extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; > extern const struct bpf_verifier_ops xdp_analyzer_ops; > +extern const struct bpf_link_ops bpf_iter_link_lops; > > struct bpf_prog *bpf_prog_get(u32 ufd); > struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, > @@ -1129,6 +1130,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); > struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, > u64 *session_id, u64 *seq_num, bool is_last); > int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); > +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > > int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); > int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); > diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c > index 284c95587803..9532e7bcb8e1 100644 > --- a/kernel/bpf/bpf_iter.c > +++ b/kernel/bpf/bpf_iter.c > @@ -14,6 +14,11 @@ struct bpf_iter_target_info { > u32 target_feature; > }; > > +struct bpf_iter_link { > + struct bpf_link link; > + struct bpf_iter_target_info *tinfo; > +}; > + > static struct list_head targets; > static struct mutex targets_mutex; > static bool bpf_iter_inited = false; > @@ -67,3 +72,52 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) > > return ret; > } > + > +static void bpf_iter_link_release(struct bpf_link *link) > +{ > +} > + > +static void bpf_iter_link_dealloc(struct bpf_link *link) > +{ > +} > + > +const struct bpf_link_ops bpf_iter_link_lops = { > + .release = bpf_iter_link_release, > + .dealloc = bpf_iter_link_dealloc, > +}; > + > +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > +{ > + struct bpf_iter_target_info *tinfo; > + struct bpf_iter_link *link; > + const char *func_name; > + bool existed = false; > + int err; > + > + if (attr->link_create.target_fd || attr->link_create.flags) > + return -EINVAL; > + > + func_name = prog->aux->attach_func_name; > + mutex_lock(&targets_mutex); > + list_for_each_entry(tinfo, &targets, list) { > + if (!strcmp(tinfo->target_func_name, func_name)) { This can be done in prog load time. Also, is it better to store a btf_id at tinfo instead of doing strcmp here? > + existed = true; > + break; > + } > + } > + mutex_unlock(&targets_mutex); > + if (!existed) > + return -ENOENT; > + > + link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); > + if (!link) > + return -ENOMEM; > + > + bpf_link_init(&link->link, &bpf_iter_link_lops, prog); > + link->tinfo = tinfo; > + > + err = bpf_link_new_fd(&link->link); > + if (err < 0) > + kfree(link); > + return err; > +} > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 022187640943..8741b5e11c85 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2269,6 +2269,8 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) > else if (link->ops == &bpf_cgroup_link_lops) > link_type = "cgroup"; > #endif > + else if (link->ops == &bpf_iter_link_lops) > + link_type = "iter"; > else > link_type = "unknown"; > > @@ -2597,6 +2599,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) > case BPF_CGROUP_GETSOCKOPT: > case BPF_CGROUP_SETSOCKOPT: > return BPF_PROG_TYPE_CGROUP_SOCKOPT; > + case BPF_TRACE_ITER: > + return BPF_PROG_TYPE_TRACING; > default: > return BPF_PROG_TYPE_UNSPEC; > } > @@ -3571,6 +3575,14 @@ static int bpf_map_do_batch(const union bpf_attr *attr, > return err; > } > > +static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > +{ > + if (attr->link_create.attach_type == BPF_TRACE_ITER) Has prog->expected_attach_type been checked also? > + return bpf_iter_link_attach(attr, prog); > + > + return -EINVAL; > +} > + > #define BPF_LINK_CREATE_LAST_FIELD link_create.flags > static int link_create(union bpf_attr *attr) > { > @@ -3607,6 +3619,9 @@ static int link_create(union bpf_attr *attr) > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > ret = cgroup_bpf_link_attach(attr, prog); > break; > + case BPF_PROG_TYPE_TRACING: > + ret = tracing_bpf_link_attach(attr, prog); > + break; > default: > ret = -EINVAL; > } > -- > 2.24.1 >
On Mon, Apr 27, 2020 at 1:13 PM Yonghong Song <yhs@fb.com> wrote: > > Given a bpf program, the step to create an anonymous bpf iterator is: > - create a bpf_iter_link, which combines bpf program and the target. > In the future, there could be more information recorded in the link. > A link_fd will be returned to the user space. > - create an anonymous bpf iterator with the given link_fd. > > The anonymous bpf iterator (and its underlying bpf_link) will be > used to create file based bpf iterator as well. > > The benefit to use of bpf_iter_link: > - for file based bpf iterator, bpf_iter_link provides a standard > way to replace underlying bpf programs. > - for both anonymous and free based iterators, bpf link query > capability can be leveraged. > > The patch added support of tracing/iter programs for BPF_LINK_CREATE. > > Signed-off-by: Yonghong Song <yhs@fb.com> > --- > include/linux/bpf.h | 2 ++ > kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 15 ++++++++++++ > 3 files changed, 71 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 4ac8d61f7c3e..60ecb73d8f6d 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1034,6 +1034,7 @@ extern const struct file_operations bpf_prog_fops; > extern const struct bpf_prog_ops bpf_offload_prog_ops; > extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; > extern const struct bpf_verifier_ops xdp_analyzer_ops; > +extern const struct bpf_link_ops bpf_iter_link_lops; show_fdinfo implementation for bpf_link has changed, so thankfully this won't be necessary after you rebase on latest master :) > > struct bpf_prog *bpf_prog_get(u32 ufd); > struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, > @@ -1129,6 +1130,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); > struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, > u64 *session_id, u64 *seq_num, bool is_last); > int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); > +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > > int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); > int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); > diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c > index 284c95587803..9532e7bcb8e1 100644 > --- a/kernel/bpf/bpf_iter.c > +++ b/kernel/bpf/bpf_iter.c > @@ -14,6 +14,11 @@ struct bpf_iter_target_info { > u32 target_feature; > }; > > +struct bpf_iter_link { > + struct bpf_link link; > + struct bpf_iter_target_info *tinfo; > +}; > + > static struct list_head targets; > static struct mutex targets_mutex; > static bool bpf_iter_inited = false; > @@ -67,3 +72,52 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) > > return ret; > } > + > +static void bpf_iter_link_release(struct bpf_link *link) > +{ > +} > + > +static void bpf_iter_link_dealloc(struct bpf_link *link) > +{ Here you need to kfree() link struct. See bpf_raw_tp_link_dealloc() for example. > +} > + > +const struct bpf_link_ops bpf_iter_link_lops = { > + .release = bpf_iter_link_release, > + .dealloc = bpf_iter_link_dealloc, > +}; > + [...]
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4ac8d61f7c3e..60ecb73d8f6d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1034,6 +1034,7 @@ extern const struct file_operations bpf_prog_fops; extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; +extern const struct bpf_link_ops bpf_iter_link_lops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, @@ -1129,6 +1130,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, u64 *session_id, u64 *seq_num, bool is_last); int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 284c95587803..9532e7bcb8e1 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -14,6 +14,11 @@ struct bpf_iter_target_info { u32 target_feature; }; +struct bpf_iter_link { + struct bpf_link link; + struct bpf_iter_target_info *tinfo; +}; + static struct list_head targets; static struct mutex targets_mutex; static bool bpf_iter_inited = false; @@ -67,3 +72,52 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) return ret; } + +static void bpf_iter_link_release(struct bpf_link *link) +{ +} + +static void bpf_iter_link_dealloc(struct bpf_link *link) +{ +} + +const struct bpf_link_ops bpf_iter_link_lops = { + .release = bpf_iter_link_release, + .dealloc = bpf_iter_link_dealloc, +}; + +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_iter_target_info *tinfo; + struct bpf_iter_link *link; + const char *func_name; + bool existed = false; + int err; + + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + func_name = prog->aux->attach_func_name; + mutex_lock(&targets_mutex); + list_for_each_entry(tinfo, &targets, list) { + if (!strcmp(tinfo->target_func_name, func_name)) { + existed = true; + break; + } + } + mutex_unlock(&targets_mutex); + if (!existed) + return -ENOENT; + + link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); + if (!link) + return -ENOMEM; + + bpf_link_init(&link->link, &bpf_iter_link_lops, prog); + link->tinfo = tinfo; + + err = bpf_link_new_fd(&link->link); + if (err < 0) + kfree(link); + return err; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 022187640943..8741b5e11c85 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2269,6 +2269,8 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) else if (link->ops == &bpf_cgroup_link_lops) link_type = "cgroup"; #endif + else if (link->ops == &bpf_iter_link_lops) + link_type = "iter"; else link_type = "unknown"; @@ -2597,6 +2599,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; + case BPF_TRACE_ITER: + return BPF_PROG_TYPE_TRACING; default: return BPF_PROG_TYPE_UNSPEC; } @@ -3571,6 +3575,14 @@ static int bpf_map_do_batch(const union bpf_attr *attr, return err; } +static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + if (attr->link_create.attach_type == BPF_TRACE_ITER) + return bpf_iter_link_attach(attr, prog); + + return -EINVAL; +} + #define BPF_LINK_CREATE_LAST_FIELD link_create.flags static int link_create(union bpf_attr *attr) { @@ -3607,6 +3619,9 @@ static int link_create(union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: ret = cgroup_bpf_link_attach(attr, prog); break; + case BPF_PROG_TYPE_TRACING: + ret = tracing_bpf_link_attach(attr, prog); + break; default: ret = -EINVAL; }
Given a bpf program, the step to create an anonymous bpf iterator is: - create a bpf_iter_link, which combines bpf program and the target. In the future, there could be more information recorded in the link. A link_fd will be returned to the user space. - create an anonymous bpf iterator with the given link_fd. The anonymous bpf iterator (and its underlying bpf_link) will be used to create file based bpf iterator as well. The benefit to use of bpf_iter_link: - for file based bpf iterator, bpf_iter_link provides a standard way to replace underlying bpf programs. - for both anonymous and free based iterators, bpf link query capability can be leveraged. The patch added support of tracing/iter programs for BPF_LINK_CREATE. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 2 ++ kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 15 ++++++++++++ 3 files changed, 71 insertions(+)