Message ID | 20200427201241.2995075-1-yhs@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: implement bpf iterator for kernel data | expand |
On Mon, Apr 27, 2020 at 01:12:41PM -0700, Yonghong Song wrote: > Added BPF_LINK_UPDATE support for tracing/iter programs. > This way, a file based bpf iterator, which holds a reference > to the link, can have its bpf program updated without > creating new files. > > Signed-off-by: Yonghong Song <yhs@fb.com> > --- > include/linux/bpf.h | 2 ++ > kernel/bpf/bpf_iter.c | 29 +++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 5 +++++ > 3 files changed, 36 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 60ecb73d8f6d..4fc39d9b5cd0 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1131,6 +1131,8 @@ struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, > u64 *session_id, u64 *seq_num, bool is_last); > int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); > int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, > + struct bpf_prog *new_prog); > > int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); > int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); > diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c > index 9532e7bcb8e1..fc1ce5ee5c3f 100644 > --- a/kernel/bpf/bpf_iter.c > +++ b/kernel/bpf/bpf_iter.c > @@ -23,6 +23,9 @@ static struct list_head targets; > static struct mutex targets_mutex; > static bool bpf_iter_inited = false; > > +/* protect bpf_iter_link.link->prog upddate */ > +static struct mutex bpf_iter_mutex; > + > int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) > { > struct bpf_iter_target_info *tinfo; > @@ -33,6 +36,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) > if (!bpf_iter_inited) { > INIT_LIST_HEAD(&targets); > mutex_init(&targets_mutex); > + mutex_init(&bpf_iter_mutex); > bpf_iter_inited = true; > } > > @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > kfree(link); > return err; > } > + > +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, > + struct bpf_prog *new_prog) > +{ > + int ret = 0; > + > + mutex_lock(&bpf_iter_mutex); > + if (old_prog && link->prog != old_prog) { > + ret = -EPERM; > + goto out_unlock; > + } > + > + if (link->prog->type != new_prog->type || > + link->prog->expected_attach_type != new_prog->expected_attach_type || > + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { Can attach_btf_id be compared instead of strcmp()? > + ret = -EINVAL; > + goto out_unlock; > + } > + > + link->prog = new_prog; Does the old link->prog need a bpf_prog_put()? > + > +out_unlock: > + mutex_unlock(&bpf_iter_mutex); > + return ret; > +}
On 4/28/20 6:32 PM, Martin KaFai Lau wrote: > On Mon, Apr 27, 2020 at 01:12:41PM -0700, Yonghong Song wrote: >> Added BPF_LINK_UPDATE support for tracing/iter programs. >> This way, a file based bpf iterator, which holds a reference >> to the link, can have its bpf program updated without >> creating new files. >> >> Signed-off-by: Yonghong Song <yhs@fb.com> >> --- >> include/linux/bpf.h | 2 ++ >> kernel/bpf/bpf_iter.c | 29 +++++++++++++++++++++++++++++ >> kernel/bpf/syscall.c | 5 +++++ >> 3 files changed, 36 insertions(+) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index 60ecb73d8f6d..4fc39d9b5cd0 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1131,6 +1131,8 @@ struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, >> u64 *session_id, u64 *seq_num, bool is_last); >> int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); >> int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); >> +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, >> + struct bpf_prog *new_prog); >> >> int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); >> int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); >> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c >> index 9532e7bcb8e1..fc1ce5ee5c3f 100644 >> --- a/kernel/bpf/bpf_iter.c >> +++ b/kernel/bpf/bpf_iter.c >> @@ -23,6 +23,9 @@ static struct list_head targets; >> static struct mutex targets_mutex; >> static bool bpf_iter_inited = false; >> >> +/* protect bpf_iter_link.link->prog upddate */ >> +static struct mutex bpf_iter_mutex; >> + >> int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) >> { >> struct bpf_iter_target_info *tinfo; >> @@ -33,6 +36,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) >> if (!bpf_iter_inited) { >> INIT_LIST_HEAD(&targets); >> mutex_init(&targets_mutex); >> + mutex_init(&bpf_iter_mutex); >> bpf_iter_inited = true; >> } >> >> @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) >> kfree(link); >> return err; >> } >> + >> +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, >> + struct bpf_prog *new_prog) >> +{ >> + int ret = 0; >> + >> + mutex_lock(&bpf_iter_mutex); >> + if (old_prog && link->prog != old_prog) { >> + ret = -EPERM; >> + goto out_unlock; >> + } >> + >> + if (link->prog->type != new_prog->type || >> + link->prog->expected_attach_type != new_prog->expected_attach_type || >> + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { > Can attach_btf_id be compared instead of strcmp()? Yes, we can do it. > >> + ret = -EINVAL; >> + goto out_unlock; >> + } >> + >> + link->prog = new_prog; > Does the old link->prog need a bpf_prog_put()? The old_prog is replaced in caller link_update (syscall.c): static int link_update(union bpf_attr *attr) { struct bpf_prog *old_prog = NULL, *new_prog; struct bpf_link *link; u32 flags; int ret; ... if (link->ops == &bpf_iter_link_lops) { ret = bpf_iter_link_replace(link, old_prog, new_prog); goto out_put_progs; } ret = -EINVAL; out_put_progs: if (old_prog) bpf_prog_put(old_prog); ... > >> + >> +out_unlock: >> + mutex_unlock(&bpf_iter_mutex); >> + return ret; >> +}
On Tue, Apr 28, 2020 at 10:04:54PM -0700, Yonghong Song wrote: > > > On 4/28/20 6:32 PM, Martin KaFai Lau wrote: > > On Mon, Apr 27, 2020 at 01:12:41PM -0700, Yonghong Song wrote: > > > Added BPF_LINK_UPDATE support for tracing/iter programs. > > > This way, a file based bpf iterator, which holds a reference > > > to the link, can have its bpf program updated without > > > creating new files. > > > [ ... ] > > > --- a/kernel/bpf/bpf_iter.c > > > +++ b/kernel/bpf/bpf_iter.c [ ... ] > > > @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > > > kfree(link); > > > return err; > > > } > > > + > > > +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, > > > + struct bpf_prog *new_prog) > > > +{ > > > + int ret = 0; > > > + > > > + mutex_lock(&bpf_iter_mutex); > > > + if (old_prog && link->prog != old_prog) { hmm.... If I read this function correctly, old_prog could be NULL here and it is only needed during BPF_F_REPLACE to ensure it is replacing a particular old_prog, no? > > > + ret = -EPERM; > > > + goto out_unlock; > > > + } > > > + > > > + if (link->prog->type != new_prog->type || > > > + link->prog->expected_attach_type != new_prog->expected_attach_type || > > > + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { > > Can attach_btf_id be compared instead of strcmp()? > > Yes, we can do it. > > > > > > + ret = -EINVAL; > > > + goto out_unlock; > > > + } > > > + > > > + link->prog = new_prog; > > Does the old link->prog need a bpf_prog_put()? > > The old_prog is replaced in caller link_update (syscall.c): > static int link_update(union bpf_attr *attr) > { > struct bpf_prog *old_prog = NULL, *new_prog; > struct bpf_link *link; > u32 flags; > int ret; > ... > if (link->ops == &bpf_iter_link_lops) { > ret = bpf_iter_link_replace(link, old_prog, new_prog); > goto out_put_progs; > } > ret = -EINVAL; > > out_put_progs: > if (old_prog) > bpf_prog_put(old_prog); The old_prog in link_update() took a separate refcnt from bpf_prog_get(). I don't see how it is related to the existing refcnt held in the link->prog. or I am missing something in BPF_F_REPLACE?
On Tue, Apr 28, 2020 at 10:59 PM Martin KaFai Lau <kafai@fb.com> wrote: > > On Tue, Apr 28, 2020 at 10:04:54PM -0700, Yonghong Song wrote: > > > > > > On 4/28/20 6:32 PM, Martin KaFai Lau wrote: > > > On Mon, Apr 27, 2020 at 01:12:41PM -0700, Yonghong Song wrote: > > > > Added BPF_LINK_UPDATE support for tracing/iter programs. > > > > This way, a file based bpf iterator, which holds a reference > > > > to the link, can have its bpf program updated without > > > > creating new files. > > > > > > [ ... ] > > > > > --- a/kernel/bpf/bpf_iter.c > > > > +++ b/kernel/bpf/bpf_iter.c > > [ ... ] > > > > > @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > > > > kfree(link); > > > > return err; > > > > } > > > > + > > > > +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, > > > > + struct bpf_prog *new_prog) > > > > +{ > > > > + int ret = 0; > > > > + > > > > + mutex_lock(&bpf_iter_mutex); > > > > + if (old_prog && link->prog != old_prog) { > hmm.... > > If I read this function correctly, > old_prog could be NULL here and it is only needed during BPF_F_REPLACE > to ensure it is replacing a particular old_prog, no? Yes, do you see any problem with the above logic? > > > > > > + ret = -EPERM; > > > > + goto out_unlock; > > > > + } > > > > + > > > > + if (link->prog->type != new_prog->type || > > > > + link->prog->expected_attach_type != new_prog->expected_attach_type || > > > > + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { > > > Can attach_btf_id be compared instead of strcmp()? > > > > Yes, we can do it. > > > > > > > > > + ret = -EINVAL; > > > > + goto out_unlock; > > > > + } > > > > + > > > > + link->prog = new_prog; > > > Does the old link->prog need a bpf_prog_put()? > > > > The old_prog is replaced in caller link_update (syscall.c): > > > static int link_update(union bpf_attr *attr) > > { > > struct bpf_prog *old_prog = NULL, *new_prog; > > struct bpf_link *link; > > u32 flags; > > int ret; > > ... > > if (link->ops == &bpf_iter_link_lops) { > > ret = bpf_iter_link_replace(link, old_prog, new_prog); > > goto out_put_progs; > > } > > ret = -EINVAL; > > > > out_put_progs: > > if (old_prog) > > bpf_prog_put(old_prog); > The old_prog in link_update() took a separate refcnt from bpf_prog_get(). > I don't see how it is related to the existing refcnt held in the link->prog. > > or I am missing something in BPF_F_REPLACE? Martin is right, bpf_iter_link_replace() needs to drop its own refcnt on old_prog, in addition to what generic link_update logic does here, because bpf_link_iter bumped old_prog's refcnt when it was created or updated last time.
On Tue, Apr 28, 2020 at 11:32:15PM -0700, Andrii Nakryiko wrote: > On Tue, Apr 28, 2020 at 10:59 PM Martin KaFai Lau <kafai@fb.com> wrote: > > > > On Tue, Apr 28, 2020 at 10:04:54PM -0700, Yonghong Song wrote: > > > > > > > > > On 4/28/20 6:32 PM, Martin KaFai Lau wrote: > > > > On Mon, Apr 27, 2020 at 01:12:41PM -0700, Yonghong Song wrote: > > > > > Added BPF_LINK_UPDATE support for tracing/iter programs. > > > > > This way, a file based bpf iterator, which holds a reference > > > > > to the link, can have its bpf program updated without > > > > > creating new files. > > > > > > > > > [ ... ] > > > > > > > --- a/kernel/bpf/bpf_iter.c > > > > > +++ b/kernel/bpf/bpf_iter.c > > > > [ ... ] > > > > > > > @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > > > > > kfree(link); > > > > > return err; > > > > > } > > > > > + > > > > > +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, > > > > > + struct bpf_prog *new_prog) > > > > > +{ > > > > > + int ret = 0; > > > > > + > > > > > + mutex_lock(&bpf_iter_mutex); > > > > > + if (old_prog && link->prog != old_prog) { > > hmm.... > > > > If I read this function correctly, > > old_prog could be NULL here and it is only needed during BPF_F_REPLACE > > to ensure it is replacing a particular old_prog, no? > > Yes, do you see any problem with the above logic? Not at all. I just want to point out that when old_prog is NULL, the link_update() would not even call bpf_prog_put(old_prog). > > > > > > > > > > + ret = -EPERM; > > > > > + goto out_unlock; > > > > > + } > > > > > + > > > > > + if (link->prog->type != new_prog->type || > > > > > + link->prog->expected_attach_type != new_prog->expected_attach_type || > > > > > + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { > > > > Can attach_btf_id be compared instead of strcmp()? > > > > > > Yes, we can do it. > > > > > > > > > > > > + ret = -EINVAL; > > > > > + goto out_unlock; > > > > > + } > > > > > + > > > > > + link->prog = new_prog; > > > > Does the old link->prog need a bpf_prog_put()? > > > > > > The old_prog is replaced in caller link_update (syscall.c): > > > > > static int link_update(union bpf_attr *attr) > > > { > > > struct bpf_prog *old_prog = NULL, *new_prog; > > > struct bpf_link *link; > > > u32 flags; > > > int ret; > > > ... > > > if (link->ops == &bpf_iter_link_lops) { > > > ret = bpf_iter_link_replace(link, old_prog, new_prog); > > > goto out_put_progs; > > > } > > > ret = -EINVAL; > > > > > > out_put_progs: > > > if (old_prog) > > > bpf_prog_put(old_prog); > > The old_prog in link_update() took a separate refcnt from bpf_prog_get(). > > I don't see how it is related to the existing refcnt held in the link->prog. > > > > or I am missing something in BPF_F_REPLACE? > > Martin is right, bpf_iter_link_replace() needs to drop its own refcnt > on old_prog, in addition to what generic link_update logic does here, > because bpf_link_iter bumped old_prog's refcnt when it was created or > updated last time.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 60ecb73d8f6d..4fc39d9b5cd0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1131,6 +1131,8 @@ struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, u64 *session_id, u64 *seq_num, bool is_last); int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 9532e7bcb8e1..fc1ce5ee5c3f 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -23,6 +23,9 @@ static struct list_head targets; static struct mutex targets_mutex; static bool bpf_iter_inited = false; +/* protect bpf_iter_link.link->prog upddate */ +static struct mutex bpf_iter_mutex; + int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; @@ -33,6 +36,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) if (!bpf_iter_inited) { INIT_LIST_HEAD(&targets); mutex_init(&targets_mutex); + mutex_init(&bpf_iter_mutex); bpf_iter_inited = true; } @@ -121,3 +125,28 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) kfree(link); return err; } + +int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog) +{ + int ret = 0; + + mutex_lock(&bpf_iter_mutex); + if (old_prog && link->prog != old_prog) { + ret = -EPERM; + goto out_unlock; + } + + if (link->prog->type != new_prog->type || + link->prog->expected_attach_type != new_prog->expected_attach_type || + strcmp(link->prog->aux->attach_func_name, new_prog->aux->attach_func_name)) { + ret = -EINVAL; + goto out_unlock; + } + + link->prog = new_prog; + +out_unlock: + mutex_unlock(&bpf_iter_mutex); + return ret; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8741b5e11c85..b7af4f006f2e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3679,6 +3679,11 @@ static int link_update(union bpf_attr *attr) goto out_put_progs; } #endif + + if (link->ops == &bpf_iter_link_lops) { + ret = bpf_iter_link_replace(link, old_prog, new_prog); + goto out_put_progs; + } ret = -EINVAL; out_put_progs:
Added BPF_LINK_UPDATE support for tracing/iter programs. This way, a file based bpf iterator, which holds a reference to the link, can have its bpf program updated without creating new files. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 2 ++ kernel/bpf/bpf_iter.c | 29 +++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 5 +++++ 3 files changed, 36 insertions(+)