diff mbox series

[bpf-next,v2,08/20] bpf: implement common macros/helpers for target iterators

Message ID 20200504062555.2048028-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 4, 2020, 6:25 a.m. UTC
Macro DEFINE_BPF_ITER_FUNC is implemented so target
can define an init function to capture the BTF type
which represents the target.

The bpf_iter_meta is a structure holding meta data, common
to all targets in the bpf program.

Additional marker functions are called before/after
bpf_seq_read() show() and stop() callback functions
to help calculate precise seq_num and whether call bpf_prog
inside stop().

Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
are implemented so target can get needed information from
bpf_iter infrastructure and can run the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   | 11 +++++
 kernel/bpf/bpf_iter.c | 94 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 5 deletions(-)

Comments

Andrii Nakryiko May 5, 2020, 8:25 p.m. UTC | #1
On Sun, May 3, 2020 at 11:28 PM Yonghong Song <yhs@fb.com> wrote:
>
> Macro DEFINE_BPF_ITER_FUNC is implemented so target
> can define an init function to capture the BTF type
> which represents the target.
>
> The bpf_iter_meta is a structure holding meta data, common
> to all targets in the bpf program.
>
> Additional marker functions are called before/after
> bpf_seq_read() show() and stop() callback functions
> to help calculate precise seq_num and whether call bpf_prog
> inside stop().
>
> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
> are implemented so target can get needed information from
> bpf_iter infrastructure and can run the program.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h   | 11 +++++
>  kernel/bpf/bpf_iter.c | 94 ++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 100 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 26daf85cba10..70c71c3cd9e8 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>  int bpf_obj_get_user(const char __user *pathname, int flags);
>
>  #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
> +#define DEFINE_BPF_ITER_FUNC(target, args...)                  \
> +       extern int __bpf_iter__ ## target(args);                \
> +       int __init __bpf_iter__ ## target(args) { return 0; }

Why is extern declaration needed here? Doesn't the same macro define
global function itself? I'm probably missing some C semantics thingy,
sorry...

>
>  typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
>  typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
> @@ -1141,11 +1144,19 @@ struct bpf_iter_reg {
>         u32 seq_priv_size;
>  };
>
> +struct bpf_iter_meta {
> +       __bpf_md_ptr(struct seq_file *, seq);
> +       u64 session_id;
> +       u64 seq_num;
> +};
> +

[...]

>  /* bpf_seq_read, a customized and simpler version for bpf iterator.
>   * no_llseek is assumed for this file.
>   * The following are differences from seq_read():
> @@ -83,12 +119,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>         if (!p || IS_ERR(p))
>                 goto Stop;
>
> +       bpf_iter_inc_seq_num(seq);

so seq_num is one-based, not zero-based? So on first show() call it
will be set to 1, not 0, right?

>         err = seq->op->show(seq, p);
>         if (seq_has_overflowed(seq)) {
> +               bpf_iter_dec_seq_num(seq);
>                 err = -E2BIG;
>                 goto Error_show;
>         } else if (err) {
>                 /* < 0: go out, > 0: skip */
> +               bpf_iter_dec_seq_num(seq);
>                 if (likely(err < 0))
>                         goto Error_show;
>                 seq->count = 0;

[...]
Yonghong Song May 5, 2020, 8:30 p.m. UTC | #2
On 5/5/20 1:25 PM, Andrii Nakryiko wrote:
> On Sun, May 3, 2020 at 11:28 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Macro DEFINE_BPF_ITER_FUNC is implemented so target
>> can define an init function to capture the BTF type
>> which represents the target.
>>
>> The bpf_iter_meta is a structure holding meta data, common
>> to all targets in the bpf program.
>>
>> Additional marker functions are called before/after
>> bpf_seq_read() show() and stop() callback functions
>> to help calculate precise seq_num and whether call bpf_prog
>> inside stop().
>>
>> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
>> are implemented so target can get needed information from
>> bpf_iter infrastructure and can run the program.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h   | 11 +++++
>>   kernel/bpf/bpf_iter.c | 94 ++++++++++++++++++++++++++++++++++++++++---
>>   2 files changed, 100 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 26daf85cba10..70c71c3cd9e8 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>>   int bpf_obj_get_user(const char __user *pathname, int flags);
>>
>>   #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
>> +#define DEFINE_BPF_ITER_FUNC(target, args...)                  \
>> +       extern int __bpf_iter__ ## target(args);                \
>> +       int __init __bpf_iter__ ## target(args) { return 0; }
> 
> Why is extern declaration needed here? Doesn't the same macro define

Silence sparse warning. Apparently in kernel, any global function, they 
want a declaration?

> global function itself? I'm probably missing some C semantics thingy,
> sorry...
> 
>>
>>   typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
>>   typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
>> @@ -1141,11 +1144,19 @@ struct bpf_iter_reg {
>>          u32 seq_priv_size;
>>   };
>>
>> +struct bpf_iter_meta {
>> +       __bpf_md_ptr(struct seq_file *, seq);
>> +       u64 session_id;
>> +       u64 seq_num;
>> +};
>> +
> 
> [...]
> 
>>   /* bpf_seq_read, a customized and simpler version for bpf iterator.
>>    * no_llseek is assumed for this file.
>>    * The following are differences from seq_read():
>> @@ -83,12 +119,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>>          if (!p || IS_ERR(p))
>>                  goto Stop;
>>
>> +       bpf_iter_inc_seq_num(seq);
> 
> so seq_num is one-based, not zero-based? So on first show() call it
> will be set to 1, not 0, right?

It is 1 based, we need to document this clearly. I forgot to adjust my 
bpf program for this. Will adjust them properly in the next revision.
> 
>>          err = seq->op->show(seq, p);
>>          if (seq_has_overflowed(seq)) {
>> +               bpf_iter_dec_seq_num(seq);
>>                  err = -E2BIG;
>>                  goto Error_show;
>>          } else if (err) {
>>                  /* < 0: go out, > 0: skip */
>> +               bpf_iter_dec_seq_num(seq);
>>                  if (likely(err < 0))
>>                          goto Error_show;
>>                  seq->count = 0;
> 
> [...]
>
Andrii Nakryiko May 5, 2020, 9:10 p.m. UTC | #3
On Tue, May 5, 2020 at 1:30 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/5/20 1:25 PM, Andrii Nakryiko wrote:
> > On Sun, May 3, 2020 at 11:28 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> Macro DEFINE_BPF_ITER_FUNC is implemented so target
> >> can define an init function to capture the BTF type
> >> which represents the target.
> >>
> >> The bpf_iter_meta is a structure holding meta data, common
> >> to all targets in the bpf program.
> >>
> >> Additional marker functions are called before/after
> >> bpf_seq_read() show() and stop() callback functions
> >> to help calculate precise seq_num and whether call bpf_prog
> >> inside stop().
> >>
> >> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
> >> are implemented so target can get needed information from
> >> bpf_iter infrastructure and can run the program.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   include/linux/bpf.h   | 11 +++++
> >>   kernel/bpf/bpf_iter.c | 94 ++++++++++++++++++++++++++++++++++++++++---
> >>   2 files changed, 100 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index 26daf85cba10..70c71c3cd9e8 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
> >>   int bpf_obj_get_user(const char __user *pathname, int flags);
> >>
> >>   #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
> >> +#define DEFINE_BPF_ITER_FUNC(target, args...)                  \
> >> +       extern int __bpf_iter__ ## target(args);                \
> >> +       int __init __bpf_iter__ ## target(args) { return 0; }
> >
> > Why is extern declaration needed here? Doesn't the same macro define
>
> Silence sparse warning. Apparently in kernel, any global function, they
> want a declaration?

Ah.. alright :)

>
> > global function itself? I'm probably missing some C semantics thingy,
> > sorry...
> >
> >>
> >>   typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
> >>   typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
> >> @@ -1141,11 +1144,19 @@ struct bpf_iter_reg {
> >>          u32 seq_priv_size;
> >>   };
> >>
> >> +struct bpf_iter_meta {
> >> +       __bpf_md_ptr(struct seq_file *, seq);
> >> +       u64 session_id;
> >> +       u64 seq_num;
> >> +};
> >> +
> >
> > [...]
> >
> >>   /* bpf_seq_read, a customized and simpler version for bpf iterator.
> >>    * no_llseek is assumed for this file.
> >>    * The following are differences from seq_read():
> >> @@ -83,12 +119,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> >>          if (!p || IS_ERR(p))
> >>                  goto Stop;
> >>
> >> +       bpf_iter_inc_seq_num(seq);
> >
> > so seq_num is one-based, not zero-based? So on first show() call it
> > will be set to 1, not 0, right?
>
> It is 1 based, we need to document this clearly. I forgot to adjust my
> bpf program for this. Will adjust them properly in the next revision.

I see. IMO, seq_num starting at 0 is more natural, but whichever way
is fine with me.

> >
> >>          err = seq->op->show(seq, p);
> >>          if (seq_has_overflowed(seq)) {
> >> +               bpf_iter_dec_seq_num(seq);
> >>                  err = -E2BIG;
> >>                  goto Error_show;
> >>          } else if (err) {
> >>                  /* < 0: go out, > 0: skip */
> >> +               bpf_iter_dec_seq_num(seq);
> >>                  if (likely(err < 0))
> >>                          goto Error_show;
> >>                  seq->count = 0;
> >
> > [...]
> >
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 26daf85cba10..70c71c3cd9e8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1129,6 +1129,9 @@  int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
 #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define DEFINE_BPF_ITER_FUNC(target, args...)			\
+	extern int __bpf_iter__ ## target(args);		\
+	int __init __bpf_iter__ ## target(args) { return 0; }
 
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
@@ -1141,11 +1144,19 @@  struct bpf_iter_reg {
 	u32 seq_priv_size;
 };
 
+struct bpf_iter_meta {
+	__bpf_md_ptr(struct seq_file *, seq);
+	u64 session_id;
+	u64 seq_num;
+};
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 8bd787f3db6f..90d58c589816 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -43,6 +43,42 @@  static atomic64_t session_id;
 
 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
 
+static void bpf_iter_inc_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num++;
+}
+
+static void bpf_iter_dec_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num--;
+}
+
+static void bpf_iter_set_stop(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->do_stop++;
+}
+
+static void bpf_iter_unset_stop(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->do_stop--;
+}
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -83,12 +119,15 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 	if (!p || IS_ERR(p))
 		goto Stop;
 
+	bpf_iter_inc_seq_num(seq);
 	err = seq->op->show(seq, p);
 	if (seq_has_overflowed(seq)) {
+		bpf_iter_dec_seq_num(seq);
 		err = -E2BIG;
 		goto Error_show;
 	} else if (err) {
 		/* < 0: go out, > 0: skip */
+		bpf_iter_dec_seq_num(seq);
 		if (likely(err < 0))
 			goto Error_show;
 		seq->count = 0;
@@ -113,8 +152,10 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 		if (seq->count >= size)
 			break;
 
+		bpf_iter_inc_seq_num(seq);
 		err = seq->op->show(seq, p);
 		if (seq_has_overflowed(seq)) {
+			bpf_iter_dec_seq_num(seq);
 			if (offs == 0) {
 				err = -E2BIG;
 				goto Error_show;
@@ -122,6 +163,7 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 			seq->count = offs;
 			break;
 		} else if (err) {
+			bpf_iter_dec_seq_num(seq);
 			/* < 0: go out, > 0: skip */
 			seq->count = offs;
 			if (likely(err < 0)) {
@@ -134,11 +176,17 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 Stop:
 	offs = seq->count;
 	/* may call bpf program */
-	seq->op->stop(seq, p);
-	if (seq_has_overflowed(seq)) {
-		if (offs == 0)
-			goto Error_stop;
-		seq->count = offs;
+	if (!p) {
+		bpf_iter_set_stop(seq);
+		seq->op->stop(seq, p);
+		if (seq_has_overflowed(seq)) {
+			bpf_iter_unset_stop(seq);
+			if (offs == 0)
+				goto Error_stop;
+			seq->count = offs;
+		}
+	} else {
+		seq->op->stop(seq, p);
 	}
 
 	n = min(seq->count, size);
@@ -432,3 +480,39 @@  int bpf_iter_new_fd(struct bpf_link *link)
 	put_unused_fd(fd);
 	return err;
 }
+
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
+{
+	struct bpf_iter_priv_data *iter_priv;
+	struct seq_file *seq;
+	void *seq_priv;
+
+	seq = meta->seq;
+	if (seq->file->f_op != &bpf_iter_fops)
+		return NULL;
+
+	seq_priv = seq->private;
+	iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
+				 target_private);
+
+	if (in_stop && iter_priv->do_stop != 1)
+		return NULL;
+
+	meta->session_id = iter_priv->session_id;
+	meta->seq_num = iter_priv->seq_num;
+
+	return iter_priv->prog;
+}
+
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
+{
+	int ret;
+
+	rcu_read_lock();
+	migrate_disable();
+	ret = BPF_PROG_RUN(prog, ctx);
+	migrate_enable();
+	rcu_read_unlock();
+
+	return ret == 0 ? 0 : -EAGAIN;
+}