diff mbox series

[bpf-next,v3,08/21] bpf: implement common macros/helpers for target iterators

Message ID 20200507053924.1543103-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 7, 2020, 5:39 a.m. UTC
Macro DEFINE_BPF_ITER_FUNC is implemented so target
can define an init function to capture the BTF type
which represents the target.

The bpf_iter_meta is a structure holding meta data, common
to all targets in the bpf program.

Additional marker functions are called before or after
bpf_seq_read() show()/next()/stop() callback functions
to help calculate precise seq_num and whether call bpf_prog
inside stop().

Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
are implemented so target can get needed information from
bpf_iter infrastructure and can run the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   | 11 ++++++
 kernel/bpf/bpf_iter.c | 86 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 92 insertions(+), 5 deletions(-)

Comments

Andrii Nakryiko May 8, 2020, 7:07 p.m. UTC | #1
On Wed, May 6, 2020 at 10:40 PM Yonghong Song <yhs@fb.com> wrote:
>
> Macro DEFINE_BPF_ITER_FUNC is implemented so target
> can define an init function to capture the BTF type
> which represents the target.
>
> The bpf_iter_meta is a structure holding meta data, common
> to all targets in the bpf program.
>
> Additional marker functions are called before or after
> bpf_seq_read() show()/next()/stop() callback functions
> to help calculate precise seq_num and whether call bpf_prog
> inside stop().
>
> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
> are implemented so target can get needed information from
> bpf_iter infrastructure and can run the program.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h   | 11 ++++++
>  kernel/bpf/bpf_iter.c | 86 ++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 92 insertions(+), 5 deletions(-)
>

Looks good. I was worried about re-using seq_num when element is
skipped, but this could already happen that same seq_num is associated
with different objects: overflow + retry returns different object
(because iteration is not a snapshot, so the element could be gone on
retry). Both cases will have to be handled in about the same fashion,
so it's fine.

Hm... Could this be a problem for start() implementation? E.g., if
object is still there, but iterator wants to skip it permanently.
Re-using seq_num will mean that start() will keep trying to fetch same
to-be-skipped element? Not sure, please think about it, but we can fix
it up later, if necessary.

Acked-by: Andrii Nakryiko <andriin@fb.com>

[...]

> @@ -112,11 +143,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>                         err = PTR_ERR(p);
>                         break;
>                 }
> +
> +               /* get a valid next object, increase seq_num */

typo: get -> got

> +               bpf_iter_inc_seq_num(seq);
> +
>                 if (seq->count >= size)
>                         break;
>
>                 err = seq->op->show(seq, p);
>                 if (err > 0) {
> +                       bpf_iter_dec_seq_num(seq);
>                         seq->count = offs;
>                 } else if (err < 0 || seq_has_overflowed(seq)) {
>                         seq->count = offs;

[...]
Yonghong Song May 9, 2020, 3:18 a.m. UTC | #2
On 5/8/20 12:07 PM, Andrii Nakryiko wrote:
> On Wed, May 6, 2020 at 10:40 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Macro DEFINE_BPF_ITER_FUNC is implemented so target
>> can define an init function to capture the BTF type
>> which represents the target.
>>
>> The bpf_iter_meta is a structure holding meta data, common
>> to all targets in the bpf program.
>>
>> Additional marker functions are called before or after
>> bpf_seq_read() show()/next()/stop() callback functions
>> to help calculate precise seq_num and whether call bpf_prog
>> inside stop().
>>
>> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
>> are implemented so target can get needed information from
>> bpf_iter infrastructure and can run the program.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h   | 11 ++++++
>>   kernel/bpf/bpf_iter.c | 86 ++++++++++++++++++++++++++++++++++++++++---
>>   2 files changed, 92 insertions(+), 5 deletions(-)
>>
> 
> Looks good. I was worried about re-using seq_num when element is
> skipped, but this could already happen that same seq_num is associated
> with different objects: overflow + retry returns different object
> (because iteration is not a snapshot, so the element could be gone on
> retry). Both cases will have to be handled in about the same fashion,
> so it's fine.

This is what I thought as well.

> 
> Hm... Could this be a problem for start() implementation? E.g., if
> object is still there, but iterator wants to skip it permanently.
> Re-using seq_num will mean that start() will keep trying to fetch same
> to-be-skipped element? Not sure, please think about it, but we can fix
> it up later, if necessary.

The seq_num is for bpf_program context. It does not affect how start()
behaves. The start() MAY retry the same element over and over again
if show() overflows or returns <0, but in which case, user space
should check the return error code to decide to retry or give up.

> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
> [...]
> 
>> @@ -112,11 +143,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>>                          err = PTR_ERR(p);
>>                          break;
>>                  }
>> +
>> +               /* get a valid next object, increase seq_num */
> 
> typo: get -> got

Ack.

> 
>> +               bpf_iter_inc_seq_num(seq);
>> +
>>                  if (seq->count >= size)
>>                          break;
>>
>>                  err = seq->op->show(seq, p);
>>                  if (err > 0) {
>> +                       bpf_iter_dec_seq_num(seq);
>>                          seq->count = offs;
>>                  } else if (err < 0 || seq_has_overflowed(seq)) {
>>                          seq->count = offs;
> 
> [...]
>
Andrii Nakryiko May 12, 2020, 3:16 a.m. UTC | #3
On Fri, May 8, 2020 at 8:18 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/8/20 12:07 PM, Andrii Nakryiko wrote:
> > On Wed, May 6, 2020 at 10:40 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> Macro DEFINE_BPF_ITER_FUNC is implemented so target
> >> can define an init function to capture the BTF type
> >> which represents the target.
> >>
> >> The bpf_iter_meta is a structure holding meta data, common
> >> to all targets in the bpf program.
> >>
> >> Additional marker functions are called before or after
> >> bpf_seq_read() show()/next()/stop() callback functions
> >> to help calculate precise seq_num and whether call bpf_prog
> >> inside stop().
> >>
> >> Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
> >> are implemented so target can get needed information from
> >> bpf_iter infrastructure and can run the program.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   include/linux/bpf.h   | 11 ++++++
> >>   kernel/bpf/bpf_iter.c | 86 ++++++++++++++++++++++++++++++++++++++++---
> >>   2 files changed, 92 insertions(+), 5 deletions(-)
> >>
> >
> > Looks good. I was worried about re-using seq_num when element is
> > skipped, but this could already happen that same seq_num is associated
> > with different objects: overflow + retry returns different object
> > (because iteration is not a snapshot, so the element could be gone on
> > retry). Both cases will have to be handled in about the same fashion,
> > so it's fine.
>
> This is what I thought as well.
>
> >
> > Hm... Could this be a problem for start() implementation? E.g., if
> > object is still there, but iterator wants to skip it permanently.
> > Re-using seq_num will mean that start() will keep trying to fetch same
> > to-be-skipped element? Not sure, please think about it, but we can fix
> > it up later, if necessary.
>
> The seq_num is for bpf_program context. It does not affect how start()
> behaves. The start() MAY retry the same element over and over again
> if show() overflows or returns <0, but in which case, user space
> should check the return error code to decide to retry or give up.

ah, right seq_num vs internal id, makes sense, never mind :)

>
> >
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
> >
> > [...]
> >
> >> @@ -112,11 +143,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> >>                          err = PTR_ERR(p);
> >>                          break;
> >>                  }
> >> +
> >> +               /* get a valid next object, increase seq_num */
> >
> > typo: get -> got
>
> Ack.
>
> >
> >> +               bpf_iter_inc_seq_num(seq);
> >> +
> >>                  if (seq->count >= size)
> >>                          break;
> >>
> >>                  err = seq->op->show(seq, p);
> >>                  if (err > 0) {
> >> +                       bpf_iter_dec_seq_num(seq);
> >>                          seq->count = offs;
> >>                  } else if (err < 0 || seq_has_overflowed(seq)) {
> >>                          seq->count = offs;
> >
> > [...]
> >
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b06653ab3476..ffe0b9b669bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1129,6 +1129,9 @@  int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
 #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define DEFINE_BPF_ITER_FUNC(target, args...)			\
+	extern int __bpf_iter__ ## target(args);		\
+	int __init __bpf_iter__ ## target(args) { return 0; }
 
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
@@ -1141,12 +1144,20 @@  struct bpf_iter_reg {
 	u32 seq_priv_size;
 };
 
+struct bpf_iter_meta {
+	__bpf_md_ptr(struct seq_file *, seq);
+	u64 session_id;
+	u64 seq_num;
+};
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b0b8726c08f8..bf4fb7d7267b 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -41,6 +41,33 @@  static atomic64_t session_id;
 
 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
 
+static void bpf_iter_inc_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num++;
+}
+
+static void bpf_iter_dec_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num--;
+}
+
+static void bpf_iter_done_stop(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->done_stop = true;
+}
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -87,6 +114,10 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 
 	err = seq->op->show(seq, p);
 	if (err > 0) {
+		/* object is skipped, decrease seq_num, so next
+		 * valid object can reuse the same seq_num.
+		 */
+		bpf_iter_dec_seq_num(seq);
 		seq->count = 0;
 	} else if (err < 0 || seq_has_overflowed(seq)) {
 		if (!err)
@@ -112,11 +143,16 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 			err = PTR_ERR(p);
 			break;
 		}
+
+		/* get a valid next object, increase seq_num */
+		bpf_iter_inc_seq_num(seq);
+
 		if (seq->count >= size)
 			break;
 
 		err = seq->op->show(seq, p);
 		if (err > 0) {
+			bpf_iter_dec_seq_num(seq);
 			seq->count = offs;
 		} else if (err < 0 || seq_has_overflowed(seq)) {
 			seq->count = offs;
@@ -133,11 +169,15 @@  static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 	offs = seq->count;
 	/* bpf program called if !p */
 	seq->op->stop(seq, p);
-	if (!p && seq_has_overflowed(seq)) {
-		seq->count = offs;
-		if (offs == 0) {
-			err = -E2BIG;
-			goto done;
+	if (!p) {
+		if (!seq_has_overflowed(seq)) {
+			bpf_iter_done_stop(seq);
+		} else {
+			seq->count = offs;
+			if (offs == 0) {
+				err = -E2BIG;
+				goto done;
+			}
 		}
 	}
 
@@ -448,3 +488,39 @@  int bpf_iter_new_fd(struct bpf_link *link)
 	put_unused_fd(fd);
 	return err;
 }
+
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
+{
+	struct bpf_iter_priv_data *iter_priv;
+	struct seq_file *seq;
+	void *seq_priv;
+
+	seq = meta->seq;
+	if (seq->file->f_op != &bpf_iter_fops)
+		return NULL;
+
+	seq_priv = seq->private;
+	iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
+				 target_private);
+
+	if (in_stop && iter_priv->done_stop)
+		return NULL;
+
+	meta->session_id = iter_priv->session_id;
+	meta->seq_num = iter_priv->seq_num;
+
+	return iter_priv->prog;
+}
+
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
+{
+	int ret;
+
+	rcu_read_lock();
+	migrate_disable();
+	ret = BPF_PROG_RUN(prog, ctx);
+	migrate_enable();
+	rcu_read_unlock();
+
+	return ret == 0 ? 0 : -EAGAIN;
+}