diff mbox series

[bpf-next,v3,05/21] bpf: implement bpf_seq_read() for bpf iterator

Message ID 20200507053920.1542763-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 7, 2020, 5:39 a.m. UTC
bpf iterator uses seq_file to provide a lossless
way to transfer data to user space. But we want to call
bpf program after all objects have been traversed, and
bpf program may write additional data to the
seq_file buffer. The current seq_read() does not work
for this use case.

Besides allowing stop() function to write to the buffer,
the bpf_seq_read() also fixed the buffer size to one page.
If any single call of show() or stop() will emit data
more than one page to cause overflow, -E2BIG error code
will be returned to user space.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/bpf/bpf_iter.c | 118 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

Comments

Andrii Nakryiko May 8, 2020, 6:52 p.m. UTC | #1
On Wed, May 6, 2020 at 10:39 PM Yonghong Song <yhs@fb.com> wrote:
>
> bpf iterator uses seq_file to provide a lossless
> way to transfer data to user space. But we want to call
> bpf program after all objects have been traversed, and
> bpf program may write additional data to the
> seq_file buffer. The current seq_read() does not work
> for this use case.
>
> Besides allowing stop() function to write to the buffer,
> the bpf_seq_read() also fixed the buffer size to one page.
> If any single call of show() or stop() will emit data
> more than one page to cause overflow, -E2BIG error code
> will be returned to user space.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

This loop is much simpler and more streamlined now, thanks a lot! I
think it's correct, see below about one confusing (but apparently
correct) bit, though. Either way:

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  kernel/bpf/bpf_iter.c | 118 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 118 insertions(+)
>
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 0542a243b78c..f198597b0ea4 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -26,6 +26,124 @@ static DEFINE_MUTEX(targets_mutex);
>  /* protect bpf_iter_link changes */
>  static DEFINE_MUTEX(link_mutex);
>
> +/* bpf_seq_read, a customized and simpler version for bpf iterator.
> + * no_llseek is assumed for this file.
> + * The following are differences from seq_read():
> + *  . fixed buffer size (PAGE_SIZE)
> + *  . assuming no_llseek
> + *  . stop() may call bpf program, handling potential overflow there
> + */
> +static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> +                           loff_t *ppos)
> +{
> +       struct seq_file *seq = file->private_data;
> +       size_t n, offs, copied = 0;
> +       int err = 0;
> +       void *p;
> +
> +       mutex_lock(&seq->lock);
> +
> +       if (!seq->buf) {
> +               seq->size = PAGE_SIZE;
> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
> +               if (!seq->buf) {
> +                       err = -ENOMEM;
> +                       goto done;

oh, thank you for converting to all lower-case label names! :)

> +               }
> +       }
> +
> +       if (seq->count) {
> +               n = min(seq->count, size);
> +               err = copy_to_user(buf, seq->buf + seq->from, n);
> +               if (err) {
> +                       err = -EFAULT;
> +                       goto done;
> +               }
> +               seq->count -= n;
> +               seq->from += n;
> +               copied = n;
> +               goto done;
> +       }
> +
> +       seq->from = 0;
> +       p = seq->op->start(seq, &seq->index);
> +       if (IS_ERR_OR_NULL(p))
> +               goto stop;

if start() returns IS_ERR(p), stop(p) below won't produce any output
(because BPF program is called only for p == NULL), so we'll just
return 0 with no error, do I interpret the code correctly? I think
seq_file's read actually returns PTR_ERR(p) as a result in this case.

so I think you need err = PTR_ERR(p); before goto stop here?

> +
> +       err = seq->op->show(seq, p);
> +       if (err > 0) {
> +               seq->count = 0;
> +       } else if (err < 0 || seq_has_overflowed(seq)) {
> +               if (!err)
> +                       err = -E2BIG;
> +               seq->count = 0;
> +               seq->op->stop(seq, p);
> +               goto done;
> +       }
> +
> +       while (1) {
> +               loff_t pos = seq->index;
> +
> +               offs = seq->count;
> +               p = seq->op->next(seq, p, &seq->index);
> +               if (pos == seq->index) {
> +                       pr_info_ratelimited("buggy seq_file .next function %ps "
> +                               "did not updated position index\n",
> +                               seq->op->next);
> +                       seq->index++;
> +               }
> +
> +               if (IS_ERR_OR_NULL(p)) {
> +                       err = PTR_ERR(p);
> +                       break;
> +               }
> +               if (seq->count >= size)
> +                       break;
> +
> +               err = seq->op->show(seq, p);
> +               if (err > 0) {
> +                       seq->count = offs;
> +               } else if (err < 0 || seq_has_overflowed(seq)) {
> +                       seq->count = offs;
> +                       if (!err)
> +                               err = -E2BIG;

nit: this -E2BIG is set unconditionally even for 2nd+ show(). This
will work, because it will get ignored on next iteration, but I think
it will be much more obvious if written as:

if (!err && offs = 0)
    err = -E2BIG;

It took me few re-readings of the code I'm pretty familiar with
already to realize that this is ok.

I had to write the below piece to realize that this is fine :) Just
leaving here just in case you find it useful:

else if (err < 0 || seq_has_overflowed(seq)) {
    if (!err && offs == 0) /* overflow in first show() output */
        err = -E2BIG;
    if (err) {             /* overflow in first show() or real error happened */
        seq->count = 0; /* not strictly necessary, but shows that we
are truncating output */
        seq->op->stop(seq, p);
        goto done; /* done will return err */
    }
    /* no error and overflow for 2nd+ show(), roll back output and stop */
    seq->count = offs;
    break;
}

> +                       if (offs == 0) {
> +                               seq->op->stop(seq, p);
> +                               goto done;
> +                       }
> +                       break;
> +               }
> +       }
> +stop:
> +       offs = seq->count;
> +       /* bpf program called if !p */
> +       seq->op->stop(seq, p);
> +       if (!p && seq_has_overflowed(seq)) {
> +               seq->count = offs;
> +               if (offs == 0) {
> +                       err = -E2BIG;
> +                       goto done;
> +               }
> +       }
> +
> +       n = min(seq->count, size);
> +       err = copy_to_user(buf, seq->buf, n);
> +       if (err) {
> +               err = -EFAULT;
> +               goto done;
> +       }
> +       copied = n;
> +       seq->count -= n;
> +       seq->from = n;
> +done:
> +       if (!copied)
> +               copied = err;
> +       else
> +               *ppos += copied;
> +       mutex_unlock(&seq->lock);
> +       return copied;
> +}
> +
>  int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>  {
>         struct bpf_iter_target_info *tinfo;
> --
> 2.24.1
>
Yonghong Song May 9, 2020, 1:41 a.m. UTC | #2
On 5/8/20 11:52 AM, Andrii Nakryiko wrote:
> On Wed, May 6, 2020 at 10:39 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> bpf iterator uses seq_file to provide a lossless
>> way to transfer data to user space. But we want to call
>> bpf program after all objects have been traversed, and
>> bpf program may write additional data to the
>> seq_file buffer. The current seq_read() does not work
>> for this use case.
>>
>> Besides allowing stop() function to write to the buffer,
>> the bpf_seq_read() also fixed the buffer size to one page.
>> If any single call of show() or stop() will emit data
>> more than one page to cause overflow, -E2BIG error code
>> will be returned to user space.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
> 
> This loop is much simpler and more streamlined now, thanks a lot! I
> think it's correct, see below about one confusing (but apparently
> correct) bit, though. Either way:
> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
>>   kernel/bpf/bpf_iter.c | 118 ++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 118 insertions(+)
>>
>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>> index 0542a243b78c..f198597b0ea4 100644
>> --- a/kernel/bpf/bpf_iter.c
>> +++ b/kernel/bpf/bpf_iter.c
>> @@ -26,6 +26,124 @@ static DEFINE_MUTEX(targets_mutex);
>>   /* protect bpf_iter_link changes */
>>   static DEFINE_MUTEX(link_mutex);
>>
>> +/* bpf_seq_read, a customized and simpler version for bpf iterator.
>> + * no_llseek is assumed for this file.
>> + * The following are differences from seq_read():
>> + *  . fixed buffer size (PAGE_SIZE)
>> + *  . assuming no_llseek
>> + *  . stop() may call bpf program, handling potential overflow there
>> + */
>> +static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>> +                           loff_t *ppos)
>> +{
>> +       struct seq_file *seq = file->private_data;
>> +       size_t n, offs, copied = 0;
>> +       int err = 0;
>> +       void *p;
>> +
>> +       mutex_lock(&seq->lock);
>> +
>> +       if (!seq->buf) {
>> +               seq->size = PAGE_SIZE;
>> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
>> +               if (!seq->buf) {
>> +                       err = -ENOMEM;
>> +                       goto done;
> 
> oh, thank you for converting to all lower-case label names! :)
> 
>> +               }
>> +       }
>> +
>> +       if (seq->count) {
>> +               n = min(seq->count, size);
>> +               err = copy_to_user(buf, seq->buf + seq->from, n);
>> +               if (err) {
>> +                       err = -EFAULT;
>> +                       goto done;
>> +               }
>> +               seq->count -= n;
>> +               seq->from += n;
>> +               copied = n;
>> +               goto done;
>> +       }
>> +
>> +       seq->from = 0;
>> +       p = seq->op->start(seq, &seq->index);
>> +       if (IS_ERR_OR_NULL(p))
>> +               goto stop;
> 
> if start() returns IS_ERR(p), stop(p) below won't produce any output
> (because BPF program is called only for p == NULL), so we'll just
> return 0 with no error, do I interpret the code correctly? I think
> seq_file's read actually returns PTR_ERR(p) as a result in this case.
> 
> so I think you need err = PTR_ERR(p); before goto stop here?

Thanks for catching this!
Yes, seq_file() indeed returns PTR_ERR(p) to user space here.
Will make the change.

> 
>> +
>> +       err = seq->op->show(seq, p);
>> +       if (err > 0) {
>> +               seq->count = 0;
>> +       } else if (err < 0 || seq_has_overflowed(seq)) {
>> +               if (!err)
>> +                       err = -E2BIG;
>> +               seq->count = 0;
>> +               seq->op->stop(seq, p);
>> +               goto done;
>> +       }
>> +
>> +       while (1) {
>> +               loff_t pos = seq->index;
>> +
>> +               offs = seq->count;
>> +               p = seq->op->next(seq, p, &seq->index);
>> +               if (pos == seq->index) {
>> +                       pr_info_ratelimited("buggy seq_file .next function %ps "
>> +                               "did not updated position index\n",
>> +                               seq->op->next);
>> +                       seq->index++;
>> +               }
>> +
>> +               if (IS_ERR_OR_NULL(p)) {
>> +                       err = PTR_ERR(p);
>> +                       break;
>> +               }
>> +               if (seq->count >= size)
>> +                       break;
>> +
>> +               err = seq->op->show(seq, p);
>> +               if (err > 0) {
>> +                       seq->count = offs;
>> +               } else if (err < 0 || seq_has_overflowed(seq)) {
>> +                       seq->count = offs;
>> +                       if (!err)
>> +                               err = -E2BIG;
> 
> nit: this -E2BIG is set unconditionally even for 2nd+ show(). This
> will work, because it will get ignored on next iteration, but I think
> it will be much more obvious if written as:
> 
> if (!err && offs = 0)
>      err = -E2BIG;

Yes, will make the change since it indeed makes code more readable.

> 
> It took me few re-readings of the code I'm pretty familiar with
> already to realize that this is ok.
> 
> I had to write the below piece to realize that this is fine :) Just
> leaving here just in case you find it useful:
> 
> else if (err < 0 || seq_has_overflowed(seq)) {
>      if (!err && offs == 0) /* overflow in first show() output */
>          err = -E2BIG;
>      if (err) {             /* overflow in first show() or real error happened */
>          seq->count = 0; /* not strictly necessary, but shows that we
> are truncating output */
>          seq->op->stop(seq, p);
>          goto done; /* done will return err */
>      }
>      /* no error and overflow for 2nd+ show(), roll back output and stop */
>      seq->count = offs;
>      break;
> }
> 
>> +                       if (offs == 0) {
>> +                               seq->op->stop(seq, p);
>> +                               goto done;
>> +                       }
>> +                       break;
>> +               }
>> +       }
>> +stop:
>> +       offs = seq->count;
>> +       /* bpf program called if !p */
>> +       seq->op->stop(seq, p);
>> +       if (!p && seq_has_overflowed(seq)) {
>> +               seq->count = offs;
>> +               if (offs == 0) {
>> +                       err = -E2BIG;
>> +                       goto done;
>> +               }
>> +       }
>> +
>> +       n = min(seq->count, size);
>> +       err = copy_to_user(buf, seq->buf, n);
>> +       if (err) {
>> +               err = -EFAULT;
>> +               goto done;
>> +       }
>> +       copied = n;
>> +       seq->count -= n;
>> +       seq->from = n;
>> +done:
>> +       if (!copied)
>> +               copied = err;
>> +       else
>> +               *ppos += copied;
>> +       mutex_unlock(&seq->lock);
>> +       return copied;
>> +}
>> +
>>   int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>>   {
>>          struct bpf_iter_target_info *tinfo;
>> --
>> 2.24.1
>>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0542a243b78c..f198597b0ea4 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -26,6 +26,124 @@  static DEFINE_MUTEX(targets_mutex);
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* bpf_seq_read, a customized and simpler version for bpf iterator.
+ * no_llseek is assumed for this file.
+ * The following are differences from seq_read():
+ *  . fixed buffer size (PAGE_SIZE)
+ *  . assuming no_llseek
+ *  . stop() may call bpf program, handling potential overflow there
+ */
+static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
+			    loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	size_t n, offs, copied = 0;
+	int err = 0;
+	void *p;
+
+	mutex_lock(&seq->lock);
+
+	if (!seq->buf) {
+		seq->size = PAGE_SIZE;
+		seq->buf = kmalloc(seq->size, GFP_KERNEL);
+		if (!seq->buf) {
+			err = -ENOMEM;
+			goto done;
+		}
+	}
+
+	if (seq->count) {
+		n = min(seq->count, size);
+		err = copy_to_user(buf, seq->buf + seq->from, n);
+		if (err) {
+			err = -EFAULT;
+			goto done;
+		}
+		seq->count -= n;
+		seq->from += n;
+		copied = n;
+		goto done;
+	}
+
+	seq->from = 0;
+	p = seq->op->start(seq, &seq->index);
+	if (IS_ERR_OR_NULL(p))
+		goto stop;
+
+	err = seq->op->show(seq, p);
+	if (err > 0) {
+		seq->count = 0;
+	} else if (err < 0 || seq_has_overflowed(seq)) {
+		if (!err)
+			err = -E2BIG;
+		seq->count = 0;
+		seq->op->stop(seq, p);
+		goto done;
+	}
+
+	while (1) {
+		loff_t pos = seq->index;
+
+		offs = seq->count;
+		p = seq->op->next(seq, p, &seq->index);
+		if (pos == seq->index) {
+			pr_info_ratelimited("buggy seq_file .next function %ps "
+				"did not updated position index\n",
+				seq->op->next);
+			seq->index++;
+		}
+
+		if (IS_ERR_OR_NULL(p)) {
+			err = PTR_ERR(p);
+			break;
+		}
+		if (seq->count >= size)
+			break;
+
+		err = seq->op->show(seq, p);
+		if (err > 0) {
+			seq->count = offs;
+		} else if (err < 0 || seq_has_overflowed(seq)) {
+			seq->count = offs;
+			if (!err)
+				err = -E2BIG;
+			if (offs == 0) {
+				seq->op->stop(seq, p);
+				goto done;
+			}
+			break;
+		}
+	}
+stop:
+	offs = seq->count;
+	/* bpf program called if !p */
+	seq->op->stop(seq, p);
+	if (!p && seq_has_overflowed(seq)) {
+		seq->count = offs;
+		if (offs == 0) {
+			err = -E2BIG;
+			goto done;
+		}
+	}
+
+	n = min(seq->count, size);
+	err = copy_to_user(buf, seq->buf, n);
+	if (err) {
+		err = -EFAULT;
+		goto done;
+	}
+	copied = n;
+	seq->count -= n;
+	seq->from = n;
+done:
+	if (!copied)
+		copied = err;
+	else
+		*ppos += copied;
+	mutex_unlock(&seq->lock);
+	return copied;
+}
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;