diff mbox series

[bpf-next,v4,05/21] bpf: implement bpf_seq_read() for bpf iterator

Message ID 20200509175904.2475468-1-yhs@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 9, 2020, 5:59 p.m. UTC
bpf iterator uses seq_file to provide a lossless
way to transfer data to user space. But we want to call
bpf program after all objects have been traversed, and
bpf program may write additional data to the
seq_file buffer. The current seq_read() does not work
for this use case.

Besides allowing stop() function to write to the buffer,
the bpf_seq_read() also fixed the buffer size to one page.
If any single call of show() or stop() will emit data
more than one page to cause overflow, -E2BIG error code
will be returned to user space.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/bpf/bpf_iter.c | 123 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

Comments

Alexei Starovoitov May 10, 2020, 12:30 a.m. UTC | #1
On Sat, May 09, 2020 at 10:59:04AM -0700, Yonghong Song wrote:
> +
> +		err = seq->op->show(seq, p);
> +		if (err > 0) {
> +			seq->count = offs;

as far as I can see this condition can never happen.
I understand that seq_read() has this logic, but four iterators
implemented don't exercise this path.
I guess it's ok to keep it, but may be add warn_once so we notice
when things change?
Yonghong Song May 10, 2020, 4:51 a.m. UTC | #2
On 5/9/20 5:30 PM, Alexei Starovoitov wrote:
> On Sat, May 09, 2020 at 10:59:04AM -0700, Yonghong Song wrote:
>> +
>> +		err = seq->op->show(seq, p);
>> +		if (err > 0) {
>> +			seq->count = offs;
> 
> as far as I can see this condition can never happen.
> I understand that seq_read() has this logic, but four iterators
> implemented don't exercise this path.
> I guess it's ok to keep it, but may be add warn_once so we notice
> when things change?

Yes, it won't happen with our current bpf return values.
I keep it to be compatible with seq_read() and for potential
future use in case we want to enable this.
we will add a warn_once here.
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0542a243b78c..832973ee80fa 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -26,6 +26,129 @@  static DEFINE_MUTEX(targets_mutex);
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* bpf_seq_read, a customized and simpler version for bpf iterator.
+ * no_llseek is assumed for this file.
+ * The following are differences from seq_read():
+ *  . fixed buffer size (PAGE_SIZE)
+ *  . assuming no_llseek
+ *  . stop() may call bpf program, handling potential overflow there
+ */
+static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
+			    loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	size_t n, offs, copied = 0;
+	int err = 0;
+	void *p;
+
+	mutex_lock(&seq->lock);
+
+	if (!seq->buf) {
+		seq->size = PAGE_SIZE;
+		seq->buf = kmalloc(seq->size, GFP_KERNEL);
+		if (!seq->buf) {
+			err = -ENOMEM;
+			goto done;
+		}
+	}
+
+	if (seq->count) {
+		n = min(seq->count, size);
+		err = copy_to_user(buf, seq->buf + seq->from, n);
+		if (err) {
+			err = -EFAULT;
+			goto done;
+		}
+		seq->count -= n;
+		seq->from += n;
+		copied = n;
+		goto done;
+	}
+
+	seq->from = 0;
+	p = seq->op->start(seq, &seq->index);
+	if (!p)
+		goto stop;
+	if (IS_ERR(p)) {
+		err = PTR_ERR(p);
+		seq->op->stop(seq, p);
+		seq->count = 0;
+		goto done;
+	}
+
+	err = seq->op->show(seq, p);
+	if (err > 0) {
+		seq->count = 0;
+	} else if (err < 0 || seq_has_overflowed(seq)) {
+		if (!err)
+			err = -E2BIG;
+		seq->op->stop(seq, p);
+		seq->count = 0;
+		goto done;
+	}
+
+	while (1) {
+		loff_t pos = seq->index;
+
+		offs = seq->count;
+		p = seq->op->next(seq, p, &seq->index);
+		if (pos == seq->index) {
+			pr_info_ratelimited("buggy seq_file .next function %ps "
+				"did not updated position index\n",
+				seq->op->next);
+			seq->index++;
+		}
+
+		if (IS_ERR_OR_NULL(p))
+			break;
+
+		if (seq->count >= size)
+			break;
+
+		err = seq->op->show(seq, p);
+		if (err > 0) {
+			seq->count = offs;
+		} else if (err < 0 || seq_has_overflowed(seq)) {
+			seq->count = offs;
+			if (offs == 0) {
+				if (!err)
+					err = -E2BIG;
+				seq->op->stop(seq, p);
+				goto done;
+			}
+			break;
+		}
+	}
+stop:
+	offs = seq->count;
+	/* bpf program called if !p */
+	seq->op->stop(seq, p);
+	if (!p && seq_has_overflowed(seq)) {
+		seq->count = offs;
+		if (offs == 0) {
+			err = -E2BIG;
+			goto done;
+		}
+	}
+
+	n = min(seq->count, size);
+	err = copy_to_user(buf, seq->buf, n);
+	if (err) {
+		err = -EFAULT;
+		goto done;
+	}
+	copied = n;
+	seq->count -= n;
+	seq->from = n;
+done:
+	if (!copied)
+		copied = err;
+	else
+		*ppos += copied;
+	mutex_unlock(&seq->lock);
+	return copied;
+}
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;