diff mbox series

[bpf-next,v2,05/20] bpf: implement bpf_seq_read() for bpf iterator

Message ID 20200504062552.2047789-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 4, 2020, 6:25 a.m. UTC
bpf iterator uses seq_file to provide a lossless
way to transfer data to user space. But we want to call
bpf program after all objects have been traversed, and
bpf program may write additional data to the
seq_file buffer. The current seq_read() does not work
for this use case.

Besides allowing stop() function to write to the buffer,
the bpf_seq_read() also fixed the buffer size to one page.
If any single call of show() or stop() will emit data
more than one page to cause overflow, -E2BIG error code
will be returned to user space.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/bpf/bpf_iter.c | 128 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

Comments

Andrii Nakryiko May 5, 2020, 7:56 p.m. UTC | #1
On Sun, May 3, 2020 at 11:26 PM Yonghong Song <yhs@fb.com> wrote:
>
> bpf iterator uses seq_file to provide a lossless
> way to transfer data to user space. But we want to call
> bpf program after all objects have been traversed, and
> bpf program may write additional data to the
> seq_file buffer. The current seq_read() does not work
> for this use case.
>
> Besides allowing stop() function to write to the buffer,
> the bpf_seq_read() also fixed the buffer size to one page.
> If any single call of show() or stop() will emit data
> more than one page to cause overflow, -E2BIG error code
> will be returned to user space.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  kernel/bpf/bpf_iter.c | 128 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 128 insertions(+)
>
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 05ae04ac1eca..2674c9cbc3dc 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -26,6 +26,134 @@ static DEFINE_MUTEX(targets_mutex);
>  /* protect bpf_iter_link changes */
>  static DEFINE_MUTEX(link_mutex);
>
> +/* bpf_seq_read, a customized and simpler version for bpf iterator.
> + * no_llseek is assumed for this file.
> + * The following are differences from seq_read():
> + *  . fixed buffer size (PAGE_SIZE)
> + *  . assuming no_llseek
> + *  . stop() may call bpf program, handling potential overflow there
> + */
> +static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> +                           loff_t *ppos)
> +{
> +       struct seq_file *seq = file->private_data;
> +       size_t n, offs, copied = 0;
> +       int err = 0;
> +       void *p;
> +
> +       mutex_lock(&seq->lock);
> +
> +       if (!seq->buf) {
> +               seq->size = PAGE_SIZE;
> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
> +               if (!seq->buf)
> +                       goto Enomem;

Why not just mutex_unlock and exit with -ENOMEM? Less goto'ing, more
straightforward.

> +       }
> +
> +       if (seq->count) {
> +               n = min(seq->count, size);
> +               err = copy_to_user(buf, seq->buf + seq->from, n);
> +               if (err)
> +                       goto Efault;
> +               seq->count -= n;
> +               seq->from += n;
> +               copied = n;
> +               goto Done;
> +       }
> +
> +       seq->from = 0;
> +       p = seq->op->start(seq, &seq->index);
> +       if (!p || IS_ERR(p))

IS_ERR_OR_NULL?

> +               goto Stop;
> +
> +       err = seq->op->show(seq, p);
> +       if (seq_has_overflowed(seq)) {
> +               err = -E2BIG;
> +               goto Error_show;
> +       } else if (err) {
> +               /* < 0: go out, > 0: skip */
> +               if (likely(err < 0))
> +                       goto Error_show;
> +               seq->count = 0;
> +       }

This seems a bit more straightforward:

if (seq_has_overflowed(seq))
    err = -E2BIG;
if (err < 0)
    goto Error_show;
else if (err > 0)
    seq->count = 0;

Also, I wonder if err > 0 (so skip was requested), should we ignore
overflow? So something like:

if (err > 0) {
    seq->count = 0;
} else {
    if (seq_has_overflowed(seq))
        err = -E2BIG;
    if (err)
        goto Error_show;
}

> +
> +       while (1) {
> +               loff_t pos = seq->index;
> +
> +               offs = seq->count;
> +               p = seq->op->next(seq, p, &seq->index);
> +               if (pos == seq->index) {
> +                       pr_info_ratelimited("buggy seq_file .next function %ps "
> +                               "did not updated position index\n",
> +                               seq->op->next);
> +                       seq->index++;
> +               }
> +
> +               if (!p || IS_ERR(p)) {

Same, IS_ERR_OR_NULL.

> +                       err = PTR_ERR(p);
> +                       break;
> +               }
> +               if (seq->count >= size)
> +                       break;
> +
> +               err = seq->op->show(seq, p);
> +               if (seq_has_overflowed(seq)) {
> +                       if (offs == 0) {
> +                               err = -E2BIG;
> +                               goto Error_show;
> +                       }
> +                       seq->count = offs;
> +                       break;
> +               } else if (err) {
> +                       /* < 0: go out, > 0: skip */
> +                       seq->count = offs;
> +                       if (likely(err < 0)) {
> +                               if (offs == 0)
> +                                       goto Error_show;
> +                               break;
> +                       }
> +               }

Same question here about ignoring overflow if skip was requested.

> +       }
> +Stop:
> +       offs = seq->count;
> +       /* may call bpf program */
> +       seq->op->stop(seq, p);
> +       if (seq_has_overflowed(seq)) {
> +               if (offs == 0)
> +                       goto Error_stop;
> +               seq->count = offs;

just want to double-check, because it's not clear from the code. If
all the start()/show()/next() succeeded, but stop() overflown. Would
stop() be called again on subsequent read? Would start/show/next
handle this correctly as well?

> +       }
> +
> +       n = min(seq->count, size);
> +       err = copy_to_user(buf, seq->buf, n);
> +       if (err)
> +               goto Efault;
> +       copied = n;
> +       seq->count -= n;
> +       seq->from = n;
> +Done:
> +       if (!copied)
> +               copied = err;
> +       else
> +               *ppos += copied;
> +       mutex_unlock(&seq->lock);
> +       return copied;
> +
> +Error_show:
> +       seq->op->stop(seq, p);
> +Error_stop:
> +       seq->count = 0;
> +       goto Done;
> +
> +Enomem:
> +       err = -ENOMEM;
> +       goto Done;
> +
> +Efault:
> +       err = -EFAULT;
> +       goto Done;

Enomem and Efault seem completely redundant and just add goto
complexity to this algorithm. Let's just inline `err =
-E(NOMEM|FAULT); goto Done;` instead?

> +}
> +
>  int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>  {
>         struct bpf_iter_target_info *tinfo;
> --
> 2.24.1
>
Alexei Starovoitov May 5, 2020, 7:57 p.m. UTC | #2
On 5/5/20 12:56 PM, Andrii Nakryiko wrote:
>> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
>> +               if (!seq->buf)
>> +                       goto Enomem;
> Why not just mutex_unlock and exit with -ENOMEM? Less goto'ing, more
> straightforward.
> 

no. please keep kernel coding style. goto is appropriate here.
Yonghong Song May 5, 2020, 8:25 p.m. UTC | #3
On 5/5/20 12:56 PM, Andrii Nakryiko wrote:
> On Sun, May 3, 2020 at 11:26 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> bpf iterator uses seq_file to provide a lossless
>> way to transfer data to user space. But we want to call
>> bpf program after all objects have been traversed, and
>> bpf program may write additional data to the
>> seq_file buffer. The current seq_read() does not work
>> for this use case.
>>
>> Besides allowing stop() function to write to the buffer,
>> the bpf_seq_read() also fixed the buffer size to one page.
>> If any single call of show() or stop() will emit data
>> more than one page to cause overflow, -E2BIG error code
>> will be returned to user space.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   kernel/bpf/bpf_iter.c | 128 ++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 128 insertions(+)
>>
>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>> index 05ae04ac1eca..2674c9cbc3dc 100644
>> --- a/kernel/bpf/bpf_iter.c
>> +++ b/kernel/bpf/bpf_iter.c
>> @@ -26,6 +26,134 @@ static DEFINE_MUTEX(targets_mutex);
>>   /* protect bpf_iter_link changes */
>>   static DEFINE_MUTEX(link_mutex);
>>
>> +/* bpf_seq_read, a customized and simpler version for bpf iterator.
>> + * no_llseek is assumed for this file.
>> + * The following are differences from seq_read():
>> + *  . fixed buffer size (PAGE_SIZE)
>> + *  . assuming no_llseek
>> + *  . stop() may call bpf program, handling potential overflow there
>> + */
>> +static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>> +                           loff_t *ppos)
>> +{
>> +       struct seq_file *seq = file->private_data;
>> +       size_t n, offs, copied = 0;
>> +       int err = 0;
>> +       void *p;
>> +
>> +       mutex_lock(&seq->lock);
>> +
>> +       if (!seq->buf) {
>> +               seq->size = PAGE_SIZE;
>> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
>> +               if (!seq->buf)
>> +                       goto Enomem;
> 
> Why not just mutex_unlock and exit with -ENOMEM? Less goto'ing, more
> straightforward.
> 
>> +       }
>> +
>> +       if (seq->count) {
>> +               n = min(seq->count, size);
>> +               err = copy_to_user(buf, seq->buf + seq->from, n);
>> +               if (err)
>> +                       goto Efault;
>> +               seq->count -= n;
>> +               seq->from += n;
>> +               copied = n;
>> +               goto Done;
>> +       }
>> +
>> +       seq->from = 0;
>> +       p = seq->op->start(seq, &seq->index);
>> +       if (!p || IS_ERR(p))
> 
> IS_ERR_OR_NULL?

Ack.

> 
>> +               goto Stop;
>> +
>> +       err = seq->op->show(seq, p);
>> +       if (seq_has_overflowed(seq)) {
>> +               err = -E2BIG;
>> +               goto Error_show;
>> +       } else if (err) {
>> +               /* < 0: go out, > 0: skip */
>> +               if (likely(err < 0))
>> +                       goto Error_show;
>> +               seq->count = 0;
>> +       }
> 
> This seems a bit more straightforward:
> 
> if (seq_has_overflowed(seq))
>      err = -E2BIG;
> if (err < 0)
>      goto Error_show;
> else if (err > 0)
>      seq->count = 0;
> 
> Also, I wonder if err > 0 (so skip was requested), should we ignore
> overflow? So something like:

Think about overflow vs. err > 0 case, I double checked seq_file()
implementation again, yes, it is skipped. So your suggestion below
looks reasonable.

> 
> if (err > 0) {
>      seq->count = 0;
> } else {
>      if (seq_has_overflowed(seq))
>          err = -E2BIG;
>      if (err)
>          goto Error_show;
> }
> 
>> +
>> +       while (1) {
>> +               loff_t pos = seq->index;
>> +
>> +               offs = seq->count;
>> +               p = seq->op->next(seq, p, &seq->index);
>> +               if (pos == seq->index) {
>> +                       pr_info_ratelimited("buggy seq_file .next function %ps "
>> +                               "did not updated position index\n",
>> +                               seq->op->next);
>> +                       seq->index++;
>> +               }
>> +
>> +               if (!p || IS_ERR(p)) {
> 
> Same, IS_ERR_OR_NULL.

Ack.

> 
>> +                       err = PTR_ERR(p);
>> +                       break;
>> +               }
>> +               if (seq->count >= size)
>> +                       break;
>> +
>> +               err = seq->op->show(seq, p);
>> +               if (seq_has_overflowed(seq)) {
>> +                       if (offs == 0) {
>> +                               err = -E2BIG;
>> +                               goto Error_show;
>> +                       }
>> +                       seq->count = offs;
>> +                       break;
>> +               } else if (err) {
>> +                       /* < 0: go out, > 0: skip */
>> +                       seq->count = offs;
>> +                       if (likely(err < 0)) {
>> +                               if (offs == 0)
>> +                                       goto Error_show;
>> +                               break;
>> +                       }
>> +               }
> 
> Same question here about ignoring overflow if skip was requested.

Yes, we should prioritize err > 0 over overflow.

> 
>> +       }
>> +Stop:
>> +       offs = seq->count;
>> +       /* may call bpf program */
>> +       seq->op->stop(seq, p);
>> +       if (seq_has_overflowed(seq)) {
>> +               if (offs == 0)
>> +                       goto Error_stop;
>> +               seq->count = offs;
> 
> just want to double-check, because it's not clear from the code. If
> all the start()/show()/next() succeeded, but stop() overflown. Would
> stop() be called again on subsequent read? Would start/show/next
> handle this correctly as well?

I am supposed to handle this unless there is a bug...
The idea is:
    - if start()/show()/next() is fine and stop() overflow,
      we will skip stop() output and move on.
      (if we found out, we skip to the beginning of the
       buffer, we will return -E2BIG. Otherwise, we will return
       0 here, the user read() may just exit.)
    - next time, when read() called again, the start() will return
      NULL (since previous next() returns NULL) and the control
      will jump to stop(), which will try to do another dump().

> 
>> +       }
>> +
>> +       n = min(seq->count, size);
>> +       err = copy_to_user(buf, seq->buf, n);
>> +       if (err)
>> +               goto Efault;
>> +       copied = n;
>> +       seq->count -= n;
>> +       seq->from = n;
>> +Done:
>> +       if (!copied)
>> +               copied = err;
>> +       else
>> +               *ppos += copied;
>> +       mutex_unlock(&seq->lock);
>> +       return copied;
>> +
>> +Error_show:
>> +       seq->op->stop(seq, p);
>> +Error_stop:
>> +       seq->count = 0;
>> +       goto Done;
>> +
>> +Enomem:
>> +       err = -ENOMEM;
>> +       goto Done;
>> +
>> +Efault:
>> +       err = -EFAULT;
>> +       goto Done;
> 
> Enomem and Efault seem completely redundant and just add goto
> complexity to this algorithm. Let's just inline `err =
> -E(NOMEM|FAULT); goto Done;` instead?

We can do this. This is kind of original seq_read() coding
style. Agree that we do not need to follow them.

> 
>> +}
>> +
>>   int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>>   {
>>          struct bpf_iter_target_info *tinfo;
>> --
>> 2.24.1
>>
Andrii Nakryiko May 5, 2020, 9:08 p.m. UTC | #4
On Tue, May 5, 2020 at 1:25 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/5/20 12:56 PM, Andrii Nakryiko wrote:
> > On Sun, May 3, 2020 at 11:26 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> bpf iterator uses seq_file to provide a lossless
> >> way to transfer data to user space. But we want to call
> >> bpf program after all objects have been traversed, and
> >> bpf program may write additional data to the
> >> seq_file buffer. The current seq_read() does not work
> >> for this use case.
> >>
> >> Besides allowing stop() function to write to the buffer,
> >> the bpf_seq_read() also fixed the buffer size to one page.
> >> If any single call of show() or stop() will emit data
> >> more than one page to cause overflow, -E2BIG error code
> >> will be returned to user space.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   kernel/bpf/bpf_iter.c | 128 ++++++++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 128 insertions(+)
> >>
> >> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> >> index 05ae04ac1eca..2674c9cbc3dc 100644
> >> --- a/kernel/bpf/bpf_iter.c
> >> +++ b/kernel/bpf/bpf_iter.c
> >> @@ -26,6 +26,134 @@ static DEFINE_MUTEX(targets_mutex);
> >>   /* protect bpf_iter_link changes */
> >>   static DEFINE_MUTEX(link_mutex);
> >>
> >> +/* bpf_seq_read, a customized and simpler version for bpf iterator.
> >> + * no_llseek is assumed for this file.
> >> + * The following are differences from seq_read():
> >> + *  . fixed buffer size (PAGE_SIZE)
> >> + *  . assuming no_llseek
> >> + *  . stop() may call bpf program, handling potential overflow there
> >> + */
> >> +static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> >> +                           loff_t *ppos)
> >> +{
> >> +       struct seq_file *seq = file->private_data;
> >> +       size_t n, offs, copied = 0;
> >> +       int err = 0;
> >> +       void *p;
> >> +
> >> +       mutex_lock(&seq->lock);
> >> +
> >> +       if (!seq->buf) {
> >> +               seq->size = PAGE_SIZE;
> >> +               seq->buf = kmalloc(seq->size, GFP_KERNEL);
> >> +               if (!seq->buf)
> >> +                       goto Enomem;
> >
> > Why not just mutex_unlock and exit with -ENOMEM? Less goto'ing, more
> > straightforward.
> >
> >> +       }
> >> +
> >> +       if (seq->count) {
> >> +               n = min(seq->count, size);
> >> +               err = copy_to_user(buf, seq->buf + seq->from, n);
> >> +               if (err)
> >> +                       goto Efault;
> >> +               seq->count -= n;
> >> +               seq->from += n;
> >> +               copied = n;
> >> +               goto Done;
> >> +       }
> >> +
> >> +       seq->from = 0;
> >> +       p = seq->op->start(seq, &seq->index);
> >> +       if (!p || IS_ERR(p))
> >
> > IS_ERR_OR_NULL?
>
> Ack.
>
> >
> >> +               goto Stop;
> >> +
> >> +       err = seq->op->show(seq, p);
> >> +       if (seq_has_overflowed(seq)) {
> >> +               err = -E2BIG;
> >> +               goto Error_show;
> >> +       } else if (err) {
> >> +               /* < 0: go out, > 0: skip */
> >> +               if (likely(err < 0))
> >> +                       goto Error_show;
> >> +               seq->count = 0;
> >> +       }
> >
> > This seems a bit more straightforward:
> >
> > if (seq_has_overflowed(seq))
> >      err = -E2BIG;
> > if (err < 0)
> >      goto Error_show;
> > else if (err > 0)
> >      seq->count = 0;
> >
> > Also, I wonder if err > 0 (so skip was requested), should we ignore
> > overflow? So something like:
>
> Think about overflow vs. err > 0 case, I double checked seq_file()
> implementation again, yes, it is skipped. So your suggestion below
> looks reasonable.
>
> >
> > if (err > 0) {
> >      seq->count = 0;
> > } else {
> >      if (seq_has_overflowed(seq))
> >          err = -E2BIG;
> >      if (err)
> >          goto Error_show;
> > }
> >
> >> +
> >> +       while (1) {
> >> +               loff_t pos = seq->index;
> >> +
> >> +               offs = seq->count;
> >> +               p = seq->op->next(seq, p, &seq->index);
> >> +               if (pos == seq->index) {
> >> +                       pr_info_ratelimited("buggy seq_file .next function %ps "
> >> +                               "did not updated position index\n",
> >> +                               seq->op->next);
> >> +                       seq->index++;
> >> +               }
> >> +
> >> +               if (!p || IS_ERR(p)) {
> >
> > Same, IS_ERR_OR_NULL.
>
> Ack.
>
> >
> >> +                       err = PTR_ERR(p);
> >> +                       break;
> >> +               }
> >> +               if (seq->count >= size)
> >> +                       break;
> >> +
> >> +               err = seq->op->show(seq, p);
> >> +               if (seq_has_overflowed(seq)) {
> >> +                       if (offs == 0) {
> >> +                               err = -E2BIG;
> >> +                               goto Error_show;
> >> +                       }
> >> +                       seq->count = offs;
> >> +                       break;
> >> +               } else if (err) {
> >> +                       /* < 0: go out, > 0: skip */
> >> +                       seq->count = offs;
> >> +                       if (likely(err < 0)) {
> >> +                               if (offs == 0)
> >> +                                       goto Error_show;
> >> +                               break;
> >> +                       }
> >> +               }
> >
> > Same question here about ignoring overflow if skip was requested.
>
> Yes, we should prioritize err > 0 over overflow.
>
> >
> >> +       }
> >> +Stop:
> >> +       offs = seq->count;
> >> +       /* may call bpf program */
> >> +       seq->op->stop(seq, p);
> >> +       if (seq_has_overflowed(seq)) {
> >> +               if (offs == 0)
> >> +                       goto Error_stop;
> >> +               seq->count = offs;
> >
> > just want to double-check, because it's not clear from the code. If
> > all the start()/show()/next() succeeded, but stop() overflown. Would
> > stop() be called again on subsequent read? Would start/show/next
> > handle this correctly as well?
>
> I am supposed to handle this unless there is a bug...
> The idea is:
>     - if start()/show()/next() is fine and stop() overflow,
>       we will skip stop() output and move on.
>       (if we found out, we skip to the beginning of the
>        buffer, we will return -E2BIG. Otherwise, we will return
>        0 here, the user read() may just exit.)
>     - next time, when read() called again, the start() will return
>       NULL (since previous next() returns NULL) and the control
>       will jump to stop(), which will try to do another dump().
>

Right, sounds reasonable :)

> >
> >> +       }
> >> +
> >> +       n = min(seq->count, size);
> >> +       err = copy_to_user(buf, seq->buf, n);
> >> +       if (err)
> >> +               goto Efault;
> >> +       copied = n;
> >> +       seq->count -= n;
> >> +       seq->from = n;
> >> +Done:
> >> +       if (!copied)
> >> +               copied = err;
> >> +       else
> >> +               *ppos += copied;
> >> +       mutex_unlock(&seq->lock);
> >> +       return copied;
> >> +
> >> +Error_show:
> >> +       seq->op->stop(seq, p);
> >> +Error_stop:
> >> +       seq->count = 0;
> >> +       goto Done;
> >> +
> >> +Enomem:
> >> +       err = -ENOMEM;
> >> +       goto Done;
> >> +
> >> +Efault:
> >> +       err = -EFAULT;
> >> +       goto Done;
> >
> > Enomem and Efault seem completely redundant and just add goto
> > complexity to this algorithm. Let's just inline `err =
> > -E(NOMEM|FAULT); goto Done;` instead?
>
> We can do this. This is kind of original seq_read() coding
> style. Agree that we do not need to follow them.
>
> >
> >> +}
> >> +
> >>   int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
> >>   {
> >>          struct bpf_iter_target_info *tinfo;
> >> --
> >> 2.24.1
> >>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 05ae04ac1eca..2674c9cbc3dc 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -26,6 +26,134 @@  static DEFINE_MUTEX(targets_mutex);
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* bpf_seq_read, a customized and simpler version for bpf iterator.
+ * no_llseek is assumed for this file.
+ * The following are differences from seq_read():
+ *  . fixed buffer size (PAGE_SIZE)
+ *  . assuming no_llseek
+ *  . stop() may call bpf program, handling potential overflow there
+ */
+static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
+			    loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	size_t n, offs, copied = 0;
+	int err = 0;
+	void *p;
+
+	mutex_lock(&seq->lock);
+
+	if (!seq->buf) {
+		seq->size = PAGE_SIZE;
+		seq->buf = kmalloc(seq->size, GFP_KERNEL);
+		if (!seq->buf)
+			goto Enomem;
+	}
+
+	if (seq->count) {
+		n = min(seq->count, size);
+		err = copy_to_user(buf, seq->buf + seq->from, n);
+		if (err)
+			goto Efault;
+		seq->count -= n;
+		seq->from += n;
+		copied = n;
+		goto Done;
+	}
+
+	seq->from = 0;
+	p = seq->op->start(seq, &seq->index);
+	if (!p || IS_ERR(p))
+		goto Stop;
+
+	err = seq->op->show(seq, p);
+	if (seq_has_overflowed(seq)) {
+		err = -E2BIG;
+		goto Error_show;
+	} else if (err) {
+		/* < 0: go out, > 0: skip */
+		if (likely(err < 0))
+			goto Error_show;
+		seq->count = 0;
+	}
+
+	while (1) {
+		loff_t pos = seq->index;
+
+		offs = seq->count;
+		p = seq->op->next(seq, p, &seq->index);
+		if (pos == seq->index) {
+			pr_info_ratelimited("buggy seq_file .next function %ps "
+				"did not updated position index\n",
+				seq->op->next);
+			seq->index++;
+		}
+
+		if (!p || IS_ERR(p)) {
+			err = PTR_ERR(p);
+			break;
+		}
+		if (seq->count >= size)
+			break;
+
+		err = seq->op->show(seq, p);
+		if (seq_has_overflowed(seq)) {
+			if (offs == 0) {
+				err = -E2BIG;
+				goto Error_show;
+			}
+			seq->count = offs;
+			break;
+		} else if (err) {
+			/* < 0: go out, > 0: skip */
+			seq->count = offs;
+			if (likely(err < 0)) {
+				if (offs == 0)
+					goto Error_show;
+				break;
+			}
+		}
+	}
+Stop:
+	offs = seq->count;
+	/* may call bpf program */
+	seq->op->stop(seq, p);
+	if (seq_has_overflowed(seq)) {
+		if (offs == 0)
+			goto Error_stop;
+		seq->count = offs;
+	}
+
+	n = min(seq->count, size);
+	err = copy_to_user(buf, seq->buf, n);
+	if (err)
+		goto Efault;
+	copied = n;
+	seq->count -= n;
+	seq->from = n;
+Done:
+	if (!copied)
+		copied = err;
+	else
+		*ppos += copied;
+	mutex_unlock(&seq->lock);
+	return copied;
+
+Error_show:
+	seq->op->stop(seq, p);
+Error_stop:
+	seq->count = 0;
+	goto Done;
+
+Enomem:
+	err = -ENOMEM;
+	goto Done;
+
+Efault:
+	err = -EFAULT;
+	goto Done;
+}
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;