diff mbox series

[bpf-next,v4] bpf: add new helper fd2path for mapping a file descriptor to a pathname

Message ID 20191028141053.12267-1-ethercflow@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf-next,v4] bpf: add new helper fd2path for mapping a file descriptor to a pathname | expand

Commit Message

Wenbo Zhang Oct. 28, 2019, 2:10 p.m. UTC
When people want to identify which file system files are being opened,
read, and written to, they can use this helper with file descriptor as
input to achieve this goal. Other pseudo filesystems are also supported.

This requirement is mainly discussed here:

  https://github.com/iovisor/bcc/issues/237

v3->v4:
- fix missing fdput()
- move fd2path from kernel/bpf/trace.c to kernel/trace/bpf_trace.c
- move fd2path's test code to another patch

v2->v3:
- remove unnecessary LOCKDOWN_BPF_READ
- refactor error handling section for enhanced readability
- provide a test case in tools/testing/selftests/bpf

v1->v2:
- fix backward compatibility
- add this helper description
- fix signed-off name

Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
---
 include/uapi/linux/bpf.h       | 14 +++++++++++-
 kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 14 +++++++++++-
 3 files changed, 66 insertions(+), 2 deletions(-)

Comments

Andrii Nakryiko Oct. 29, 2019, 6:48 p.m. UTC | #1
On Mon, Oct 28, 2019 at 1:59 PM Wenbo Zhang <ethercflow@gmail.com> wrote:
>
> When people want to identify which file system files are being opened,
> read, and written to, they can use this helper with file descriptor as
> input to achieve this goal. Other pseudo filesystems are also supported.
>
> This requirement is mainly discussed here:
>
>   https://github.com/iovisor/bcc/issues/237
>
> v3->v4:
> - fix missing fdput()
> - move fd2path from kernel/bpf/trace.c to kernel/trace/bpf_trace.c
> - move fd2path's test code to another patch
>
> v2->v3:
> - remove unnecessary LOCKDOWN_BPF_READ
> - refactor error handling section for enhanced readability
> - provide a test case in tools/testing/selftests/bpf
>
> v1->v2:
> - fix backward compatibility
> - add this helper description
> - fix signed-off name
>
> Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 14 +++++++++++-
>  kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 14 +++++++++++-
>  3 files changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4af8b0819a32..124632b2a697 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2775,6 +2775,17 @@ union bpf_attr {
>   *             restricted to raw_tracepoint bpf programs.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * int bpf_fd2path(char *path, u32 size, int fd)

from what I can see, we don't have any BPF helper with this naming
approach(2 -> to, 4 -> for, etc). How about something like
bpf_get_file_path?

> + *     Description
> + *             Get **file** atrribute from the current task by *fd*, then call
> + *             **d_path** to get it's absolute path and copy it as string into
> + *             *path* of *size*. The **path** also support pseudo filesystems
> + *             (whether or not it can be mounted). The *size* must be strictly
> + *             positive. On success, the helper makes sure that the *path* is
> + *             NUL-terminated. On failure, it is filled with zeroes.
> + *     Return
> + *             0 on success, or a negative error in case of failure.

Mention that we actually return a positive number on success, which is
a size of the string + 1 for NUL byte (the +1 is not true right now,
but I think should be).

>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -2888,7 +2899,8 @@ union bpf_attr {
>         FN(sk_storage_delete),          \
>         FN(send_signal),                \
>         FN(tcp_gen_syncookie),          \
> -       FN(skb_output),
> +       FN(skb_output),                 \
> +       FN(fd2path),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 571c25d60710..dd7b070df3d6 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -683,6 +683,44 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
>         .arg1_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_3(bpf_fd2path, char *, dst, u32, size, int, fd)
> +{
> +       struct fd f;
> +       char *p;
> +       int ret = -EINVAL;
> +
> +       /* Use fdget_raw instead of fdget to support O_PATH */
> +       f = fdget_raw(fd);

I haven't followed previous discussions, so sorry if this was asked
before. Can either fdget_raw or d_path sleep? Also, d_path seems to be
relying on current, which in the interrupt context might not be what
you really want. Have you considered these problems?

> +       if (!f.file)
> +               goto error;
> +
> +       p = d_path(&f.file->f_path, dst, size);
> +       if (IS_ERR_OR_NULL(p)) {
> +               ret = PTR_ERR(p);

if p can really be NULL, you'd get ret == 0 here, which is probably
not what you want.
But reading d_path, it seems like it's either valid pointer or error,
so just use IS_ERR above?

> +               goto error;
> +       }
> +
> +       ret = strlen(p);
> +       memmove(dst, p, ret);
> +       dst[ret] = '\0';

I think returning number of useful bytes (including terminating NUL)
is good and follows bpf_probe_read_str() convention. So ret++ here?

> +       goto end;
> +
> +error:
> +       memset(dst, '0', size);
> +end:
> +       fdput(f);
> +       return ret;
> +}
> +

[...]
Daniel Borkmann Oct. 30, 2019, 3:32 p.m. UTC | #2
On Tue, Oct 29, 2019 at 11:48:44AM -0700, Andrii Nakryiko wrote:
> On Mon, Oct 28, 2019 at 1:59 PM Wenbo Zhang <ethercflow@gmail.com> wrote:
> >
> > When people want to identify which file system files are being opened,
> > read, and written to, they can use this helper with file descriptor as
> > input to achieve this goal. Other pseudo filesystems are also supported.
> >
> > This requirement is mainly discussed here:
> >
> >   https://github.com/iovisor/bcc/issues/237
> >
> > v3->v4:
> > - fix missing fdput()
> > - move fd2path from kernel/bpf/trace.c to kernel/trace/bpf_trace.c
> > - move fd2path's test code to another patch
> >
> > v2->v3:
> > - remove unnecessary LOCKDOWN_BPF_READ
> > - refactor error handling section for enhanced readability
> > - provide a test case in tools/testing/selftests/bpf
> >
> > v1->v2:
> > - fix backward compatibility
> > - add this helper description
> > - fix signed-off name
> >
> > Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h       | 14 +++++++++++-
> >  kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 14 +++++++++++-
> >  3 files changed, 66 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4af8b0819a32..124632b2a697 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2775,6 +2775,17 @@ union bpf_attr {
> >   *             restricted to raw_tracepoint bpf programs.
> >   *     Return
> >   *             0 on success, or a negative error in case of failure.
> > + *
> > + * int bpf_fd2path(char *path, u32 size, int fd)
> 
> from what I can see, we don't have any BPF helper with this naming
> approach(2 -> to, 4 -> for, etc). How about something like
> bpf_get_file_path?
> 
> > + *     Description
> > + *             Get **file** atrribute from the current task by *fd*, then call
> > + *             **d_path** to get it's absolute path and copy it as string into
> > + *             *path* of *size*. The **path** also support pseudo filesystems
> > + *             (whether or not it can be mounted). The *size* must be strictly
> > + *             positive. On success, the helper makes sure that the *path* is
> > + *             NUL-terminated. On failure, it is filled with zeroes.
> > + *     Return
> > + *             0 on success, or a negative error in case of failure.
> 
> Mention that we actually return a positive number on success, which is
> a size of the string + 1 for NUL byte (the +1 is not true right now,
> but I think should be).
> 
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -2888,7 +2899,8 @@ union bpf_attr {
> >         FN(sk_storage_delete),          \
> >         FN(send_signal),                \
> >         FN(tcp_gen_syncookie),          \
> > -       FN(skb_output),
> > +       FN(skb_output),                 \
> > +       FN(fd2path),
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 571c25d60710..dd7b070df3d6 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -683,6 +683,44 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
> >         .arg1_type      = ARG_ANYTHING,
> >  };
> >
> > +BPF_CALL_3(bpf_fd2path, char *, dst, u32, size, int, fd)
> > +{
> > +       struct fd f;
> > +       char *p;
> > +       int ret = -EINVAL;
> > +
> > +       /* Use fdget_raw instead of fdget to support O_PATH */
> > +       f = fdget_raw(fd);
> 
> I haven't followed previous discussions, so sorry if this was asked
> before. Can either fdget_raw or d_path sleep? Also, d_path seems to be
> relying on current, which in the interrupt context might not be what
> you really want. Have you considered these problems?
> 
> > +       if (!f.file)
> > +               goto error;
> > +
> > +       p = d_path(&f.file->f_path, dst, size);
> > +       if (IS_ERR_OR_NULL(p)) {
> > +               ret = PTR_ERR(p);
> 
> if p can really be NULL, you'd get ret == 0 here, which is probably
> not what you want.
> But reading d_path, it seems like it's either valid pointer or error,
> so just use IS_ERR above?
> 
> > +               goto error;
> > +       }
> > +
> > +       ret = strlen(p);
> > +       memmove(dst, p, ret);
> > +       dst[ret] = '\0';
> 
> I think returning number of useful bytes (including terminating NUL)
> is good and follows bpf_probe_read_str() convention. So ret++ here?
> 
> > +       goto end;
> > +
> > +error:
> > +       memset(dst, '0', size);
> > +end:
> > +       fdput(f);

Also I'd prefer fdget_*()'s error path not calling fdput(f).

> > +       return ret;
> > +}
> > +
> 
> [...]
Wenbo Zhang Oct. 30, 2019, 4:19 p.m. UTC | #3
> from what I can see, we don't have any BPF helper with this naming
> approach(2 -> to, 4 -> for, etc). How about something like
> bpf_get_file_path?

I think bpf_get_file_path is better. I'll change to it.

> > + *     Description
> > + *             Get **file** atrribute from the current task by *fd*, then call
> > + *             **d_path** to get it's absolute path and copy it as string into
> > + *             *path* of *size*. The **path** also support pseudo filesystems
> > + *             (whether or not it can be mounted). The *size* must be strictly
> > + *             positive. On success, the helper makes sure that the *path* is
> > + *             NUL-terminated. On failure, it is filled with zeroes.
> > + *     Return
> > + *             0 on success, or a negative error in case of failure.

> Mention that we actually return a positive number on success, which is
> a size of the string + 1 for NUL byte (the +1 is not true right now,
> but I think should be).

I agree.

> I haven't followed previous discussions, so sorry if this was asked
> before. Can either fdget_raw or d_path sleep? Also, d_path seems to be
> relying on current, which in the interrupt context might not be what
> you really want. Have you considered these problems?

Yes, I've checked fdget_raw, it use atomic and rcu_read_lock/ruc_read_unlock,
so it's not sleepable. d_path use rcu_read_lock/rcu_read_unlock too.

In my mind I think this helper won't be called in the interrupt
context (Would you
please give me an example if there's an application scene). So I think
it's ok to
use d_path here.

> > +       if (!f.file)
> > +               goto error;
> > +
> > +       p = d_path(&f.file->f_path, dst, size);
> > +       if (IS_ERR_OR_NULL(p)) {
> > +               ret = PTR_ERR(p);

if p can really be NULL, you'd get ret == 0 here, which is probably
not what you want.
But reading d_path, it seems like it's either valid pointer or error,
so just use IS_ERR above?

Agree, I'll fix error handling code.

> > +               goto error;
> > +       }
> > +
> > +       ret = strlen(p);
> > +       memmove(dst, p, ret);
> > +       dst[ret] = '\0';

I think returning number of useful bytes (including terminating NUL)
is good and follows bpf_probe_read_str() convention. So ret++ here?

Agree. Thank you.

> +       goto end;
> +
> +error:
> +       memset(dst, '0', size);
> +end:
> +       fdput(f);
> +       return ret;
> +}
> +

[...]


Andrii Nakryiko <andrii.nakryiko@gmail.com> 于2019年10月30日周三 上午2:48写道:
>
> On Mon, Oct 28, 2019 at 1:59 PM Wenbo Zhang <ethercflow@gmail.com> wrote:
> >
> > When people want to identify which file system files are being opened,
> > read, and written to, they can use this helper with file descriptor as
> > input to achieve this goal. Other pseudo filesystems are also supported.
> >
> > This requirement is mainly discussed here:
> >
> >   https://github.com/iovisor/bcc/issues/237
> >
> > v3->v4:
> > - fix missing fdput()
> > - move fd2path from kernel/bpf/trace.c to kernel/trace/bpf_trace.c
> > - move fd2path's test code to another patch
> >
> > v2->v3:
> > - remove unnecessary LOCKDOWN_BPF_READ
> > - refactor error handling section for enhanced readability
> > - provide a test case in tools/testing/selftests/bpf
> >
> > v1->v2:
> > - fix backward compatibility
> > - add this helper description
> > - fix signed-off name
> >
> > Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h       | 14 +++++++++++-
> >  kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 14 +++++++++++-
> >  3 files changed, 66 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4af8b0819a32..124632b2a697 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2775,6 +2775,17 @@ union bpf_attr {
> >   *             restricted to raw_tracepoint bpf programs.
> >   *     Return
> >   *             0 on success, or a negative error in case of failure.
> > + *
> > + * int bpf_fd2path(char *path, u32 size, int fd)
>
> from what I can see, we don't have any BPF helper with this naming
> approach(2 -> to, 4 -> for, etc). How about something like
> bpf_get_file_path?
>
> > + *     Description
> > + *             Get **file** atrribute from the current task by *fd*, then call
> > + *             **d_path** to get it's absolute path and copy it as string into
> > + *             *path* of *size*. The **path** also support pseudo filesystems
> > + *             (whether or not it can be mounted). The *size* must be strictly
> > + *             positive. On success, the helper makes sure that the *path* is
> > + *             NUL-terminated. On failure, it is filled with zeroes.
> > + *     Return
> > + *             0 on success, or a negative error in case of failure.
>
> Mention that we actually return a positive number on success, which is
> a size of the string + 1 for NUL byte (the +1 is not true right now,
> but I think should be).
>
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -2888,7 +2899,8 @@ union bpf_attr {
> >         FN(sk_storage_delete),          \
> >         FN(send_signal),                \
> >         FN(tcp_gen_syncookie),          \
> > -       FN(skb_output),
> > +       FN(skb_output),                 \
> > +       FN(fd2path),
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 571c25d60710..dd7b070df3d6 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -683,6 +683,44 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
> >         .arg1_type      = ARG_ANYTHING,
> >  };
> >
> > +BPF_CALL_3(bpf_fd2path, char *, dst, u32, size, int, fd)
> > +{
> > +       struct fd f;
> > +       char *p;
> > +       int ret = -EINVAL;
> > +
> > +       /* Use fdget_raw instead of fdget to support O_PATH */
> > +       f = fdget_raw(fd);
>
> I haven't followed previous discussions, so sorry if this was asked
> before. Can either fdget_raw or d_path sleep? Also, d_path seems to be
> relying on current, which in the interrupt context might not be what
> you really want. Have you considered these problems?
>
> > +       if (!f.file)
> > +               goto error;
> > +
> > +       p = d_path(&f.file->f_path, dst, size);
> > +       if (IS_ERR_OR_NULL(p)) {
> > +               ret = PTR_ERR(p);
>
> if p can really be NULL, you'd get ret == 0 here, which is probably
> not what you want.
> But reading d_path, it seems like it's either valid pointer or error,
> so just use IS_ERR above?
>
> > +               goto error;
> > +       }
> > +
> > +       ret = strlen(p);
> > +       memmove(dst, p, ret);
> > +       dst[ret] = '\0';
>
> I think returning number of useful bytes (including terminating NUL)
> is good and follows bpf_probe_read_str() convention. So ret++ here?
>
> > +       goto end;
> > +
> > +error:
> > +       memset(dst, '0', size);
> > +end:
> > +       fdput(f);
> > +       return ret;
> > +}
> > +
>
> [...]
Wenbo Zhang Nov. 3, 2019, 4:10 a.m. UTC | #4
> Also I'd prefer fdget_*()'s error path not calling fdput(f).

Thank you, I've committed a new patch to fix this.

Daniel Borkmann <daniel@iogearbox.net> 于2019年10月30日周三 下午11:32写道:
>
> On Tue, Oct 29, 2019 at 11:48:44AM -0700, Andrii Nakryiko wrote:
> > On Mon, Oct 28, 2019 at 1:59 PM Wenbo Zhang <ethercflow@gmail.com> wrote:
> > >
> > > When people want to identify which file system files are being opened,
> > > read, and written to, they can use this helper with file descriptor as
> > > input to achieve this goal. Other pseudo filesystems are also supported.
> > >
> > > This requirement is mainly discussed here:
> > >
> > >   https://github.com/iovisor/bcc/issues/237
> > >
> > > v3->v4:
> > > - fix missing fdput()
> > > - move fd2path from kernel/bpf/trace.c to kernel/trace/bpf_trace.c
> > > - move fd2path's test code to another patch
> > >
> > > v2->v3:
> > > - remove unnecessary LOCKDOWN_BPF_READ
> > > - refactor error handling section for enhanced readability
> > > - provide a test case in tools/testing/selftests/bpf
> > >
> > > v1->v2:
> > > - fix backward compatibility
> > > - add this helper description
> > > - fix signed-off name
> > >
> > > Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
> > > ---
> > >  include/uapi/linux/bpf.h       | 14 +++++++++++-
> > >  kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
> > >  tools/include/uapi/linux/bpf.h | 14 +++++++++++-
> > >  3 files changed, 66 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index 4af8b0819a32..124632b2a697 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -2775,6 +2775,17 @@ union bpf_attr {
> > >   *             restricted to raw_tracepoint bpf programs.
> > >   *     Return
> > >   *             0 on success, or a negative error in case of failure.
> > > + *
> > > + * int bpf_fd2path(char *path, u32 size, int fd)
> >
> > from what I can see, we don't have any BPF helper with this naming
> > approach(2 -> to, 4 -> for, etc). How about something like
> > bpf_get_file_path?
> >
> > > + *     Description
> > > + *             Get **file** atrribute from the current task by *fd*, then call
> > > + *             **d_path** to get it's absolute path and copy it as string into
> > > + *             *path* of *size*. The **path** also support pseudo filesystems
> > > + *             (whether or not it can be mounted). The *size* must be strictly
> > > + *             positive. On success, the helper makes sure that the *path* is
> > > + *             NUL-terminated. On failure, it is filled with zeroes.
> > > + *     Return
> > > + *             0 on success, or a negative error in case of failure.
> >
> > Mention that we actually return a positive number on success, which is
> > a size of the string + 1 for NUL byte (the +1 is not true right now,
> > but I think should be).
> >
> > >   */
> > >  #define __BPF_FUNC_MAPPER(FN)          \
> > >         FN(unspec),                     \
> > > @@ -2888,7 +2899,8 @@ union bpf_attr {
> > >         FN(sk_storage_delete),          \
> > >         FN(send_signal),                \
> > >         FN(tcp_gen_syncookie),          \
> > > -       FN(skb_output),
> > > +       FN(skb_output),                 \
> > > +       FN(fd2path),
> > >
> > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > >   * function eBPF program intends to call
> > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > index 571c25d60710..dd7b070df3d6 100644
> > > --- a/kernel/trace/bpf_trace.c
> > > +++ b/kernel/trace/bpf_trace.c
> > > @@ -683,6 +683,44 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
> > >         .arg1_type      = ARG_ANYTHING,
> > >  };
> > >
> > > +BPF_CALL_3(bpf_fd2path, char *, dst, u32, size, int, fd)
> > > +{
> > > +       struct fd f;
> > > +       char *p;
> > > +       int ret = -EINVAL;
> > > +
> > > +       /* Use fdget_raw instead of fdget to support O_PATH */
> > > +       f = fdget_raw(fd);
> >
> > I haven't followed previous discussions, so sorry if this was asked
> > before. Can either fdget_raw or d_path sleep? Also, d_path seems to be
> > relying on current, which in the interrupt context might not be what
> > you really want. Have you considered these problems?
> >
> > > +       if (!f.file)
> > > +               goto error;
> > > +
> > > +       p = d_path(&f.file->f_path, dst, size);
> > > +       if (IS_ERR_OR_NULL(p)) {
> > > +               ret = PTR_ERR(p);
> >
> > if p can really be NULL, you'd get ret == 0 here, which is probably
> > not what you want.
> > But reading d_path, it seems like it's either valid pointer or error,
> > so just use IS_ERR above?
> >
> > > +               goto error;
> > > +       }
> > > +
> > > +       ret = strlen(p);
> > > +       memmove(dst, p, ret);
> > > +       dst[ret] = '\0';
> >
> > I think returning number of useful bytes (including terminating NUL)
> > is good and follows bpf_probe_read_str() convention. So ret++ here?
> >
> > > +       goto end;
> > > +
> > > +error:
> > > +       memset(dst, '0', size);
> > > +end:
> > > +       fdput(f);
>
> Also I'd prefer fdget_*()'s error path not calling fdput(f).
>
> > > +       return ret;
> > > +}
> > > +
> >
> > [...]
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4af8b0819a32..124632b2a697 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2775,6 +2775,17 @@  union bpf_attr {
  * 		restricted to raw_tracepoint bpf programs.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fd2path(char *path, u32 size, int fd)
+ *	Description
+ *		Get **file** atrribute from the current task by *fd*, then call
+ *		**d_path** to get it's absolute path and copy it as string into
+ *		*path* of *size*. The **path** also support pseudo filesystems
+ *		(whether or not it can be mounted). The *size* must be strictly
+ *		positive. On success, the helper makes sure that the *path* is
+ *		NUL-terminated. On failure, it is filled with zeroes.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2888,7 +2899,8 @@  union bpf_attr {
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
 	FN(tcp_gen_syncookie),		\
-	FN(skb_output),
+	FN(skb_output),			\
+	FN(fd2path),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 571c25d60710..dd7b070df3d6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -683,6 +683,44 @@  static const struct bpf_func_proto bpf_send_signal_proto = {
 	.arg1_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_fd2path, char *, dst, u32, size, int, fd)
+{
+	struct fd f;
+	char *p;
+	int ret = -EINVAL;
+
+	/* Use fdget_raw instead of fdget to support O_PATH */
+	f = fdget_raw(fd);
+	if (!f.file)
+		goto error;
+
+	p = d_path(&f.file->f_path, dst, size);
+	if (IS_ERR_OR_NULL(p)) {
+		ret = PTR_ERR(p);
+		goto error;
+	}
+
+	ret = strlen(p);
+	memmove(dst, p, ret);
+	dst[ret] = '\0';
+	goto end;
+
+error:
+	memset(dst, '0', size);
+end:
+	fdput(f);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_fd2path_proto = {
+	.func       = bpf_fd2path,
+	.gpl_only   = true,
+	.ret_type   = RET_INTEGER,
+	.arg1_type  = ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type  = ARG_CONST_SIZE,
+	.arg3_type  = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -735,6 +773,8 @@  tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
 	case BPF_FUNC_send_signal:
 		return &bpf_send_signal_proto;
+	case BPF_FUNC_fd2path:
+		return &bpf_fd2path_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4af8b0819a32..124632b2a697 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2775,6 +2775,17 @@  union bpf_attr {
  * 		restricted to raw_tracepoint bpf programs.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fd2path(char *path, u32 size, int fd)
+ *	Description
+ *		Get **file** atrribute from the current task by *fd*, then call
+ *		**d_path** to get it's absolute path and copy it as string into
+ *		*path* of *size*. The **path** also support pseudo filesystems
+ *		(whether or not it can be mounted). The *size* must be strictly
+ *		positive. On success, the helper makes sure that the *path* is
+ *		NUL-terminated. On failure, it is filled with zeroes.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2888,7 +2899,8 @@  union bpf_attr {
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
 	FN(tcp_gen_syncookie),		\
-	FN(skb_output),
+	FN(skb_output),			\
+	FN(fd2path),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call