diff mbox series

[bpf-next,1/3] bpf: introduce helper bpf_get_task_stack_trace()

Message ID 20200623070802.2310018-2-songliubraving@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: introduce bpf_get_task_stack_trace() | expand

Commit Message

Song Liu June 23, 2020, 7:08 a.m. UTC
This helper can be used with bpf_iter__task to dump all /proc/*/stack to
a seq_file.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/uapi/linux/bpf.h       | 10 +++++++++-
 kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 10 +++++++++-
 4 files changed, 41 insertions(+), 2 deletions(-)

--
2.24.1

Comments

Alexei Starovoitov June 23, 2020, 3:19 p.m. UTC | #1
On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/uapi/linux/bpf.h       | 10 +++++++++-
>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 ++
>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>  4 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>   *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>   *             is returned or the error code -EACCES in case the skb is not
>   *             subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().
> + *     Return
> + *             Number of trace entries stored.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
>         FN(ringbuf_query),              \
> -       FN(csum_level),
> +       FN(csum_level),                 \
> +       FN(get_task_stack_trace),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +          void *, entries, u32, size)
> +{
> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +       .func           = bpf_get_task_stack_trace,
> +       .gpl_only       = true,

why?

> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,

OR_ZERO ? why?

> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> +};
> +
>  static const struct bpf_func_proto *
>  raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  {
> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return prog->expected_attach_type == BPF_TRACE_ITER ?
>                        &bpf_seq_write_proto :
>                        NULL;
> +       case BPF_FUNC_get_task_stack_trace:
> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
> +                       &bpf_get_task_stack_trace_proto :

why limit to iter only?

> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().

size is not documented and looks wrong.
the verifier checks it in bytes, but it's consumed as number of u32s.
Daniel Borkmann June 23, 2020, 3:22 p.m. UTC | #2
On 6/23/20 9:08 AM, Song Liu wrote:
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>   include/uapi/linux/bpf.h       | 10 +++++++++-
>   kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>   scripts/bpf_helpers_doc.py     |  2 ++
>   tools/include/uapi/linux/bpf.h | 10 +++++++++-
>   4 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>    * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>    * 		is returned or the error code -EACCES in case the skb is not
>    * 		subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *	Description
> + *		Save a task stack trace into array *entries*. This is a wrapper
> + *		over stack_trace_save_tsk().
> + *	Return
> + *		Number of trace entries stored.
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>   	FN(ringbuf_submit),		\
>   	FN(ringbuf_discard),		\
>   	FN(ringbuf_query),		\
> -	FN(csum_level),
> +	FN(csum_level),			\
> +	FN(get_task_stack_trace),
> 
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>    * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>   	.arg4_type	= ARG_ANYTHING,
>   };
> 
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +	   void *, entries, u32, size)
> +{
> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);

nit: cast not needed.

> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +	.func		= bpf_get_task_stack_trace,
> +	.gpl_only	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_BTF_ID,
> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,

Is there a use-case to pass in entries == NULL + size == 0?

> +	.btf_id		= bpf_get_task_stack_trace_btf_ids,
> +};
> +
Song Liu June 23, 2020, 4:59 p.m. UTC | #3
> On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>> 

[...]

>> 
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +          void *, entries, u32, size)
>> +{
>> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>> +}
>> +
>> +static int bpf_get_task_stack_trace_btf_ids[5];
>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>> +       .func           = bpf_get_task_stack_trace,
>> +       .gpl_only       = true,
> 
> why?

Actually, I am not sure when we should use gpl_only = true. 

> 
>> +       .ret_type       = RET_INTEGER,
>> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
>> +       .arg2_type      = ARG_PTR_TO_MEM,
>> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> 
> OR_ZERO ? why?

Will fix. 

> 
>> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
>> +};
>> +
>> static const struct bpf_func_proto *
>> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>> {
>> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>                return prog->expected_attach_type == BPF_TRACE_ITER ?
>>                       &bpf_seq_write_proto :
>>                       NULL;
>> +       case BPF_FUNC_get_task_stack_trace:
>> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
>> +                       &bpf_get_task_stack_trace_proto :
> 
> why limit to iter only?

I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?

> 
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *     Description
>> + *             Save a task stack trace into array *entries*. This is a wrapper
>> + *             over stack_trace_save_tsk().
> 
> size is not documented and looks wrong.
> the verifier checks it in bytes, but it's consumed as number of u32s.

I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
as u64s?

Thanks,
Song
Song Liu June 23, 2020, 5:19 p.m. UTC | #4
> On Jun 23, 2020, at 8:22 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> On 6/23/20 9:08 AM, Song Liu wrote:
>> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
>> a seq_file.
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>>  include/uapi/linux/bpf.h       | 10 +++++++++-
>>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>>  scripts/bpf_helpers_doc.py     |  2 ++
>>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>>  4 files changed, 41 insertions(+), 2 deletions(-)
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 19684813faaed..a30416b822fe3 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3252,6 +3252,13 @@ union bpf_attr {
>>   * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>>   * 		is returned or the error code -EACCES in case the skb is not
>>   * 		subject to CHECKSUM_UNNECESSARY.
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *	Description
>> + *		Save a task stack trace into array *entries*. This is a wrapper
>> + *		over stack_trace_save_tsk().
>> + *	Return
>> + *		Number of trace entries stored.
>>   */
>>  #define __BPF_FUNC_MAPPER(FN)		\
>>  	FN(unspec),			\
>> @@ -3389,7 +3396,8 @@ union bpf_attr {
>>  	FN(ringbuf_submit),		\
>>  	FN(ringbuf_discard),		\
>>  	FN(ringbuf_query),		\
>> -	FN(csum_level),
>> +	FN(csum_level),			\
>> +	FN(get_task_stack_trace),
>>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>   * function eBPF program intends to call
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index e729c9e587a07..2c13bcb5c2bce 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>>  	.arg4_type	= ARG_ANYTHING,
>>  };
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +	   void *, entries, u32, size)
>> +{
>> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> 
> nit: cast not needed.

Will fix. 

> 
>> +}
>> +
>> +static int bpf_get_task_stack_trace_btf_ids[5];
>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>> +	.func		= bpf_get_task_stack_trace,
>> +	.gpl_only	= true,
>> +	.ret_type	= RET_INTEGER,
>> +	.arg1_type	= ARG_PTR_TO_BTF_ID,
>> +	.arg2_type	= ARG_PTR_TO_MEM,
>> +	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
> 
> Is there a use-case to pass in entries == NULL + size == 0?

Not really. Will fix. 


> 
>> +	.btf_id		= bpf_get_task_stack_trace_btf_ids,
>> +};
>> +
Song Liu June 23, 2020, 5:40 p.m. UTC | #5
> On Jun 23, 2020, at 9:59 AM, Song Liu <songliubraving@fb.com> wrote:
> 
> 
> 
>> On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>> 
>> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>>> 
> 
> [...]
> 
>>> 
>>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>>> +          void *, entries, u32, size)
>>> +{
>>> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>>> +}
>>> +
>>> +static int bpf_get_task_stack_trace_btf_ids[5];
>>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>>> +       .func           = bpf_get_task_stack_trace,
>>> +       .gpl_only       = true,
>> 
>> why?
> 
> Actually, I am not sure when we should use gpl_only = true. 
> 
>> 
>>> +       .ret_type       = RET_INTEGER,
>>> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
>>> +       .arg2_type      = ARG_PTR_TO_MEM,
>>> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
>> 
>> OR_ZERO ? why?
> 
> Will fix. 
> 
>> 
>>> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
>>> +};
>>> +
>>> static const struct bpf_func_proto *
>>> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>> {
>>> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>>               return prog->expected_attach_type == BPF_TRACE_ITER ?
>>>                      &bpf_seq_write_proto :
>>>                      NULL;
>>> +       case BPF_FUNC_get_task_stack_trace:
>>> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
>>> +                       &bpf_get_task_stack_trace_proto :
>> 
>> why limit to iter only?
> 
> I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?
> 
>> 
>>> + *
>>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>>> + *     Description
>>> + *             Save a task stack trace into array *entries*. This is a wrapper
>>> + *             over stack_trace_save_tsk().
>> 
>> size is not documented and looks wrong.
>> the verifier checks it in bytes, but it's consumed as number of u32s.
> 
> I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
> as u64s?

I was wrong. Verifier checks as bytes. Will fix. 

Song
Andrii Nakryiko June 23, 2020, 6:41 p.m. UTC | #6
On Tue, Jun 23, 2020 at 10:00 AM Song Liu <songliubraving@fb.com> wrote:
>
>
>
> > On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
> >>
>
> [...]
>
> >>
> >> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> >> +          void *, entries, u32, size)
> >> +{
> >> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> >> +}
> >> +
> >> +static int bpf_get_task_stack_trace_btf_ids[5];
> >> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> >> +       .func           = bpf_get_task_stack_trace,
> >> +       .gpl_only       = true,
> >
> > why?
>
> Actually, I am not sure when we should use gpl_only = true.
>
> >
> >> +       .ret_type       = RET_INTEGER,
> >> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> >> +       .arg2_type      = ARG_PTR_TO_MEM,
> >> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> >
> > OR_ZERO ? why?
>
> Will fix.

I actually think it's a good idea, because it makes writing code that
uses variable-sized buffers easier. Remember how we had
bpf_perf_event_output() forcing size > 0? That was a major PITA and
required unnecessary code gymnastics to prove verifier it's OK (even
if zero size was never possible). Yonghong eventually fixed that to be
_OR_ZERO.

So if this is not causing any problems, please leave it as _OR_ZERO.
Thank you from everyone who had to suffer through dealing with
anything variable-sized in BPF!

>
> >
> >> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> >> +};
> >> +
> >> static const struct bpf_func_proto *
> >> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >> {
> >> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >>                return prog->expected_attach_type == BPF_TRACE_ITER ?
> >>                       &bpf_seq_write_proto :
> >>                       NULL;
> >> +       case BPF_FUNC_get_task_stack_trace:
> >> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
> >> +                       &bpf_get_task_stack_trace_proto :
> >
> > why limit to iter only?
>
> I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?
>
> >
> >> + *
> >> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> >> + *     Description
> >> + *             Save a task stack trace into array *entries*. This is a wrapper
> >> + *             over stack_trace_save_tsk().
> >
> > size is not documented and looks wrong.
> > the verifier checks it in bytes, but it's consumed as number of u32s.
>
> I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
> as u64s?
>
> Thanks,
> Song
>
Andrii Nakryiko June 23, 2020, 6:45 p.m. UTC | #7
On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/uapi/linux/bpf.h       | 10 +++++++++-
>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 ++
>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>  4 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>   *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>   *             is returned or the error code -EACCES in case the skb is not
>   *             subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().
> + *     Return
> + *             Number of trace entries stored.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
>         FN(ringbuf_query),              \
> -       FN(csum_level),
> +       FN(csum_level),                 \
> +       FN(get_task_stack_trace),

We have get_stackid and get_stack, I think to stay consistent it
should be named get_task_stack

>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +          void *, entries, u32, size)

See get_stack definition, this one needs to support flags as well. And
we should probably support BPF_F_USER_BUILD_ID as well. And
BPF_F_USER_STACK is also a good idea, I presume?

> +{
> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +       .func           = bpf_get_task_stack_trace,
> +       .gpl_only       = true,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> +};
> +

[...]
Song Liu June 23, 2020, 10:53 p.m. UTC | #8
> On Jun 23, 2020, at 11:45 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>> 
>> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
>> a seq_file.
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> include/uapi/linux/bpf.h       | 10 +++++++++-
>> kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>> scripts/bpf_helpers_doc.py     |  2 ++
>> tools/include/uapi/linux/bpf.h | 10 +++++++++-
>> 4 files changed, 41 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 19684813faaed..a30416b822fe3 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3252,6 +3252,13 @@ union bpf_attr {
>>  *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>>  *             is returned or the error code -EACCES in case the skb is not
>>  *             subject to CHECKSUM_UNNECESSARY.
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *     Description
>> + *             Save a task stack trace into array *entries*. This is a wrapper
>> + *             over stack_trace_save_tsk().
>> + *     Return
>> + *             Number of trace entries stored.
>>  */
>> #define __BPF_FUNC_MAPPER(FN)          \
>>        FN(unspec),                     \
>> @@ -3389,7 +3396,8 @@ union bpf_attr {
>>        FN(ringbuf_submit),             \
>>        FN(ringbuf_discard),            \
>>        FN(ringbuf_query),              \
>> -       FN(csum_level),
>> +       FN(csum_level),                 \
>> +       FN(get_task_stack_trace),
> 
> We have get_stackid and get_stack, I think to stay consistent it
> should be named get_task_stack
> 
>> 
>> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>  * function eBPF program intends to call
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index e729c9e587a07..2c13bcb5c2bce 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>>        .arg4_type      = ARG_ANYTHING,
>> };
>> 
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +          void *, entries, u32, size)
> 
> See get_stack definition, this one needs to support flags as well. And
> we should probably support BPF_F_USER_BUILD_ID as well. And
> BPF_F_USER_STACK is also a good idea, I presume?

This will be a different direction that is similar to stackmap implementation.
Current version follows the implementation behind /proc/<pid>/stack . Let me 
check which of them is better. 

Thanks,
Song
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 19684813faaed..a30416b822fe3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3252,6 +3252,13 @@  union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
+ *	Description
+ *		Save a task stack trace into array *entries*. This is a wrapper
+ *		over stack_trace_save_tsk().
+ *	Return
+ *		Number of trace entries stored.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3396,8 @@  union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(get_task_stack_trace),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e729c9e587a07..2c13bcb5c2bce 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1488,6 +1488,23 @@  static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
 	.arg4_type	= ARG_ANYTHING,
 };

+BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
+	   void *, entries, u32, size)
+{
+	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
+}
+
+static int bpf_get_task_stack_trace_btf_ids[5];
+static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
+	.func		= bpf_get_task_stack_trace,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.btf_id		= bpf_get_task_stack_trace_btf_ids,
+};
+
 static const struct bpf_func_proto *
 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1521,6 +1538,10 @@  tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
 		       &bpf_seq_write_proto :
 		       NULL;
+	case BPF_FUNC_get_task_stack_trace:
+		return prog->expected_attach_type == BPF_TRACE_ITER ?
+			&bpf_get_task_stack_trace_proto :
+			NULL;
 	default:
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 91fa668fa8602..a8783d668c5b7 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -421,6 +421,7 @@  class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct task_struct',

             'struct __sk_buff',
             'struct sk_msg_md',
@@ -458,6 +459,7 @@  class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct task_struct',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 19684813faaed..a30416b822fe3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3252,6 +3252,13 @@  union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
+ *	Description
+ *		Save a task stack trace into array *entries*. This is a wrapper
+ *		over stack_trace_save_tsk().
+ *	Return
+ *		Number of trace entries stored.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3396,8 @@  union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(get_task_stack_trace),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call