Message ID | 20200623070802.2310018-4-songliubraving@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: introduce bpf_get_task_stack_trace() | expand |
On 6/23/20 12:08 AM, Song Liu wrote: > The new test is similar to other bpf_iter tests. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ > .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ > 2 files changed, 67 insertions(+) > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > index 87c29dde1cf96..baa83328f810d 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > @@ -5,6 +5,7 @@ > #include "bpf_iter_netlink.skel.h" > #include "bpf_iter_bpf_map.skel.h" > #include "bpf_iter_task.skel.h" > +#include "bpf_iter_task_stack.skel.h" > #include "bpf_iter_task_file.skel.h" > #include "bpf_iter_test_kern1.skel.h" > #include "bpf_iter_test_kern2.skel.h" > @@ -106,6 +107,20 @@ static void test_task(void) > bpf_iter_task__destroy(skel); > } > > +static void test_task_stack(void) > +{ > + struct bpf_iter_task_stack *skel; > + > + skel = bpf_iter_task_stack__open_and_load(); > + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", > + "skeleton open_and_load failed\n")) > + return; > + > + do_dummy_read(skel->progs.dump_task_stack); > + > + bpf_iter_task_stack__destroy(skel); > +} > + > static void test_task_file(void) > { > struct bpf_iter_task_file *skel; > @@ -392,6 +407,8 @@ void test_bpf_iter(void) > test_bpf_map(); > if (test__start_subtest("task")) > test_task(); > + if (test__start_subtest("task_stack")) > + test_task_stack(); > if (test__start_subtest("task_file")) > test_task_file(); > if (test__start_subtest("anon")) > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > new file mode 100644 > index 0000000000000..4fc939e0fca77 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > @@ -0,0 +1,50 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > +/* "undefine" structs in vmlinux.h, because we "override" them below */ > +#define bpf_iter_meta bpf_iter_meta___not_used > +#define bpf_iter__task bpf_iter__task___not_used > +#include "vmlinux.h" > +#undef bpf_iter_meta > +#undef bpf_iter__task > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct bpf_iter_meta { > + struct seq_file *seq; > + __u64 session_id; > + __u64 seq_num; > +} __attribute__((preserve_access_index)); > + > +struct bpf_iter__task { > + struct bpf_iter_meta *meta; > + struct task_struct *task; > +} __attribute__((preserve_access_index)); > + > +#define MAX_STACK_TRACE_DEPTH 64 > +unsigned long entries[MAX_STACK_TRACE_DEPTH]; > + > +SEC("iter/task") > +int dump_task_stack(struct bpf_iter__task *ctx) > +{ > + struct seq_file *seq = ctx->meta->seq; > + struct task_struct *task = ctx->task; > + unsigned int i, num_entries; > + > + if (task == (void *)0) > + return 0; > + > + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); > + > + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); > + > + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { > + if (num_entries > i) > + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); We may have an issue on 32bit issue. On 32bit system, the following is called in the kernel + return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0); it will pack addresses at 4 byte increment. But in BPF program, the reading is in 8 byte increment. If "entries" are handled in user space, user space can just using "long *" pointer and will be able to correctly retrieve the stack addresses. Maybe add some comments to clarify this prog only works for 64bit system. > + } > + > + BPF_SEQ_PRINTF(seq, "\n"); > + > + return 0; > +} >
> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote: > > > > On 6/23/20 12:08 AM, Song Liu wrote: >> The new test is similar to other bpf_iter tests. >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ >> .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ >> 2 files changed, 67 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >> index 87c29dde1cf96..baa83328f810d 100644 >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >> @@ -5,6 +5,7 @@ >> #include "bpf_iter_netlink.skel.h" >> #include "bpf_iter_bpf_map.skel.h" >> #include "bpf_iter_task.skel.h" >> +#include "bpf_iter_task_stack.skel.h" >> #include "bpf_iter_task_file.skel.h" >> #include "bpf_iter_test_kern1.skel.h" >> #include "bpf_iter_test_kern2.skel.h" >> @@ -106,6 +107,20 @@ static void test_task(void) >> bpf_iter_task__destroy(skel); >> } >> +static void test_task_stack(void) >> +{ >> + struct bpf_iter_task_stack *skel; >> + >> + skel = bpf_iter_task_stack__open_and_load(); >> + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", >> + "skeleton open_and_load failed\n")) >> + return; >> + >> + do_dummy_read(skel->progs.dump_task_stack); >> + >> + bpf_iter_task_stack__destroy(skel); >> +} >> + >> static void test_task_file(void) >> { >> struct bpf_iter_task_file *skel; >> @@ -392,6 +407,8 @@ void test_bpf_iter(void) >> test_bpf_map(); >> if (test__start_subtest("task")) >> test_task(); >> + if (test__start_subtest("task_stack")) >> + test_task_stack(); >> if (test__start_subtest("task_file")) >> test_task_file(); >> if (test__start_subtest("anon")) >> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >> new file mode 100644 >> index 0000000000000..4fc939e0fca77 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >> @@ -0,0 +1,50 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2020 Facebook */ >> +/* "undefine" structs in vmlinux.h, because we "override" them below */ >> +#define bpf_iter_meta bpf_iter_meta___not_used >> +#define bpf_iter__task bpf_iter__task___not_used >> +#include "vmlinux.h" >> +#undef bpf_iter_meta >> +#undef bpf_iter__task >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +struct bpf_iter_meta { >> + struct seq_file *seq; >> + __u64 session_id; >> + __u64 seq_num; >> +} __attribute__((preserve_access_index)); >> + >> +struct bpf_iter__task { >> + struct bpf_iter_meta *meta; >> + struct task_struct *task; >> +} __attribute__((preserve_access_index)); >> + >> +#define MAX_STACK_TRACE_DEPTH 64 >> +unsigned long entries[MAX_STACK_TRACE_DEPTH]; >> + >> +SEC("iter/task") >> +int dump_task_stack(struct bpf_iter__task *ctx) >> +{ >> + struct seq_file *seq = ctx->meta->seq; >> + struct task_struct *task = ctx->task; >> + unsigned int i, num_entries; >> + >> + if (task == (void *)0) >> + return 0; >> + >> + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); >> + >> + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); >> + >> + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { >> + if (num_entries > i) >> + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); > > We may have an issue on 32bit issue. > On 32bit system, the following is called in the kernel > + return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0); > it will pack addresses at 4 byte increment. > But in BPF program, the reading is in 8 byte increment. Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down size to closest multiple of 8? Thanks, Song
On 6/23/20 3:07 PM, Song Liu wrote: > > >> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 6/23/20 12:08 AM, Song Liu wrote: >>> The new test is similar to other bpf_iter tests. >>> Signed-off-by: Song Liu <songliubraving@fb.com> >>> --- >>> .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ >>> .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ >>> 2 files changed, 67 insertions(+) >>> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>> index 87c29dde1cf96..baa83328f810d 100644 >>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>> @@ -5,6 +5,7 @@ >>> #include "bpf_iter_netlink.skel.h" >>> #include "bpf_iter_bpf_map.skel.h" >>> #include "bpf_iter_task.skel.h" >>> +#include "bpf_iter_task_stack.skel.h" >>> #include "bpf_iter_task_file.skel.h" >>> #include "bpf_iter_test_kern1.skel.h" >>> #include "bpf_iter_test_kern2.skel.h" >>> @@ -106,6 +107,20 @@ static void test_task(void) >>> bpf_iter_task__destroy(skel); >>> } >>> +static void test_task_stack(void) >>> +{ >>> + struct bpf_iter_task_stack *skel; >>> + >>> + skel = bpf_iter_task_stack__open_and_load(); >>> + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", >>> + "skeleton open_and_load failed\n")) >>> + return; >>> + >>> + do_dummy_read(skel->progs.dump_task_stack); >>> + >>> + bpf_iter_task_stack__destroy(skel); >>> +} >>> + >>> static void test_task_file(void) >>> { >>> struct bpf_iter_task_file *skel; >>> @@ -392,6 +407,8 @@ void test_bpf_iter(void) >>> test_bpf_map(); >>> if (test__start_subtest("task")) >>> test_task(); >>> + if (test__start_subtest("task_stack")) >>> + test_task_stack(); >>> if (test__start_subtest("task_file")) >>> test_task_file(); >>> if (test__start_subtest("anon")) >>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>> new file mode 100644 >>> index 0000000000000..4fc939e0fca77 >>> --- /dev/null >>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>> @@ -0,0 +1,50 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* Copyright (c) 2020 Facebook */ >>> +/* "undefine" structs in vmlinux.h, because we "override" them below */ >>> +#define bpf_iter_meta bpf_iter_meta___not_used >>> +#define bpf_iter__task bpf_iter__task___not_used >>> +#include "vmlinux.h" >>> +#undef bpf_iter_meta >>> +#undef bpf_iter__task >>> +#include <bpf/bpf_helpers.h> >>> +#include <bpf/bpf_tracing.h> >>> + >>> +char _license[] SEC("license") = "GPL"; >>> + >>> +struct bpf_iter_meta { >>> + struct seq_file *seq; >>> + __u64 session_id; >>> + __u64 seq_num; >>> +} __attribute__((preserve_access_index)); >>> + >>> +struct bpf_iter__task { >>> + struct bpf_iter_meta *meta; >>> + struct task_struct *task; >>> +} __attribute__((preserve_access_index)); >>> + >>> +#define MAX_STACK_TRACE_DEPTH 64 >>> +unsigned long entries[MAX_STACK_TRACE_DEPTH]; >>> + >>> +SEC("iter/task") >>> +int dump_task_stack(struct bpf_iter__task *ctx) >>> +{ >>> + struct seq_file *seq = ctx->meta->seq; >>> + struct task_struct *task = ctx->task; >>> + unsigned int i, num_entries; >>> + >>> + if (task == (void *)0) >>> + return 0; >>> + >>> + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); >>> + >>> + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); >>> + >>> + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { >>> + if (num_entries > i) >>> + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); >> >> We may have an issue on 32bit issue. >> On 32bit system, the following is called in the kernel >> + return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0); >> it will pack addresses at 4 byte increment. >> But in BPF program, the reading is in 8 byte increment. > > Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down > size to closest multiple of 8? This is what I mean: for bpf program: "long" means u64, so we allocate 64 * 8 buffer size and pass it to the helper in the helper, the address will be increased along sizeof(long), which is 4 for 32bit system. So address is recorded at buf, buf + 4, buf + 8, buf + 12, ... After the helper returns, the bpf program tries to retrieve the address at buf, buf + 8, buf + 16. The helper itself is okay. But BPF_SEQ_PRINTF above is wrong. Is this interpretation correct? > > Thanks, > Song >
> On Jun 23, 2020, at 3:27 PM, Yonghong Song <yhs@fb.com> wrote: > > > > On 6/23/20 3:07 PM, Song Liu wrote: >>> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote: >>> >>> >>> >>> On 6/23/20 12:08 AM, Song Liu wrote: >>>> The new test is similar to other bpf_iter tests. >>>> Signed-off-by: Song Liu <songliubraving@fb.com> >>>> --- >>>> .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ >>>> .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ >>>> 2 files changed, 67 insertions(+) >>>> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>> index 87c29dde1cf96..baa83328f810d 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>> @@ -5,6 +5,7 @@ >>>> #include "bpf_iter_netlink.skel.h" >>>> #include "bpf_iter_bpf_map.skel.h" >>>> #include "bpf_iter_task.skel.h" >>>> +#include "bpf_iter_task_stack.skel.h" >>>> #include "bpf_iter_task_file.skel.h" >>>> #include "bpf_iter_test_kern1.skel.h" >>>> #include "bpf_iter_test_kern2.skel.h" >>>> @@ -106,6 +107,20 @@ static void test_task(void) >>>> bpf_iter_task__destroy(skel); >>>> } >>>> +static void test_task_stack(void) >>>> +{ >>>> + struct bpf_iter_task_stack *skel; >>>> + >>>> + skel = bpf_iter_task_stack__open_and_load(); >>>> + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", >>>> + "skeleton open_and_load failed\n")) >>>> + return; >>>> + >>>> + do_dummy_read(skel->progs.dump_task_stack); >>>> + >>>> + bpf_iter_task_stack__destroy(skel); >>>> +} >>>> + >>>> static void test_task_file(void) >>>> { >>>> struct bpf_iter_task_file *skel; >>>> @@ -392,6 +407,8 @@ void test_bpf_iter(void) >>>> test_bpf_map(); >>>> if (test__start_subtest("task")) >>>> test_task(); >>>> + if (test__start_subtest("task_stack")) >>>> + test_task_stack(); >>>> if (test__start_subtest("task_file")) >>>> test_task_file(); >>>> if (test__start_subtest("anon")) >>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>> new file mode 100644 >>>> index 0000000000000..4fc939e0fca77 >>>> --- /dev/null >>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>> @@ -0,0 +1,50 @@ >>>> +// SPDX-License-Identifier: GPL-2.0 >>>> +/* Copyright (c) 2020 Facebook */ >>>> +/* "undefine" structs in vmlinux.h, because we "override" them below */ >>>> +#define bpf_iter_meta bpf_iter_meta___not_used >>>> +#define bpf_iter__task bpf_iter__task___not_used >>>> +#include "vmlinux.h" >>>> +#undef bpf_iter_meta >>>> +#undef bpf_iter__task >>>> +#include <bpf/bpf_helpers.h> >>>> +#include <bpf/bpf_tracing.h> >>>> + >>>> +char _license[] SEC("license") = "GPL"; >>>> + >>>> +struct bpf_iter_meta { >>>> + struct seq_file *seq; >>>> + __u64 session_id; >>>> + __u64 seq_num; >>>> +} __attribute__((preserve_access_index)); >>>> + >>>> +struct bpf_iter__task { >>>> + struct bpf_iter_meta *meta; >>>> + struct task_struct *task; >>>> +} __attribute__((preserve_access_index)); >>>> + >>>> +#define MAX_STACK_TRACE_DEPTH 64 >>>> +unsigned long entries[MAX_STACK_TRACE_DEPTH]; >>>> + >>>> +SEC("iter/task") >>>> +int dump_task_stack(struct bpf_iter__task *ctx) >>>> +{ >>>> + struct seq_file *seq = ctx->meta->seq; >>>> + struct task_struct *task = ctx->task; >>>> + unsigned int i, num_entries; >>>> + >>>> + if (task == (void *)0) >>>> + return 0; >>>> + >>>> + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); >>>> + >>>> + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); >>>> + >>>> + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { >>>> + if (num_entries > i) >>>> + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); >>> >>> We may have an issue on 32bit issue. >>> On 32bit system, the following is called in the kernel >>> + return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0); >>> it will pack addresses at 4 byte increment. >>> But in BPF program, the reading is in 8 byte increment. >> Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down >> size to closest multiple of 8? > > This is what I mean: > for bpf program: "long" means u64, so we allocate 64 * 8 buffer size > and pass it to the helper > in the helper, the address will be increased along sizeof(long), which > is 4 for 32bit system. > So address is recorded at buf, buf + 4, buf + 8, buf + 12, ... > After the helper returns, the bpf program tries to retrieve > the address at buf, buf + 8, buf + 16. > > The helper itself is okay. But BPF_SEQ_PRINTF above is wrong. > Is this interpretation correct? Thanks for the clarification. I guess the best solution is to fix this once in the kernel, so BPF programs don't have to worry about it. Song
On 6/24/20 1:37 PM, Song Liu wrote: > > >> On Jun 23, 2020, at 3:27 PM, Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 6/23/20 3:07 PM, Song Liu wrote: >>>> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote: >>>> >>>> >>>> >>>> On 6/23/20 12:08 AM, Song Liu wrote: >>>>> The new test is similar to other bpf_iter tests. >>>>> Signed-off-by: Song Liu <songliubraving@fb.com> >>>>> --- >>>>> .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ >>>>> .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ >>>>> 2 files changed, 67 insertions(+) >>>>> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>>> index 87c29dde1cf96..baa83328f810d 100644 >>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c >>>>> @@ -5,6 +5,7 @@ >>>>> #include "bpf_iter_netlink.skel.h" >>>>> #include "bpf_iter_bpf_map.skel.h" >>>>> #include "bpf_iter_task.skel.h" >>>>> +#include "bpf_iter_task_stack.skel.h" >>>>> #include "bpf_iter_task_file.skel.h" >>>>> #include "bpf_iter_test_kern1.skel.h" >>>>> #include "bpf_iter_test_kern2.skel.h" >>>>> @@ -106,6 +107,20 @@ static void test_task(void) >>>>> bpf_iter_task__destroy(skel); >>>>> } >>>>> +static void test_task_stack(void) >>>>> +{ >>>>> + struct bpf_iter_task_stack *skel; >>>>> + >>>>> + skel = bpf_iter_task_stack__open_and_load(); >>>>> + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", >>>>> + "skeleton open_and_load failed\n")) >>>>> + return; >>>>> + >>>>> + do_dummy_read(skel->progs.dump_task_stack); >>>>> + >>>>> + bpf_iter_task_stack__destroy(skel); >>>>> +} >>>>> + >>>>> static void test_task_file(void) >>>>> { >>>>> struct bpf_iter_task_file *skel; >>>>> @@ -392,6 +407,8 @@ void test_bpf_iter(void) >>>>> test_bpf_map(); >>>>> if (test__start_subtest("task")) >>>>> test_task(); >>>>> + if (test__start_subtest("task_stack")) >>>>> + test_task_stack(); >>>>> if (test__start_subtest("task_file")) >>>>> test_task_file(); >>>>> if (test__start_subtest("anon")) >>>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>>> new file mode 100644 >>>>> index 0000000000000..4fc939e0fca77 >>>>> --- /dev/null >>>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c >>>>> @@ -0,0 +1,50 @@ >>>>> +// SPDX-License-Identifier: GPL-2.0 >>>>> +/* Copyright (c) 2020 Facebook */ >>>>> +/* "undefine" structs in vmlinux.h, because we "override" them below */ >>>>> +#define bpf_iter_meta bpf_iter_meta___not_used >>>>> +#define bpf_iter__task bpf_iter__task___not_used >>>>> +#include "vmlinux.h" >>>>> +#undef bpf_iter_meta >>>>> +#undef bpf_iter__task >>>>> +#include <bpf/bpf_helpers.h> >>>>> +#include <bpf/bpf_tracing.h> >>>>> + >>>>> +char _license[] SEC("license") = "GPL"; >>>>> + >>>>> +struct bpf_iter_meta { >>>>> + struct seq_file *seq; >>>>> + __u64 session_id; >>>>> + __u64 seq_num; >>>>> +} __attribute__((preserve_access_index)); >>>>> + >>>>> +struct bpf_iter__task { >>>>> + struct bpf_iter_meta *meta; >>>>> + struct task_struct *task; >>>>> +} __attribute__((preserve_access_index)); >>>>> + >>>>> +#define MAX_STACK_TRACE_DEPTH 64 >>>>> +unsigned long entries[MAX_STACK_TRACE_DEPTH]; >>>>> + >>>>> +SEC("iter/task") >>>>> +int dump_task_stack(struct bpf_iter__task *ctx) >>>>> +{ >>>>> + struct seq_file *seq = ctx->meta->seq; >>>>> + struct task_struct *task = ctx->task; >>>>> + unsigned int i, num_entries; >>>>> + >>>>> + if (task == (void *)0) >>>>> + return 0; >>>>> + >>>>> + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); >>>>> + >>>>> + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); >>>>> + >>>>> + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { >>>>> + if (num_entries > i) >>>>> + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); >>>> >>>> We may have an issue on 32bit issue. >>>> On 32bit system, the following is called in the kernel >>>> + return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0); >>>> it will pack addresses at 4 byte increment. >>>> But in BPF program, the reading is in 8 byte increment. >>> Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down >>> size to closest multiple of 8? >> >> This is what I mean: >> for bpf program: "long" means u64, so we allocate 64 * 8 buffer size >> and pass it to the helper >> in the helper, the address will be increased along sizeof(long), which >> is 4 for 32bit system. >> So address is recorded at buf, buf + 4, buf + 8, buf + 12, ... >> After the helper returns, the bpf program tries to retrieve >> the address at buf, buf + 8, buf + 16. >> >> The helper itself is okay. But BPF_SEQ_PRINTF above is wrong. >> Is this interpretation correct? > > Thanks for the clarification. I guess the best solution is to fix this > once in the kernel, so BPF programs don't have to worry about it. The kernel could make each entry 8 bytes. This will cause less potential entries for 32bit, probably fine. Another option is BPF program declares an extern variable CONFIG_64BIT and it is 'y', that means 64 bit. Otherwise it is 32bit. libbpf should set CONFIG_64BIT correctly. I guess storing each address as 64bit probably a better and less confusion choice. > > Song >
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 87c29dde1cf96..baa83328f810d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -5,6 +5,7 @@ #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_task.skel.h" +#include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" @@ -106,6 +107,20 @@ static void test_task(void) bpf_iter_task__destroy(skel); } +static void test_task_stack(void) +{ + struct bpf_iter_task_stack *skel; + + skel = bpf_iter_task_stack__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task_stack); + + bpf_iter_task_stack__destroy(skel); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; @@ -392,6 +407,8 @@ void test_bpf_iter(void) test_bpf_map(); if (test__start_subtest("task")) test_task(); + if (test__start_subtest("task_stack")) + test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); if (test__start_subtest("anon")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c new file mode 100644 index 0000000000000..4fc939e0fca77 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__task bpf_iter__task___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__task +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +#define MAX_STACK_TRACE_DEPTH 64 +unsigned long entries[MAX_STACK_TRACE_DEPTH]; + +SEC("iter/task") +int dump_task_stack(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + unsigned int i, num_entries; + + if (task == (void *)0) + return 0; + + num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH); + + BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid); + + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { + if (num_entries > i) + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); + } + + BPF_SEQ_PRINTF(seq, "\n"); + + return 0; +}
The new test is similar to other bpf_iter tests. Signed-off-by: Song Liu <songliubraving@fb.com> --- .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++++ .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c