Message ID | 20170803052828.2303723-2-yhs@fb.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Wed, Aug 02, 2017 at 10:28:27PM -0700, Yonghong Song wrote: > Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* > style tracepoints. The iovisor/bcc issue #748 > (https://github.com/iovisor/bcc/issues/748) documents this issue. > For example, if you try to attach a bpf program to tracepoints > syscalls/sys_enter_newfstat, you will get the following error: > # ./tools/trace.py t:syscalls:sys_enter_newfstat > Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument > Failed to attach BPF to tracepoint > > The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* > tracepoints are treated differently from other tracepoints and there > is no bpf hook to it. > > This patch adds bpf support for these syscalls tracepoints by > . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF > . calling bpf programs in perf_syscall_enter and perf_syscall_exit > > Signed-off-by: Yonghong Song <yhs@fb.com> Ack for the perf bits, but you should've Cc'ed steve too I suppose. > --- > include/linux/syscalls.h | 6 +++++ > kernel/events/core.c | 8 ++++--- > kernel/trace/trace_syscalls.c | 53 +++++++++++++++++++++++++++++++++++++++++-- > 3 files changed, 62 insertions(+), 5 deletions(-) > > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index 3cb15ea..00fa3eb 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -117,6 +117,12 @@ extern struct trace_event_class event_class_syscall_exit; > extern struct trace_event_functions enter_syscall_print_funcs; > extern struct trace_event_functions exit_syscall_print_funcs; > > +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) > +{ > + return tp_event->class == &event_class_syscall_enter || > + tp_event->class == &event_class_syscall_exit; > +} > + > #define SYSCALL_TRACE_ENTER_EVENT(sname) \ > static struct syscall_metadata __syscall_meta_##sname; \ > static struct trace_event_call __used \ > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 426c2ff..750b8d3 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -8050,7 +8050,7 @@ static void perf_event_free_bpf_handler(struct perf_event *event) > > static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) > { > - bool is_kprobe, is_tracepoint; > + bool is_kprobe, is_tracepoint, is_syscall_tp; > struct bpf_prog *prog; > > if (event->attr.type != PERF_TYPE_TRACEPOINT) > @@ -8061,7 +8061,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) > > is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; > is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; > - if (!is_kprobe && !is_tracepoint) > + is_syscall_tp = is_syscall_trace_event(event->tp_event); > + if (!is_kprobe && !is_tracepoint && !is_syscall_tp) > /* bpf programs can only be attached to u/kprobe or tracepoint */ > return -EINVAL; > > @@ -8070,7 +8071,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) > return PTR_ERR(prog); > > if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) || > - (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { > + (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) || > + (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { > /* valid fd, but invalid bpf program type */ > bpf_prog_put(prog); > return -EINVAL; > diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c > index 5e10395..3bd9e1c 100644 > --- a/kernel/trace/trace_syscalls.c > +++ b/kernel/trace/trace_syscalls.c > @@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); > static int sys_perf_refcount_enter; > static int sys_perf_refcount_exit; > > +static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, > + struct syscall_metadata *sys_data, > + struct syscall_trace_enter *rec) { > + struct syscall_tp_t { > + unsigned long long regs; > + unsigned long syscall_nr; > + unsigned long args[6]; /* maximum 6 arguments */ > + } param; > + int i; > + > + *(struct pt_regs **)¶m = regs; > + param.syscall_nr = rec->nr; > + for (i = 0; i < sys_data->nb_args && i < 6; i++) > + param.args[i] = rec->args[i]; > + return trace_call_bpf(prog, ¶m); > +} > + > static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) > { > struct syscall_metadata *sys_data; > struct syscall_trace_enter *rec; > struct hlist_head *head; > + struct bpf_prog *prog; > int syscall_nr; > int rctx; > int size; > @@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) > if (!sys_data) > return; > > + prog = READ_ONCE(sys_data->enter_event->prog); > head = this_cpu_ptr(sys_data->enter_event->perf_events); > - if (hlist_empty(head)) > + if (!prog && hlist_empty(head)) > return; > > /* get the size after alignment with the u32 buffer size field */ > @@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) > rec->nr = syscall_nr; > syscall_get_arguments(current, regs, 0, sys_data->nb_args, > (unsigned long *)&rec->args); > + > + if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || > + hlist_empty(head)) { > + perf_swevent_put_recursion_context(rctx); > + return; > + } > + > perf_trace_buf_submit(rec, size, rctx, > sys_data->enter_event->event.type, 1, regs, > head, NULL); > @@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call) > mutex_unlock(&syscall_trace_lock); > } > > +static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, > + struct syscall_trace_exit *rec) { > + struct syscall_tp_t { > + unsigned long long regs; > + unsigned long syscall_nr; > + unsigned long ret; > + } param; > + > + *(struct pt_regs **)¶m = regs; > + param.syscall_nr = rec->nr; > + param.ret = rec->ret; > + return trace_call_bpf(prog, ¶m); > +} > + > static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) > { > struct syscall_metadata *sys_data; > struct syscall_trace_exit *rec; > struct hlist_head *head; > + struct bpf_prog *prog; > int syscall_nr; > int rctx; > int size; > @@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) > if (!sys_data) > return; > > + prog = READ_ONCE(sys_data->exit_event->prog); > head = this_cpu_ptr(sys_data->exit_event->perf_events); > - if (hlist_empty(head)) > + if (!prog && hlist_empty(head)) > return; > > /* We can probably do that at build time */ > @@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) > > rec->nr = syscall_nr; > rec->ret = syscall_get_return_value(current, regs); > + > + if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || > + hlist_empty(head)) { > + perf_swevent_put_recursion_context(rctx); > + return; > + } > + > perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, > 1, regs, head, NULL); > } > -- > 2.9.4 >
Hi Yonghong, [auto build test ERROR on net-next/master] url: https://github.com/0day-ci/linux/commits/Yonghong-Song/bpf-add-support-for-sys_-enter-exit-_-tracepoints/20170803-213504 config: i386-randconfig-x019-201731 (attached as .config) compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): kernel/events/core.c: In function 'perf_event_set_bpf_prog': >> kernel/events/core.c:8073:18: error: implicit declaration of function 'is_syscall_trace_event' [-Werror=implicit-function-declaration] is_syscall_tp = is_syscall_trace_event(event->tp_event); ^~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +/is_syscall_trace_event +8073 kernel/events/core.c 8059 8060 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) 8061 { 8062 bool is_kprobe, is_tracepoint, is_syscall_tp; 8063 struct bpf_prog *prog; 8064 8065 if (event->attr.type != PERF_TYPE_TRACEPOINT) 8066 return perf_event_set_bpf_handler(event, prog_fd); 8067 8068 if (event->tp_event->prog) 8069 return -EEXIST; 8070 8071 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; 8072 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; > 8073 is_syscall_tp = is_syscall_trace_event(event->tp_event); 8074 if (!is_kprobe && !is_tracepoint && !is_syscall_tp) 8075 /* bpf programs can only be attached to u/kprobe or tracepoint */ 8076 return -EINVAL; 8077 8078 prog = bpf_prog_get(prog_fd); 8079 if (IS_ERR(prog)) 8080 return PTR_ERR(prog); 8081 8082 if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) || 8083 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) || 8084 (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { 8085 /* valid fd, but invalid bpf program type */ 8086 bpf_prog_put(prog); 8087 return -EINVAL; 8088 } 8089 8090 if (is_tracepoint) { 8091 int off = trace_event_get_offsets(event->tp_event); 8092 8093 if (prog->aux->max_ctx_offset > off) { 8094 bpf_prog_put(prog); 8095 return -EACCES; 8096 } 8097 } 8098 event->tp_event->prog = prog; 8099 8100 return 0; 8101 } 8102 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
On 8/3/17 1:08 AM, Peter Zijlstra wrote: > On Wed, Aug 02, 2017 at 10:28:27PM -0700, Yonghong Song wrote: >> Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* >> style tracepoints. The iovisor/bcc issue #748 >> (https://github.com/iovisor/bcc/issues/748) documents this issue. >> For example, if you try to attach a bpf program to tracepoints >> syscalls/sys_enter_newfstat, you will get the following error: >> # ./tools/trace.py t:syscalls:sys_enter_newfstat >> Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument >> Failed to attach BPF to tracepoint >> >> The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* >> tracepoints are treated differently from other tracepoints and there >> is no bpf hook to it. >> >> This patch adds bpf support for these syscalls tracepoints by >> . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF >> . calling bpf programs in perf_syscall_enter and perf_syscall_exit >> >> Signed-off-by: Yonghong Song <yhs@fb.com> > > Ack for the perf bits, but you should've Cc'ed steve too I suppose. Thanks, Peter. This is first time I posted for tracing related changes. Will for sure remember this next time. There is a build error: ====== kernel/events/core.c: In function 'perf_event_set_bpf_prog': >> kernel/events/core.c:8073:18: error: implicit declaration of function 'is_syscall_trace_event' [-Werror=implicit-function-declaration] is_syscall_tp = is_syscall_trace_event(event->tp_event); ====== Will address this and send another patch soon.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3cb15ea..00fa3eb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -117,6 +117,12 @@ extern struct trace_event_class event_class_syscall_exit; extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) +{ + return tp_event->class == &event_class_syscall_enter || + tp_event->class == &event_class_syscall_exit; +} + #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ static struct trace_event_call __used \ diff --git a/kernel/events/core.c b/kernel/events/core.c index 426c2ff..750b8d3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8050,7 +8050,7 @@ static void perf_event_free_bpf_handler(struct perf_event *event) static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { - bool is_kprobe, is_tracepoint; + bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; if (event->attr.type != PERF_TYPE_TRACEPOINT) @@ -8061,7 +8061,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; - if (!is_kprobe && !is_tracepoint) + is_syscall_tp = is_syscall_trace_event(event->tp_event); + if (!is_kprobe && !is_tracepoint && !is_syscall_tp) /* bpf programs can only be attached to u/kprobe or tracepoint */ return -EINVAL; @@ -8070,7 +8071,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return PTR_ERR(prog); if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) || - (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { + (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) || + (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { /* valid fd, but invalid bpf program type */ bpf_prog_put(prog); return -EINVAL; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e10395..3bd9e1c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; +static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) { + struct syscall_tp_t { + unsigned long long regs; + unsigned long syscall_nr; + unsigned long args[6]; /* maximum 6 arguments */ + } param; + int i; + + *(struct pt_regs **)¶m = regs; + param.syscall_nr = rec->nr; + for (i = 0; i < sys_data->nb_args && i < 6; i++) + param.args[i] = rec->args[i]; + return trace_call_bpf(prog, ¶m); +} + static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; + struct bpf_prog *prog; int syscall_nr; int rctx; int size; @@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; + prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (hlist_empty(head)) + if (!prog && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); + + if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, head, NULL); @@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } +static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, + struct syscall_trace_exit *rec) { + struct syscall_tp_t { + unsigned long long regs; + unsigned long syscall_nr; + unsigned long ret; + } param; + + *(struct pt_regs **)¶m = regs; + param.syscall_nr = rec->nr; + param.ret = rec->ret; + return trace_call_bpf(prog, ¶m); +} + static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; + struct bpf_prog *prog; int syscall_nr; int rctx; int size; @@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; + prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (hlist_empty(head)) + if (!prog && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); + + if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + hlist_empty(head)) { + perf_swevent_put_recursion_context(rctx); + return; + } + perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, 1, regs, head, NULL); }
Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The iovisor/bcc issue #748 (https://github.com/iovisor/bcc/issues/748) documents this issue. For example, if you try to attach a bpf program to tracepoints syscalls/sys_enter_newfstat, you will get the following error: # ./tools/trace.py t:syscalls:sys_enter_newfstat Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument Failed to attach BPF to tracepoint The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch adds bpf support for these syscalls tracepoints by . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF . calling bpf programs in perf_syscall_enter and perf_syscall_exit Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/syscalls.h | 6 +++++ kernel/events/core.c | 8 ++++--- kernel/trace/trace_syscalls.c | 53 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 62 insertions(+), 5 deletions(-)