Message ID | 20190109192111.130995-2-songliubraving@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | reveal invisible bpf programs | expand |
Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: > For better performance analysis of dynamically JITed and loaded kernel > functions, such as BPF programs, this patch introduces > PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol > register/unregister information to user space. > > The following data structure is used for PERF_RECORD_KSYMBOL. > > /* > * struct { > * struct perf_event_header header; > * u64 addr; > * u32 len; > * u16 ksym_type; > * u16 flags; > * char name[]; > * struct sample_id sample_id; > * }; > */ So, I couldn't find where this gets used, the intention here is just to add the interfaces and afterwards is that you will wire this up? I would like to test the whole shebang to see it working. - Arnaldo > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/linux/perf_event.h | 13 +++++ > include/uapi/linux/perf_event.h | 26 ++++++++- > kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++- > 3 files changed, 135 insertions(+), 2 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 1d5c551a5add..6b5f08db5ef3 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, > } > > extern void perf_event_mmap(struct vm_area_struct *vma); > + > +/* callback function to generate ksymbol name */ > +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); > +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, > + bool unregister, > + perf_ksymbol_get_name_f get_name, void *data); > + > extern struct perf_guest_info_callbacks *perf_guest_cbs; > extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); > extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); > @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks > (struct perf_guest_info_callbacks *callbacks) { return 0; } > > static inline void perf_event_mmap(struct vm_area_struct *vma) { } > + > +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); > +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, > + bool unregister, > + perf_ksymbol_get_name_f get_name, > + void *data) { } > static inline void perf_event_exec(void) { } > static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } > static inline void perf_event_namespaces(struct task_struct *tsk) { } > diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h > index 9de8780ac8d9..68c4da0227c5 100644 > --- a/include/uapi/linux/perf_event.h > +++ b/include/uapi/linux/perf_event.h > @@ -372,7 +372,8 @@ struct perf_event_attr { > context_switch : 1, /* context switch data */ > write_backward : 1, /* Write ring buffer from end to beginning */ > namespaces : 1, /* include namespaces data */ > - __reserved_1 : 35; > + ksymbol : 1, /* include ksymbol events */ > + __reserved_1 : 34; > > union { > __u32 wakeup_events; /* wakeup every n events */ > @@ -965,9 +966,32 @@ enum perf_event_type { > */ > PERF_RECORD_NAMESPACES = 16, > > + /* > + * Record ksymbol register/unregister events: > + * > + * struct { > + * struct perf_event_header header; > + * u64 addr; > + * u32 len; > + * u16 ksym_type; > + * u16 flags; > + * char name[]; > + * struct sample_id sample_id; > + * }; > + */ > + PERF_RECORD_KSYMBOL = 17, > + > PERF_RECORD_MAX, /* non-ABI */ > }; > > +enum perf_record_ksymbol_type { > + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, > + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, > + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ > +}; > + > +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) > + > #define PERF_MAX_STACK_DEPTH 127 > #define PERF_MAX_CONTEXTS_PER_STACK 8 > > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 3cd13a30f732..ef27f2776999 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; > static atomic_t nr_task_events __read_mostly; > static atomic_t nr_freq_events __read_mostly; > static atomic_t nr_switch_events __read_mostly; > +static atomic_t nr_ksymbol_events __read_mostly; > > static LIST_HEAD(pmus); > static DEFINE_MUTEX(pmus_lock); > @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) > > if (attr->mmap || attr->mmap_data || attr->mmap2 || > attr->comm || attr->comm_exec || > - attr->task || > + attr->task || attr->ksymbol || > attr->context_switch) > return true; > return false; > @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) > dec = true; > if (has_branch_stack(event)) > dec = true; > + if (event->attr.ksymbol) > + atomic_dec(&nr_ksymbol_events); > > if (dec) { > if (!atomic_add_unless(&perf_sched_count, -1, 1)) > @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) > perf_output_end(&handle); > } > > +/* > + * ksymbol register/unregister tracking > + */ > + > +struct perf_ksymbol_event { > + const char *name; > + int name_len; > + struct { > + struct perf_event_header header; > + u64 addr; > + u32 len; > + u16 ksym_type; > + u16 flags; > + } event_id; > +}; > + > +static int perf_event_ksymbol_match(struct perf_event *event) > +{ > + return event->attr.ksymbol; > +} > + > +static void perf_event_ksymbol_output(struct perf_event *event, void *data) > +{ > + struct perf_ksymbol_event *ksymbol_event = data; > + struct perf_output_handle handle; > + struct perf_sample_data sample; > + int ret; > + > + if (!perf_event_ksymbol_match(event)) > + return; > + > + perf_event_header__init_id(&ksymbol_event->event_id.header, > + &sample, event); > + ret = perf_output_begin(&handle, event, > + ksymbol_event->event_id.header.size); > + if (ret) > + return; > + > + perf_output_put(&handle, ksymbol_event->event_id); > + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); > + perf_event__output_id_sample(event, &handle, &sample); > + > + perf_output_end(&handle); > +} > + > +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, > + perf_ksymbol_get_name_f get_name, void *data) > +{ > + struct perf_ksymbol_event ksymbol_event; > + char name[KSYM_NAME_LEN]; > + u16 flags = 0; > + int name_len; > + > + if (!atomic_read(&nr_ksymbol_events)) > + return; > + > + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || > + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) > + goto err; > + > + get_name(name, KSYM_NAME_LEN, data); > + name_len = strlen(name) + 1; > + while (!IS_ALIGNED(name_len, sizeof(u64))) > + name[name_len++] = '\0'; > + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); > + > + if (unregister) > + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; > + > + ksymbol_event = (struct perf_ksymbol_event){ > + .name = name, > + .name_len = name_len, > + .event_id = { > + .header = { > + .type = PERF_RECORD_KSYMBOL, > + .size = sizeof(ksymbol_event.event_id) + > + name_len, > + }, > + .addr = addr, > + .len = len, > + .ksym_type = ksym_type, > + .flags = flags, > + }, > + }; > + > + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); > + return; > +err: > + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); > +} > + > void perf_event_itrace_started(struct perf_event *event) > { > event->attach_state |= PERF_ATTACH_ITRACE; > @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event) > inc = true; > if (is_cgroup_event(event)) > inc = true; > + if (event->attr.ksymbol) > + atomic_inc(&nr_ksymbol_events); > > if (inc) { > /* > -- > 2.17.1
> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: >> For better performance analysis of dynamically JITed and loaded kernel >> functions, such as BPF programs, this patch introduces >> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol >> register/unregister information to user space. >> >> The following data structure is used for PERF_RECORD_KSYMBOL. >> >> /* >> * struct { >> * struct perf_event_header header; >> * u64 addr; >> * u32 len; >> * u16 ksym_type; >> * u16 flags; >> * char name[]; >> * struct sample_id sample_id; >> * }; >> */ > > So, I couldn't find where this gets used, the intention here is just to > add the interfaces and afterwards is that you will wire this up? I would > like to test the whole shebang to see it working. > > - Arnaldo I guess you meant PERF_RECORD_BPF_EVENT not being used? PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support. Thanks, Song >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> include/linux/perf_event.h | 13 +++++ >> include/uapi/linux/perf_event.h | 26 ++++++++- >> kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++- >> 3 files changed, 135 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h >> index 1d5c551a5add..6b5f08db5ef3 100644 >> --- a/include/linux/perf_event.h >> +++ b/include/linux/perf_event.h >> @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, >> } >> >> extern void perf_event_mmap(struct vm_area_struct *vma); >> + >> +/* callback function to generate ksymbol name */ >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); >> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, >> + bool unregister, >> + perf_ksymbol_get_name_f get_name, void *data); >> + >> extern struct perf_guest_info_callbacks *perf_guest_cbs; >> extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); >> extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); >> @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks >> (struct perf_guest_info_callbacks *callbacks) { return 0; } >> >> static inline void perf_event_mmap(struct vm_area_struct *vma) { } >> + >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); >> +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, >> + bool unregister, >> + perf_ksymbol_get_name_f get_name, >> + void *data) { } >> static inline void perf_event_exec(void) { } >> static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } >> static inline void perf_event_namespaces(struct task_struct *tsk) { } >> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h >> index 9de8780ac8d9..68c4da0227c5 100644 >> --- a/include/uapi/linux/perf_event.h >> +++ b/include/uapi/linux/perf_event.h >> @@ -372,7 +372,8 @@ struct perf_event_attr { >> context_switch : 1, /* context switch data */ >> write_backward : 1, /* Write ring buffer from end to beginning */ >> namespaces : 1, /* include namespaces data */ >> - __reserved_1 : 35; >> + ksymbol : 1, /* include ksymbol events */ >> + __reserved_1 : 34; >> >> union { >> __u32 wakeup_events; /* wakeup every n events */ >> @@ -965,9 +966,32 @@ enum perf_event_type { >> */ >> PERF_RECORD_NAMESPACES = 16, >> >> + /* >> + * Record ksymbol register/unregister events: >> + * >> + * struct { >> + * struct perf_event_header header; >> + * u64 addr; >> + * u32 len; >> + * u16 ksym_type; >> + * u16 flags; >> + * char name[]; >> + * struct sample_id sample_id; >> + * }; >> + */ >> + PERF_RECORD_KSYMBOL = 17, >> + >> PERF_RECORD_MAX, /* non-ABI */ >> }; >> >> +enum perf_record_ksymbol_type { >> + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, >> + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, >> + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ >> +}; >> + >> +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) >> + >> #define PERF_MAX_STACK_DEPTH 127 >> #define PERF_MAX_CONTEXTS_PER_STACK 8 >> >> diff --git a/kernel/events/core.c b/kernel/events/core.c >> index 3cd13a30f732..ef27f2776999 100644 >> --- a/kernel/events/core.c >> +++ b/kernel/events/core.c >> @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; >> static atomic_t nr_task_events __read_mostly; >> static atomic_t nr_freq_events __read_mostly; >> static atomic_t nr_switch_events __read_mostly; >> +static atomic_t nr_ksymbol_events __read_mostly; >> >> static LIST_HEAD(pmus); >> static DEFINE_MUTEX(pmus_lock); >> @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) >> >> if (attr->mmap || attr->mmap_data || attr->mmap2 || >> attr->comm || attr->comm_exec || >> - attr->task || >> + attr->task || attr->ksymbol || >> attr->context_switch) >> return true; >> return false; >> @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) >> dec = true; >> if (has_branch_stack(event)) >> dec = true; >> + if (event->attr.ksymbol) >> + atomic_dec(&nr_ksymbol_events); >> >> if (dec) { >> if (!atomic_add_unless(&perf_sched_count, -1, 1)) >> @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) >> perf_output_end(&handle); >> } >> >> +/* >> + * ksymbol register/unregister tracking >> + */ >> + >> +struct perf_ksymbol_event { >> + const char *name; >> + int name_len; >> + struct { >> + struct perf_event_header header; >> + u64 addr; >> + u32 len; >> + u16 ksym_type; >> + u16 flags; >> + } event_id; >> +}; >> + >> +static int perf_event_ksymbol_match(struct perf_event *event) >> +{ >> + return event->attr.ksymbol; >> +} >> + >> +static void perf_event_ksymbol_output(struct perf_event *event, void *data) >> +{ >> + struct perf_ksymbol_event *ksymbol_event = data; >> + struct perf_output_handle handle; >> + struct perf_sample_data sample; >> + int ret; >> + >> + if (!perf_event_ksymbol_match(event)) >> + return; >> + >> + perf_event_header__init_id(&ksymbol_event->event_id.header, >> + &sample, event); >> + ret = perf_output_begin(&handle, event, >> + ksymbol_event->event_id.header.size); >> + if (ret) >> + return; >> + >> + perf_output_put(&handle, ksymbol_event->event_id); >> + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); >> + perf_event__output_id_sample(event, &handle, &sample); >> + >> + perf_output_end(&handle); >> +} >> + >> +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, >> + perf_ksymbol_get_name_f get_name, void *data) >> +{ >> + struct perf_ksymbol_event ksymbol_event; >> + char name[KSYM_NAME_LEN]; >> + u16 flags = 0; >> + int name_len; >> + >> + if (!atomic_read(&nr_ksymbol_events)) >> + return; >> + >> + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || >> + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) >> + goto err; >> + >> + get_name(name, KSYM_NAME_LEN, data); >> + name_len = strlen(name) + 1; >> + while (!IS_ALIGNED(name_len, sizeof(u64))) >> + name[name_len++] = '\0'; >> + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); >> + >> + if (unregister) >> + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; >> + >> + ksymbol_event = (struct perf_ksymbol_event){ >> + .name = name, >> + .name_len = name_len, >> + .event_id = { >> + .header = { >> + .type = PERF_RECORD_KSYMBOL, >> + .size = sizeof(ksymbol_event.event_id) + >> + name_len, >> + }, >> + .addr = addr, >> + .len = len, >> + .ksym_type = ksym_type, >> + .flags = flags, >> + }, >> + }; >> + >> + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); >> + return; >> +err: >> + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); >> +} >> + >> void perf_event_itrace_started(struct perf_event *event) >> { >> event->attach_state |= PERF_ATTACH_ITRACE; >> @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event) >> inc = true; >> if (is_cgroup_event(event)) >> inc = true; >> + if (event->attr.ksymbol) >> + atomic_inc(&nr_ksymbol_events); >> >> if (inc) { >> /* >> -- >> 2.17.1 > > -- > > - Arnaldo
Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu: > > > > On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > > > Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: > >> For better performance analysis of dynamically JITed and loaded kernel > >> functions, such as BPF programs, this patch introduces > >> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol > >> register/unregister information to user space. > >> > >> The following data structure is used for PERF_RECORD_KSYMBOL. > >> > >> /* > >> * struct { > >> * struct perf_event_header header; > >> * u64 addr; > >> * u32 len; > >> * u16 ksym_type; > >> * u16 flags; > >> * char name[]; > >> * struct sample_id sample_id; > >> * }; > >> */ > > > > So, I couldn't find where this gets used, the intention here is just to > > add the interfaces and afterwards is that you will wire this up? I would > > like to test the whole shebang to see it working. > > I guess you meant PERF_RECORD_BPF_EVENT not being used? > > PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested Oops, I didn't look at 3/7, just read its cset summary line and as it says: Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT I didn't thought it was related, perhaps break it down into one that states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we could just test it, getting the notifications for new kallsyms related to BPF? > PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from > RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support. Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by tooling, etc, then move on to PERF_RECORD_BPF_EVENT? - Arnaldo > Thanks, > Song > > >> Signed-off-by: Song Liu <songliubraving@fb.com> > >> --- > >> include/linux/perf_event.h | 13 +++++ > >> include/uapi/linux/perf_event.h | 26 ++++++++- > >> kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++- > >> 3 files changed, 135 insertions(+), 2 deletions(-) > >> > >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > >> index 1d5c551a5add..6b5f08db5ef3 100644 > >> --- a/include/linux/perf_event.h > >> +++ b/include/linux/perf_event.h > >> @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, > >> } > >> > >> extern void perf_event_mmap(struct vm_area_struct *vma); > >> + > >> +/* callback function to generate ksymbol name */ > >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); > >> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, > >> + bool unregister, > >> + perf_ksymbol_get_name_f get_name, void *data); > >> + > >> extern struct perf_guest_info_callbacks *perf_guest_cbs; > >> extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); > >> extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); > >> @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks > >> (struct perf_guest_info_callbacks *callbacks) { return 0; } > >> > >> static inline void perf_event_mmap(struct vm_area_struct *vma) { } > >> + > >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); > >> +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, > >> + bool unregister, > >> + perf_ksymbol_get_name_f get_name, > >> + void *data) { } > >> static inline void perf_event_exec(void) { } > >> static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } > >> static inline void perf_event_namespaces(struct task_struct *tsk) { } > >> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h > >> index 9de8780ac8d9..68c4da0227c5 100644 > >> --- a/include/uapi/linux/perf_event.h > >> +++ b/include/uapi/linux/perf_event.h > >> @@ -372,7 +372,8 @@ struct perf_event_attr { > >> context_switch : 1, /* context switch data */ > >> write_backward : 1, /* Write ring buffer from end to beginning */ > >> namespaces : 1, /* include namespaces data */ > >> - __reserved_1 : 35; > >> + ksymbol : 1, /* include ksymbol events */ > >> + __reserved_1 : 34; > >> > >> union { > >> __u32 wakeup_events; /* wakeup every n events */ > >> @@ -965,9 +966,32 @@ enum perf_event_type { > >> */ > >> PERF_RECORD_NAMESPACES = 16, > >> > >> + /* > >> + * Record ksymbol register/unregister events: > >> + * > >> + * struct { > >> + * struct perf_event_header header; > >> + * u64 addr; > >> + * u32 len; > >> + * u16 ksym_type; > >> + * u16 flags; > >> + * char name[]; > >> + * struct sample_id sample_id; > >> + * }; > >> + */ > >> + PERF_RECORD_KSYMBOL = 17, > >> + > >> PERF_RECORD_MAX, /* non-ABI */ > >> }; > >> > >> +enum perf_record_ksymbol_type { > >> + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, > >> + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, > >> + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ > >> +}; > >> + > >> +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) > >> + > >> #define PERF_MAX_STACK_DEPTH 127 > >> #define PERF_MAX_CONTEXTS_PER_STACK 8 > >> > >> diff --git a/kernel/events/core.c b/kernel/events/core.c > >> index 3cd13a30f732..ef27f2776999 100644 > >> --- a/kernel/events/core.c > >> +++ b/kernel/events/core.c > >> @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; > >> static atomic_t nr_task_events __read_mostly; > >> static atomic_t nr_freq_events __read_mostly; > >> static atomic_t nr_switch_events __read_mostly; > >> +static atomic_t nr_ksymbol_events __read_mostly; > >> > >> static LIST_HEAD(pmus); > >> static DEFINE_MUTEX(pmus_lock); > >> @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) > >> > >> if (attr->mmap || attr->mmap_data || attr->mmap2 || > >> attr->comm || attr->comm_exec || > >> - attr->task || > >> + attr->task || attr->ksymbol || > >> attr->context_switch) > >> return true; > >> return false; > >> @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) > >> dec = true; > >> if (has_branch_stack(event)) > >> dec = true; > >> + if (event->attr.ksymbol) > >> + atomic_dec(&nr_ksymbol_events); > >> > >> if (dec) { > >> if (!atomic_add_unless(&perf_sched_count, -1, 1)) > >> @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) > >> perf_output_end(&handle); > >> } > >> > >> +/* > >> + * ksymbol register/unregister tracking > >> + */ > >> + > >> +struct perf_ksymbol_event { > >> + const char *name; > >> + int name_len; > >> + struct { > >> + struct perf_event_header header; > >> + u64 addr; > >> + u32 len; > >> + u16 ksym_type; > >> + u16 flags; > >> + } event_id; > >> +}; > >> + > >> +static int perf_event_ksymbol_match(struct perf_event *event) > >> +{ > >> + return event->attr.ksymbol; > >> +} > >> + > >> +static void perf_event_ksymbol_output(struct perf_event *event, void *data) > >> +{ > >> + struct perf_ksymbol_event *ksymbol_event = data; > >> + struct perf_output_handle handle; > >> + struct perf_sample_data sample; > >> + int ret; > >> + > >> + if (!perf_event_ksymbol_match(event)) > >> + return; > >> + > >> + perf_event_header__init_id(&ksymbol_event->event_id.header, > >> + &sample, event); > >> + ret = perf_output_begin(&handle, event, > >> + ksymbol_event->event_id.header.size); > >> + if (ret) > >> + return; > >> + > >> + perf_output_put(&handle, ksymbol_event->event_id); > >> + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); > >> + perf_event__output_id_sample(event, &handle, &sample); > >> + > >> + perf_output_end(&handle); > >> +} > >> + > >> +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, > >> + perf_ksymbol_get_name_f get_name, void *data) > >> +{ > >> + struct perf_ksymbol_event ksymbol_event; > >> + char name[KSYM_NAME_LEN]; > >> + u16 flags = 0; > >> + int name_len; > >> + > >> + if (!atomic_read(&nr_ksymbol_events)) > >> + return; > >> + > >> + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || > >> + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) > >> + goto err; > >> + > >> + get_name(name, KSYM_NAME_LEN, data); > >> + name_len = strlen(name) + 1; > >> + while (!IS_ALIGNED(name_len, sizeof(u64))) > >> + name[name_len++] = '\0'; > >> + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); > >> + > >> + if (unregister) > >> + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; > >> + > >> + ksymbol_event = (struct perf_ksymbol_event){ > >> + .name = name, > >> + .name_len = name_len, > >> + .event_id = { > >> + .header = { > >> + .type = PERF_RECORD_KSYMBOL, > >> + .size = sizeof(ksymbol_event.event_id) + > >> + name_len, > >> + }, > >> + .addr = addr, > >> + .len = len, > >> + .ksym_type = ksym_type, > >> + .flags = flags, > >> + }, > >> + }; > >> + > >> + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); > >> + return; > >> +err: > >> + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); > >> +} > >> + > >> void perf_event_itrace_started(struct perf_event *event) > >> { > >> event->attach_state |= PERF_ATTACH_ITRACE; > >> @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event) > >> inc = true; > >> if (is_cgroup_event(event)) > >> inc = true; > >> + if (event->attr.ksymbol) > >> + atomic_inc(&nr_ksymbol_events); > >> > >> if (inc) { > >> /* > >> -- > >> 2.17.1 > > > > -- > > > > - Arnaldo
> On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu: >> >> >>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: >>> >>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: >>>> For better performance analysis of dynamically JITed and loaded kernel >>>> functions, such as BPF programs, this patch introduces >>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol >>>> register/unregister information to user space. >>>> >>>> The following data structure is used for PERF_RECORD_KSYMBOL. >>>> >>>> /* >>>> * struct { >>>> * struct perf_event_header header; >>>> * u64 addr; >>>> * u32 len; >>>> * u16 ksym_type; >>>> * u16 flags; >>>> * char name[]; >>>> * struct sample_id sample_id; >>>> * }; >>>> */ >>> >>> So, I couldn't find where this gets used, the intention here is just to >>> add the interfaces and afterwards is that you will wire this up? I would >>> like to test the whole shebang to see it working. >> >> I guess you meant PERF_RECORD_BPF_EVENT not being used? >> >> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested > > Oops, I didn't look at 3/7, just read its cset summary line and as it > says: > > Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT > > I didn't thought it was related, perhaps break it down into one that > states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we > could just test it, getting the notifications for new kallsyms related > to BPF? Good idea! I will split it into two patches as: [3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program [4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT > >> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from >> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support. > > Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by > tooling, etc, then move on to PERF_RECORD_BPF_EVENT? I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT, for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the development of these tools. On perf side, it will take us quite some time to finish annotation. Ideally, I don't want to block the development of other tools for so long. Thanks, Song
> On Jan 10, 2019, at 11:30 AM, Song Liu <songliubraving@fb.com> wrote: > > > >> On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: >> >> Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu: >>> >>> >>>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: >>>> >>>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: >>>>> For better performance analysis of dynamically JITed and loaded kernel >>>>> functions, such as BPF programs, this patch introduces >>>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol >>>>> register/unregister information to user space. >>>>> >>>>> The following data structure is used for PERF_RECORD_KSYMBOL. >>>>> >>>>> /* >>>>> * struct { >>>>> * struct perf_event_header header; >>>>> * u64 addr; >>>>> * u32 len; >>>>> * u16 ksym_type; >>>>> * u16 flags; >>>>> * char name[]; >>>>> * struct sample_id sample_id; >>>>> * }; >>>>> */ >>>> >>>> So, I couldn't find where this gets used, the intention here is just to >>>> add the interfaces and afterwards is that you will wire this up? I would >>>> like to test the whole shebang to see it working. >>> >>> I guess you meant PERF_RECORD_BPF_EVENT not being used? >>> >>> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested >> >> Oops, I didn't look at 3/7, just read its cset summary line and as it >> says: >> >> Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT >> >> I didn't thought it was related, perhaps break it down into one that >> states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we >> could just test it, getting the notifications for new kallsyms related >> to BPF? > > Good idea! I will split it into two patches as: > > [3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program > [4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT > >> >>> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from >>> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support. >> >> Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by >> tooling, etc, then move on to PERF_RECORD_BPF_EVENT? > > I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and > RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT, > for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the > development of these tools. On perf side, it will take us quite some time > to finish annotation. Ideally, I don't want to block the development of > other tools for so long. > > Thanks, > Song + DavidA Hi David, Could you please share your feedback on PERF_RECORD_BPF_EVENT for auditing use cases? Thanks, Song
Em Thu, Jan 10, 2019 at 07:30:22PM +0000, Song Liu escreveu: > > > > On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > > > Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu: > >> > >> > >>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > >>> > >>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu: > >>>> For better performance analysis of dynamically JITed and loaded kernel > >>>> functions, such as BPF programs, this patch introduces > >>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol > >>>> register/unregister information to user space. > >>>> > >>>> The following data structure is used for PERF_RECORD_KSYMBOL. > >>>> > >>>> /* > >>>> * struct { > >>>> * struct perf_event_header header; > >>>> * u64 addr; > >>>> * u32 len; > >>>> * u16 ksym_type; > >>>> * u16 flags; > >>>> * char name[]; > >>>> * struct sample_id sample_id; > >>>> * }; > >>>> */ > >>> > >>> So, I couldn't find where this gets used, the intention here is just to > >>> add the interfaces and afterwards is that you will wire this up? I would > >>> like to test the whole shebang to see it working. > >> > >> I guess you meant PERF_RECORD_BPF_EVENT not being used? > >> > >> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested > > > > Oops, I didn't look at 3/7, just read its cset summary line and as it > > says: > > > > Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT > > > > I didn't thought it was related, perhaps break it down into one that > > states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we > > could just test it, getting the notifications for new kallsyms related > > to BPF? > > Good idea! I will split it into two patches as: > > [3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program > [4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT Thanks! I'm juggling a lot of stuff right now, so I didn't read all patches in the series, just the first one and when I couldn't find where perf_event_ksymbol() was being called in that patch nor by looking at just the Subject for the others, I gave up and got back to pahole day :-) > >> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from > >> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support. > > > > Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by > > tooling, etc, then move on to PERF_RECORD_BPF_EVENT? > > I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and > RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT, > for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the > development of these tools. On perf side, it will take us quite some time > to finish annotation. Ideally, I don't want to block the development of > other tools for so long. With that 3/7 split I guess we can go on with what is in this patchset if PeterZ is happy with it. - Arnaldo
On 1/10/19 12:45 PM, Song Liu wrote: > Could you please share your feedback on PERF_RECORD_BPF_EVENT for auditing > use cases? Google shows Daniel was the one looking at audit use cases: https://www.mail-archive.com/netdev@vger.kernel.org/msg250728.html My comment was that using a PERF_RECORD_BPF_EVENT limits the usability with combinations of other tracepoints (e.g, scheduling) when tracing processes.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..6b5f08db5ef3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, } extern void perf_event_mmap(struct vm_area_struct *vma); + +/* callback function to generate ksymbol name */ +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, + perf_ksymbol_get_name_f get_name, void *data); + extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } + +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, + perf_ksymbol_get_name_f get_name, + void *data) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9de8780ac8d9..68c4da0227c5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -372,7 +372,8 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + __reserved_1 : 34; union { __u32 wakeup_events; /* wakeup every n events */ @@ -965,9 +966,32 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..ef27f2776999 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; +static atomic_t nr_ksymbol_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->comm || attr->comm_exec || - attr->task || + attr->task || attr->ksymbol || attr->context_switch) return true; return false; @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (has_branch_stack(event)) dec = true; + if (event->attr.ksymbol) + atomic_dec(&nr_ksymbol_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +/* + * ksymbol register/unregister tracking + */ + +struct perf_ksymbol_event { + const char *name; + int name_len; + struct { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + } event_id; +}; + +static int perf_event_ksymbol_match(struct perf_event *event) +{ + return event->attr.ksymbol; +} + +static void perf_event_ksymbol_output(struct perf_event *event, void *data) +{ + struct perf_ksymbol_event *ksymbol_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_ksymbol_match(event)) + return; + + perf_event_header__init_id(&ksymbol_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + ksymbol_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, ksymbol_event->event_id); + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, + perf_ksymbol_get_name_f get_name, void *data) +{ + struct perf_ksymbol_event ksymbol_event; + char name[KSYM_NAME_LEN]; + u16 flags = 0; + int name_len; + + if (!atomic_read(&nr_ksymbol_events)) + return; + + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) + goto err; + + get_name(name, KSYM_NAME_LEN, data); + name_len = strlen(name) + 1; + while (!IS_ALIGNED(name_len, sizeof(u64))) + name[name_len++] = '\0'; + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); + + if (unregister) + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; + + ksymbol_event = (struct perf_ksymbol_event){ + .name = name, + .name_len = name_len, + .event_id = { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = sizeof(ksymbol_event.event_id) + + name_len, + }, + .addr = addr, + .len = len, + .ksym_type = ksym_type, + .flags = flags, + }, + }; + + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); + return; +err: + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event) inc = true; if (is_cgroup_event(event)) inc = true; + if (event->attr.ksymbol) + atomic_inc(&nr_ksymbol_events); if (inc) { /*
For better performance analysis of dynamically JITed and loaded kernel functions, such as BPF programs, this patch introduces PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol register/unregister information to user space. The following data structure is used for PERF_RECORD_KSYMBOL. /* * struct { * struct perf_event_header header; * u64 addr; * u32 len; * u16 ksym_type; * u16 flags; * char name[]; * struct sample_id sample_id; * }; */ Signed-off-by: Song Liu <songliubraving@fb.com> --- include/linux/perf_event.h | 13 +++++ include/uapi/linux/perf_event.h | 26 ++++++++- kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++- 3 files changed, 135 insertions(+), 2 deletions(-)