diff mbox series

[v6,perf,bpf-next,1/7] perf, bpf: Introduce PERF_RECORD_KSYMBOL

Message ID 20190109192111.130995-2-songliubraving@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series reveal invisible bpf programs | expand

Commit Message

Song Liu Jan. 9, 2019, 7:21 p.m. UTC
For better performance analysis of dynamically JITed and loaded kernel
functions, such as BPF programs, this patch introduces
PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
register/unregister information to user space.

The following data structure is used for PERF_RECORD_KSYMBOL.

    /*
     * struct {
     *      struct perf_event_header        header;
     *      u64                             addr;
     *      u32                             len;
     *      u16                             ksym_type;
     *      u16                             flags;
     *      char                            name[];
     *      struct sample_id                sample_id;
     * };
     */

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/perf_event.h      | 13 +++++
 include/uapi/linux/perf_event.h | 26 ++++++++-
 kernel/events/core.c            | 98 ++++++++++++++++++++++++++++++++-
 3 files changed, 135 insertions(+), 2 deletions(-)

Comments

Arnaldo Carvalho de Melo Jan. 10, 2019, 6:24 p.m. UTC | #1
Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
> For better performance analysis of dynamically JITed and loaded kernel
> functions, such as BPF programs, this patch introduces
> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
> register/unregister information to user space.
> 
> The following data structure is used for PERF_RECORD_KSYMBOL.
> 
>     /*
>      * struct {
>      *      struct perf_event_header        header;
>      *      u64                             addr;
>      *      u32                             len;
>      *      u16                             ksym_type;
>      *      u16                             flags;
>      *      char                            name[];
>      *      struct sample_id                sample_id;
>      * };
>      */

So, I couldn't find where this gets used, the intention here is just to
add the interfaces and afterwards is that you will wire this up? I would
like to test the whole shebang to see it working.

- Arnaldo
 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/linux/perf_event.h      | 13 +++++
>  include/uapi/linux/perf_event.h | 26 ++++++++-
>  kernel/events/core.c            | 98 ++++++++++++++++++++++++++++++++-
>  3 files changed, 135 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1d5c551a5add..6b5f08db5ef3 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
>  }
>  
>  extern void perf_event_mmap(struct vm_area_struct *vma);
> +
> +/* callback function to generate ksymbol name */
> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
> +			       bool unregister,
> +			       perf_ksymbol_get_name_f get_name, void *data);
> +
>  extern struct perf_guest_info_callbacks *perf_guest_cbs;
>  extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
>  extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
> @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks
>  (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
>  
>  static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
> +
> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
> +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
> +				      bool unregister,
> +				      perf_ksymbol_get_name_f get_name,
> +				      void *data) 			{ }
>  static inline void perf_event_exec(void)				{ }
>  static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
>  static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 9de8780ac8d9..68c4da0227c5 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -372,7 +372,8 @@ struct perf_event_attr {
>  				context_switch :  1, /* context switch data */
>  				write_backward :  1, /* Write ring buffer from end to beginning */
>  				namespaces     :  1, /* include namespaces data */
> -				__reserved_1   : 35;
> +				ksymbol        :  1, /* include ksymbol events */
> +				__reserved_1   : 34;
>  
>  	union {
>  		__u32		wakeup_events;	  /* wakeup every n events */
> @@ -965,9 +966,32 @@ enum perf_event_type {
>  	 */
>  	PERF_RECORD_NAMESPACES			= 16,
>  
> +	/*
> +	 * Record ksymbol register/unregister events:
> +	 *
> +	 * struct {
> +	 *	struct perf_event_header	header;
> +	 *	u64				addr;
> +	 *	u32				len;
> +	 *	u16				ksym_type;
> +	 *	u16				flags;
> +	 *	char				name[];
> +	 *	struct sample_id		sample_id;
> +	 * };
> +	 */
> +	PERF_RECORD_KSYMBOL			= 17,
> +
>  	PERF_RECORD_MAX,			/* non-ABI */
>  };
>  
> +enum perf_record_ksymbol_type {
> +	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
> +	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
> +	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
> +};
> +
> +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
> +
>  #define PERF_MAX_STACK_DEPTH		127
>  #define PERF_MAX_CONTEXTS_PER_STACK	  8
>  
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 3cd13a30f732..ef27f2776999 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly;
>  static atomic_t nr_task_events __read_mostly;
>  static atomic_t nr_freq_events __read_mostly;
>  static atomic_t nr_switch_events __read_mostly;
> +static atomic_t nr_ksymbol_events __read_mostly;
>  
>  static LIST_HEAD(pmus);
>  static DEFINE_MUTEX(pmus_lock);
> @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event)
>  
>  	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
>  	    attr->comm || attr->comm_exec ||
> -	    attr->task ||
> +	    attr->task || attr->ksymbol ||
>  	    attr->context_switch)
>  		return true;
>  	return false;
> @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event)
>  		dec = true;
>  	if (has_branch_stack(event))
>  		dec = true;
> +	if (event->attr.ksymbol)
> +		atomic_dec(&nr_ksymbol_events);
>  
>  	if (dec) {
>  		if (!atomic_add_unless(&perf_sched_count, -1, 1))
> @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable)
>  	perf_output_end(&handle);
>  }
>  
> +/*
> + * ksymbol register/unregister tracking
> + */
> +
> +struct perf_ksymbol_event {
> +	const char	*name;
> +	int		name_len;
> +	struct {
> +		struct perf_event_header        header;
> +		u64				addr;
> +		u32				len;
> +		u16				ksym_type;
> +		u16				flags;
> +	} event_id;
> +};
> +
> +static int perf_event_ksymbol_match(struct perf_event *event)
> +{
> +	return event->attr.ksymbol;
> +}
> +
> +static void perf_event_ksymbol_output(struct perf_event *event, void *data)
> +{
> +	struct perf_ksymbol_event *ksymbol_event = data;
> +	struct perf_output_handle handle;
> +	struct perf_sample_data sample;
> +	int ret;
> +
> +	if (!perf_event_ksymbol_match(event))
> +		return;
> +
> +	perf_event_header__init_id(&ksymbol_event->event_id.header,
> +				   &sample, event);
> +	ret = perf_output_begin(&handle, event,
> +				ksymbol_event->event_id.header.size);
> +	if (ret)
> +		return;
> +
> +	perf_output_put(&handle, ksymbol_event->event_id);
> +	__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
> +	perf_event__output_id_sample(event, &handle, &sample);
> +
> +	perf_output_end(&handle);
> +}
> +
> +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
> +			perf_ksymbol_get_name_f get_name, void *data)
> +{
> +	struct perf_ksymbol_event ksymbol_event;
> +	char name[KSYM_NAME_LEN];
> +	u16 flags = 0;
> +	int name_len;
> +
> +	if (!atomic_read(&nr_ksymbol_events))
> +		return;
> +
> +	if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
> +	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
> +		goto err;
> +
> +	get_name(name, KSYM_NAME_LEN, data);
> +	name_len = strlen(name) + 1;
> +	while (!IS_ALIGNED(name_len, sizeof(u64)))
> +		name[name_len++] = '\0';
> +	BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
> +
> +	if (unregister)
> +		flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
> +
> +	ksymbol_event = (struct perf_ksymbol_event){
> +		.name = name,
> +		.name_len = name_len,
> +		.event_id = {
> +			.header = {
> +				.type = PERF_RECORD_KSYMBOL,
> +				.size = sizeof(ksymbol_event.event_id) +
> +					name_len,
> +			},
> +			.addr = addr,
> +			.len = len,
> +			.ksym_type = ksym_type,
> +			.flags = flags,
> +		},
> +	};
> +
> +	perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
> +	return;
> +err:
> +	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
> +}
> +
>  void perf_event_itrace_started(struct perf_event *event)
>  {
>  	event->attach_state |= PERF_ATTACH_ITRACE;
> @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event)
>  		inc = true;
>  	if (is_cgroup_event(event))
>  		inc = true;
> +	if (event->attr.ksymbol)
> +		atomic_inc(&nr_ksymbol_events);
>  
>  	if (inc) {
>  		/*
> -- 
> 2.17.1
Song Liu Jan. 10, 2019, 6:40 p.m. UTC | #2
> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> 
> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
>> For better performance analysis of dynamically JITed and loaded kernel
>> functions, such as BPF programs, this patch introduces
>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
>> register/unregister information to user space.
>> 
>> The following data structure is used for PERF_RECORD_KSYMBOL.
>> 
>>    /*
>>     * struct {
>>     *      struct perf_event_header        header;
>>     *      u64                             addr;
>>     *      u32                             len;
>>     *      u16                             ksym_type;
>>     *      u16                             flags;
>>     *      char                            name[];
>>     *      struct sample_id                sample_id;
>>     * };
>>     */
> 
> So, I couldn't find where this gets used, the intention here is just to
> add the interfaces and afterwards is that you will wire this up? I would
> like to test the whole shebang to see it working.
> 
> - Arnaldo

I guess you meant PERF_RECORD_BPF_EVENT not being used? 

PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested 
PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from
RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support.  

Thanks,
Song

>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> include/linux/perf_event.h      | 13 +++++
>> include/uapi/linux/perf_event.h | 26 ++++++++-
>> kernel/events/core.c            | 98 ++++++++++++++++++++++++++++++++-
>> 3 files changed, 135 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index 1d5c551a5add..6b5f08db5ef3 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
>> }
>> 
>> extern void perf_event_mmap(struct vm_area_struct *vma);
>> +
>> +/* callback function to generate ksymbol name */
>> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
>> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
>> +			       bool unregister,
>> +			       perf_ksymbol_get_name_f get_name, void *data);
>> +
>> extern struct perf_guest_info_callbacks *perf_guest_cbs;
>> extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
>> extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
>> @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks
>> (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
>> 
>> static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
>> +
>> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
>> +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
>> +				      bool unregister,
>> +				      perf_ksymbol_get_name_f get_name,
>> +				      void *data) 			{ }
>> static inline void perf_event_exec(void)				{ }
>> static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
>> static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 9de8780ac8d9..68c4da0227c5 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -372,7 +372,8 @@ struct perf_event_attr {
>> 				context_switch :  1, /* context switch data */
>> 				write_backward :  1, /* Write ring buffer from end to beginning */
>> 				namespaces     :  1, /* include namespaces data */
>> -				__reserved_1   : 35;
>> +				ksymbol        :  1, /* include ksymbol events */
>> +				__reserved_1   : 34;
>> 
>> 	union {
>> 		__u32		wakeup_events;	  /* wakeup every n events */
>> @@ -965,9 +966,32 @@ enum perf_event_type {
>> 	 */
>> 	PERF_RECORD_NAMESPACES			= 16,
>> 
>> +	/*
>> +	 * Record ksymbol register/unregister events:
>> +	 *
>> +	 * struct {
>> +	 *	struct perf_event_header	header;
>> +	 *	u64				addr;
>> +	 *	u32				len;
>> +	 *	u16				ksym_type;
>> +	 *	u16				flags;
>> +	 *	char				name[];
>> +	 *	struct sample_id		sample_id;
>> +	 * };
>> +	 */
>> +	PERF_RECORD_KSYMBOL			= 17,
>> +
>> 	PERF_RECORD_MAX,			/* non-ABI */
>> };
>> 
>> +enum perf_record_ksymbol_type {
>> +	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
>> +	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
>> +	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
>> +};
>> +
>> +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
>> +
>> #define PERF_MAX_STACK_DEPTH		127
>> #define PERF_MAX_CONTEXTS_PER_STACK	  8
>> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 3cd13a30f732..ef27f2776999 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly;
>> static atomic_t nr_task_events __read_mostly;
>> static atomic_t nr_freq_events __read_mostly;
>> static atomic_t nr_switch_events __read_mostly;
>> +static atomic_t nr_ksymbol_events __read_mostly;
>> 
>> static LIST_HEAD(pmus);
>> static DEFINE_MUTEX(pmus_lock);
>> @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event)
>> 
>> 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
>> 	    attr->comm || attr->comm_exec ||
>> -	    attr->task ||
>> +	    attr->task || attr->ksymbol ||
>> 	    attr->context_switch)
>> 		return true;
>> 	return false;
>> @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event)
>> 		dec = true;
>> 	if (has_branch_stack(event))
>> 		dec = true;
>> +	if (event->attr.ksymbol)
>> +		atomic_dec(&nr_ksymbol_events);
>> 
>> 	if (dec) {
>> 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
>> @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable)
>> 	perf_output_end(&handle);
>> }
>> 
>> +/*
>> + * ksymbol register/unregister tracking
>> + */
>> +
>> +struct perf_ksymbol_event {
>> +	const char	*name;
>> +	int		name_len;
>> +	struct {
>> +		struct perf_event_header        header;
>> +		u64				addr;
>> +		u32				len;
>> +		u16				ksym_type;
>> +		u16				flags;
>> +	} event_id;
>> +};
>> +
>> +static int perf_event_ksymbol_match(struct perf_event *event)
>> +{
>> +	return event->attr.ksymbol;
>> +}
>> +
>> +static void perf_event_ksymbol_output(struct perf_event *event, void *data)
>> +{
>> +	struct perf_ksymbol_event *ksymbol_event = data;
>> +	struct perf_output_handle handle;
>> +	struct perf_sample_data sample;
>> +	int ret;
>> +
>> +	if (!perf_event_ksymbol_match(event))
>> +		return;
>> +
>> +	perf_event_header__init_id(&ksymbol_event->event_id.header,
>> +				   &sample, event);
>> +	ret = perf_output_begin(&handle, event,
>> +				ksymbol_event->event_id.header.size);
>> +	if (ret)
>> +		return;
>> +
>> +	perf_output_put(&handle, ksymbol_event->event_id);
>> +	__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
>> +	perf_event__output_id_sample(event, &handle, &sample);
>> +
>> +	perf_output_end(&handle);
>> +}
>> +
>> +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
>> +			perf_ksymbol_get_name_f get_name, void *data)
>> +{
>> +	struct perf_ksymbol_event ksymbol_event;
>> +	char name[KSYM_NAME_LEN];
>> +	u16 flags = 0;
>> +	int name_len;
>> +
>> +	if (!atomic_read(&nr_ksymbol_events))
>> +		return;
>> +
>> +	if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
>> +	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
>> +		goto err;
>> +
>> +	get_name(name, KSYM_NAME_LEN, data);
>> +	name_len = strlen(name) + 1;
>> +	while (!IS_ALIGNED(name_len, sizeof(u64)))
>> +		name[name_len++] = '\0';
>> +	BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
>> +
>> +	if (unregister)
>> +		flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
>> +
>> +	ksymbol_event = (struct perf_ksymbol_event){
>> +		.name = name,
>> +		.name_len = name_len,
>> +		.event_id = {
>> +			.header = {
>> +				.type = PERF_RECORD_KSYMBOL,
>> +				.size = sizeof(ksymbol_event.event_id) +
>> +					name_len,
>> +			},
>> +			.addr = addr,
>> +			.len = len,
>> +			.ksym_type = ksym_type,
>> +			.flags = flags,
>> +		},
>> +	};
>> +
>> +	perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
>> +	return;
>> +err:
>> +	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
>> +}
>> +
>> void perf_event_itrace_started(struct perf_event *event)
>> {
>> 	event->attach_state |= PERF_ATTACH_ITRACE;
>> @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event)
>> 		inc = true;
>> 	if (is_cgroup_event(event))
>> 		inc = true;
>> +	if (event->attr.ksymbol)
>> +		atomic_inc(&nr_ksymbol_events);
>> 
>> 	if (inc) {
>> 		/*
>> -- 
>> 2.17.1
> 
> -- 
> 
> - Arnaldo
Arnaldo Carvalho de Melo Jan. 10, 2019, 6:55 p.m. UTC | #3
Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu:
> 
> 
> > On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> > 
> > Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
> >> For better performance analysis of dynamically JITed and loaded kernel
> >> functions, such as BPF programs, this patch introduces
> >> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
> >> register/unregister information to user space.
> >> 
> >> The following data structure is used for PERF_RECORD_KSYMBOL.
> >> 
> >>    /*
> >>     * struct {
> >>     *      struct perf_event_header        header;
> >>     *      u64                             addr;
> >>     *      u32                             len;
> >>     *      u16                             ksym_type;
> >>     *      u16                             flags;
> >>     *      char                            name[];
> >>     *      struct sample_id                sample_id;
> >>     * };
> >>     */
> > 
> > So, I couldn't find where this gets used, the intention here is just to
> > add the interfaces and afterwards is that you will wire this up? I would
> > like to test the whole shebang to see it working.
> 
> I guess you meant PERF_RECORD_BPF_EVENT not being used? 
> 
> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested 

Oops, I didn't look at 3/7, just read its cset summary line and as it
says:

Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT

I didn't thought it was related, perhaps break it down into one that
states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we
could just test it, getting the notifications for new kallsyms related
to BPF?

> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from
> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support.  

Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by
tooling, etc, then move on to PERF_RECORD_BPF_EVENT?

- Arnaldo
 
> Thanks,
> Song
> 
> >> Signed-off-by: Song Liu <songliubraving@fb.com>
> >> ---
> >> include/linux/perf_event.h      | 13 +++++
> >> include/uapi/linux/perf_event.h | 26 ++++++++-
> >> kernel/events/core.c            | 98 ++++++++++++++++++++++++++++++++-
> >> 3 files changed, 135 insertions(+), 2 deletions(-)
> >> 
> >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> >> index 1d5c551a5add..6b5f08db5ef3 100644
> >> --- a/include/linux/perf_event.h
> >> +++ b/include/linux/perf_event.h
> >> @@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
> >> }
> >> 
> >> extern void perf_event_mmap(struct vm_area_struct *vma);
> >> +
> >> +/* callback function to generate ksymbol name */
> >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
> >> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
> >> +			       bool unregister,
> >> +			       perf_ksymbol_get_name_f get_name, void *data);
> >> +
> >> extern struct perf_guest_info_callbacks *perf_guest_cbs;
> >> extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
> >> extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
> >> @@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks
> >> (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
> >> 
> >> static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
> >> +
> >> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
> >> +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
> >> +				      bool unregister,
> >> +				      perf_ksymbol_get_name_f get_name,
> >> +				      void *data) 			{ }
> >> static inline void perf_event_exec(void)				{ }
> >> static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
> >> static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
> >> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> >> index 9de8780ac8d9..68c4da0227c5 100644
> >> --- a/include/uapi/linux/perf_event.h
> >> +++ b/include/uapi/linux/perf_event.h
> >> @@ -372,7 +372,8 @@ struct perf_event_attr {
> >> 				context_switch :  1, /* context switch data */
> >> 				write_backward :  1, /* Write ring buffer from end to beginning */
> >> 				namespaces     :  1, /* include namespaces data */
> >> -				__reserved_1   : 35;
> >> +				ksymbol        :  1, /* include ksymbol events */
> >> +				__reserved_1   : 34;
> >> 
> >> 	union {
> >> 		__u32		wakeup_events;	  /* wakeup every n events */
> >> @@ -965,9 +966,32 @@ enum perf_event_type {
> >> 	 */
> >> 	PERF_RECORD_NAMESPACES			= 16,
> >> 
> >> +	/*
> >> +	 * Record ksymbol register/unregister events:
> >> +	 *
> >> +	 * struct {
> >> +	 *	struct perf_event_header	header;
> >> +	 *	u64				addr;
> >> +	 *	u32				len;
> >> +	 *	u16				ksym_type;
> >> +	 *	u16				flags;
> >> +	 *	char				name[];
> >> +	 *	struct sample_id		sample_id;
> >> +	 * };
> >> +	 */
> >> +	PERF_RECORD_KSYMBOL			= 17,
> >> +
> >> 	PERF_RECORD_MAX,			/* non-ABI */
> >> };
> >> 
> >> +enum perf_record_ksymbol_type {
> >> +	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
> >> +	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
> >> +	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
> >> +};
> >> +
> >> +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
> >> +
> >> #define PERF_MAX_STACK_DEPTH		127
> >> #define PERF_MAX_CONTEXTS_PER_STACK	  8
> >> 
> >> diff --git a/kernel/events/core.c b/kernel/events/core.c
> >> index 3cd13a30f732..ef27f2776999 100644
> >> --- a/kernel/events/core.c
> >> +++ b/kernel/events/core.c
> >> @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly;
> >> static atomic_t nr_task_events __read_mostly;
> >> static atomic_t nr_freq_events __read_mostly;
> >> static atomic_t nr_switch_events __read_mostly;
> >> +static atomic_t nr_ksymbol_events __read_mostly;
> >> 
> >> static LIST_HEAD(pmus);
> >> static DEFINE_MUTEX(pmus_lock);
> >> @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event)
> >> 
> >> 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
> >> 	    attr->comm || attr->comm_exec ||
> >> -	    attr->task ||
> >> +	    attr->task || attr->ksymbol ||
> >> 	    attr->context_switch)
> >> 		return true;
> >> 	return false;
> >> @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event)
> >> 		dec = true;
> >> 	if (has_branch_stack(event))
> >> 		dec = true;
> >> +	if (event->attr.ksymbol)
> >> +		atomic_dec(&nr_ksymbol_events);
> >> 
> >> 	if (dec) {
> >> 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
> >> @@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable)
> >> 	perf_output_end(&handle);
> >> }
> >> 
> >> +/*
> >> + * ksymbol register/unregister tracking
> >> + */
> >> +
> >> +struct perf_ksymbol_event {
> >> +	const char	*name;
> >> +	int		name_len;
> >> +	struct {
> >> +		struct perf_event_header        header;
> >> +		u64				addr;
> >> +		u32				len;
> >> +		u16				ksym_type;
> >> +		u16				flags;
> >> +	} event_id;
> >> +};
> >> +
> >> +static int perf_event_ksymbol_match(struct perf_event *event)
> >> +{
> >> +	return event->attr.ksymbol;
> >> +}
> >> +
> >> +static void perf_event_ksymbol_output(struct perf_event *event, void *data)
> >> +{
> >> +	struct perf_ksymbol_event *ksymbol_event = data;
> >> +	struct perf_output_handle handle;
> >> +	struct perf_sample_data sample;
> >> +	int ret;
> >> +
> >> +	if (!perf_event_ksymbol_match(event))
> >> +		return;
> >> +
> >> +	perf_event_header__init_id(&ksymbol_event->event_id.header,
> >> +				   &sample, event);
> >> +	ret = perf_output_begin(&handle, event,
> >> +				ksymbol_event->event_id.header.size);
> >> +	if (ret)
> >> +		return;
> >> +
> >> +	perf_output_put(&handle, ksymbol_event->event_id);
> >> +	__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
> >> +	perf_event__output_id_sample(event, &handle, &sample);
> >> +
> >> +	perf_output_end(&handle);
> >> +}
> >> +
> >> +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
> >> +			perf_ksymbol_get_name_f get_name, void *data)
> >> +{
> >> +	struct perf_ksymbol_event ksymbol_event;
> >> +	char name[KSYM_NAME_LEN];
> >> +	u16 flags = 0;
> >> +	int name_len;
> >> +
> >> +	if (!atomic_read(&nr_ksymbol_events))
> >> +		return;
> >> +
> >> +	if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
> >> +	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
> >> +		goto err;
> >> +
> >> +	get_name(name, KSYM_NAME_LEN, data);
> >> +	name_len = strlen(name) + 1;
> >> +	while (!IS_ALIGNED(name_len, sizeof(u64)))
> >> +		name[name_len++] = '\0';
> >> +	BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
> >> +
> >> +	if (unregister)
> >> +		flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
> >> +
> >> +	ksymbol_event = (struct perf_ksymbol_event){
> >> +		.name = name,
> >> +		.name_len = name_len,
> >> +		.event_id = {
> >> +			.header = {
> >> +				.type = PERF_RECORD_KSYMBOL,
> >> +				.size = sizeof(ksymbol_event.event_id) +
> >> +					name_len,
> >> +			},
> >> +			.addr = addr,
> >> +			.len = len,
> >> +			.ksym_type = ksym_type,
> >> +			.flags = flags,
> >> +		},
> >> +	};
> >> +
> >> +	perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
> >> +	return;
> >> +err:
> >> +	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
> >> +}
> >> +
> >> void perf_event_itrace_started(struct perf_event *event)
> >> {
> >> 	event->attach_state |= PERF_ATTACH_ITRACE;
> >> @@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event)
> >> 		inc = true;
> >> 	if (is_cgroup_event(event))
> >> 		inc = true;
> >> +	if (event->attr.ksymbol)
> >> +		atomic_inc(&nr_ksymbol_events);
> >> 
> >> 	if (inc) {
> >> 		/*
> >> -- 
> >> 2.17.1
> > 
> > -- 
> > 
> > - Arnaldo
Song Liu Jan. 10, 2019, 7:30 p.m. UTC | #4
> On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> 
> Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu:
>> 
>> 
>>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>>> 
>>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
>>>> For better performance analysis of dynamically JITed and loaded kernel
>>>> functions, such as BPF programs, this patch introduces
>>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
>>>> register/unregister information to user space.
>>>> 
>>>> The following data structure is used for PERF_RECORD_KSYMBOL.
>>>> 
>>>>   /*
>>>>    * struct {
>>>>    *      struct perf_event_header        header;
>>>>    *      u64                             addr;
>>>>    *      u32                             len;
>>>>    *      u16                             ksym_type;
>>>>    *      u16                             flags;
>>>>    *      char                            name[];
>>>>    *      struct sample_id                sample_id;
>>>>    * };
>>>>    */
>>> 
>>> So, I couldn't find where this gets used, the intention here is just to
>>> add the interfaces and afterwards is that you will wire this up? I would
>>> like to test the whole shebang to see it working.
>> 
>> I guess you meant PERF_RECORD_BPF_EVENT not being used? 
>> 
>> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested 
> 
> Oops, I didn't look at 3/7, just read its cset summary line and as it
> says:
> 
> Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT
> 
> I didn't thought it was related, perhaps break it down into one that
> states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we
> could just test it, getting the notifications for new kallsyms related
> to BPF?

Good idea! I will split it into two patches as:

[3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program 
[4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT

> 
>> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from
>> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support.  
> 
> Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by
> tooling, etc, then move on to PERF_RECORD_BPF_EVENT?

I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and 
RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT,
for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the 
development of these tools. On perf side, it will take us quite some time 
to finish annotation. Ideally, I don't want to block the development of 
other tools for so long. 

Thanks,
Song
Song Liu Jan. 10, 2019, 7:45 p.m. UTC | #5
> On Jan 10, 2019, at 11:30 AM, Song Liu <songliubraving@fb.com> wrote:
> 
> 
> 
>> On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>> 
>> Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu:
>>> 
>>> 
>>>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>>>> 
>>>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
>>>>> For better performance analysis of dynamically JITed and loaded kernel
>>>>> functions, such as BPF programs, this patch introduces
>>>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
>>>>> register/unregister information to user space.
>>>>> 
>>>>> The following data structure is used for PERF_RECORD_KSYMBOL.
>>>>> 
>>>>>  /*
>>>>>   * struct {
>>>>>   *      struct perf_event_header        header;
>>>>>   *      u64                             addr;
>>>>>   *      u32                             len;
>>>>>   *      u16                             ksym_type;
>>>>>   *      u16                             flags;
>>>>>   *      char                            name[];
>>>>>   *      struct sample_id                sample_id;
>>>>>   * };
>>>>>   */
>>>> 
>>>> So, I couldn't find where this gets used, the intention here is just to
>>>> add the interfaces and afterwards is that you will wire this up? I would
>>>> like to test the whole shebang to see it working.
>>> 
>>> I guess you meant PERF_RECORD_BPF_EVENT not being used? 
>>> 
>>> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested 
>> 
>> Oops, I didn't look at 3/7, just read its cset summary line and as it
>> says:
>> 
>> Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT
>> 
>> I didn't thought it was related, perhaps break it down into one that
>> states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we
>> could just test it, getting the notifications for new kallsyms related
>> to BPF?
> 
> Good idea! I will split it into two patches as:
> 
> [3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program 
> [4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT
> 
>> 
>>> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from
>>> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support.  
>> 
>> Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by
>> tooling, etc, then move on to PERF_RECORD_BPF_EVENT?
> 
> I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and 
> RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT,
> for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the 
> development of these tools. On perf side, it will take us quite some time 
> to finish annotation. Ideally, I don't want to block the development of 
> other tools for so long. 
> 
> Thanks,
> Song

+ DavidA

Hi David, 

Could you please share your feedback on PERF_RECORD_BPF_EVENT for auditing
use cases?

Thanks,
Song
Arnaldo Carvalho de Melo Jan. 10, 2019, 7:52 p.m. UTC | #6
Em Thu, Jan 10, 2019 at 07:30:22PM +0000, Song Liu escreveu:
> 
> 
> > On Jan 10, 2019, at 10:55 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> > 
> > Em Thu, Jan 10, 2019 at 06:40:37PM +0000, Song Liu escreveu:
> >> 
> >> 
> >>> On Jan 10, 2019, at 10:24 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> >>> 
> >>> Em Wed, Jan 09, 2019 at 11:21:05AM -0800, Song Liu escreveu:
> >>>> For better performance analysis of dynamically JITed and loaded kernel
> >>>> functions, such as BPF programs, this patch introduces
> >>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
> >>>> register/unregister information to user space.
> >>>> 
> >>>> The following data structure is used for PERF_RECORD_KSYMBOL.
> >>>> 
> >>>>   /*
> >>>>    * struct {
> >>>>    *      struct perf_event_header        header;
> >>>>    *      u64                             addr;
> >>>>    *      u32                             len;
> >>>>    *      u16                             ksym_type;
> >>>>    *      u16                             flags;
> >>>>    *      char                            name[];
> >>>>    *      struct sample_id                sample_id;
> >>>>    * };
> >>>>    */
> >>> 
> >>> So, I couldn't find where this gets used, the intention here is just to
> >>> add the interfaces and afterwards is that you will wire this up? I would
> >>> like to test the whole shebang to see it working.
> >> 
> >> I guess you meant PERF_RECORD_BPF_EVENT not being used? 
> >> 
> >> PERF_RECORD_KSYMBOL is used by BPF in 3/7 and 5/7. I tested 
> > 
> > Oops, I didn't look at 3/7, just read its cset summary line and as it
> > says:
> > 
> > Subject: [PATCH v6 perf, bpf-next 3/7] perf, bpf: introduce PERF_RECORD_BPF_EVENT
> > 
> > I didn't thought it was related, perhaps break it down into one that
> > states that it is wiring up PERF_RECORD_KSYMBOL, and at that point we
> > could just test it, getting the notifications for new kallsyms related
> > to BPF?
> 
> Good idea! I will split it into two patches as:
> 
> [3/8] perf, bpf: generate PERF_RECORD_KSYMBOL for BPF program 
> [4/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT

Thanks! I'm juggling a lot of stuff right now, so I didn't read all
patches in the series, just the first one and when I couldn't find where
perf_event_ksymbol() was being called in that patch nor by looking at
just the Subject for the others, I gave up and got back to pahole day :-)
 
> >> PERF_RECORD_BPF_EVENT with dump_trace. As we separate RECORD_KSYMBOL from
> >> RECORD_BPF_EVENT, user space won't use BPF_EVENT until annotation support.  
> > 
> > Right, so why not just introduce PERF_RECORD_KSYMBOL, make it be used by
> > tooling, etc, then move on to PERF_RECORD_BPF_EVENT?
> 
> I'd like to make sure we all agree on the new ABI for RECORD_KSYMBOL and 
> RECORD_BPF_EVENT. Multiple user space tools dependent on RECORD_BPF_EVENT,
> for example, bcc and auditing. Finalizing RECORD_BPF_EVENT will unblock the 
> development of these tools. On perf side, it will take us quite some time 
> to finish annotation. Ideally, I don't want to block the development of 
> other tools for so long. 

With that 3/7 split I guess we can go on with what is in this patchset
if PeterZ is happy with it.

- Arnaldo
David Ahern Jan. 11, 2019, 1:05 a.m. UTC | #7
On 1/10/19 12:45 PM, Song Liu wrote:
> Could you please share your feedback on PERF_RECORD_BPF_EVENT for auditing
> use cases?

Google shows Daniel was the one looking at audit use cases:
https://www.mail-archive.com/netdev@vger.kernel.org/msg250728.html


My comment was that using a PERF_RECORD_BPF_EVENT limits the usability
with combinations of other tracepoints (e.g, scheduling) when tracing
processes.
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1d5c551a5add..6b5f08db5ef3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1113,6 +1113,13 @@  static inline void perf_event_task_sched_out(struct task_struct *prev,
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
+
+/* callback function to generate ksymbol name */
+typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
+extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+			       bool unregister,
+			       perf_ksymbol_get_name_f get_name, void *data);
+
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1333,6 +1340,12 @@  static inline int perf_unregister_guest_info_callbacks
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
+
+typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
+static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+				      bool unregister,
+				      perf_ksymbol_get_name_f get_name,
+				      void *data) 			{ }
 static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9de8780ac8d9..68c4da0227c5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -372,7 +372,8 @@  struct perf_event_attr {
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
 				namespaces     :  1, /* include namespaces data */
-				__reserved_1   : 35;
+				ksymbol        :  1, /* include ksymbol events */
+				__reserved_1   : 34;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -965,9 +966,32 @@  enum perf_event_type {
 	 */
 	PERF_RECORD_NAMESPACES			= 16,
 
+	/*
+	 * Record ksymbol register/unregister events:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				addr;
+	 *	u32				len;
+	 *	u16				ksym_type;
+	 *	u16				flags;
+	 *	char				name[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_KSYMBOL			= 17,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
+enum perf_record_ksymbol_type {
+	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
+	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
+	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
+};
+
+#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
+
 #define PERF_MAX_STACK_DEPTH		127
 #define PERF_MAX_CONTEXTS_PER_STACK	  8
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..ef27f2776999 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -385,6 +385,7 @@  static atomic_t nr_namespaces_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
+static atomic_t nr_ksymbol_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4235,7 +4236,7 @@  static bool is_sb_event(struct perf_event *event)
 
 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
 	    attr->comm || attr->comm_exec ||
-	    attr->task ||
+	    attr->task || attr->ksymbol ||
 	    attr->context_switch)
 		return true;
 	return false;
@@ -4305,6 +4306,8 @@  static void unaccount_event(struct perf_event *event)
 		dec = true;
 	if (has_branch_stack(event))
 		dec = true;
+	if (event->attr.ksymbol)
+		atomic_dec(&nr_ksymbol_events);
 
 	if (dec) {
 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7650,6 +7653,97 @@  static void perf_log_throttle(struct perf_event *event, int enable)
 	perf_output_end(&handle);
 }
 
+/*
+ * ksymbol register/unregister tracking
+ */
+
+struct perf_ksymbol_event {
+	const char	*name;
+	int		name_len;
+	struct {
+		struct perf_event_header        header;
+		u64				addr;
+		u32				len;
+		u16				ksym_type;
+		u16				flags;
+	} event_id;
+};
+
+static int perf_event_ksymbol_match(struct perf_event *event)
+{
+	return event->attr.ksymbol;
+}
+
+static void perf_event_ksymbol_output(struct perf_event *event, void *data)
+{
+	struct perf_ksymbol_event *ksymbol_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	if (!perf_event_ksymbol_match(event))
+		return;
+
+	perf_event_header__init_id(&ksymbol_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				ksymbol_event->event_id.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, ksymbol_event->event_id);
+	__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
+			perf_ksymbol_get_name_f get_name, void *data)
+{
+	struct perf_ksymbol_event ksymbol_event;
+	char name[KSYM_NAME_LEN];
+	u16 flags = 0;
+	int name_len;
+
+	if (!atomic_read(&nr_ksymbol_events))
+		return;
+
+	if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
+	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
+		goto err;
+
+	get_name(name, KSYM_NAME_LEN, data);
+	name_len = strlen(name) + 1;
+	while (!IS_ALIGNED(name_len, sizeof(u64)))
+		name[name_len++] = '\0';
+	BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
+
+	if (unregister)
+		flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
+
+	ksymbol_event = (struct perf_ksymbol_event){
+		.name = name,
+		.name_len = name_len,
+		.event_id = {
+			.header = {
+				.type = PERF_RECORD_KSYMBOL,
+				.size = sizeof(ksymbol_event.event_id) +
+					name_len,
+			},
+			.addr = addr,
+			.len = len,
+			.ksym_type = ksym_type,
+			.flags = flags,
+		},
+	};
+
+	perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+	return;
+err:
+	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
 	event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9900,6 +9994,8 @@  static void account_event(struct perf_event *event)
 		inc = true;
 	if (is_cgroup_event(event))
 		inc = true;
+	if (event->attr.ksymbol)
+		atomic_inc(&nr_ksymbol_events);
 
 	if (inc) {
 		/*