diff mbox series

[bpf-next,1/3] perf/core: introduce perf_event_mmap_bpf_prog

Message ID 20180919223935.999270-2-ast@kernel.org
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series perf, bpf: reveal invisible bpf programs | expand

Commit Message

Alexei Starovoitov Sept. 19, 2018, 10:39 p.m. UTC
introduce perf_event_mmap_bpf_prog() helper to emit RECORD_MMAP events
into perf ring buffer.
It's used by bpf load/unload logic to notify user space of addresses
and names of JITed bpf programs.

Note that event->mmap.pid == -1 is an existing indicator of kernel event.
In addition use event->mmap.tid == BPF_FS_MAGIC to indicate bpf related
RECORD_MMAP event.

Alternatively it's possible to introduce new 'enum perf_event_type' command
specificially for bpf prog load/unload, but existing RECORD_MMAP
is very close, so the choice made by this patch is to extend it.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 44 +++++++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 5 deletions(-)

Comments

Song Liu Sept. 19, 2018, 11:30 p.m. UTC | #1
> On Sep 19, 2018, at 3:39 PM, Alexei Starovoitov <ast@kernel.org> wrote:
> 
> introduce perf_event_mmap_bpf_prog() helper to emit RECORD_MMAP events
> into perf ring buffer.
> It's used by bpf load/unload logic to notify user space of addresses
> and names of JITed bpf programs.
> 
> Note that event->mmap.pid == -1 is an existing indicator of kernel event.
> In addition use event->mmap.tid == BPF_FS_MAGIC to indicate bpf related
> RECORD_MMAP event.
> 
> Alternatively it's possible to introduce new 'enum perf_event_type' command
> specificially for bpf prog load/unload, but existing RECORD_MMAP
> is very close, so the choice made by this patch is to extend it.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Acked-by: Song Liu <songliubraving@fb.com>

I guess we should also use this for kernel modules load/unload? 


> ---
> include/linux/perf_event.h |  1 +
> kernel/events/core.c       | 44 +++++++++++++++++++++++++++++++++-----
> 2 files changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 53c500f0ca79..0e79af83138f 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1113,6 +1113,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
> }
> 
> extern void perf_event_mmap(struct vm_area_struct *vma);
> +void perf_event_mmap_bpf_prog(u64 start, u64 len, char *name, int size);
> extern struct perf_guest_info_callbacks *perf_guest_cbs;
> extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
> extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 2a62b96600ad..c48244ddf993 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7152,7 +7152,7 @@ static int perf_event_mmap_match(struct perf_event *event,
> {
> 	struct perf_mmap_event *mmap_event = data;
> 	struct vm_area_struct *vma = mmap_event->vma;
> -	int executable = vma->vm_flags & VM_EXEC;
> +	int executable = !vma || vma->vm_flags & VM_EXEC;
> 
> 	return (!executable && event->attr.mmap_data) ||
> 	       (executable && (event->attr.mmap || event->attr.mmap2));
> @@ -7165,12 +7165,13 @@ static void perf_event_mmap_output(struct perf_event *event,
> 	struct perf_output_handle handle;
> 	struct perf_sample_data sample;
> 	int size = mmap_event->event_id.header.size;
> +	bool bpf_event = !mmap_event->vma;
> 	int ret;
> 
> 	if (!perf_event_mmap_match(event, data))
> 		return;
> 
> -	if (event->attr.mmap2) {
> +	if (event->attr.mmap2 && !bpf_event) {
> 		mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
> 		mmap_event->event_id.header.size += sizeof(mmap_event->maj);
> 		mmap_event->event_id.header.size += sizeof(mmap_event->min);
> @@ -7186,12 +7187,14 @@ static void perf_event_mmap_output(struct perf_event *event,
> 	if (ret)
> 		goto out;
> 
> -	mmap_event->event_id.pid = perf_event_pid(event, current);
> -	mmap_event->event_id.tid = perf_event_tid(event, current);
> +	if (!bpf_event) {
> +		mmap_event->event_id.pid = perf_event_pid(event, current);
> +		mmap_event->event_id.tid = perf_event_tid(event, current);
> +	}
> 
> 	perf_output_put(&handle, mmap_event->event_id);
> 
> -	if (event->attr.mmap2) {
> +	if (event->attr.mmap2 && !bpf_event) {
> 		perf_output_put(&handle, mmap_event->maj);
> 		perf_output_put(&handle, mmap_event->min);
> 		perf_output_put(&handle, mmap_event->ino);
> @@ -7448,6 +7451,37 @@ void perf_event_mmap(struct vm_area_struct *vma)
> 	perf_event_mmap_event(&mmap_event);
> }
> 
> +void perf_event_mmap_bpf_prog(u64 start, u64 len, char *name, int size)
> +{
> +	struct perf_mmap_event mmap_event;
> +
> +	if (!atomic_read(&nr_mmap_events))
> +		return;
> +
> +	if (!IS_ALIGNED(size, sizeof(u64))) {
> +		WARN_ONCE(1, "size is not aligned\n");
> +		return;
> +	}
> +
> +	mmap_event = (struct perf_mmap_event){
> +		.file_name = name,
> +		.file_size = size,
> +		.event_id  = {
> +			.header = {
> +				.type = PERF_RECORD_MMAP,
> +				.misc = PERF_RECORD_MISC_KERNEL,
> +				.size = sizeof(mmap_event.event_id) + size,
> +			},
> +			.pid = -1, /* indicates kernel */
> +			.tid = BPF_FS_MAGIC, /* bpf mmap event */
> +			.start  = start,
> +			.len    = len,
> +			.pgoff  = start,
> +		},
> +	};
> +	perf_iterate_sb(perf_event_mmap_output, &mmap_event, NULL);
> +}
> +
> void perf_event_aux_event(struct perf_event *event, unsigned long head,
> 			  unsigned long size, u64 flags)
> {
> -- 
> 2.17.1
>
Alexei Starovoitov Sept. 20, 2018, 12:53 a.m. UTC | #2
On 9/19/18 4:30 PM, Song Liu wrote:
>
>
>> On Sep 19, 2018, at 3:39 PM, Alexei Starovoitov <ast@kernel.org> wrote:
>>
>> introduce perf_event_mmap_bpf_prog() helper to emit RECORD_MMAP events
>> into perf ring buffer.
>> It's used by bpf load/unload logic to notify user space of addresses
>> and names of JITed bpf programs.
>>
>> Note that event->mmap.pid == -1 is an existing indicator of kernel event.
>> In addition use event->mmap.tid == BPF_FS_MAGIC to indicate bpf related
>> RECORD_MMAP event.
>>
>> Alternatively it's possible to introduce new 'enum perf_event_type' command
>> specificially for bpf prog load/unload, but existing RECORD_MMAP
>> is very close, so the choice made by this patch is to extend it.
>>
>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>
> Acked-by: Song Liu <songliubraving@fb.com>
>
> I guess we should also use this for kernel modules load/unload?

yes. that's possible.
There is similar issue with modules today that get unloaded
before 'perf report'.
Synthetic record_mmap for modules that perf emits into perf.data
has filename==module_name. It solves typical use case though.
We can extend record_mmap further and let kernel emit them
for module load/unload.
Some new magic for 'tid' would be necessary.
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 53c500f0ca79..0e79af83138f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1113,6 +1113,7 @@  static inline void perf_event_task_sched_out(struct task_struct *prev,
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
+void perf_event_mmap_bpf_prog(u64 start, u64 len, char *name, int size);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2a62b96600ad..c48244ddf993 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7152,7 +7152,7 @@  static int perf_event_mmap_match(struct perf_event *event,
 {
 	struct perf_mmap_event *mmap_event = data;
 	struct vm_area_struct *vma = mmap_event->vma;
-	int executable = vma->vm_flags & VM_EXEC;
+	int executable = !vma || vma->vm_flags & VM_EXEC;
 
 	return (!executable && event->attr.mmap_data) ||
 	       (executable && (event->attr.mmap || event->attr.mmap2));
@@ -7165,12 +7165,13 @@  static void perf_event_mmap_output(struct perf_event *event,
 	struct perf_output_handle handle;
 	struct perf_sample_data sample;
 	int size = mmap_event->event_id.header.size;
+	bool bpf_event = !mmap_event->vma;
 	int ret;
 
 	if (!perf_event_mmap_match(event, data))
 		return;
 
-	if (event->attr.mmap2) {
+	if (event->attr.mmap2 && !bpf_event) {
 		mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
 		mmap_event->event_id.header.size += sizeof(mmap_event->maj);
 		mmap_event->event_id.header.size += sizeof(mmap_event->min);
@@ -7186,12 +7187,14 @@  static void perf_event_mmap_output(struct perf_event *event,
 	if (ret)
 		goto out;
 
-	mmap_event->event_id.pid = perf_event_pid(event, current);
-	mmap_event->event_id.tid = perf_event_tid(event, current);
+	if (!bpf_event) {
+		mmap_event->event_id.pid = perf_event_pid(event, current);
+		mmap_event->event_id.tid = perf_event_tid(event, current);
+	}
 
 	perf_output_put(&handle, mmap_event->event_id);
 
-	if (event->attr.mmap2) {
+	if (event->attr.mmap2 && !bpf_event) {
 		perf_output_put(&handle, mmap_event->maj);
 		perf_output_put(&handle, mmap_event->min);
 		perf_output_put(&handle, mmap_event->ino);
@@ -7448,6 +7451,37 @@  void perf_event_mmap(struct vm_area_struct *vma)
 	perf_event_mmap_event(&mmap_event);
 }
 
+void perf_event_mmap_bpf_prog(u64 start, u64 len, char *name, int size)
+{
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_events))
+		return;
+
+	if (!IS_ALIGNED(size, sizeof(u64))) {
+		WARN_ONCE(1, "size is not aligned\n");
+		return;
+	}
+
+	mmap_event = (struct perf_mmap_event){
+		.file_name = name,
+		.file_size = size,
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_MMAP,
+				.misc = PERF_RECORD_MISC_KERNEL,
+				.size = sizeof(mmap_event.event_id) + size,
+			},
+			.pid = -1, /* indicates kernel */
+			.tid = BPF_FS_MAGIC, /* bpf mmap event */
+			.start  = start,
+			.len    = len,
+			.pgoff  = start,
+		},
+	};
+	perf_iterate_sb(perf_event_mmap_output, &mmap_event, NULL);
+}
+
 void perf_event_aux_event(struct perf_event *event, unsigned long head,
 			  unsigned long size, u64 flags)
 {