diff mbox series

[v10,perf,bpf-next,3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT

Message ID 20190116162931.1542429-4-songliubraving@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series reveal invisible bpf programs | expand

Commit Message

Song Liu Jan. 16, 2019, 4:29 p.m. UTC
For better performance analysis of BPF programs, this patch introduces
PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program
load/unload information to user space.

Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs.
The following example shows kernel symbols for a BPF program with 7
sub programs:

    ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F
    ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F
    ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F
    ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F
    ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F
    ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F
    ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F
    ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi

When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for
each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not
needed for simple profiling.

For annotation, user space need to listen to PERF_RECORD_BPF_EVENT
and gather more information about these (sub) programs via sys_bpf.

Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/filter.h          |   7 ++
 include/linux/perf_event.h      |   6 ++
 include/uapi/linux/perf_event.h |  29 +++++++-
 kernel/bpf/core.c               |   2 +-
 kernel/bpf/syscall.c            |   2 +
 kernel/events/core.c            | 120 ++++++++++++++++++++++++++++++++
 6 files changed, 164 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra Jan. 17, 2019, 1:09 p.m. UTC | #1
On Wed, Jan 16, 2019 at 08:29:25AM -0800, Song Liu wrote:
> +	/*
> +	 * Record bpf events:
> +	 *  enum perf_bpf_event_type {
> +	 *	PERF_BPF_EVENT_UNKNOWN		= 0,
> +	 *	PERF_BPF_EVENT_PROG_LOAD	= 1,
> +	 *	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
> +	 *  };
> +	 *
> +	 * struct {
> +	 *	struct perf_event_header	header;
> +	 *	u16				type;
> +	 *	u16				flags;
> +	 *	u32				id;
> +	 *	u8				tag[BPF_TAG_SIZE];

This does forever fix BPF_TAG_SIZE; is that intentional? We could easily
make that a variable length field like with the other event. Or is that
value already part of the eBPF ABI?

> +	 *	struct sample_id		sample_id;
> +	 * };
> +	 */
> +	PERF_RECORD_BPF_EVENT			= 18,
> @@ -7744,6 +7747,121 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
>  	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
>  }
>  
> +struct perf_bpf_event {
> +	struct bpf_prog	*prog;
> +	struct {
> +		struct perf_event_header        header;
> +		u16				type;
> +		u16				flags;
> +		u32				id;
> +		u8				tag[BPF_TAG_SIZE];
> +	} event_id;
> +};

> +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
> +					 enum perf_bpf_event_type type)
> +{
> +	bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
> +	int i;
> +
> +	if (prog->aux->func_cnt == 0) {
> +		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
> +				   (u64)(unsigned long)prog->bpf_func,
> +				   prog->jited_len, unregister,
> +				   perf_event_bpf_get_name, prog);
> +	} else {
> +		for (i = 0; i < prog->aux->func_cnt; i++) {
> +			struct bpf_prog *subprog = prog->aux->func[i];
> +
> +			perf_event_ksymbol(
> +				PERF_RECORD_KSYMBOL_TYPE_BPF,
> +				(u64)(unsigned long)subprog->bpf_func,
> +				subprog->jited_len, unregister,
> +				perf_event_bpf_get_name, subprog);
> +		}
> +	}
> +}

I still think this is a weird place to do this.. :-) See them patches I
just send.

> +void perf_event_bpf_event(struct bpf_prog *prog,
> +			  enum perf_bpf_event_type type,
> +			  u16 flags)
> +{
> +	struct perf_bpf_event bpf_event;
> +
> +	if (type <= PERF_BPF_EVENT_UNKNOWN ||
> +	    type >= PERF_BPF_EVENT_MAX)
> +		return;
> +
> +	switch (type) {
> +	case PERF_BPF_EVENT_PROG_LOAD:
> +	case PERF_BPF_EVENT_PROG_UNLOAD:
> +		if (atomic_read(&nr_ksymbol_events))
> +			perf_event_bpf_emit_ksymbols(prog, type);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (!atomic_read(&nr_bpf_events))
> +		return;
> +
> +	bpf_event = (struct perf_bpf_event){
> +		.prog = prog,
> +		.event_id = {
> +			.header = {
> +				.type = PERF_RECORD_BPF_EVENT,
> +				.size = sizeof(bpf_event.event_id),
> +			},
> +			.type = type,
> +			.flags = flags,
> +			.id = prog->aux->id,
> +		},
> +	};

	BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));

> +	memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
> +	perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
> +}

Anyway, small nits only:

Acked-by: Peter Zijlstra (Intel) <peterz@infradeaed.org>
Song Liu Jan. 17, 2019, 1:49 p.m. UTC | #2
Thanks Peter!

> On Jan 17, 2019, at 5:09 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> 
> On Wed, Jan 16, 2019 at 08:29:25AM -0800, Song Liu wrote:
>> +	/*
>> +	 * Record bpf events:
>> +	 *  enum perf_bpf_event_type {
>> +	 *	PERF_BPF_EVENT_UNKNOWN		= 0,
>> +	 *	PERF_BPF_EVENT_PROG_LOAD	= 1,
>> +	 *	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
>> +	 *  };
>> +	 *
>> +	 * struct {
>> +	 *	struct perf_event_header	header;
>> +	 *	u16				type;
>> +	 *	u16				flags;
>> +	 *	u32				id;
>> +	 *	u8				tag[BPF_TAG_SIZE];
> 
> This does forever fix BPF_TAG_SIZE; is that intentional? We could easily
> make that a variable length field like with the other event. Or is that
> value already part of the eBPF ABI?

Yes, BPF_TAG_SIZE is already part of eBPF ABI. 

Song

> 
>> +	 *	struct sample_id		sample_id;
>> +	 * };
>> +	 */
>> +	PERF_RECORD_BPF_EVENT			= 18,
>> @@ -7744,6 +7747,121 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
>> 	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
>> }
>> 
>> +struct perf_bpf_event {
>> +	struct bpf_prog	*prog;
>> +	struct {
>> +		struct perf_event_header        header;
>> +		u16				type;
>> +		u16				flags;
>> +		u32				id;
>> +		u8				tag[BPF_TAG_SIZE];
>> +	} event_id;
>> +};
> 
>> +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
>> +					 enum perf_bpf_event_type type)
>> +{
>> +	bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
>> +	int i;
>> +
>> +	if (prog->aux->func_cnt == 0) {
>> +		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
>> +				   (u64)(unsigned long)prog->bpf_func,
>> +				   prog->jited_len, unregister,
>> +				   perf_event_bpf_get_name, prog);
>> +	} else {
>> +		for (i = 0; i < prog->aux->func_cnt; i++) {
>> +			struct bpf_prog *subprog = prog->aux->func[i];
>> +
>> +			perf_event_ksymbol(
>> +				PERF_RECORD_KSYMBOL_TYPE_BPF,
>> +				(u64)(unsigned long)subprog->bpf_func,
>> +				subprog->jited_len, unregister,
>> +				perf_event_bpf_get_name, subprog);
>> +		}
>> +	}
>> +}
> 
> I still think this is a weird place to do this.. :-) See them patches I
> just send.
> 
>> +void perf_event_bpf_event(struct bpf_prog *prog,
>> +			  enum perf_bpf_event_type type,
>> +			  u16 flags)
>> +{
>> +	struct perf_bpf_event bpf_event;
>> +
>> +	if (type <= PERF_BPF_EVENT_UNKNOWN ||
>> +	    type >= PERF_BPF_EVENT_MAX)
>> +		return;
>> +
>> +	switch (type) {
>> +	case PERF_BPF_EVENT_PROG_LOAD:
>> +	case PERF_BPF_EVENT_PROG_UNLOAD:
>> +		if (atomic_read(&nr_ksymbol_events))
>> +			perf_event_bpf_emit_ksymbols(prog, type);
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	if (!atomic_read(&nr_bpf_events))
>> +		return;
>> +
>> +	bpf_event = (struct perf_bpf_event){
>> +		.prog = prog,
>> +		.event_id = {
>> +			.header = {
>> +				.type = PERF_RECORD_BPF_EVENT,
>> +				.size = sizeof(bpf_event.event_id),
>> +			},
>> +			.type = type,
>> +			.flags = flags,
>> +			.id = prog->aux->id,
>> +		},
>> +	};
> 
> 	BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
> 
>> +	memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
>> +	perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
>> +}
> 
> Anyway, small nits only:
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradeaed.org>
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad106d845b22..d531d4250bff 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -951,6 +951,7 @@  bpf_address_lookup(unsigned long addr, unsigned long *size,
 
 void bpf_prog_kallsyms_add(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
 
 #else /* CONFIG_BPF_JIT */
 
@@ -1006,6 +1007,12 @@  static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 {
 }
+
+static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+{
+	sym[0] = '\0';
+}
+
 #endif /* CONFIG_BPF_JIT */
 
 void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 77b2560f2dc7..0b539a2e21af 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1119,6 +1119,9 @@  typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
 extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
 			       bool unregister,
 			       perf_ksymbol_get_name_f get_name, void *data);
+extern void perf_event_bpf_event(struct bpf_prog *prog,
+				 enum perf_bpf_event_type type,
+				 u16 flags);
 
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1346,6 +1349,9 @@  static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
 				      bool unregister,
 				      perf_ksymbol_get_name_f get_name,
 				      void *data)			{ }
+static inline void perf_event_bpf_event(struct bpf_prog *prog,
+					enum perf_bpf_event_type type,
+					u16 flags)			{ }
 static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 68c4da0227c5..8bd78a34e396 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -373,7 +373,8 @@  struct perf_event_attr {
 				write_backward :  1, /* Write ring buffer from end to beginning */
 				namespaces     :  1, /* include namespaces data */
 				ksymbol        :  1, /* include ksymbol events */
-				__reserved_1   : 34;
+				bpf_event      :  1, /* include bpf events */
+				__reserved_1   : 33;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -981,6 +982,25 @@  enum perf_event_type {
 	 */
 	PERF_RECORD_KSYMBOL			= 17,
 
+	/*
+	 * Record bpf events:
+	 *  enum perf_bpf_event_type {
+	 *	PERF_BPF_EVENT_UNKNOWN		= 0,
+	 *	PERF_BPF_EVENT_PROG_LOAD	= 1,
+	 *	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
+	 *  };
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u16				type;
+	 *	u16				flags;
+	 *	u32				id;
+	 *	u8				tag[BPF_TAG_SIZE];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_BPF_EVENT			= 18,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -992,6 +1012,13 @@  enum perf_record_ksymbol_type {
 
 #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
 
+enum perf_bpf_event_type {
+	PERF_BPF_EVENT_UNKNOWN		= 0,
+	PERF_BPF_EVENT_PROG_LOAD	= 1,
+	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
+	PERF_BPF_EVENT_MAX,		/* non-ABI */
+};
+
 #define PERF_MAX_STACK_DEPTH		127
 #define PERF_MAX_CONTEXTS_PER_STACK	  8
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b9356025..19c49313c709 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -495,7 +495,7 @@  bpf_get_prog_addr_region(const struct bpf_prog *prog,
 	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
 }
 
-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 {
 	const char *end = sym + KSYM_NAME_LEN;
 	const struct btf_type *type;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..30ebd085790b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1211,6 +1211,7 @@  static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+		perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);
 		bpf_prog_kallsyms_del_all(prog);
@@ -1554,6 +1555,7 @@  static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	}
 
 	bpf_prog_kallsyms_add(prog);
+	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
 	return err;
 
 free_used_maps:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef27f2776999..2f238a8ddaab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@  static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4308,6 +4309,8 @@  static void unaccount_event(struct perf_event *event)
 		dec = true;
 	if (event->attr.ksymbol)
 		atomic_dec(&nr_ksymbol_events);
+	if (event->attr.bpf_event)
+		atomic_dec(&nr_bpf_events);
 
 	if (dec) {
 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7744,6 +7747,121 @@  void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
 	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
 }
 
+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+	struct bpf_prog	*prog;
+	struct {
+		struct perf_event_header        header;
+		u16				type;
+		u16				flags;
+		u32				id;
+		u8				tag[BPF_TAG_SIZE];
+	} event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+	return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+	struct perf_bpf_event *bpf_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	if (!perf_event_bpf_match(event))
+		return;
+
+	perf_event_header__init_id(&bpf_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				bpf_event->event_id.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, bpf_event->event_id);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+static int perf_event_bpf_get_name(char *name, int len, void *data)
+{
+	struct bpf_prog *prog = data;
+
+	bpf_get_prog_name(prog, name);
+	return 0;
+}
+
+static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+					 enum perf_bpf_event_type type)
+{
+	bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
+	int i;
+
+	if (prog->aux->func_cnt == 0) {
+		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+				   (u64)(unsigned long)prog->bpf_func,
+				   prog->jited_len, unregister,
+				   perf_event_bpf_get_name, prog);
+	} else {
+		for (i = 0; i < prog->aux->func_cnt; i++) {
+			struct bpf_prog *subprog = prog->aux->func[i];
+
+			perf_event_ksymbol(
+				PERF_RECORD_KSYMBOL_TYPE_BPF,
+				(u64)(unsigned long)subprog->bpf_func,
+				subprog->jited_len, unregister,
+				perf_event_bpf_get_name, subprog);
+		}
+	}
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+			  enum perf_bpf_event_type type,
+			  u16 flags)
+{
+	struct perf_bpf_event bpf_event;
+
+	if (type <= PERF_BPF_EVENT_UNKNOWN ||
+	    type >= PERF_BPF_EVENT_MAX)
+		return;
+
+	switch (type) {
+	case PERF_BPF_EVENT_PROG_LOAD:
+	case PERF_BPF_EVENT_PROG_UNLOAD:
+		if (atomic_read(&nr_ksymbol_events))
+			perf_event_bpf_emit_ksymbols(prog, type);
+		break;
+	default:
+		break;
+	}
+
+	if (!atomic_read(&nr_bpf_events))
+		return;
+
+	bpf_event = (struct perf_bpf_event){
+		.prog = prog,
+		.event_id = {
+			.header = {
+				.type = PERF_RECORD_BPF_EVENT,
+				.size = sizeof(bpf_event.event_id),
+			},
+			.type = type,
+			.flags = flags,
+			.id = prog->aux->id,
+		},
+	};
+
+	memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+	perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
 	event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9996,6 +10114,8 @@  static void account_event(struct perf_event *event)
 		inc = true;
 	if (event->attr.ksymbol)
 		atomic_inc(&nr_ksymbol_events);
+	if (event->attr.bpf_event)
+		atomic_inc(&nr_bpf_events);
 
 	if (inc) {
 		/*