diff mbox series

[3/6] perf: implement kprobe support to PERF_TYPE_PROBE

Message ID 20171115172339.1791161-6-songliubraving@fb.com
State Not Applicable, archived
Delegated to: David Miller
Headers show
Series enable creating [k,u]probe with perf_event_open | expand

Commit Message

Song Liu Nov. 15, 2017, 5:23 p.m. UTC
A new pmu, perf_probe, is created for PERF_TYPE_PROBE. Based on
input from perf_event_open(), perf_probe creates a kprobe (or
kretprobe) for the perf_event. This kprobe is private to this
perf_event, and thus not added to global lists, and not
available in tracefs.

Two functions, create_local_trace_kprobe() and
destroy_local_trace_kprobe()  are added to created and destroy these
local trace_kprobe.

Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/trace_events.h    |  2 +
 kernel/events/core.c            | 41 +++++++++++++++++--
 kernel/trace/trace_event_perf.c | 81 ++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c     | 91 +++++++++++++++++++++++++++++++++++++----
 kernel/trace/trace_probe.h      |  7 ++++
 5 files changed, 211 insertions(+), 11 deletions(-)

Comments

Peter Zijlstra Nov. 23, 2017, 10:06 a.m. UTC | #1
On Wed, Nov 15, 2017 at 09:23:36AM -0800, Song Liu wrote:
> +int perf_probe_init(struct perf_event *p_event)
> +{

> +	__aligned_u64 aligned_probe_desc;
> +
> +	/*
> +	 * attr.probe_desc may not be 64-bit aligned on 32-bit systems.
> +	 * Make an aligned copy of it to before u64_to_user_ptr().
> +	 */
> +	memcpy(&aligned_probe_desc, &p_event->attr.probe_desc,
> +	       sizeof(__aligned_u64));
> +
> +	if (copy_from_user(&pd, u64_to_user_ptr(aligned_probe_desc),
> +			   sizeof(struct probe_desc)))
> +		return -EFAULT;

That doesn't seem to make any sense what so ever.. the alignment has no
effect on this usecase. Not to mention that the kernel variable should
very much already be aligned.
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bcb4dc..743e68d 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -494,6 +494,8 @@  extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
 extern int  perf_trace_add(struct perf_event *event, int flags);
 extern void perf_trace_del(struct perf_event *event, int flags);
+extern int  perf_probe_init(struct perf_event *event);
+extern void perf_probe_destroy(struct perf_event *event);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81dd57b..95c6610 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7966,6 +7966,28 @@  static int perf_tp_event_init(struct perf_event *event)
 	return 0;
 }
 
+static int perf_probe_event_init(struct perf_event *event)
+{
+	int err;
+
+	if (event->attr.type != PERF_TYPE_PROBE)
+		return -ENOENT;
+
+	/*
+	 * no branch sampling for probe events
+	 */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	err = perf_probe_init(event);
+	if (err)
+		return err;
+
+	event->destroy = perf_probe_destroy;
+
+	return 0;
+}
+
 static struct pmu perf_tracepoint = {
 	.task_ctx_nr	= perf_sw_context,
 
@@ -7977,9 +7999,20 @@  static struct pmu perf_tracepoint = {
 	.read		= perf_swevent_read,
 };
 
+static struct pmu perf_probe = {
+	.task_ctx_nr	= perf_sw_context,
+	.event_init	= perf_probe_event_init,
+	.add		= perf_trace_add,
+	.del		= perf_trace_del,
+	.start		= perf_swevent_start,
+	.stop		= perf_swevent_stop,
+	.read		= perf_swevent_read,
+};
+
 static inline void perf_tp_register(void)
 {
 	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
+	perf_pmu_register(&perf_probe, "probe", PERF_TYPE_PROBE);
 }
 
 static void perf_event_free_filter(struct perf_event *event)
@@ -8061,7 +8094,8 @@  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 	bool is_kprobe, is_tracepoint, is_syscall_tp;
 	struct bpf_prog *prog;
 
-	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+	if (event->attr.type != PERF_TYPE_TRACEPOINT &&
+	    event->attr.type != PERF_TYPE_PROBE)
 		return perf_event_set_bpf_handler(event, prog_fd);
 
 	if (event->tp_event->prog)
@@ -8533,8 +8567,9 @@  static int perf_event_set_filter(struct perf_event *event, void __user *arg)
 	char *filter_str;
 	int ret = -EINVAL;
 
-	if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
-	    !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+	if (((event->attr.type != PERF_TYPE_TRACEPOINT &&
+	      event->attr.type != PERF_TYPE_PROBE) ||
+	     !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
 	    !has_addr_filter(event))
 		return -EINVAL;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 13ba2d3..bf9b99b 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -8,6 +8,7 @@ 
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include "trace.h"
+#include "trace_probe.h"
 
 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
 
@@ -229,6 +230,74 @@  int perf_trace_init(struct perf_event *p_event)
 	return ret;
 }
 
+#ifdef CONFIG_KPROBE_EVENTS
+static int perf_probe_create_kprobe(struct perf_event *p_event,
+				    struct probe_desc *pd, char *name)
+{
+	struct trace_event_call *tp_event;
+	int ret;
+
+	tp_event = create_local_trace_kprobe(
+		name, (void *)(unsigned long)(pd->addr), pd->offset,
+		p_event->attr.is_return);
+	if (IS_ERR(tp_event))
+		return PTR_ERR(tp_event);
+	ret = perf_trace_event_init(tp_event, p_event);
+	if (ret)
+		destroy_local_trace_kprobe(tp_event);
+
+	return ret;
+}
+#else
+static int perf_probe_create_kprobe(struct perf_event *p_event,
+				    struct probe_desc *pd, char *name)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_KPROBE_EVENTS */
+
+int perf_probe_init(struct perf_event *p_event)
+{
+	struct probe_desc pd;
+	int ret;
+	char *name = NULL;
+	__aligned_u64 aligned_probe_desc;
+
+	/*
+	 * attr.probe_desc may not be 64-bit aligned on 32-bit systems.
+	 * Make an aligned copy of it to before u64_to_user_ptr().
+	 */
+	memcpy(&aligned_probe_desc, &p_event->attr.probe_desc,
+	       sizeof(__aligned_u64));
+
+	if (copy_from_user(&pd, u64_to_user_ptr(aligned_probe_desc),
+			   sizeof(struct probe_desc)))
+		return -EFAULT;
+
+	if (pd.func) {
+		name = kzalloc(MAX_PROBE_FUNC_NAME_LEN, GFP_KERNEL);
+		if (!name)
+			return -ENOMEM;
+		ret = strncpy_from_user(name, u64_to_user_ptr(pd.func),
+					MAX_PROBE_FUNC_NAME_LEN);
+		if (ret < 0)
+			goto out;
+
+		if (name[0] == '\0') {
+			kfree(name);
+			name = NULL;
+		}
+	}
+
+	if (!p_event->attr.is_uprobe)
+		ret = perf_probe_create_kprobe(p_event, &pd, name);
+	else
+		ret = -EOPNOTSUPP;
+out:
+	kfree(name);
+	return ret;
+}
+
 void perf_trace_destroy(struct perf_event *p_event)
 {
 	mutex_lock(&event_mutex);
@@ -237,6 +306,18 @@  void perf_trace_destroy(struct perf_event *p_event)
 	mutex_unlock(&event_mutex);
 }
 
+void perf_probe_destroy(struct perf_event *p_event)
+{
+	perf_trace_event_close(p_event);
+	perf_trace_event_unreg(p_event);
+
+	if (!p_event->attr.is_uprobe) {
+#ifdef CONFIG_KPROBE_EVENTS
+		destroy_local_trace_kprobe(p_event->tp_event);
+#endif
+	}
+}
+
 int perf_trace_add(struct perf_event *p_event, int flags)
 {
 	struct trace_event_call *tp_event = p_event->tp_event;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8a907e1..16b334a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -438,6 +438,14 @@  disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 			disable_kprobe(&tk->rp.kp);
 		wait = 1;
 	}
+
+	/*
+	 * if tk is not added to any list, it must be a local trace_kprobe
+	 * created with perf_event_open. We don't need to wait for these
+	 * trace_kprobes
+	 */
+	if (list_empty(&tk->list))
+		wait = 0;
  out:
 	if (wait) {
 		/*
@@ -1315,12 +1323,9 @@  static struct trace_event_functions kprobe_funcs = {
 	.trace		= print_kprobe_event
 };
 
-static int register_kprobe_event(struct trace_kprobe *tk)
+static inline void init_trace_event_call(struct trace_kprobe *tk,
+					 struct trace_event_call *call)
 {
-	struct trace_event_call *call = &tk->tp.call;
-	int ret;
-
-	/* Initialize trace_event_call */
 	INIT_LIST_HEAD(&call->class->fields);
 	if (trace_kprobe_is_return(tk)) {
 		call->event.funcs = &kretprobe_funcs;
@@ -1329,6 +1334,19 @@  static int register_kprobe_event(struct trace_kprobe *tk)
 		call->event.funcs = &kprobe_funcs;
 		call->class->define_fields = kprobe_event_define_fields;
 	}
+
+	call->flags = TRACE_EVENT_FL_KPROBE;
+	call->class->reg = kprobe_register;
+	call->data = tk;
+}
+
+static int register_kprobe_event(struct trace_kprobe *tk)
+{
+	struct trace_event_call *call = &tk->tp.call;
+	int ret = 0;
+
+	init_trace_event_call(tk, call);
+
 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
 		return -ENOMEM;
 	ret = register_trace_event(&call->event);
@@ -1336,9 +1354,6 @@  static int register_kprobe_event(struct trace_kprobe *tk)
 		kfree(call->print_fmt);
 		return -ENODEV;
 	}
-	call->flags = TRACE_EVENT_FL_KPROBE;
-	call->class->reg = kprobe_register;
-	call->data = tk;
 	ret = trace_add_event_call(call);
 	if (ret) {
 		pr_info("Failed to register kprobe event: %s\n",
@@ -1360,6 +1375,66 @@  static int unregister_kprobe_event(struct trace_kprobe *tk)
 	return ret;
 }
 
+#ifdef CONFIG_PERF_EVENTS
+/* create a trace_kprobe, but don't add it to global lists */
+struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+			  bool is_return)
+{
+	struct trace_kprobe *tk;
+	int ret;
+	char *event;
+
+	/*
+	 * local trace_kprobes are not added to probe_list, so they are never
+	 * searched in find_trace_kprobe(). Therefore, there is no concern of
+	 * duplicated name here.
+	 */
+	event = func ? func : "DUMMY_EVENT";
+
+	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
+				offs, 0 /* maxactive */, 0 /* nargs */,
+				is_return);
+
+	if (IS_ERR(tk)) {
+		pr_info("Failed to allocate trace_probe.(%d)\n",
+			(int)PTR_ERR(tk));
+		return ERR_CAST(tk);
+	}
+
+	init_trace_event_call(tk, &tk->tp.call);
+
+	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = __register_trace_kprobe(tk);
+	if (ret < 0)
+		goto error;
+
+	return &tk->tp.call;
+error:
+	free_trace_kprobe(tk);
+	return ERR_PTR(ret);
+}
+
+void destroy_local_trace_kprobe(struct trace_event_call *event_call)
+{
+	struct trace_kprobe *tk;
+
+	tk = container_of(event_call, struct trace_kprobe, tp.call);
+
+	if (trace_probe_is_enabled(&tk->tp)) {
+		WARN_ON(1);
+		return;
+	}
+
+	__unregister_trace_kprobe(tk);
+	free_trace_kprobe(tk);
+}
+#endif /* CONFIG_PERF_EVENTS */
+
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 903273c..910ae1b 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -411,3 +411,10 @@  store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
 }
 
 extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+
+#ifdef CONFIG_PERF_EVENTS
+extern struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+			  bool is_return);
+extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
+#endif