[RFC,v2,3/6] perf: implement kprobe support to PERF_TYPE_PROBE

Message ID 20171112234014.2983360-6-songliubraving@fb.com
State RFC
Delegated to: David Miller
Headers show
Series
  • enable creating [k,u]probe with perf_event_open
Related show

Commit Message

Song Liu Nov. 12, 2017, 11:40 p.m.
A new pmu, perf_probe, is created for PERF_TYPE_PROBE. Based on
input from perf_event_open(), perf_probe creates a kprobe (or
kretprobe) for the perf_event. This kprobe is private to this
perf_event, and thus not added to global lists, and not
available in tracefs.

Two functions, create_local_trace_kprobe() and
destroy_local_trace_kprobe()  are added to created and destroy these
local trace_kprobe.

Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/trace_events.h    |  2 +
 kernel/events/core.c            | 39 +++++++++++++++++-
 kernel/trace/trace_event_perf.c | 81 ++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c     | 91 +++++++++++++++++++++++++++++++++++++----
 kernel/trace/trace_probe.h      |  7 ++++
 5 files changed, 210 insertions(+), 10 deletions(-)

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 84014ec..96ce715 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -528,6 +528,8 @@  extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
 extern int  perf_trace_add(struct perf_event *event, int flags);
 extern void perf_trace_del(struct perf_event *event, int flags);
+extern int  perf_probe_init(struct perf_event *event);
+extern void perf_probe_destroy(struct perf_event *event);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 42d24bd..97dc648 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8053,6 +8053,28 @@  static int perf_tp_event_init(struct perf_event *event)
 	return 0;
 }
 
+static int perf_probe_event_init(struct perf_event *event)
+{
+	int err;
+
+	if (event->attr.type != PERF_TYPE_PROBE)
+		return -ENOENT;
+
+	/*
+	 * no branch sampling for probe events
+	 */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	err = perf_probe_init(event);
+	if (err)
+		return err;
+
+	event->destroy = perf_probe_destroy;
+
+	return 0;
+}
+
 static struct pmu perf_tracepoint = {
 	.task_ctx_nr	= perf_sw_context,
 
@@ -8064,9 +8086,20 @@  static struct pmu perf_tracepoint = {
 	.read		= perf_swevent_read,
 };
 
+static struct pmu perf_probe = {
+	.task_ctx_nr	= perf_sw_context,
+	.event_init	= perf_probe_event_init,
+	.add		= perf_trace_add,
+	.del		= perf_trace_del,
+	.start		= perf_swevent_start,
+	.stop		= perf_swevent_stop,
+	.read		= perf_swevent_read,
+};
+
 static inline void perf_tp_register(void)
 {
 	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
+	perf_pmu_register(&perf_probe, "probe", PERF_TYPE_PROBE);
 }
 
 static void perf_event_free_filter(struct perf_event *event)
@@ -8149,7 +8182,8 @@  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 	struct bpf_prog *prog;
 	int ret;
 
-	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+	if (event->attr.type != PERF_TYPE_TRACEPOINT &&
+	    event->attr.type != PERF_TYPE_PROBE)
 		return perf_event_set_bpf_handler(event, prog_fd);
 
 	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
@@ -8188,7 +8222,8 @@  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 
 static void perf_event_free_bpf_prog(struct perf_event *event)
 {
-	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
+	if (event->attr.type != PERF_TYPE_TRACEPOINT &&
+	    event->attr.type != PERF_TYPE_PROBE) {
 		perf_event_free_bpf_handler(event);
 		return;
 	}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 13ba2d3..bf9b99b 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -8,6 +8,7 @@ 
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include "trace.h"
+#include "trace_probe.h"
 
 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
 
@@ -229,6 +230,74 @@  int perf_trace_init(struct perf_event *p_event)
 	return ret;
 }
 
+#ifdef CONFIG_KPROBE_EVENTS
+static int perf_probe_create_kprobe(struct perf_event *p_event,
+				    struct probe_desc *pd, char *name)
+{
+	struct trace_event_call *tp_event;
+	int ret;
+
+	tp_event = create_local_trace_kprobe(
+		name, (void *)(unsigned long)(pd->addr), pd->offset,
+		p_event->attr.is_return);
+	if (IS_ERR(tp_event))
+		return PTR_ERR(tp_event);
+	ret = perf_trace_event_init(tp_event, p_event);
+	if (ret)
+		destroy_local_trace_kprobe(tp_event);
+
+	return ret;
+}
+#else
+static int perf_probe_create_kprobe(struct perf_event *p_event,
+				    struct probe_desc *pd, char *name)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_KPROBE_EVENTS */
+
+int perf_probe_init(struct perf_event *p_event)
+{
+	struct probe_desc pd;
+	int ret;
+	char *name = NULL;
+	__aligned_u64 aligned_probe_desc;
+
+	/*
+	 * attr.probe_desc may not be 64-bit aligned on 32-bit systems.
+	 * Make an aligned copy of it to before u64_to_user_ptr().
+	 */
+	memcpy(&aligned_probe_desc, &p_event->attr.probe_desc,
+	       sizeof(__aligned_u64));
+
+	if (copy_from_user(&pd, u64_to_user_ptr(aligned_probe_desc),
+			   sizeof(struct probe_desc)))
+		return -EFAULT;
+
+	if (pd.func) {
+		name = kzalloc(MAX_PROBE_FUNC_NAME_LEN, GFP_KERNEL);
+		if (!name)
+			return -ENOMEM;
+		ret = strncpy_from_user(name, u64_to_user_ptr(pd.func),
+					MAX_PROBE_FUNC_NAME_LEN);
+		if (ret < 0)
+			goto out;
+
+		if (name[0] == '\0') {
+			kfree(name);
+			name = NULL;
+		}
+	}
+
+	if (!p_event->attr.is_uprobe)
+		ret = perf_probe_create_kprobe(p_event, &pd, name);
+	else
+		ret = -EOPNOTSUPP;
+out:
+	kfree(name);
+	return ret;
+}
+
 void perf_trace_destroy(struct perf_event *p_event)
 {
 	mutex_lock(&event_mutex);
@@ -237,6 +306,18 @@  void perf_trace_destroy(struct perf_event *p_event)
 	mutex_unlock(&event_mutex);
 }
 
+void perf_probe_destroy(struct perf_event *p_event)
+{
+	perf_trace_event_close(p_event);
+	perf_trace_event_unreg(p_event);
+
+	if (!p_event->attr.is_uprobe) {
+#ifdef CONFIG_KPROBE_EVENTS
+		destroy_local_trace_kprobe(p_event->tp_event);
+#endif
+	}
+}
+
 int perf_trace_add(struct perf_event *p_event, int flags)
 {
 	struct trace_event_call *tp_event = p_event->tp_event;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index abf92e4..121a067 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -438,6 +438,14 @@  disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 			disable_kprobe(&tk->rp.kp);
 		wait = 1;
 	}
+
+	/*
+	 * if tk is not added to any list, it must be a local trace_kprobe
+	 * created with perf_event_open. We don't need to wait for these
+	 * trace_kprobes
+	 */
+	if (list_empty(&tk->list))
+		wait = 0;
  out:
 	if (wait) {
 		/*
@@ -1313,12 +1321,9 @@  static struct trace_event_functions kprobe_funcs = {
 	.trace		= print_kprobe_event
 };
 
-static int register_kprobe_event(struct trace_kprobe *tk)
+static inline void init_trace_event_call(struct trace_kprobe *tk,
+					 struct trace_event_call *call)
 {
-	struct trace_event_call *call = &tk->tp.call;
-	int ret;
-
-	/* Initialize trace_event_call */
 	INIT_LIST_HEAD(&call->class->fields);
 	if (trace_kprobe_is_return(tk)) {
 		call->event.funcs = &kretprobe_funcs;
@@ -1327,6 +1332,19 @@  static int register_kprobe_event(struct trace_kprobe *tk)
 		call->event.funcs = &kprobe_funcs;
 		call->class->define_fields = kprobe_event_define_fields;
 	}
+
+	call->flags = TRACE_EVENT_FL_KPROBE;
+	call->class->reg = kprobe_register;
+	call->data = tk;
+}
+
+static int register_kprobe_event(struct trace_kprobe *tk)
+{
+	struct trace_event_call *call = &tk->tp.call;
+	int ret = 0;
+
+	init_trace_event_call(tk, call);
+
 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
 		return -ENOMEM;
 	ret = register_trace_event(&call->event);
@@ -1334,9 +1352,6 @@  static int register_kprobe_event(struct trace_kprobe *tk)
 		kfree(call->print_fmt);
 		return -ENODEV;
 	}
-	call->flags = TRACE_EVENT_FL_KPROBE;
-	call->class->reg = kprobe_register;
-	call->data = tk;
 	ret = trace_add_event_call(call);
 	if (ret) {
 		pr_info("Failed to register kprobe event: %s\n",
@@ -1358,6 +1373,66 @@  static int unregister_kprobe_event(struct trace_kprobe *tk)
 	return ret;
 }
 
+#ifdef CONFIG_PERF_EVENTS
+/* create a trace_kprobe, but don't add it to global lists */
+struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+			  bool is_return)
+{
+	struct trace_kprobe *tk;
+	int ret;
+	char *event;
+
+	/*
+	 * local trace_kprobes are not added to probe_list, so they are never
+	 * searched in find_trace_kprobe(). Therefore, there is no concern of
+	 * duplicated name here.
+	 */
+	event = func ? func : "DUMMY_EVENT";
+
+	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
+				offs, 0 /* maxactive */, 0 /* nargs */,
+				is_return);
+
+	if (IS_ERR(tk)) {
+		pr_info("Failed to allocate trace_probe.(%d)\n",
+			(int)PTR_ERR(tk));
+		return ERR_CAST(tk);
+	}
+
+	init_trace_event_call(tk, &tk->tp.call);
+
+	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = __register_trace_kprobe(tk);
+	if (ret < 0)
+		goto error;
+
+	return &tk->tp.call;
+error:
+	free_trace_kprobe(tk);
+	return ERR_PTR(ret);
+}
+
+void destroy_local_trace_kprobe(struct trace_event_call *event_call)
+{
+	struct trace_kprobe *tk;
+
+	tk = container_of(event_call, struct trace_kprobe, tp.call);
+
+	if (trace_probe_is_enabled(&tk->tp)) {
+		WARN_ON(1);
+		return;
+	}
+
+	__unregister_trace_kprobe(tk);
+	free_trace_kprobe(tk);
+}
+#endif /* CONFIG_PERF_EVENTS */
+
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 903273c..910ae1b 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -411,3 +411,10 @@  store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
 }
 
 extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+
+#ifdef CONFIG_PERF_EVENTS
+extern struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+			  bool is_return);
+extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
+#endif