diff mbox series

[v3] bcc: Try use new API to create [k,u]probe with perf_event_open

Message ID 20171130235023.1414663-2-songliubraving@fb.com
State Not Applicable, archived
Delegated to: BPF Maintainers
Headers show
Series enable creating [k,u]probe with perf_event_open | expand

Commit Message

Song Liu Nov. 30, 2017, 11:50 p.m. UTC
New kernel API allows creating [k,u]probe with perf_event_open.
This patch tries to use the new API. If the new API doesn't work,
we fall back to old API.

bpf_detach_probe() looks up the event being removed. If the event
is not found, we skip the clean up procedure.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 src/cc/libbpf.c | 245 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 176 insertions(+), 69 deletions(-)
diff mbox series

Patch

diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index ef6daf3..5be9a7b 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -526,38 +526,67 @@  int bpf_attach_socket(int sock, int prog) {
   return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
 }
 
+/*
+ * new kernel API allows creating [k,u]probe with perf_event_open, which
+ * makes it easier to clean up the [k,u]probe. This function tries to
+ * create pfd with the new API.
+ */
+static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
+    int pid, int cpu, int group_fd, int perf_probe_type, int is_return)
+{
+  struct perf_event_attr attr = {};
+
+  attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+  attr.sample_period = 1;
+  attr.wakeup_events = 1;
+  attr.config = is_return ? 1 : 0;
+  attr.probe_offset = offs;  /* for kprobe, if name is NULL, this the addr */
+  attr.size = sizeof(attr);
+  attr.type = perf_probe_type;
+  attr.kprobe_func = ptr_to_u64((void *)name);  /* also work for uprobe_path */
+  return syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd,
+                 PERF_FLAG_FD_CLOEXEC);
+}
+
 static int bpf_attach_tracing_event(int progfd, const char *event_path,
-    struct perf_reader *reader, int pid, int cpu, int group_fd) {
-  int efd, pfd;
+    struct perf_reader *reader, int pid, int cpu, int group_fd, int pfd) {
+  int efd;
   ssize_t bytes;
   char buf[256];
   struct perf_event_attr attr = {};
 
-  snprintf(buf, sizeof(buf), "%s/id", event_path);
-  efd = open(buf, O_RDONLY, 0);
-  if (efd < 0) {
-    fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
-    return -1;
-  }
+  /*
+   * Only look up id and call perf_event_open when
+   * bpf_try_perf_event_open_with_probe() didn't returns valid pfd.
+   */
+  if (pfd < 0) {
+    snprintf(buf, sizeof(buf), "%s/id", event_path);
+    efd = open(buf, O_RDONLY, 0);
+    if (efd < 0) {
+      fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+      return -1;
+    }
 
-  bytes = read(efd, buf, sizeof(buf));
-  if (bytes <= 0 || bytes >= sizeof(buf)) {
-    fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
+    bytes = read(efd, buf, sizeof(buf));
+    if (bytes <= 0 || bytes >= sizeof(buf)) {
+      fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
+      close(efd);
+      return -1;
+    }
     close(efd);
-    return -1;
-  }
-  close(efd);
-  buf[bytes] = '\0';
-  attr.config = strtol(buf, NULL, 0);
-  attr.type = PERF_TYPE_TRACEPOINT;
-  attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
-  attr.sample_period = 1;
-  attr.wakeup_events = 1;
-  pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
-  if (pfd < 0) {
-    fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
-    return -1;
+    buf[bytes] = '\0';
+    attr.config = strtol(buf, NULL, 0);
+    attr.type = PERF_TYPE_TRACEPOINT;
+    attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+    attr.sample_period = 1;
+    attr.wakeup_events = 1;
+    pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
+    if (pfd < 0) {
+      fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
+      return -1;
+    }
   }
+
   perf_reader_set_fd(reader, pfd);
 
   if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
@@ -575,6 +604,26 @@  static int bpf_attach_tracing_event(int progfd, const char *event_path,
   return 0;
 }
 
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *p_type)
+{
+  int fd;
+  int ret;
+  char buf[64];
+
+  snprintf(buf, sizeof(buf), PMU_TYPE_FILE, p_type);
+
+  fd = open(buf, O_RDONLY);
+  if (fd < 0)
+    return -1;
+  ret = read(fd, buf, sizeof(buf));
+  close(fd);
+  if (ret < 0 || ret >= sizeof(buf))
+    return -1;
+  ret = (int)strtol(buf, NULL, 10);
+  return errno ? -1 : ret;
+}
+
 void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
                         const char *fn_name,
                         pid_t pid, int cpu, int group_fd,
@@ -585,31 +634,42 @@  void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, con
   char event_alias[128];
   struct perf_reader *reader = NULL;
   static char *event_type = "kprobe";
+  int pfd = -1;
+  int perf_probe_type;
 
   reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
   if (!reader)
     goto error;
 
-  snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
-  kfd = open(buf, O_WRONLY | O_APPEND, 0);
-  if (kfd < 0) {
-    fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
-    goto error;
-  }
+  /* try use new API to create kprobe */
+  perf_probe_type = bpf_find_probe_type(event_type);
+  if (perf_probe_type != -1)
+    pfd = bpf_try_perf_event_open_with_probe(fn_name, 0, pid, cpu, group_fd,
+                                             perf_probe_type,
+                                             attach_type != BPF_PROBE_ENTRY);
 
-  snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
-  snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
-			event_type, event_alias, fn_name);
-  if (write(kfd, buf, strlen(buf)) < 0) {
-    if (errno == EINVAL)
-      fprintf(stderr, "check dmesg output for possible cause\n");
+  if (pfd < 0) {
+    snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+    kfd = open(buf, O_WRONLY | O_APPEND, 0);
+    if (kfd < 0) {
+      fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+      goto error;
+    }
+
+    snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+    snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+             event_type, event_alias, fn_name);
+    if (write(kfd, buf, strlen(buf)) < 0) {
+      if (errno == EINVAL)
+        fprintf(stderr, "check dmesg output for possible cause\n");
+      close(kfd);
+      goto error;
+    }
     close(kfd);
-    goto error;
+    snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
   }
-  close(kfd);
 
-  snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
-  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
     goto error;
 
   return reader;
@@ -691,42 +751,52 @@  void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, con
   struct perf_reader *reader = NULL;
   static char *event_type = "uprobe";
   int res, kfd = -1, ns_fd = -1;
+  int pfd = -1;
+  int perf_probe_type;
 
   reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
   if (!reader)
     goto error;
 
-  snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
-  kfd = open(buf, O_WRONLY | O_APPEND, 0);
-  if (kfd < 0) {
-    fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
-    goto error;
-  }
+  /* try use new API to create uprobe */
+  perf_probe_type = bpf_find_probe_type(event_type);
+  pfd = bpf_try_perf_event_open_with_probe(binary_path, offset, pid, cpu,
+            group_fd, perf_probe_type, attach_type != BPF_PROBE_ENTRY);
 
-  res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
-  if (res < 0 || res >= sizeof(event_alias)) {
-    fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
-    goto error;
-  }
-  res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
-			event_type, event_alias, binary_path, offset);
-  if (res < 0 || res >= sizeof(buf)) {
-    fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
-    goto error;
-  }
+  if (pfd < 0) {
+    snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+    kfd = open(buf, O_WRONLY | O_APPEND, 0);
+    if (kfd < 0) {
+      fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+      goto error;
+    }
 
-  ns_fd = enter_mount_ns(pid);
-  if (write(kfd, buf, strlen(buf)) < 0) {
-    if (errno == EINVAL)
-      fprintf(stderr, "check dmesg output for possible cause\n");
-    goto error;
+    res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+    if (res < 0 || res >= sizeof(event_alias)) {
+      fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
+      goto error;
+    }
+    res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+                   event_type, event_alias, binary_path, offset);
+    if (res < 0 || res >= sizeof(buf)) {
+      fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
+      goto error;
+    }
+
+    ns_fd = enter_mount_ns(pid);
+    if (write(kfd, buf, strlen(buf)) < 0) {
+      if (errno == EINVAL)
+        fprintf(stderr, "check dmesg output for possible cause\n");
+      goto error;
+    }
+    close(kfd);
+    exit_mount_ns(ns_fd);
+    ns_fd = -1;
+
+    snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
   }
-  close(kfd);
-  exit_mount_ns(ns_fd);
-  ns_fd = -1;
 
-  snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
-  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
     goto error;
 
   return reader;
@@ -741,8 +811,43 @@  error:
 
 static int bpf_detach_probe(const char *ev_name, const char *event_type)
 {
-  int kfd, res;
+  int kfd = -1, res;
   char buf[PATH_MAX];
+  int found_event = 0;
+  size_t bufsize = 0;
+  char *cptr = NULL;
+  FILE *fp;
+
+  /*
+   * For [k,u]probe created with perf_event_open (on newer kernel), it is
+   * not necessary to clean it up in [k,u]probe_events. We first look up
+   * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
+   * it is safe to skip the cleaning up process (write -:... to the file).
+   */
+  snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+  fp = fopen(buf, "r");
+  if (!fp) {
+    fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+    goto error;
+  }
+
+  res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
+  if (res < 0 || res >= sizeof(buf)) {
+    fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
+    goto error;
+  }
+
+  while (getline(&cptr, &bufsize, fp) != -1)
+    if (strstr(cptr, buf) != NULL) {
+      found_event = 1;
+      break;
+    }
+  fclose(fp);
+  fp = NULL;
+
+  if (!found_event)
+    return 0;
+
   snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
   kfd = open(buf, O_WRONLY | O_APPEND, 0);
   if (kfd < 0) {
@@ -766,6 +871,8 @@  static int bpf_detach_probe(const char *ev_name, const char *event_type)
 error:
   if (kfd >= 0)
     close(kfd);
+  if (fp)
+    fclose(fp);
   return -1;
 }
 
@@ -792,7 +899,7 @@  void * bpf_attach_tracepoint(int progfd, const char *tp_category,
 
   snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
            tp_category, tp_name);
-  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+  if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, -1) < 0)
     goto error;
 
   return reader;