new file mode 100644
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+#ifndef _UAPI__LINUX_BPF_GTRACE_H__
+#define _UAPI__LINUX_BPF_GTRACE_H__
+
+#include <asm/bpf_perf_event.h>
+
+struct bpf_gtrace_context {
+ /* CPU registers */
+ bpf_user_pt_regs_t regs;
+
+ /* Current task info */
+ u64 task; /* current */
+ u64 state; /* current->state */
+ u32 prio; /* current->prio */
+ u32 cpu; /* current->cpu or current->thread_info->cpu */
+ u32 tid; /* current->pid */
+ u32 pid; /* current->tgid */
+ u32 ppid; /* current->real_parent->tgid */
+ u32 uid; /* from_kuid(&init_user_ns, current_real_cred()->uid */
+ u32 gid; /* from_kgid(&init_user_ns, current_real_cred()->gid */
+ u32 euid; /* from_kuid(&init_user_ns, current_real_cred()->euid */
+ u32 egid; /* from_kgid(&init_user_ns, current_real_cred()->egid */
+};
+
+#endif /* _UAPI__LINUX_BPF_GTRACE_H__ */
@@ -35,6 +35,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
#endif
+#ifdef CONFIG_BPF_GTRACE
+BPF_PROG_TYPE(BPF_PROG_TYPE_GTRACE, gtrace)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
@@ -166,6 +166,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_GTRACE,
};
enum bpf_attach_type {
@@ -792,6 +792,13 @@ config GCOV_PROFILE_FTRACE
Note that on a kernel compiled with this config, ftrace will
run significantly slower.
+config BPF_GTRACE
+ bool "Generic BPF Tracing Support"
+ depends on BPF_SYSCALL
+ default y
+ help
+ Enable generic tracing BPF program support.
+
endif # FTRACE
endif # TRACING_SUPPORT
@@ -82,6 +82,7 @@ endif
obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
+obj-$(CONFIG_BPF_GTRACE) += gtrace.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
new file mode 100644
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+#include <linux/bpf.h>
+#include <linux/bpf_gtrace.h>
+#include <linux/filter.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+
+struct bpf_gtrace_ctx {
+ struct pt_regs *regs;
+ struct task_struct *task;
+};
+
+static const struct bpf_func_proto *
+gtrace_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_trace_printk:
+ return bpf_get_trace_printk_proto();
+ default:
+ return NULL;
+ }
+}
+
+static bool gtrace_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ /* Ensure offset is within the context structure. */
+ if (off < 0 || off >= sizeof(struct bpf_gtrace_context))
+ return false;
+
+ /* Only READ access is allowed. */
+ if (type != BPF_READ)
+ return false;
+
+ /* Ensure offset is aligned (verifier guarantees size > 0). */
+ if (off % size != 0)
+ return false;
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_gtrace_context, task):
+ case bpf_ctx_range(struct bpf_gtrace_context, state):
+ bpf_ctx_record_field_size(info, sizeof(u64));
+ if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64)))
+ return true;
+ break;
+ case bpf_ctx_range(struct bpf_gtrace_context, prio):
+ case bpf_ctx_range(struct bpf_gtrace_context, cpu):
+ case bpf_ctx_range(struct bpf_gtrace_context, tid):
+ case bpf_ctx_range(struct bpf_gtrace_context, pid):
+ case bpf_ctx_range(struct bpf_gtrace_context, ppid):
+ case bpf_ctx_range(struct bpf_gtrace_context, uid):
+ case bpf_ctx_range(struct bpf_gtrace_context, gid):
+ case bpf_ctx_range(struct bpf_gtrace_context, euid):
+ case bpf_ctx_range(struct bpf_gtrace_context, egid):
+ bpf_ctx_record_field_size(info, sizeof(u32));
+ if (bpf_ctx_narrow_access_ok(off, size, sizeof(u32)))
+ return true;
+ break;
+ default:
+ if (size == sizeof(unsigned long))
+ return true;
+ }
+
+ return false;
+}
+
+#define BPF_LDX_CTX_PTR(type, member, si) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+ (si)->dst_reg, (si)->src_reg, offsetof(type, member))
+#define BPF_LDX_LNK_PTR(type, member, si) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+ (si)->dst_reg, (si)->dst_reg, offsetof(type, member))
+#define BPF_LDX_CTX_FIELD(type, member, si, target_size) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+ (si)->dst_reg, (si)->src_reg, \
+ ({ \
+ *(target_size) = FIELD_SIZEOF(type, member); \
+ offsetof(type, member); \
+ }))
+#define BPF_LDX_LNK_FIELD(type, member, si, target_size) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+ (si)->dst_reg, (si)->dst_reg, \
+ ({ \
+ *(target_size) = FIELD_SIZEOF(type, member); \
+ offsetof(type, member); \
+ }))
+
+static u32 gtrace_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_gtrace_context, task):
+ *insn++ = BPF_LDX_CTX_FIELD(struct bpf_gtrace_ctx, task, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, state):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, state, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, prio):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, prio, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, cpu):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, cpu, si,
+ target_size);
+#else
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, stack, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct thread_info, cpu, si,
+ target_size);
+#endif
+ break;
+ case offsetof(struct bpf_gtrace_context, tid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, pid, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, pid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, tgid, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, ppid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, real_parent, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, tgid, si,
+ target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, uid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct cred, uid, si, target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, gid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct cred, gid, si, target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, euid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct cred, euid, si, target_size);
+ break;
+ case offsetof(struct bpf_gtrace_context, egid):
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si);
+ *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
+ *insn++ = BPF_LDX_LNK_FIELD(struct cred, egid, si, target_size);
+ break;
+ default:
+ *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, regs, si);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
+ si->off);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops gtrace_verifier_ops = {
+ .get_func_proto = gtrace_get_func_proto,
+ .is_valid_access = gtrace_is_valid_access,
+ .convert_ctx_access = gtrace_convert_ctx_access,
+};
+
+static bool gtrace_can_cast(enum bpf_prog_type stype, enum bpf_prog_type ttype)
+{
+ if (stype == BPF_PROG_TYPE_KPROBE)
+ return true;
+
+ return false;
+}
+
+DEFINE_PER_CPU(struct bpf_gtrace_ctx, gtrace_ctx);
+
+static void *gtrace_cast_context(enum bpf_prog_type stype,
+ enum bpf_prog_type ttype, void *ctx)
+{
+ struct bpf_gtrace_ctx *gctx;
+
+ if (stype == BPF_PROG_TYPE_KPROBE) {
+ gctx = this_cpu_ptr(>race_ctx);
+ gctx->regs = (struct pt_regs *)ctx;
+ gctx->task = current;
+
+ return gctx;
+ }
+
+ return NULL;
+}
+
+const struct bpf_prog_ops gtrace_prog_ops = {
+ .can_cast = gtrace_can_cast,
+ .cast_context = gtrace_cast_context,
+};
@@ -15,6 +15,7 @@ hostprogs-y += tracex2
hostprogs-y += tracex3
hostprogs-y += tracex4
hostprogs-y += tracex5
+hostprogs-y += tracex5b
hostprogs-y += tracex6
hostprogs-y += tracex7
hostprogs-y += test_probe_write_user
@@ -69,6 +70,7 @@ tracex2-objs := bpf_load.o tracex2_user.o
tracex3-objs := bpf_load.o tracex3_user.o
tracex4-objs := bpf_load.o tracex4_user.o
tracex5-objs := bpf_load.o tracex5_user.o
+tracex5b-objs := bpf_load.o tracex5b_user.o
tracex6-objs := bpf_load.o tracex6_user.o
tracex7-objs := bpf_load.o tracex7_user.o
load_sock_ops-objs := bpf_load.o load_sock_ops.o
@@ -120,6 +122,7 @@ always += tracex2_kern.o
always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
+always += tracex5b_kern.o
always += tracex6_kern.o
always += tracex7_kern.o
always += sock_flags_kern.o
@@ -87,6 +87,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
+ bool is_gtrace = strncmp(event, "gtrace", 6) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@@ -120,6 +121,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_SK_SKB;
} else if (is_sk_msg) {
prog_type = BPF_PROG_TYPE_SK_MSG;
+ } else if (is_gtrace) {
+ prog_type = BPF_PROG_TYPE_GTRACE;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -140,8 +143,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
- if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
- if (is_socket)
+ if (is_socket || is_sockops || is_sk_skb || is_sk_msg || is_gtrace) {
+ if (is_socket || is_gtrace)
event += 6;
else
event += 7;
@@ -643,7 +646,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0 ||
memcmp(shname, "sk_skb", 6) == 0 ||
- memcmp(shname, "sk_msg", 6) == 0) {
+ memcmp(shname, "sk_msg", 6) == 0 ||
+ memcmp(shname, "gtrace", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)
new file mode 100644
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <linux/bpf_gtrace.h>
+#include <uapi/linux/seccomp.h>
+#include <uapi/linux/unistd.h>
+#include "syscall_nrs.h"
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("gtrace/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") progs = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+#ifdef __mips__
+ .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+#else
+ .max_entries = 1024,
+#endif
+};
+
+/* we jump here when syscall number == __NR_write */
+PROG(SYS__NR_write)(struct bpf_gtrace_context *ctx)
+{
+ char fmt1[] = "write for task %p (state %x, ppid %d)\n";
+ char fmt2[] = "write for task %p (tid %d, pid %d)\n";
+ char fmt3[] = "write for task %p (uid %d, gid %d)\n";
+ char fmt4[] = "write for task %p (euid %d, egid %d)\n";
+ bpf_trace_printk(fmt1, sizeof(fmt1), ctx->task, ctx->state, ctx->ppid);
+ bpf_trace_printk(fmt2, sizeof(fmt2), ctx->task, ctx->tid, ctx->pid);
+ bpf_trace_printk(fmt3, sizeof(fmt3), ctx->task, ctx->uid, ctx->gid);
+ bpf_trace_printk(fmt4, sizeof(fmt4), ctx->task, ctx->euid, ctx->egid);
+ return 0;
+}
+
+PROG(SYS__NR_read)(struct bpf_gtrace_context *ctx)
+{
+ char fmt[] = "read for task %p\n";
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->task);
+ return 0;
+}
+
+PROG(SYS__NR_mmap)(struct bpf_gtrace_context *ctx)
+{
+ char fmt[] = "mmap for task %p\n";
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->task);
+ return 0;
+}
+
+SEC("kprobe/__seccomp_filter")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ int sc_nr = (int)PT_REGS_PARM1(ctx);
+
+ /* dispatch into next BPF program depending on syscall number */
+ bpf_tail_call(ctx, &progs, sc_nr);
+
+ /* fall through -> unknown syscall */
+ if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) {
+ char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
+ bpf_trace_printk(fmt, sizeof(fmt), sc_nr);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
new file mode 100644
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ */
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <bpf/bpf.h>
+#include "bpf_load.h"
+#include <sys/resource.h>
+
+/* install fake seccomp program to enable seccomp code path inside the kernel,
+ * so that our kprobe attached to seccomp_phase1() can be triggered
+ */
+static void install_accept_all_seccomp(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog))
+ perror("prctl");
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ install_accept_all_seccomp();
+
+ f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
@@ -166,6 +166,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_GTRACE,
};
enum bpf_attach_type {
@@ -1682,6 +1682,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_PERF_EVENT:
return false;
case BPF_PROG_TYPE_KPROBE:
+ case BPF_PROG_TYPE_GTRACE:
default:
return true;
}
@@ -97,6 +97,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_GTRACE:
default:
break;
}