[2/2] bpf: initial implementation for BPF_PROG_TYPE_GTRACE

Message ID	201902251554.x1PFsj2G026501@aserv0121.oracle.com
State	Changes Requested
Delegated to:	BPF Maintainers
Headers	show Return-Path: <netdev-owner@vger.kernel.org> Message-Id: <201902251554.x1PFsj2G026501@aserv0121.oracle.com> MIME-Version: 1.0 Date: Mon, 25 Feb 2019 07:54:45 -0800 (PST) From: Kris Van Hees <kris.van.hees@oracle.com> To: netdev@vger.kernel.org Subject: [PATCH 2/2] bpf: initial implementation for BPF_PROG_TYPE_GTRACE Content-Type: text/plain Content-Transfer-Encoding: 7bit Sender: netdev-owner@vger.kernel.org Precedence: bulk
Series	bpf: context casting for tail call and gtrace prog type \| expand [0/2] bpf: context casting for tail call and gtrace prog type [1/2] bpf: context casting for tail call [2/2] bpf: initial implementation for BPF_PROG_TYPE_GTRACE

diff --git a/include/linux/bpf_gtrace.h b/include/linux/bpf_gtrace.h new file mode 100644 index 000000000000..10c71e27d714 --- /dev/null +++ b/include/linux/bpf_gtrace.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ +#ifndef _UAPI__LINUX_BPF_GTRACE_H__ +#define _UAPI__LINUX_BPF_GTRACE_H__ + +#include <asm/bpf_perf_event.h> + +struct bpf_gtrace_context { + /* CPU registers */ + bpf_user_pt_regs_t regs; + + /* Current task info */ + u64 task; /* current */ + u64 state; /* current->state */ + u32 prio; /* current->prio */ + u32 cpu; /* current->cpu or current->thread_info->cpu */ + u32 tid; /* current->pid */ + u32 pid; /* current->tgid */ + u32 ppid; /* current->real_parent->tgid */ + u32 uid; /* from_kuid(&init_user_ns, current_real_cred()->uid */ + u32 gid; /* from_kgid(&init_user_ns, current_real_cred()->gid */ + u32 euid; /* from_kuid(&init_user_ns, current_real_cred()->euid */ + u32 egid; /* from_kgid(&init_user_ns, current_real_cred()->egid */ +}; + +#endif /* _UAPI__LINUX_BPF_GTRACE_H__ */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 08bf2f1fe553..4a2961db6cdd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -35,6 +35,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif +#ifdef CONFIG_BPF_GTRACE +BPF_PROG_TYPE(BPF_PROG_TYPE_GTRACE, gtrace) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bcdd2474eee7..6b463071b245 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -166,6 +166,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_GTRACE, }; enum bpf_attach_type { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index fa8b1fe824f3..8beed291ee68 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -792,6 +792,13 @@ config GCOV_PROFILE_FTRACE Note that on a kernel compiled with this config, ftrace will run significantly slower. +config BPF_GTRACE + bool "Generic BPF Tracing Support" + depends on BPF_SYSCALL + default y + help + Enable generic tracing BPF program support. + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c2b2148bb1d2..4deeff2a8b70 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -82,6 +82,7 @@ endif obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o +obj-$(CONFIG_BPF_GTRACE) += gtrace.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/gtrace.c b/kernel/trace/gtrace.c new file mode 100644 index 000000000000..5ea2d4d7d0c4 --- /dev/null +++ b/kernel/trace/gtrace.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ +#include <linux/bpf.h> +#include <linux/bpf_gtrace.h> +#include <linux/filter.h> +#include <linux/ptrace.h> +#include <linux/sched.h> + +struct bpf_gtrace_ctx { + struct pt_regs *regs; + struct task_struct *task; +}; + +static const struct bpf_func_proto * +gtrace_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + default: + return NULL; + } +} + +static bool gtrace_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + /* Ensure offset is within the context structure. */ + if (off < 0 || off >= sizeof(struct bpf_gtrace_context)) + return false; + + /* Only READ access is allowed. */ + if (type != BPF_READ) + return false; + + /* Ensure offset is aligned (verifier guarantees size > 0). */ + if (off % size != 0) + return false; + + switch (off) { + case bpf_ctx_range(struct bpf_gtrace_context, task): + case bpf_ctx_range(struct bpf_gtrace_context, state): + bpf_ctx_record_field_size(info, sizeof(u64)); + if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64))) + return true; + break; + case bpf_ctx_range(struct bpf_gtrace_context, prio): + case bpf_ctx_range(struct bpf_gtrace_context, cpu): + case bpf_ctx_range(struct bpf_gtrace_context, tid): + case bpf_ctx_range(struct bpf_gtrace_context, pid): + case bpf_ctx_range(struct bpf_gtrace_context, ppid): + case bpf_ctx_range(struct bpf_gtrace_context, uid): + case bpf_ctx_range(struct bpf_gtrace_context, gid): + case bpf_ctx_range(struct bpf_gtrace_context, euid): + case bpf_ctx_range(struct bpf_gtrace_context, egid): + bpf_ctx_record_field_size(info, sizeof(u32)); + if (bpf_ctx_narrow_access_ok(off, size, sizeof(u32))) + return true; + break; + default: + if (size == sizeof(unsigned long)) + return true; + } + + return false; +} + +#define BPF_LDX_CTX_PTR(type, member, si) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \ + (si)->dst_reg, (si)->src_reg, offsetof(type, member)) +#define BPF_LDX_LNK_PTR(type, member, si) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \ + (si)->dst_reg, (si)->dst_reg, offsetof(type, member)) +#define BPF_LDX_CTX_FIELD(type, member, si, target_size) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \ + (si)->dst_reg, (si)->src_reg, \ + ({ \ + *(target_size) = FIELD_SIZEOF(type, member); \ + offsetof(type, member); \ + })) +#define BPF_LDX_LNK_FIELD(type, member, si, target_size) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \ + (si)->dst_reg, (si)->dst_reg, \ + ({ \ + *(target_size) = FIELD_SIZEOF(type, member); \ + offsetof(type, member); \ + })) + +static u32 gtrace_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_gtrace_context, task): + *insn++ = BPF_LDX_CTX_FIELD(struct bpf_gtrace_ctx, task, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, state): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, state, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, prio): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, prio, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, cpu): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); +#ifdef CONFIG_THREAD_INFO_IN_TASK + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, cpu, si, + target_size); +#else + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, stack, si); + *insn++ = BPF_LDX_LNK_FIELD(struct thread_info, cpu, si, + target_size); +#endif + break; + case offsetof(struct bpf_gtrace_context, tid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, pid, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, pid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, tgid, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, ppid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, real_parent, si); + *insn++ = BPF_LDX_LNK_FIELD(struct task_struct, tgid, si, + target_size); + break; + case offsetof(struct bpf_gtrace_context, uid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si); + *insn++ = BPF_LDX_LNK_FIELD(struct cred, uid, si, target_size); + break; + case offsetof(struct bpf_gtrace_context, gid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si); + *insn++ = BPF_LDX_LNK_FIELD(struct cred, gid, si, target_size); + break; + case offsetof(struct bpf_gtrace_context, euid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si); + *insn++ = BPF_LDX_LNK_FIELD(struct cred, euid, si, target_size); + break; + case offsetof(struct bpf_gtrace_context, egid): + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, task, si); + *insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si); + *insn++ = BPF_LDX_LNK_FIELD(struct cred, egid, si, target_size); + break; + default: + *insn++ = BPF_LDX_CTX_PTR(struct bpf_gtrace_ctx, regs, si); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, + si->off); + break; + } + + return insn - insn_buf; +} + +const struct bpf_verifier_ops gtrace_verifier_ops = { + .get_func_proto = gtrace_get_func_proto, + .is_valid_access = gtrace_is_valid_access, + .convert_ctx_access = gtrace_convert_ctx_access, +}; + +static bool gtrace_can_cast(enum bpf_prog_type stype, enum bpf_prog_type ttype) +{ + if (stype == BPF_PROG_TYPE_KPROBE) + return true; + + return false; +} + +DEFINE_PER_CPU(struct bpf_gtrace_ctx, gtrace_ctx); + +static void *gtrace_cast_context(enum bpf_prog_type stype, + enum bpf_prog_type ttype, void *ctx) +{ + struct bpf_gtrace_ctx *gctx; + + if (stype == BPF_PROG_TYPE_KPROBE) { + gctx = this_cpu_ptr(&gtrace_ctx); + gctx->regs = (struct pt_regs *)ctx; + gctx->task = current; + + return gctx; + } + + return NULL; +} + +const struct bpf_prog_ops gtrace_prog_ops = { + .can_cast = gtrace_can_cast, + .cast_context = gtrace_cast_context, +}; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a0ef7eddd0b3..a5f8988acdb2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,6 +15,7 @@ hostprogs-y += tracex2 hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 +hostprogs-y += tracex5b hostprogs-y += tracex6 hostprogs-y += tracex7 hostprogs-y += test_probe_write_user @@ -69,6 +70,7 @@ tracex2-objs := bpf_load.o tracex2_user.o tracex3-objs := bpf_load.o tracex3_user.o tracex4-objs := bpf_load.o tracex4_user.o tracex5-objs := bpf_load.o tracex5_user.o +tracex5b-objs := bpf_load.o tracex5b_user.o tracex6-objs := bpf_load.o tracex6_user.o tracex7-objs := bpf_load.o tracex7_user.o load_sock_ops-objs := bpf_load.o load_sock_ops.o @@ -120,6 +122,7 @@ always += tracex2_kern.o always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o +always += tracex5b_kern.o always += tracex6_kern.o always += tracex7_kern.o always += sock_flags_kern.o diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index eae7b635343d..847b46a64315 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -87,6 +87,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_sockops = strncmp(event, "sockops", 7) == 0; bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; + bool is_gtrace = strncmp(event, "gtrace", 6) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -120,6 +121,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_SK_SKB; } else if (is_sk_msg) { prog_type = BPF_PROG_TYPE_SK_MSG; + } else if (is_gtrace) { + prog_type = BPF_PROG_TYPE_GTRACE; } else { printf("Unknown event '%s'\n", event); return -1; @@ -140,8 +143,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { - if (is_socket) + if (is_socket || is_sockops || is_sk_skb || is_sk_msg || is_gtrace) { + if (is_socket || is_gtrace) event += 6; else event += 7; @@ -643,7 +646,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "cgroup/", 7) == 0 || memcmp(shname, "sockops", 7) == 0 || memcmp(shname, "sk_skb", 6) == 0 || - memcmp(shname, "sk_msg", 6) == 0) { + memcmp(shname, "sk_msg", 6) == 0 || + memcmp(shname, "gtrace", 6) == 0) { ret = load_and_attach(shname, data->d_buf, data->d_size); if (ret != 0) diff --git a/samples/bpf/tracex5b_kern.c b/samples/bpf/tracex5b_kern.c new file mode 100644 index 000000000000..3bcc4779df3f --- /dev/null +++ b/samples/bpf/tracex5b_kern.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <linux/bpf_gtrace.h> +#include <uapi/linux/seccomp.h> +#include <uapi/linux/unistd.h> +#include "syscall_nrs.h" +#include "bpf_helpers.h" + +#define PROG(F) SEC("gtrace/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") progs = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), +#ifdef __mips__ + .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */ +#else + .max_entries = 1024, +#endif +}; + +/* we jump here when syscall number == __NR_write */ +PROG(SYS__NR_write)(struct bpf_gtrace_context *ctx) +{ + char fmt1[] = "write for task %p (state %x, ppid %d)\n"; + char fmt2[] = "write for task %p (tid %d, pid %d)\n"; + char fmt3[] = "write for task %p (uid %d, gid %d)\n"; + char fmt4[] = "write for task %p (euid %d, egid %d)\n"; + bpf_trace_printk(fmt1, sizeof(fmt1), ctx->task, ctx->state, ctx->ppid); + bpf_trace_printk(fmt2, sizeof(fmt2), ctx->task, ctx->tid, ctx->pid); + bpf_trace_printk(fmt3, sizeof(fmt3), ctx->task, ctx->uid, ctx->gid); + bpf_trace_printk(fmt4, sizeof(fmt4), ctx->task, ctx->euid, ctx->egid); + return 0; +} + +PROG(SYS__NR_read)(struct bpf_gtrace_context *ctx) +{ + char fmt[] = "read for task %p\n"; + bpf_trace_printk(fmt, sizeof(fmt), ctx->task); + return 0; +} + +PROG(SYS__NR_mmap)(struct bpf_gtrace_context *ctx) +{ + char fmt[] = "mmap for task %p\n"; + bpf_trace_printk(fmt, sizeof(fmt), ctx->task); + return 0; +} + +SEC("kprobe/__seccomp_filter") +int bpf_prog1(struct pt_regs *ctx) +{ + int sc_nr = (int)PT_REGS_PARM1(ctx); + + /* dispatch into next BPF program depending on syscall number */ + bpf_tail_call(ctx, &progs, sc_nr); + + /* fall through -> unknown syscall */ + if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) { + char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; + bpf_trace_printk(fmt, sizeof(fmt), sc_nr); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex5b_user.c b/samples/bpf/tracex5b_user.c new file mode 100644 index 000000000000..f59d75e7805b --- /dev/null +++ b/samples/bpf/tracex5b_user.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + */ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include <bpf/bpf.h> +#include "bpf_load.h" +#include <sys/resource.h> + +/* install fake seccomp program to enable seccomp code path inside the kernel, + * so that our kprobe attached to seccomp_phase1() can be triggered + */ +static void install_accept_all_seccomp(void) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + if (prctl(PR_SET_SECCOMP, 2, &prog)) + perror("prctl"); +} + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + install_accept_all_seccomp(); + + f = popen("dd if=/dev/zero of=/dev/null count=5", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bcdd2474eee7..6b463071b245 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -166,6 +166,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_GTRACE, }; enum bpf_attach_type { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b38dcbe7460a..297a0a83086e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1682,6 +1682,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_PERF_EVENT: return false; case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_GTRACE: default: return true; } diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8c3a1c04dcb2..2b5bfcda7606 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -97,6 +97,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_GTRACE: default: break; }

[2/2] bpf: initial implementation for BPF_PROG_TYPE_GTRACE

Commit Message

Comments

Patch