diff mbox series

[bpf-next,v5,1/5] bpf: add writable context for raw tracepoints

Message ID 20190426184951.21812-2-mmullins@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series writable contexts for bpf raw tracepoints | expand

Commit Message

Matt Mullins April 26, 2019, 6:49 p.m. UTC
This is an opt-in interface that allows a tracepoint to provide a safe
buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
The size of the buffer must be a compile-time constant, and is checked
before allowing a BPF program to attach to a tracepoint that uses this
feature.

The pointer to this buffer will be the first argument of tracepoints
that opt in; the pointer is valid and can be bpf_probe_read() by both
BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
programs that attach to such a tracepoint, but the buffer to which it
points may only be written by the latter.

Signed-off-by: Matt Mullins <mmullins@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h             |  2 ++
 include/linux/bpf_types.h       |  1 +
 include/linux/tracepoint-defs.h |  1 +
 include/trace/bpf_probe.h       | 27 +++++++++++++++++++++++++--
 include/uapi/linux/bpf.h        |  1 +
 kernel/bpf/syscall.c            |  8 ++++++--
 kernel/bpf/verifier.c           | 31 +++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c        | 24 ++++++++++++++++++++++++
 8 files changed, 91 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f15432d90728..cd6341eabd74 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -272,6 +272,7 @@  enum bpf_reg_type {
 	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -361,6 +362,7 @@  struct bpf_prog_aux {
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
 	u32 max_pkt_offset;
+	u32 max_tp_access;
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index d26991a16894..a10d37bce364 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -25,6 +25,7 @@  BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 49ba9cde7e4b..b29950a19205 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -45,6 +45,7 @@  struct bpf_raw_event_map {
 	struct tracepoint	*tp;
 	void			*bpf_func;
 	u32			num_args;
+	u32			writable_size;
 } __aligned(32);
 
 #endif
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 505dae0bed80..d6e556c0a085 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -69,8 +69,7 @@  __bpf_trace_##call(void *__data, proto)					\
  * to make sure that if the tracepoint handling changes, the
  * bpf probe will fail to compile unless it too is updated.
  */
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args)			\
+#define __DEFINE_EVENT(template, call, proto, args, size)		\
 static inline void bpf_test_probe_##call(void)				\
 {									\
 	check_trace_callback_type_##call(__bpf_trace_##template);	\
@@ -81,12 +80,36 @@  __bpf_trace_tp_map_##call = {						\
 	.tp		= &__tracepoint_##call,				\
 	.bpf_func	= (void *)__bpf_trace_##template,		\
 	.num_args	= COUNT_ARGS(args),				\
+	.writable_size	= size,						\
 };
 
+#define FIRST(x, ...) x
+
+#undef DEFINE_EVENT_WRITABLE
+#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size)	\
+static inline void bpf_test_buffer_##call(void)				\
+{									\
+	/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
+	 * BUILD_BUG_ON_ZERO() uses a different mechanism that is not	\
+	 * dead-code-eliminated.					\
+	 */								\
+	FIRST(proto);							\
+	(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args)));		\
+}									\
+__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DEFINE_EVENT_WRITABLE
+#undef __DEFINE_EVENT
+#undef FIRST
+
 #endif /* CONFIG_BPF_EVENTS */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index eaf2d3284248..f7fa7a34a62d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -168,6 +168,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 };
 
 enum bpf_attach_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b0de49598341..ae141e745f92 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1789,12 +1789,16 @@  static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	}
 	raw_tp->btp = btp;
 
-	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
-				 BPF_PROG_TYPE_RAW_TRACEPOINT);
+	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out_free_tp;
 	}
+	if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+	    prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+		err = -EINVAL;
+		goto out_put_prog;
+	}
 
 	err = bpf_probe_register(raw_tp->btp, prog);
 	if (err)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 423f242a5efb..2ef442c62c0e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -405,6 +405,7 @@  static const char * const reg_type_str[] = {
 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 };
 
 static char slot_type_char[] = {
@@ -1993,6 +1994,32 @@  static int check_ctx_reg(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+				  const struct bpf_reg_state *reg,
+				  int regno, int off, int size)
+{
+	if (off < 0) {
+		verbose(env,
+			"R%d invalid tracepoint buffer access: off=%d, size=%d",
+			regno, off, size);
+		return -EACCES;
+	}
+	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose(env,
+			"R%d invalid variable buffer offset: off=%d, var_off=%s",
+			regno, off, tn_buf);
+		return -EACCES;
+	}
+	if (off + size > env->prog->aux->max_tp_access)
+		env->prog->aux->max_tp_access = off + size;
+
+	return 0;
+}
+
+
 /* truncate register to smaller size (in bytes)
  * must be called with size < BPF_REG_SIZE
  */
@@ -2137,6 +2164,10 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_sock_access(env, insn_idx, regno, off, size, t);
 		if (!err && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_TP_BUFFER) {
+		err = check_tp_buffer_access(env, reg, regno, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 91800be0c8eb..8607aba1d882 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -915,6 +915,27 @@  const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 };
 
+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
+						 enum bpf_access_type type,
+						 const struct bpf_prog *prog,
+						 struct bpf_insn_access_aux *info)
+{
+	if (off == 0) {
+		if (size != sizeof(u64) || type != BPF_READ)
+			return false;
+		info->reg_type = PTR_TO_TP_BUFFER;
+	}
+	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
+}
+
+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
+	.get_func_proto  = raw_tp_prog_func_proto,
+	.is_valid_access = raw_tp_writable_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
+};
+
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
@@ -1204,6 +1225,9 @@  static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
 		return -EINVAL;
 
+	if (prog->aux->max_tp_access > btp->writable_size)
+		return -EINVAL;
+
 	return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
 }