diff mbox series

[bpf-next,03/13] bpf: support readonly buffer in verifier

Message ID 20200713161742.3076597-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for map elements | expand

Commit Message

Yonghong Song July 13, 2020, 4:17 p.m. UTC
Two new readonly buffer PTR_TO_RDONLY_BUF or
PTR_TO_RDONLY_BUF_OR_NULL register states
are introduced. These new register states will be used
by later bpf map element iterator.

New register states share some similarity to
PTR_TO_TP_BUFFER as it will calculate accessed buffer
size during verification time. The accessed buffer
size will be later compared to other metrics during
later attach/link_create time.

Two differences between PTR_TO_TP_BUFFER and
PTR_TO_RDONLY_BUF[_OR_NULL].
PTR_TO_TP_BUFFER is for write only
and PTR_TO_RDONLY_BUF[_OR_NULL] is for read only.
In addition, a rdonly_buf_seq_id is also added to the
register state since it is possible for the same program
there could be two PTR_TO_RDONLY_BUF[_OR_NULL] ctx arguments.
For example, for bpf later map element iterator,
both key and value may be PTR_TO_TP_BUFFER_OR_NULL.

Similar to reg_state PTR_TO_BTF_ID_OR_NULL in bpf
iterator programs, PTR_TO_RDONLY_BUF_OR_NULL reg_type and
its rdonly_buf_seq_id can be set at
prog->aux->bpf_ctx_arg_aux, and bpf verifier will
retrieve the values during btf_ctx_access().
Later bpf map element iterator implementation
will show how such information will be assigned
during target registeration time.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h          |  7 ++++
 include/linux/bpf_verifier.h |  2 +
 kernel/bpf/btf.c             | 13 +++++++
 kernel/bpf/verifier.c        | 74 +++++++++++++++++++++++++++++++-----
 4 files changed, 87 insertions(+), 9 deletions(-)

Comments

Alexei Starovoitov July 13, 2020, 11:25 p.m. UTC | #1
On Mon, Jul 13, 2020 at 09:17:42AM -0700, Yonghong Song wrote:
> Two new readonly buffer PTR_TO_RDONLY_BUF or
> PTR_TO_RDONLY_BUF_OR_NULL register states
> are introduced. These new register states will be used
> by later bpf map element iterator.
> 
> New register states share some similarity to
> PTR_TO_TP_BUFFER as it will calculate accessed buffer
> size during verification time. The accessed buffer
> size will be later compared to other metrics during
> later attach/link_create time.
> 
> Two differences between PTR_TO_TP_BUFFER and
> PTR_TO_RDONLY_BUF[_OR_NULL].
> PTR_TO_TP_BUFFER is for write only
> and PTR_TO_RDONLY_BUF[_OR_NULL] is for read only.
> In addition, a rdonly_buf_seq_id is also added to the
> register state since it is possible for the same program
> there could be two PTR_TO_RDONLY_BUF[_OR_NULL] ctx arguments.
> For example, for bpf later map element iterator,
> both key and value may be PTR_TO_TP_BUFFER_OR_NULL.
> 
> Similar to reg_state PTR_TO_BTF_ID_OR_NULL in bpf
> iterator programs, PTR_TO_RDONLY_BUF_OR_NULL reg_type and
> its rdonly_buf_seq_id can be set at
> prog->aux->bpf_ctx_arg_aux, and bpf verifier will
> retrieve the values during btf_ctx_access().
> Later bpf map element iterator implementation
> will show how such information will be assigned
> during target registeration time.
...
>  struct bpf_ctx_arg_aux {
>  	u32 offset;
>  	enum bpf_reg_type reg_type;
> +	u32 rdonly_buf_seq_id;
>  };
>  
> +#define BPF_MAX_RDONLY_BUF	2
> +
>  struct bpf_prog_aux {
>  	atomic64_t refcnt;
>  	u32 used_map_cnt;
> @@ -693,6 +699,7 @@ struct bpf_prog_aux {
>  	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
>  	u32 ctx_arg_info_size;
>  	const struct bpf_ctx_arg_aux *ctx_arg_info;
> +	u32 max_rdonly_access[BPF_MAX_RDONLY_BUF];

I think PTR_TO_RDONLY_BUF approach is too limiting.
I think the map value should probably be writable from the beginning,
but I don't see how this RDONLY_BUF support can be naturally extended.
Also key and value can be large, so just load/store is going to be
limiting pretty quickly. People would want to use helpers to access
key/value areas. I think any existing helper that accepts ARG_PTR_TO_MEM
should be usable with data from this key/value.
PTR_TO_TP_BUFFER was a quick hack for tiny scratch area.
Here I think the verifier should be smart from the start.

The next patch populates bpf_ctx_arg_aux with hardcoded 0 and 1.
imo that's too hacky. Helper definitions shouldn't be in business
of poking into such verifier internals.
Yonghong Song July 15, 2020, 5:34 p.m. UTC | #2
On 7/13/20 4:25 PM, Alexei Starovoitov wrote:
> On Mon, Jul 13, 2020 at 09:17:42AM -0700, Yonghong Song wrote:
>> Two new readonly buffer PTR_TO_RDONLY_BUF or
>> PTR_TO_RDONLY_BUF_OR_NULL register states
>> are introduced. These new register states will be used
>> by later bpf map element iterator.
>>
>> New register states share some similarity to
>> PTR_TO_TP_BUFFER as it will calculate accessed buffer
>> size during verification time. The accessed buffer
>> size will be later compared to other metrics during
>> later attach/link_create time.
>>
>> Two differences between PTR_TO_TP_BUFFER and
>> PTR_TO_RDONLY_BUF[_OR_NULL].
>> PTR_TO_TP_BUFFER is for write only
>> and PTR_TO_RDONLY_BUF[_OR_NULL] is for read only.
>> In addition, a rdonly_buf_seq_id is also added to the
>> register state since it is possible for the same program
>> there could be two PTR_TO_RDONLY_BUF[_OR_NULL] ctx arguments.
>> For example, for bpf later map element iterator,
>> both key and value may be PTR_TO_TP_BUFFER_OR_NULL.
>>
>> Similar to reg_state PTR_TO_BTF_ID_OR_NULL in bpf
>> iterator programs, PTR_TO_RDONLY_BUF_OR_NULL reg_type and
>> its rdonly_buf_seq_id can be set at
>> prog->aux->bpf_ctx_arg_aux, and bpf verifier will
>> retrieve the values during btf_ctx_access().
>> Later bpf map element iterator implementation
>> will show how such information will be assigned
>> during target registeration time.
> ...
>>   struct bpf_ctx_arg_aux {
>>   	u32 offset;
>>   	enum bpf_reg_type reg_type;
>> +	u32 rdonly_buf_seq_id;
>>   };
>>   
>> +#define BPF_MAX_RDONLY_BUF	2
>> +
>>   struct bpf_prog_aux {
>>   	atomic64_t refcnt;
>>   	u32 used_map_cnt;
>> @@ -693,6 +699,7 @@ struct bpf_prog_aux {
>>   	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
>>   	u32 ctx_arg_info_size;
>>   	const struct bpf_ctx_arg_aux *ctx_arg_info;
>> +	u32 max_rdonly_access[BPF_MAX_RDONLY_BUF];
> 
> I think PTR_TO_RDONLY_BUF approach is too limiting.
> I think the map value should probably be writable from the beginning,
> but I don't see how this RDONLY_BUF support can be naturally extended.

Agreed. Let me try to make map value read/write-able.

One thing we discussed earlier is whether and how we could make
map element deletable during iterator traversal. I will explore
this as well.

> Also key and value can be large, so just load/store is going to be
> limiting pretty quickly. People would want to use helpers to access
> key/value areas. I think any existing helper that accepts ARG_PTR_TO_MEM
> should be usable with data from this key/value.

This is a useful suggestion. I actually indeed hacked trying to
allow
   bpf_seq_write(seq, buf, buf_size) accepts rdonly_buf register state
so bpf iterator can also copy key/value to user space through seq_file.
The bpf_seq_write 2nd arg is ARG_PTR_TO_MEM. This actually works.

I originally planned to have this as a followup. Since you mentioned 
this, I will incorporate it in the next revision.

> PTR_TO_TP_BUFFER was a quick hack for tiny scratch area.
> Here I think the verifier should be smart from the start. >
> The next patch populates bpf_ctx_arg_aux with hardcoded 0 and 1.
> imo that's too hacky. Helper definitions shouldn't be in business
> of poking into such verifier internals.

The reason I am using 0/1 so later on I can easily correlate
which rdonly_buf access size corresponds to key or value. I guess
I can have a verifier callback to given an ctx argument index to
get the access size.
Alexei Starovoitov July 15, 2020, 5:52 p.m. UTC | #3
On Wed, Jul 15, 2020 at 10:48 AM Yonghong Song <yhs@fb.com> wrote:
>
> > PTR_TO_TP_BUFFER was a quick hack for tiny scratch area.
> > Here I think the verifier should be smart from the start. >
> > The next patch populates bpf_ctx_arg_aux with hardcoded 0 and 1.
> > imo that's too hacky. Helper definitions shouldn't be in business
> > of poking into such verifier internals.
>
> The reason I am using 0/1 so later on I can easily correlate
> which rdonly_buf access size corresponds to key or value. I guess
> I can have a verifier callback to given an ctx argument index to
> get the access size.

I see. Hardcoding key vs value in some way is necessary, of course.
Some #define for that with clear name would be good.
I was pointing out that 0/1 were used beyond that need.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 97c6e2605978..8f708d51733b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -352,6 +352,8 @@  enum bpf_reg_type {
 	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
+	PTR_TO_RDONLY_BUF,	 /* reg points to a readonly buffer */
+	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -362,6 +364,7 @@  struct bpf_insn_access_aux {
 	union {
 		int ctx_field_size;
 		u32 btf_id;
+		u32 rdonly_buf_seq_id;
 	};
 	struct bpf_verifier_log *log; /* for verbose logs */
 };
@@ -678,8 +681,11 @@  struct bpf_jit_poke_descriptor {
 struct bpf_ctx_arg_aux {
 	u32 offset;
 	enum bpf_reg_type reg_type;
+	u32 rdonly_buf_seq_id;
 };
 
+#define BPF_MAX_RDONLY_BUF	2
+
 struct bpf_prog_aux {
 	atomic64_t refcnt;
 	u32 used_map_cnt;
@@ -693,6 +699,7 @@  struct bpf_prog_aux {
 	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
 	u32 ctx_arg_info_size;
 	const struct bpf_ctx_arg_aux *ctx_arg_info;
+	u32 max_rdonly_access[BPF_MAX_RDONLY_BUF];
 	struct bpf_prog *linked_prog;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 53c7bd568c5d..063e4ab2dd77 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -56,6 +56,8 @@  struct bpf_reg_state {
 
 		u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
 
+		u32 rdonly_buf_seq_id; /* for PTR_TO_RDONLY_BUF */
+
 		/* Max size from any of the above. */
 		unsigned long raw;
 	};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4c3007f428b1..895de2b21385 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3809,6 +3809,19 @@  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 			btf_kind_str[BTF_INFO_KIND(t->info)]);
 		return false;
 	}
+
+	/* check for PTR_TO_RDONLY_BUF_OR_NULL */
+	for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+		const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+
+		if (ctx_arg_info->offset == off &&
+		    ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL) {
+			info->reg_type = ctx_arg_info->reg_type;
+			info->rdonly_buf_seq_id = ctx_arg_info->rdonly_buf_seq_id;
+			return true;
+		}
+	}
+
 	if (t->type == 0)
 		/* This is a pointer to void.
 		 * It is the same as scalar from the verifier safety pov.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b608185e1ffd..87801afa26fc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -410,7 +410,8 @@  static bool reg_type_may_be_null(enum bpf_reg_type type)
 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
 	       type == PTR_TO_TCP_SOCK_OR_NULL ||
 	       type == PTR_TO_BTF_ID_OR_NULL ||
-	       type == PTR_TO_MEM_OR_NULL;
+	       type == PTR_TO_MEM_OR_NULL ||
+	       type == PTR_TO_RDONLY_BUF_OR_NULL;
 }
 
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -504,6 +505,8 @@  static const char * const reg_type_str[] = {
 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
 	[PTR_TO_MEM]		= "mem",
 	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
+	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
+	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 };
 
 static char slot_type_char[] = {
@@ -579,6 +582,9 @@  static void print_verifier_state(struct bpf_verifier_env *env,
 				verbose(env, ",ks=%d,vs=%d",
 					reg->map_ptr->key_size,
 					reg->map_ptr->value_size);
+			else if (t == PTR_TO_RDONLY_BUF ||
+				 t == PTR_TO_RDONLY_BUF_OR_NULL)
+				verbose(env, ",seq_id=%u", reg->rdonly_buf_seq_id);
 			if (tnum_is_const(reg->var_off)) {
 				/* Typically an immediate SCALAR_VALUE, but
 				 * could be a pointer whose offset is too big
@@ -2174,6 +2180,8 @@  static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_XDP_SOCK:
 	case PTR_TO_BTF_ID:
 	case PTR_TO_BTF_ID_OR_NULL:
+	case PTR_TO_RDONLY_BUF:
+	case PTR_TO_RDONLY_BUF_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -2699,7 +2707,7 @@  static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
-			    u32 *btf_id)
+			    u32 *btf_id, u32 *rdonly_buf_seq_id)
 {
 	struct bpf_insn_access_aux info = {
 		.reg_type = *reg_type,
@@ -2719,6 +2727,8 @@  static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 
 		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
 			*btf_id = info.btf_id;
+		else if (*reg_type == PTR_TO_RDONLY_BUF_OR_NULL)
+			*rdonly_buf_seq_id = info.rdonly_buf_seq_id;
 		else
 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 		/* remember the offset of last byte accessed in ctx */
@@ -3053,14 +3063,15 @@  int check_ctx_reg(struct bpf_verifier_env *env,
 	return 0;
 }
 
-static int check_tp_buffer_access(struct bpf_verifier_env *env,
-				  const struct bpf_reg_state *reg,
-				  int regno, int off, int size)
+static int __check_buffer_access(struct bpf_verifier_env *env,
+				 const char *buf_info,
+				 const struct bpf_reg_state *reg,
+				 int regno, int off, int size)
 {
 	if (off < 0) {
 		verbose(env,
-			"R%d invalid tracepoint buffer access: off=%d, size=%d",
-			regno, off, size);
+			"R%d invalid %s buffer access: off=%d, size=%d",
+			regno, buf_info, off, size);
 		return -EACCES;
 	}
 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
@@ -3072,12 +3083,43 @@  static int check_tp_buffer_access(struct bpf_verifier_env *env,
 			regno, off, tn_buf);
 		return -EACCES;
 	}
+
+	return 0;
+}
+
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+				  const struct bpf_reg_state *reg,
+				  int regno, int off, int size)
+{
+	int err;
+
+	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
+	if (err)
+		return err;
+
 	if (off + size > env->prog->aux->max_tp_access)
 		env->prog->aux->max_tp_access = off + size;
 
 	return 0;
 }
 
+static int check_rdonly_buf_access(struct bpf_verifier_env *env,
+				   const struct bpf_reg_state *reg,
+				   int regno, int off, int size)
+{
+	u32 seq_id = reg->rdonly_buf_seq_id;
+	int err;
+
+	err = __check_buffer_access(env, "readonly", reg, regno, off, size);
+	if (err)
+		return err;
+
+	if (off + size > env->prog->aux->max_rdonly_access[seq_id])
+		env->prog->aux->max_rdonly_access[seq_id] = off + size;
+
+	return 0;
+}
+
 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
 static void zext_32_to_64(struct bpf_reg_state *reg)
 {
@@ -3327,7 +3369,7 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			mark_reg_unknown(env, regs, value_regno);
 	} else if (reg->type == PTR_TO_CTX) {
 		enum bpf_reg_type reg_type = SCALAR_VALUE;
-		u32 btf_id = 0;
+		u32 btf_id = 0, rdonly_buf_seq_id = 0;
 
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
@@ -3339,7 +3381,8 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		if (err < 0)
 			return err;
 
-		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id,
+				       &rdonly_buf_seq_id);
 		if (err)
 			verbose_linfo(env, insn_idx, "; ");
 		if (!err && t == BPF_READ && value_regno >= 0) {
@@ -3363,6 +3406,8 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				if (reg_type == PTR_TO_BTF_ID ||
 				    reg_type == PTR_TO_BTF_ID_OR_NULL)
 					regs[value_regno].btf_id = btf_id;
+				else if (reg_type == PTR_TO_RDONLY_BUF_OR_NULL)
+					regs[value_regno].rdonly_buf_seq_id = rdonly_buf_seq_id;
 			}
 			regs[value_regno].type = reg_type;
 		}
@@ -3428,6 +3473,15 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	} else if (reg->type == CONST_PTR_TO_MAP) {
 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
 					      value_regno);
+	} else if (reg->type == PTR_TO_RDONLY_BUF) {
+		if (t == BPF_WRITE) {
+			verbose(env, "R%d cannot write into %s\n",
+				regno, reg_type_str[reg->type]);
+			return -EACCES;
+		}
+		err = check_rdonly_buf_access(env, reg, regno, off, size);
+		if (!err && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -6803,6 +6857,8 @@  static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			reg->type = PTR_TO_BTF_ID;
 		} else if (reg->type == PTR_TO_MEM_OR_NULL) {
 			reg->type = PTR_TO_MEM;
+		} else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
+			reg->type = PTR_TO_RDONLY_BUF;
 		}
 		if (is_null) {
 			/* We don't need id and ref_obj_id from this point