Message ID | 20200509175912.2476576-1-yhs@fb.com |
---|---|
State | Accepted |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: implement bpf iterator for kernel data | expand |
On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote: > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > index a2cfba89a8e1..c490fbde22d4 100644 > --- a/kernel/bpf/btf.c > +++ b/kernel/bpf/btf.c > @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, > return true; > > /* this is a pointer to another type */ > - info->reg_type = PTR_TO_BTF_ID; > + if (off != 0 && prog->aux->btf_id_or_null_non0_off) > + info->reg_type = PTR_TO_BTF_ID_OR_NULL; > + else > + info->reg_type = PTR_TO_BTF_ID; I think the verifier should be smarter than this. It's too specific and inflexible. All ctx fields of bpf_iter execpt first will be such ? let's figure out a different way to tell verifier about this. How about using typedef with specific suffix? Like: typedef struct bpf_map *bpf_map_or_null; struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; bpf_map_or_null map; }; or use a union with specific second member? Like: struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; union { struct bpf_map *map; long null; }; };
On 5/9/20 5:50 PM, Alexei Starovoitov wrote: > On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote: >> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c >> index a2cfba89a8e1..c490fbde22d4 100644 >> --- a/kernel/bpf/btf.c >> +++ b/kernel/bpf/btf.c >> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, >> return true; >> >> /* this is a pointer to another type */ >> - info->reg_type = PTR_TO_BTF_ID; >> + if (off != 0 && prog->aux->btf_id_or_null_non0_off) >> + info->reg_type = PTR_TO_BTF_ID_OR_NULL; >> + else >> + info->reg_type = PTR_TO_BTF_ID; > > I think the verifier should be smarter than this. > It's too specific and inflexible. All ctx fields of bpf_iter execpt first > will be such ? let's figure out a different way to tell verifier about this. > How about using typedef with specific suffix? Like: > typedef struct bpf_map *bpf_map_or_null; > struct bpf_iter__bpf_map { > struct bpf_iter_meta *meta; > bpf_map_or_null map; > }; > or use a union with specific second member? Like: > struct bpf_iter__bpf_map { > struct bpf_iter_meta *meta; > union { > struct bpf_map *map; > long null; > }; > }; I have an alternative approach to refactor this for future support for map elements as well. For example, for bpf_map_elements iterator the prog context type can be struct bpf_iter_bpf_map_elem { struct bpf_iter_meta *meta; strruct bpf_map *map; <key type> *key; <value type> *val; }; target will pass the following information to bpf_iter registration: arg 1: PTR_TO_BTF_ID arg 2: PTR_TO_BTF_ID_OR_NULL arg 3: PTR_TO_BUFFER arg 4: PTR_TO_BUFFER verifier will retrieve the reg_type from target.
On Sat, May 9, 2020 at 10:19 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 5/9/20 5:50 PM, Alexei Starovoitov wrote: > > On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote: > >> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > >> index a2cfba89a8e1..c490fbde22d4 100644 > >> --- a/kernel/bpf/btf.c > >> +++ b/kernel/bpf/btf.c > >> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, > >> return true; > >> > >> /* this is a pointer to another type */ > >> - info->reg_type = PTR_TO_BTF_ID; > >> + if (off != 0 && prog->aux->btf_id_or_null_non0_off) > >> + info->reg_type = PTR_TO_BTF_ID_OR_NULL; > >> + else > >> + info->reg_type = PTR_TO_BTF_ID; > > > > I think the verifier should be smarter than this. > > It's too specific and inflexible. All ctx fields of bpf_iter execpt first > > will be such ? let's figure out a different way to tell verifier about this. > > How about using typedef with specific suffix? Like: > > typedef struct bpf_map *bpf_map_or_null; > > struct bpf_iter__bpf_map { > > struct bpf_iter_meta *meta; > > bpf_map_or_null map; > > }; > > or use a union with specific second member? Like: > > struct bpf_iter__bpf_map { > > struct bpf_iter_meta *meta; > > union { > > struct bpf_map *map; > > long null; > > }; > > }; > > I have an alternative approach to refactor this for future > support for map elements as well. > > For example, for bpf_map_elements iterator the prog context type > can be > struct bpf_iter_bpf_map_elem { > struct bpf_iter_meta *meta; > strruct bpf_map *map; > <key type> *key; > <value type> *val; > }; > > target will pass the following information to bpf_iter registration: > arg 1: PTR_TO_BTF_ID > arg 2: PTR_TO_BTF_ID_OR_NULL > arg 3: PTR_TO_BUFFER > arg 4: PTR_TO_BUFFER > > verifier will retrieve the reg_type from target. you mean to introduce something like 'struct bpf_func_proto' that describes types of helpers, but instead something similar to clarify the types in ctx ? That should work. Thanks
On 5/10/20 9:11 AM, Alexei Starovoitov wrote: > On Sat, May 9, 2020 at 10:19 PM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 5/9/20 5:50 PM, Alexei Starovoitov wrote: >>> On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote: >>>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c >>>> index a2cfba89a8e1..c490fbde22d4 100644 >>>> --- a/kernel/bpf/btf.c >>>> +++ b/kernel/bpf/btf.c >>>> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, >>>> return true; >>>> >>>> /* this is a pointer to another type */ >>>> - info->reg_type = PTR_TO_BTF_ID; >>>> + if (off != 0 && prog->aux->btf_id_or_null_non0_off) >>>> + info->reg_type = PTR_TO_BTF_ID_OR_NULL; >>>> + else >>>> + info->reg_type = PTR_TO_BTF_ID; >>> >>> I think the verifier should be smarter than this. >>> It's too specific and inflexible. All ctx fields of bpf_iter execpt first >>> will be such ? let's figure out a different way to tell verifier about this. >>> How about using typedef with specific suffix? Like: >>> typedef struct bpf_map *bpf_map_or_null; >>> struct bpf_iter__bpf_map { >>> struct bpf_iter_meta *meta; >>> bpf_map_or_null map; >>> }; >>> or use a union with specific second member? Like: >>> struct bpf_iter__bpf_map { >>> struct bpf_iter_meta *meta; >>> union { >>> struct bpf_map *map; >>> long null; >>> }; >>> }; >> >> I have an alternative approach to refactor this for future >> support for map elements as well. >> >> For example, for bpf_map_elements iterator the prog context type >> can be >> struct bpf_iter_bpf_map_elem { >> struct bpf_iter_meta *meta; >> strruct bpf_map *map; >> <key type> *key; >> <value type> *val; >> }; >> >> target will pass the following information to bpf_iter registration: >> arg 1: PTR_TO_BTF_ID >> arg 2: PTR_TO_BTF_ID_OR_NULL >> arg 3: PTR_TO_BUFFER >> arg 4: PTR_TO_BUFFER >> >> verifier will retrieve the reg_type from target. > > you mean to introduce something like 'struct bpf_func_proto' > that describes types of helpers, but instead something similar > to clarify the types in ctx ? That should work. Thanks Yes, this is what I think will be extensible.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 363ab0751967..cf4b6e44f2bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -320,6 +320,7 @@ enum bpf_reg_type { PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ PTR_TO_BTF_ID, /* reg points to kernel struct */ + PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -658,6 +659,7 @@ struct bpf_prog_aux { bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; + bool btf_id_or_null_non0_off; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a2cfba89a8e1..c490fbde22d4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return true; /* this is a pointer to another type */ - info->reg_type = PTR_TO_BTF_ID; + if (off != 0 && prog->aux->btf_id_or_null_non0_off) + info->reg_type = PTR_TO_BTF_ID_OR_NULL; + else + info->reg_type = PTR_TO_BTF_ID; if (tgt_prog) { ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d725ff7d11db..36b2a38a06fe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -398,7 +398,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) return type == PTR_TO_MAP_VALUE_OR_NULL || type == PTR_TO_SOCKET_OR_NULL || type == PTR_TO_SOCK_COMMON_OR_NULL || - type == PTR_TO_TCP_SOCK_OR_NULL; + type == PTR_TO_TCP_SOCK_OR_NULL || + type == PTR_TO_BTF_ID_OR_NULL; } static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) @@ -483,6 +484,7 @@ static const char * const reg_type_str[] = { [PTR_TO_TP_BUFFER] = "tp_buffer", [PTR_TO_XDP_SOCK] = "xdp_sock", [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", }; static char slot_type_char[] = { @@ -543,7 +545,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (t == PTR_TO_BTF_ID) + if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL) verbose(env, "%s", kernel_type_name(reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -2139,6 +2141,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID_OR_NULL: return true; default: return false; @@ -2659,7 +2662,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (*reg_type == PTR_TO_BTF_ID) + if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) *btf_id = info.btf_id; else env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; @@ -3243,7 +3246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; - if (reg_type == PTR_TO_BTF_ID) + if (reg_type == PTR_TO_BTF_ID || + reg_type == PTR_TO_BTF_ID_OR_NULL) regs[value_regno].btf_id = btf_id; } regs[value_regno].type = reg_type; @@ -6572,6 +6576,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_SOCK_COMMON; } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { reg->type = PTR_TO_TCP_SOCK; + } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) { + reg->type = PTR_TO_BTF_ID; } if (is_null) { /* We don't need id and ref_obj_id from this point @@ -8429,6 +8435,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID_OR_NULL: return false; default: return true; @@ -10640,6 +10647,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) prog->aux->attach_func_proto = t; if (!bpf_iter_prog_supported(prog)) return -EINVAL; + prog->aux->btf_id_or_null_non0_off = true; ret = btf_distill_func_proto(&env->log, btf, t, tname, &fmodel); return ret;