Message ID | 20200617211542.1856028-1-yhs@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | implement bpf iterator for tcp and udp sockets | expand |
On Wed, Jun 17, 2020 at 02:15:42PM -0700, Yonghong Song wrote: > The helper is used in tracing programs to cast a socket > pointer to a tcp6_sock pointer. > The return value could be NULL if the casting is illegal. > > A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added > so the verifier is able to deduce proper return types for the helper. > > Different from the previous BTF_ID based helpers, > the bpf_skc_to_tcp6_sock() argument can be several possible > btf_ids. More specifically, all possible socket data structures > with sock_common appearing in the first in the memory layout. > This patch only added socket types related to tcp and udp. > > All possible argument btf_id and return value btf_id > for helper bpf_skc_to_tcp6_sock() are pre-calculcated and > cached. In the future, it is even possible to precompute > these btf_id's at kernel build time. > [ ... ] > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 07052d44bca1..e455aa09039b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -261,6 +261,7 @@ enum bpf_return_type { > RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ > RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ > RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ > + RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ > }; > > /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs > @@ -283,6 +284,10 @@ struct bpf_func_proto { > enum bpf_arg_type arg_type[5]; > }; > int *btf_id; /* BTF ids of arguments */ > + bool (*check_btf_id)(u32 btf_id, u32 arg); /* If the argument could match > + * more than one btf id's. > + */ > + int *ret_btf_id; /* return value btf_id */ > }; > > /* bpf_context is intentionally undefined structure. Pointer to bpf_context is > @@ -1196,6 +1201,10 @@ bool bpf_link_is_iter(struct bpf_link *link); > struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); > int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); > > +void init_sock_cast_types(struct btf *btf); CONFIG_NET may not be set. [ ... ] > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 34cde841ab68..22d90d47befa 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -3735,10 +3735,12 @@ static int int_ptr_type_to_size(enum bpf_arg_type type) > return -EINVAL; > } > > -static int check_func_arg(struct bpf_verifier_env *env, u32 regno, > +static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > enum bpf_arg_type arg_type, > - struct bpf_call_arg_meta *meta) > + struct bpf_call_arg_meta *meta, > + const struct bpf_func_proto *fn) > { > + u32 regno = BPF_REG_1 + arg; > struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; > enum bpf_reg_type expected_type, type = reg->type; > int err = 0; > @@ -3820,9 +3822,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, > expected_type = PTR_TO_BTF_ID; > if (type != expected_type) > goto err_type; > - if (reg->btf_id != meta->btf_id) { > - verbose(env, "Helper has type %s got %s in R%d\n", > - kernel_type_name(meta->btf_id), > + if (!fn->check_btf_id) { > + if (reg->btf_id != meta->btf_id) { > + verbose(env, "Helper has type %s got %s in R%d\n", > + kernel_type_name(meta->btf_id), > + kernel_type_name(reg->btf_id), regno); > + > + return -EACCES; > + } > + } else if (!fn->check_btf_id(reg->btf_id, arg + 1)) { Why arg "+ 1"? > + verbose(env, "Helper does not support %s in R%d\n", > kernel_type_name(reg->btf_id), regno); > > return -EACCES; > @@ -4600,7 +4609,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > struct bpf_reg_state *regs; > struct bpf_call_arg_meta meta; > bool changes_data; > - int i, err; > + int i, err, ret_btf_id; > > /* find function prototype */ > if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { > @@ -4644,10 +4653,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > meta.func_id = func_id; > /* check args */ > for (i = 0; i < 5; i++) { > - err = btf_resolve_helper_id(&env->log, fn, i); > - if (err > 0) > - meta.btf_id = err; > - err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); > + if (!fn->check_btf_id) { > + err = btf_resolve_helper_id(&env->log, fn, i); > + if (err > 0) > + meta.btf_id = err; > + } > + err = check_func_arg(env, i, fn->arg_type[i], &meta, fn); Nit. Since it is passing fn and i, may be skip passing fn->arg_type[i] altogether? > if (err) > return err; > } > @@ -4750,6 +4761,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > regs[BPF_REG_0].id = ++env->id_gen; > regs[BPF_REG_0].mem_size = meta.mem_size; > + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { > + mark_reg_known_zero(env, regs, BPF_REG_0); > + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; > + ret_btf_id = *fn->ret_btf_id; > + if (ret_btf_id < 0) { If btf_vmlinux is not available, is ret_btf_id == 0? > + verbose(env, "invalid return type %d of func %s#%d\n", > + fn->ret_type, func_id_name(func_id), func_id); > + return err; Is err correctly set at this point? > + } > + regs[BPF_REG_0].btf_id = ret_btf_id; > } else { > verbose(env, "unknown return type %d of func %s#%d\n", > fn->ret_type, func_id_name(func_id), func_id); > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index afaec7e082d9..478c10d1ec33 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1515,6 +1515,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_skb_output_proto; > case BPF_FUNC_xdp_output: > return &bpf_xdp_output_proto; > + case BPF_FUNC_skc_to_tcp6_sock: > + return &bpf_skc_to_tcp6_sock_proto; > #endif > case BPF_FUNC_seq_printf: > return prog->expected_attach_type == BPF_TRACE_ITER ? > diff --git a/net/core/filter.c b/net/core/filter.c > index 73395384afe2..faf6feedd78e 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -9191,3 +9191,72 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) > { > bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); > } > + > +/* Define a list of socket types which can be the argument for > + * skc_to_*_sock() helpers. All these sockets should have > + * sock_common as the first argument in its memory layout. > + */ > +static const char *sock_cast_types[] = { > + "inet_connection_sock", > + "inet_request_sock", > + "inet_sock", > + "inet_timewait_sock", > + "request_sock", > + "sock", > + "sock_common", > + "tcp_sock", > + "tcp_request_sock", > + "tcp_timewait_sock", > + "tcp6_sock", > + "udp_sock", > + "udp6_sock", > +}; > + > +static int sock_cast_btf_ids[ARRAY_SIZE(sock_cast_types)]; > + > +static bool check_arg_btf_id(u32 btf_id, u32 arg) > +{ > + int i; > + > + /* only one argument, no need to check arg */ > + for (i = 0; i < ARRAY_SIZE(sock_cast_btf_ids); i++) > + if (sock_cast_btf_ids[i] == btf_id) > + return true; > + return false; > +} > + > +BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) > +{ > + /* add an explicit cast to struct tcp6_sock to force > + * debug_info type generation for it. > + */ > + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && > + sk->sk_family == AF_INET6) > + return (unsigned long)(struct tcp6_sock *)sk; > + > + return (unsigned long)NULL; > +} > + > +static int bpf_skc_to_tcp6_sock_ret_btf_id; > +const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { > + .func = bpf_skc_to_tcp6_sock, > + .gpl_only = true, > + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, > + .arg1_type = ARG_PTR_TO_BTF_ID, > + .check_btf_id = check_arg_btf_id, > + .ret_btf_id = &bpf_skc_to_tcp6_sock_ret_btf_id, > +}; > + > +void init_sock_cast_types(struct btf *btf) > +{ > + char *ret_type_name; > + > + /* find all possible argument btf_id's for socket cast helpers */ > + find_array_of_btf_ids(btf, sock_cast_types, sock_cast_btf_ids, > + ARRAY_SIZE(sock_cast_types)); > + > + /* find return btf_id */ > + ret_type_name = "tcp6_sock"; > + find_array_of_btf_ids(btf, &ret_type_name, > + &bpf_skc_to_tcp6_sock_ret_btf_id, 1); Instead of re-finding tcp6_sock/tcp_sock/request_sock...etc, can the sock_cast_btf_ids[] be reused?
On 6/18/20 1:54 PM, Martin KaFai Lau wrote: > On Wed, Jun 17, 2020 at 02:15:42PM -0700, Yonghong Song wrote: >> The helper is used in tracing programs to cast a socket >> pointer to a tcp6_sock pointer. >> The return value could be NULL if the casting is illegal. >> >> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added >> so the verifier is able to deduce proper return types for the helper. >> >> Different from the previous BTF_ID based helpers, >> the bpf_skc_to_tcp6_sock() argument can be several possible >> btf_ids. More specifically, all possible socket data structures >> with sock_common appearing in the first in the memory layout. >> This patch only added socket types related to tcp and udp. >> >> All possible argument btf_id and return value btf_id >> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and >> cached. In the future, it is even possible to precompute >> these btf_id's at kernel build time. >> > [ ... ] > >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index 07052d44bca1..e455aa09039b 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -261,6 +261,7 @@ enum bpf_return_type { >> RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ >> RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ >> RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ >> + RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ >> }; >> >> /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs >> @@ -283,6 +284,10 @@ struct bpf_func_proto { >> enum bpf_arg_type arg_type[5]; >> }; >> int *btf_id; /* BTF ids of arguments */ >> + bool (*check_btf_id)(u32 btf_id, u32 arg); /* If the argument could match >> + * more than one btf id's. >> + */ >> + int *ret_btf_id; /* return value btf_id */ >> }; >> >> /* bpf_context is intentionally undefined structure. Pointer to bpf_context is >> @@ -1196,6 +1201,10 @@ bool bpf_link_is_iter(struct bpf_link *link); >> struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); >> int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); >> >> +void init_sock_cast_types(struct btf *btf); > CONFIG_NET may not be set. Good catch, will add proper config guard in the next revision. > > [ ... ] > >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c >> index 34cde841ab68..22d90d47befa 100644 >> --- a/kernel/bpf/verifier.c >> +++ b/kernel/bpf/verifier.c >> @@ -3735,10 +3735,12 @@ static int int_ptr_type_to_size(enum bpf_arg_type type) >> return -EINVAL; >> } >> >> -static int check_func_arg(struct bpf_verifier_env *env, u32 regno, >> +static int check_func_arg(struct bpf_verifier_env *env, u32 arg, >> enum bpf_arg_type arg_type, >> - struct bpf_call_arg_meta *meta) >> + struct bpf_call_arg_meta *meta, >> + const struct bpf_func_proto *fn) >> { >> + u32 regno = BPF_REG_1 + arg; >> struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; >> enum bpf_reg_type expected_type, type = reg->type; >> int err = 0; >> @@ -3820,9 +3822,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, >> expected_type = PTR_TO_BTF_ID; >> if (type != expected_type) >> goto err_type; >> - if (reg->btf_id != meta->btf_id) { >> - verbose(env, "Helper has type %s got %s in R%d\n", >> - kernel_type_name(meta->btf_id), >> + if (!fn->check_btf_id) { >> + if (reg->btf_id != meta->btf_id) { >> + verbose(env, "Helper has type %s got %s in R%d\n", >> + kernel_type_name(meta->btf_id), >> + kernel_type_name(reg->btf_id), regno); >> + >> + return -EACCES; >> + } >> + } else if (!fn->check_btf_id(reg->btf_id, arg + 1)) { > Why arg "+ 1"? In verifier, arg starts from 0 (arguments 0 - 4). In func_proto, we have ARG1 - ARG5. That is why I add one here. I think I can just use 0-4 range for arg parameter, it should be fine. > >> + verbose(env, "Helper does not support %s in R%d\n", >> kernel_type_name(reg->btf_id), regno); >> >> return -EACCES; >> @@ -4600,7 +4609,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn >> struct bpf_reg_state *regs; >> struct bpf_call_arg_meta meta; >> bool changes_data; >> - int i, err; >> + int i, err, ret_btf_id; >> >> /* find function prototype */ >> if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { >> @@ -4644,10 +4653,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn >> meta.func_id = func_id; >> /* check args */ >> for (i = 0; i < 5; i++) { >> - err = btf_resolve_helper_id(&env->log, fn, i); >> - if (err > 0) >> - meta.btf_id = err; >> - err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); >> + if (!fn->check_btf_id) { >> + err = btf_resolve_helper_id(&env->log, fn, i); >> + if (err > 0) >> + meta.btf_id = err; >> + } >> + err = check_func_arg(env, i, fn->arg_type[i], &meta, fn); > Nit. Since it is passing fn and i, may be skip passing > fn->arg_type[i] altogether? Make sense, will do. > >> if (err) >> return err; >> } >> @@ -4750,6 +4761,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn >> regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; >> regs[BPF_REG_0].id = ++env->id_gen; >> regs[BPF_REG_0].mem_size = meta.mem_size; >> + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { >> + mark_reg_known_zero(env, regs, BPF_REG_0); >> + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; >> + ret_btf_id = *fn->ret_btf_id; >> + if (ret_btf_id < 0) { > If btf_vmlinux is not available, is ret_btf_id == 0? Yes, it is a global variable. Will change it to <= 0. > >> + verbose(env, "invalid return type %d of func %s#%d\n", >> + fn->ret_type, func_id_name(func_id), func_id); >> + return err; > Is err correctly set at this point? Typo, I mean return ret_btf_id. In Jiri's d_path patch, the btf_id are all non-negative values. I may adopt the same convention in the next revision to make future conversion easier. > >> + } >> + regs[BPF_REG_0].btf_id = ret_btf_id; >> } else { >> verbose(env, "unknown return type %d of func %s#%d\n", >> fn->ret_type, func_id_name(func_id), func_id); >> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c >> index afaec7e082d9..478c10d1ec33 100644 >> --- a/kernel/trace/bpf_trace.c >> +++ b/kernel/trace/bpf_trace.c >> @@ -1515,6 +1515,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) >> return &bpf_skb_output_proto; >> case BPF_FUNC_xdp_output: >> return &bpf_xdp_output_proto; >> + case BPF_FUNC_skc_to_tcp6_sock: >> + return &bpf_skc_to_tcp6_sock_proto; >> #endif >> case BPF_FUNC_seq_printf: >> return prog->expected_attach_type == BPF_TRACE_ITER ? >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 73395384afe2..faf6feedd78e 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -9191,3 +9191,72 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) >> { >> bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); >> } >> + >> +/* Define a list of socket types which can be the argument for >> + * skc_to_*_sock() helpers. All these sockets should have >> + * sock_common as the first argument in its memory layout. >> + */ >> +static const char *sock_cast_types[] = { >> + "inet_connection_sock", >> + "inet_request_sock", >> + "inet_sock", >> + "inet_timewait_sock", >> + "request_sock", >> + "sock", >> + "sock_common", >> + "tcp_sock", >> + "tcp_request_sock", >> + "tcp_timewait_sock", >> + "tcp6_sock", >> + "udp_sock", >> + "udp6_sock", >> +}; >> + >> +static int sock_cast_btf_ids[ARRAY_SIZE(sock_cast_types)]; >> + >> +static bool check_arg_btf_id(u32 btf_id, u32 arg) >> +{ >> + int i; >> + >> + /* only one argument, no need to check arg */ >> + for (i = 0; i < ARRAY_SIZE(sock_cast_btf_ids); i++) >> + if (sock_cast_btf_ids[i] == btf_id) >> + return true; >> + return false; >> +} >> + >> +BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) >> +{ >> + /* add an explicit cast to struct tcp6_sock to force >> + * debug_info type generation for it. >> + */ >> + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && >> + sk->sk_family == AF_INET6) >> + return (unsigned long)(struct tcp6_sock *)sk; >> + >> + return (unsigned long)NULL; >> +} >> + >> +static int bpf_skc_to_tcp6_sock_ret_btf_id; >> +const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { >> + .func = bpf_skc_to_tcp6_sock, >> + .gpl_only = true, >> + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, >> + .arg1_type = ARG_PTR_TO_BTF_ID, >> + .check_btf_id = check_arg_btf_id, >> + .ret_btf_id = &bpf_skc_to_tcp6_sock_ret_btf_id, >> +}; >> + >> +void init_sock_cast_types(struct btf *btf) >> +{ >> + char *ret_type_name; >> + >> + /* find all possible argument btf_id's for socket cast helpers */ >> + find_array_of_btf_ids(btf, sock_cast_types, sock_cast_btf_ids, >> + ARRAY_SIZE(sock_cast_types)); >> + >> + /* find return btf_id */ >> + ret_type_name = "tcp6_sock"; >> + find_array_of_btf_ids(btf, &ret_type_name, >> + &bpf_skc_to_tcp6_sock_ret_btf_id, 1); > Instead of re-finding tcp6_sock/tcp_sock/request_sock...etc, > can the sock_cast_btf_ids[] be reused? Actually, yes, we can. Will do.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..e455aa09039b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -261,6 +261,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ + RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -283,6 +284,10 @@ struct bpf_func_proto { enum bpf_arg_type arg_type[5]; }; int *btf_id; /* BTF ids of arguments */ + bool (*check_btf_id)(u32 btf_id, u32 arg); /* If the argument could match + * more than one btf id's. + */ + int *ret_btf_id; /* return value btf_id */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -1196,6 +1201,10 @@ bool bpf_link_is_iter(struct bpf_link *link); struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); +void init_sock_cast_types(struct btf *btf); +void find_array_of_btf_ids(struct btf *btf, const char **type_names, + int *btf_ids, u32 num_types); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@ -1629,6 +1638,7 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..394fcba27b6a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3252,6 +3252,12 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3389,7 +3395,8 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 58c9af1d4808..6f8b52fb9269 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3645,6 +3645,7 @@ struct btf *btf_parse_vmlinux(void) } bpf_struct_ops_init(btf, log); + init_sock_cast_types(btf); btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); @@ -4699,3 +4700,13 @@ u32 btf_id(const struct btf *btf) { return btf->id; } + +void find_array_of_btf_ids(struct btf *btf, const char **type_names, + int *btf_ids, u32 num_types) +{ + int i; + + for (i = 0; i < num_types; i++) + btf_ids[i] = btf_find_by_name_kind(btf, type_names[i], + BTF_KIND_STRUCT); +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 34cde841ab68..22d90d47befa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3735,10 +3735,12 @@ static int int_ptr_type_to_size(enum bpf_arg_type type) return -EINVAL; } -static int check_func_arg(struct bpf_verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 arg, enum bpf_arg_type arg_type, - struct bpf_call_arg_meta *meta) + struct bpf_call_arg_meta *meta, + const struct bpf_func_proto *fn) { + u32 regno = BPF_REG_1 + arg; struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -3820,9 +3822,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_BTF_ID; if (type != expected_type) goto err_type; - if (reg->btf_id != meta->btf_id) { - verbose(env, "Helper has type %s got %s in R%d\n", - kernel_type_name(meta->btf_id), + if (!fn->check_btf_id) { + if (reg->btf_id != meta->btf_id) { + verbose(env, "Helper has type %s got %s in R%d\n", + kernel_type_name(meta->btf_id), + kernel_type_name(reg->btf_id), regno); + + return -EACCES; + } + } else if (!fn->check_btf_id(reg->btf_id, arg + 1)) { + verbose(env, "Helper does not support %s in R%d\n", kernel_type_name(reg->btf_id), regno); return -EACCES; @@ -4600,7 +4609,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; bool changes_data; - int i, err; + int i, err, ret_btf_id; /* find function prototype */ if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { @@ -4644,10 +4653,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn meta.func_id = func_id; /* check args */ for (i = 0; i < 5; i++) { - err = btf_resolve_helper_id(&env->log, fn, i); - if (err > 0) - meta.btf_id = err; - err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); + if (!fn->check_btf_id) { + err = btf_resolve_helper_id(&env->log, fn, i); + if (err > 0) + meta.btf_id = err; + } + err = check_func_arg(env, i, fn->arg_type[i], &meta, fn); if (err) return err; } @@ -4750,6 +4761,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size; + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + ret_btf_id = *fn->ret_btf_id; + if (ret_btf_id < 0) { + verbose(env, "invalid return type %d of func %s#%d\n", + fn->ret_type, func_id_name(func_id), func_id); + return err; + } + regs[BPF_REG_0].btf_id = ret_btf_id; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index afaec7e082d9..478c10d1ec33 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1515,6 +1515,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_output_proto; case BPF_FUNC_xdp_output: return &bpf_xdp_output_proto; + case BPF_FUNC_skc_to_tcp6_sock: + return &bpf_skc_to_tcp6_sock_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? diff --git a/net/core/filter.c b/net/core/filter.c index 73395384afe2..faf6feedd78e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9191,3 +9191,72 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) { bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } + +/* Define a list of socket types which can be the argument for + * skc_to_*_sock() helpers. All these sockets should have + * sock_common as the first argument in its memory layout. + */ +static const char *sock_cast_types[] = { + "inet_connection_sock", + "inet_request_sock", + "inet_sock", + "inet_timewait_sock", + "request_sock", + "sock", + "sock_common", + "tcp_sock", + "tcp_request_sock", + "tcp_timewait_sock", + "tcp6_sock", + "udp_sock", + "udp6_sock", +}; + +static int sock_cast_btf_ids[ARRAY_SIZE(sock_cast_types)]; + +static bool check_arg_btf_id(u32 btf_id, u32 arg) +{ + int i; + + /* only one argument, no need to check arg */ + for (i = 0; i < ARRAY_SIZE(sock_cast_btf_ids); i++) + if (sock_cast_btf_ids[i] == btf_id) + return true; + return false; +} + +BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) +{ + /* add an explicit cast to struct tcp6_sock to force + * debug_info type generation for it. + */ + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + sk->sk_family == AF_INET6) + return (unsigned long)(struct tcp6_sock *)sk; + + return (unsigned long)NULL; +} + +static int bpf_skc_to_tcp6_sock_ret_btf_id; +const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { + .func = bpf_skc_to_tcp6_sock, + .gpl_only = true, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &bpf_skc_to_tcp6_sock_ret_btf_id, +}; + +void init_sock_cast_types(struct btf *btf) +{ + char *ret_type_name; + + /* find all possible argument btf_id's for socket cast helpers */ + find_array_of_btf_ids(btf, sock_cast_types, sock_cast_btf_ids, + ARRAY_SIZE(sock_cast_types)); + + /* find return btf_id */ + ret_type_name = "tcp6_sock"; + find_array_of_btf_ids(btf, &ret_type_name, + &bpf_skc_to_tcp6_sock_ret_btf_id, 1); +} diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 91fa668fa860..6c2f64118651 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -421,6 +421,7 @@ class PrinterHelpers(Printer): 'struct sockaddr', 'struct tcphdr', 'struct seq_file', + 'struct tcp6_sock', 'struct __sk_buff', 'struct sk_msg_md', @@ -458,6 +459,7 @@ class PrinterHelpers(Printer): 'struct sockaddr', 'struct tcphdr', 'struct seq_file', + 'struct tcp6_sock', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 19684813faae..394fcba27b6a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3252,6 +3252,12 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3389,7 +3395,8 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call
The helper is used in tracing programs to cast a socket pointer to a tcp6_sock pointer. The return value could be NULL if the casting is illegal. A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added so the verifier is able to deduce proper return types for the helper. Different from the previous BTF_ID based helpers, the bpf_skc_to_tcp6_sock() argument can be several possible btf_ids. More specifically, all possible socket data structures with sock_common appearing in the first in the memory layout. This patch only added socket types related to tcp and udp. All possible argument btf_id and return value btf_id for helper bpf_skc_to_tcp6_sock() are pre-calculcated and cached. In the future, it is even possible to precompute these btf_id's at kernel build time. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 10 +++++ include/uapi/linux/bpf.h | 9 ++++- kernel/bpf/btf.c | 11 ++++++ kernel/bpf/verifier.c | 41 +++++++++++++++----- kernel/trace/bpf_trace.c | 2 + net/core/filter.c | 69 ++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 + tools/include/uapi/linux/bpf.h | 9 ++++- 8 files changed, 141 insertions(+), 12 deletions(-)