Message ID | 20190426171114.62466-1-kafai@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | BPF sk local storage | expand |
On 04/26, Martin KaFai Lau wrote: > This patch rides on an existing BPF_PROG_TYPE_CGROUP_SKB test > (test_sock_fields.c) to do a TCP end-to-end test on the new > bpf_sk_storage_* helpers. > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > --- > tools/testing/selftests/bpf/bpf_helpers.h | 5 + > .../bpf/progs/test_sock_fields_kern.c | 49 ++++++++ > .../testing/selftests/bpf/test_sock_fields.c | 115 +++++++++++++++--- > 3 files changed, 153 insertions(+), 16 deletions(-) > > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h > index 59e221586cf7..6e80b66d7fb1 100644 > --- a/tools/testing/selftests/bpf/bpf_helpers.h > +++ b/tools/testing/selftests/bpf/bpf_helpers.h > @@ -211,6 +211,11 @@ static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, > static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, > unsigned long long flags, unsigned long *res) = > (void *) BPF_FUNC_strtoul; > +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, > + void *value, __u64 flags) = > + (void *) BPF_FUNC_sk_storage_get; > +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = > + (void *)BPF_FUNC_sk_storage_delete; > > /* llvm builtin functions that eBPF C program may use to > * emit BPF_LD_ABS and BPF_LD_IND instructions > diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > index 37328f148538..93ef3e039da3 100644 > --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > @@ -55,6 +55,31 @@ struct bpf_map_def SEC("maps") linum_map = { > .max_entries = __NR_BPF_LINUM_ARRAY_IDX, > }; > > +struct bpf_spinlock_cnt { > + struct bpf_spin_lock lock; > + __u32 cnt; > +}; > + > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { > + .type = BPF_MAP_TYPE_SK_STORAGE, > + .key_size = sizeof(int), > + .value_size = sizeof(struct bpf_spinlock_cnt), > + .max_entries = 0, > + .map_flags = BPF_F_NO_PREALLOC, > +}; > + > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); > + > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { > + .type = BPF_MAP_TYPE_SK_STORAGE, > + .key_size = sizeof(int), > + .value_size = sizeof(struct bpf_spinlock_cnt), > + .max_entries = 0, > + .map_flags = BPF_F_NO_PREALLOC, > +}; > + > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); > + > static bool is_loopback6(__u32 *a6) > { > return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); > @@ -120,7 +145,9 @@ static void tpcpy(struct bpf_tcp_sock *dst, > SEC("cgroup_skb/egress") > int egress_read_sock_fields(struct __sk_buff *skb) > { > + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; > __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; > + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; > struct sockaddr_in6 *srv_sa6, *cli_sa6; > struct bpf_tcp_sock *tp, *tp_ret; > struct bpf_sock *sk, *sk_ret; > @@ -161,6 +188,28 @@ int egress_read_sock_fields(struct __sk_buff *skb) > skcpy(sk_ret, sk); > tpcpy(tp_ret, tp); > > + if (result_idx == EGRESS_SRV_IDX) { > + /* The userspace has created it for srv sk */ > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, > + 0, 0); > + } else { > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, > + &cli_cnt_init, > + BPF_SK_STORAGE_GET_F_CREATE); > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, > + sk, &cli_cnt_init, > + BPF_SK_STORAGE_GET_F_CREATE); > + } > + > + if (!pkt_out_cnt || !pkt_out_cnt10) > + RETURN; > + [..] > + pkt_out_cnt->cnt += 1; > + bpf_spin_lock(&pkt_out_cnt10->lock); > + pkt_out_cnt10->cnt += 10; > + bpf_spin_unlock(&pkt_out_cnt10->lock); Any reason only cnt10 is spin-locked? I see both of them use bpf_spinlock_cnt struct. > + > RETURN; > } > > diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c > index dcae7f664dce..550baf8aa1ca 100644 > --- a/tools/testing/selftests/bpf/test_sock_fields.c > +++ b/tools/testing/selftests/bpf/test_sock_fields.c > @@ -35,6 +35,11 @@ enum bpf_linum_array_idx { > __NR_BPF_LINUM_ARRAY_IDX, > }; > > +struct bpf_spinlock_cnt { > + struct bpf_spin_lock lock; > + __u32 cnt; > +}; > + > #define CHECK(condition, tag, format...) ({ \ > int __ret = !!(condition); \ > if (__ret) { \ > @@ -50,6 +55,8 @@ enum bpf_linum_array_idx { > #define DATA_LEN sizeof(DATA) > > static struct sockaddr_in6 srv_sa6, cli_sa6; > +static int sk_pkt_out_cnt10_fd; > +static int sk_pkt_out_cnt_fd; > static int linum_map_fd; > static int addr_map_fd; > static int tp_map_fd; > @@ -220,28 +227,90 @@ static void check_result(void) > "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", > ingress_linum); > > - CHECK(srv_tp.data_segs_out != 1 || > + CHECK(srv_tp.data_segs_out != 2 || > srv_tp.data_segs_in || > srv_tp.snd_cwnd != 10 || > srv_tp.total_retrans || > - srv_tp.bytes_acked != DATA_LEN, > + srv_tp.bytes_acked != 2 * DATA_LEN, > "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", > egress_linum); > > CHECK(cli_tp.data_segs_out || > - cli_tp.data_segs_in != 1 || > + cli_tp.data_segs_in != 2 || > cli_tp.snd_cwnd != 10 || > cli_tp.total_retrans || > - cli_tp.bytes_received != DATA_LEN, > + cli_tp.bytes_received != 2 * DATA_LEN, > "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", > egress_linum); > } > > +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) > +{ > + struct bpf_spinlock_cnt pkt_out_cnt = {} , pkt_out_cnt10 = {}; > + int err; > + > + pkt_out_cnt.cnt = ~0; > + pkt_out_cnt10.cnt = ~0; > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); > + if (!err) > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, > + &pkt_out_cnt10); > + > + /* The bpf prog only counts for fullsock and > + * passive conneciton did not become fullsock until 3WHS > + * had been finished. > + * The bpf prog only counted two data packet out but we > + * specially init accept_fd's pkt_out_cnt by 2 in > + * init_sk_storage(). Hence, 4 here. > + */ > + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > + > + pkt_out_cnt.cnt = ~0; > + pkt_out_cnt10.cnt = ~0; > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); > + if (!err) > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, > + &pkt_out_cnt10); > + /* Active connection is fullsock from the beginning. > + * 1 SYN and 1 ACK during 3WHS > + * 2 Acks on data packet. > + * > + * The bpf_prog initialized it to 0xeB9F. > + */ > + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || > + pkt_out_cnt10.cnt != 0xeB9F + 40, > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > +} > + > +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) > +{ > + struct bpf_spinlock_cnt scnt = {}; > + int err; > + > + scnt.cnt = pkt_out_cnt; > + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, > + BPF_NOEXIST); > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", > + "err:%d errno:%d", err, errno); > + > + scnt.cnt *= 10; > + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, > + BPF_NOEXIST); > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", > + "err:%d errno:%d", err, errno); > +} > + > static void test(void) > { > int listen_fd, cli_fd, accept_fd, epfd, err; > struct epoll_event ev; > socklen_t addrlen; > + int i; > > addrlen = sizeof(struct sockaddr_in6); > ev.events = EPOLLIN; > @@ -308,24 +377,30 @@ static void test(void) > accept_fd, errno); > close(listen_fd); > > - /* Send some data from accept_fd to cli_fd */ > - err = send(accept_fd, DATA, DATA_LEN, 0); > - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > - err, errno); > - > - /* Have some timeout in recv(cli_fd). Just in case. */ > ev.data.fd = cli_fd; > err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); > CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", > err, errno); > > - err = epoll_wait(epfd, &ev, 1, 1000); > - CHECK(err != 1 || ev.data.fd != cli_fd, > - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > - err, errno, ev.data.fd, cli_fd); > + init_sk_storage(accept_fd, 2); > > - err = recv(cli_fd, NULL, 0, MSG_TRUNC); > - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > + for (i = 0; i < 2; i++) { > + /* Send some data from accept_fd to cli_fd */ > + err = send(accept_fd, DATA, DATA_LEN, 0); > + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > + err, errno); > + > + /* Have some timeout in recv(cli_fd). Just in case. */ > + err = epoll_wait(epfd, &ev, 1, 1000); > + CHECK(err != 1 || ev.data.fd != cli_fd, > + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > + err, errno, ev.data.fd, cli_fd); > + > + err = recv(cli_fd, NULL, 0, MSG_TRUNC); > + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > + } > + > + check_sk_pkt_out_cnt(accept_fd, cli_fd); > > close(epfd); > close(accept_fd); > @@ -395,6 +470,14 @@ int main(int argc, char **argv) > CHECK(!map, "cannot find linum_map", "(null)"); > linum_map_fd = bpf_map__fd(map); > > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); > + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); > + sk_pkt_out_cnt_fd = bpf_map__fd(map); > + > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); > + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); > + sk_pkt_out_cnt10_fd = bpf_map__fd(map); > + > test(); > > bpf_object__close(obj); > -- > 2.17.1 >
On Fri, Apr 26, 2019 at 10:55:02AM -0700, Stanislav Fomichev wrote: > On 04/26, Martin KaFai Lau wrote: > > This patch rides on an existing BPF_PROG_TYPE_CGROUP_SKB test > > (test_sock_fields.c) to do a TCP end-to-end test on the new > > bpf_sk_storage_* helpers. > > > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > > --- > > tools/testing/selftests/bpf/bpf_helpers.h | 5 + > > .../bpf/progs/test_sock_fields_kern.c | 49 ++++++++ > > .../testing/selftests/bpf/test_sock_fields.c | 115 +++++++++++++++--- > > 3 files changed, 153 insertions(+), 16 deletions(-) > > > > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h > > index 59e221586cf7..6e80b66d7fb1 100644 > > --- a/tools/testing/selftests/bpf/bpf_helpers.h > > +++ b/tools/testing/selftests/bpf/bpf_helpers.h > > @@ -211,6 +211,11 @@ static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, > > static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, > > unsigned long long flags, unsigned long *res) = > > (void *) BPF_FUNC_strtoul; > > +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, > > + void *value, __u64 flags) = > > + (void *) BPF_FUNC_sk_storage_get; > > +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = > > + (void *)BPF_FUNC_sk_storage_delete; > > > > /* llvm builtin functions that eBPF C program may use to > > * emit BPF_LD_ABS and BPF_LD_IND instructions > > diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > index 37328f148538..93ef3e039da3 100644 > > --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c > > @@ -55,6 +55,31 @@ struct bpf_map_def SEC("maps") linum_map = { > > .max_entries = __NR_BPF_LINUM_ARRAY_IDX, > > }; > > > > +struct bpf_spinlock_cnt { > > + struct bpf_spin_lock lock; > > + __u32 cnt; > > +}; > > + > > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { > > + .type = BPF_MAP_TYPE_SK_STORAGE, > > + .key_size = sizeof(int), > > + .value_size = sizeof(struct bpf_spinlock_cnt), > > + .max_entries = 0, > > + .map_flags = BPF_F_NO_PREALLOC, > > +}; > > + > > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); > > + > > +struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { > > + .type = BPF_MAP_TYPE_SK_STORAGE, > > + .key_size = sizeof(int), > > + .value_size = sizeof(struct bpf_spinlock_cnt), > > + .max_entries = 0, > > + .map_flags = BPF_F_NO_PREALLOC, > > +}; > > + > > +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); > > + > > static bool is_loopback6(__u32 *a6) > > { > > return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); > > @@ -120,7 +145,9 @@ static void tpcpy(struct bpf_tcp_sock *dst, > > SEC("cgroup_skb/egress") > > int egress_read_sock_fields(struct __sk_buff *skb) > > { > > + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; > > __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; > > + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; > > struct sockaddr_in6 *srv_sa6, *cli_sa6; > > struct bpf_tcp_sock *tp, *tp_ret; > > struct bpf_sock *sk, *sk_ret; > > @@ -161,6 +188,28 @@ int egress_read_sock_fields(struct __sk_buff *skb) > > skcpy(sk_ret, sk); > > tpcpy(tp_ret, tp); > > > > + if (result_idx == EGRESS_SRV_IDX) { > > + /* The userspace has created it for srv sk */ > > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); > > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, > > + 0, 0); > > + } else { > > + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, > > + &cli_cnt_init, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, > > + sk, &cli_cnt_init, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + } > > + > > + if (!pkt_out_cnt || !pkt_out_cnt10) > > + RETURN; > > + > [..] > > + pkt_out_cnt->cnt += 1; > > + bpf_spin_lock(&pkt_out_cnt10->lock); > > + pkt_out_cnt10->cnt += 10; > > + bpf_spin_unlock(&pkt_out_cnt10->lock); > Any reason only cnt10 is spin-locked? I see both of them use > bpf_spinlock_cnt struct. Intentionally one takes lock and one does not as a test. I will add one comment here. > > > + > > RETURN; > > } > > > > diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c > > index dcae7f664dce..550baf8aa1ca 100644 > > --- a/tools/testing/selftests/bpf/test_sock_fields.c > > +++ b/tools/testing/selftests/bpf/test_sock_fields.c > > @@ -35,6 +35,11 @@ enum bpf_linum_array_idx { > > __NR_BPF_LINUM_ARRAY_IDX, > > }; > > > > +struct bpf_spinlock_cnt { > > + struct bpf_spin_lock lock; > > + __u32 cnt; > > +}; > > + > > #define CHECK(condition, tag, format...) ({ \ > > int __ret = !!(condition); \ > > if (__ret) { \ > > @@ -50,6 +55,8 @@ enum bpf_linum_array_idx { > > #define DATA_LEN sizeof(DATA) > > > > static struct sockaddr_in6 srv_sa6, cli_sa6; > > +static int sk_pkt_out_cnt10_fd; > > +static int sk_pkt_out_cnt_fd; > > static int linum_map_fd; > > static int addr_map_fd; > > static int tp_map_fd; > > @@ -220,28 +227,90 @@ static void check_result(void) > > "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", > > ingress_linum); > > > > - CHECK(srv_tp.data_segs_out != 1 || > > + CHECK(srv_tp.data_segs_out != 2 || > > srv_tp.data_segs_in || > > srv_tp.snd_cwnd != 10 || > > srv_tp.total_retrans || > > - srv_tp.bytes_acked != DATA_LEN, > > + srv_tp.bytes_acked != 2 * DATA_LEN, > > "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", > > egress_linum); > > > > CHECK(cli_tp.data_segs_out || > > - cli_tp.data_segs_in != 1 || > > + cli_tp.data_segs_in != 2 || > > cli_tp.snd_cwnd != 10 || > > cli_tp.total_retrans || > > - cli_tp.bytes_received != DATA_LEN, > > + cli_tp.bytes_received != 2 * DATA_LEN, > > "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", > > egress_linum); > > } > > > > +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) > > +{ > > + struct bpf_spinlock_cnt pkt_out_cnt = {} , pkt_out_cnt10 = {}; > > + int err; > > + > > + pkt_out_cnt.cnt = ~0; > > + pkt_out_cnt10.cnt = ~0; > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); > > + if (!err) > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, > > + &pkt_out_cnt10); > > + > > + /* The bpf prog only counts for fullsock and > > + * passive conneciton did not become fullsock until 3WHS > > + * had been finished. > > + * The bpf prog only counted two data packet out but we > > + * specially init accept_fd's pkt_out_cnt by 2 in > > + * init_sk_storage(). Hence, 4 here. > > + */ > > + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, > > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", > > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > > + > > + pkt_out_cnt.cnt = ~0; > > + pkt_out_cnt10.cnt = ~0; > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); > > + if (!err) > > + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, > > + &pkt_out_cnt10); > > + /* Active connection is fullsock from the beginning. > > + * 1 SYN and 1 ACK during 3WHS > > + * 2 Acks on data packet. > > + * > > + * The bpf_prog initialized it to 0xeB9F. > > + */ > > + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || > > + pkt_out_cnt10.cnt != 0xeB9F + 40, > > + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", > > + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", > > + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); > > +} > > + > > +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) > > +{ > > + struct bpf_spinlock_cnt scnt = {}; > > + int err; > > + > > + scnt.cnt = pkt_out_cnt; > > + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, > > + BPF_NOEXIST); > > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", > > + "err:%d errno:%d", err, errno); > > + > > + scnt.cnt *= 10; > > + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, > > + BPF_NOEXIST); > > + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", > > + "err:%d errno:%d", err, errno); > > +} > > + > > static void test(void) > > { > > int listen_fd, cli_fd, accept_fd, epfd, err; > > struct epoll_event ev; > > socklen_t addrlen; > > + int i; > > > > addrlen = sizeof(struct sockaddr_in6); > > ev.events = EPOLLIN; > > @@ -308,24 +377,30 @@ static void test(void) > > accept_fd, errno); > > close(listen_fd); > > > > - /* Send some data from accept_fd to cli_fd */ > > - err = send(accept_fd, DATA, DATA_LEN, 0); > > - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > > - err, errno); > > - > > - /* Have some timeout in recv(cli_fd). Just in case. */ > > ev.data.fd = cli_fd; > > err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); > > CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", > > err, errno); > > > > - err = epoll_wait(epfd, &ev, 1, 1000); > > - CHECK(err != 1 || ev.data.fd != cli_fd, > > - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > > - err, errno, ev.data.fd, cli_fd); > > + init_sk_storage(accept_fd, 2); > > > > - err = recv(cli_fd, NULL, 0, MSG_TRUNC); > > - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > > + for (i = 0; i < 2; i++) { > > + /* Send some data from accept_fd to cli_fd */ > > + err = send(accept_fd, DATA, DATA_LEN, 0); > > + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", > > + err, errno); > > + > > + /* Have some timeout in recv(cli_fd). Just in case. */ > > + err = epoll_wait(epfd, &ev, 1, 1000); > > + CHECK(err != 1 || ev.data.fd != cli_fd, > > + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", > > + err, errno, ev.data.fd, cli_fd); > > + > > + err = recv(cli_fd, NULL, 0, MSG_TRUNC); > > + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); > > + } > > + > > + check_sk_pkt_out_cnt(accept_fd, cli_fd); > > > > close(epfd); > > close(accept_fd); > > @@ -395,6 +470,14 @@ int main(int argc, char **argv) > > CHECK(!map, "cannot find linum_map", "(null)"); > > linum_map_fd = bpf_map__fd(map); > > > > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); > > + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); > > + sk_pkt_out_cnt_fd = bpf_map__fd(map); > > + > > + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); > > + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); > > + sk_pkt_out_cnt10_fd = bpf_map__fd(map); > > + > > test(); > > > > bpf_object__close(obj); > > -- > > 2.17.1 > >
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 59e221586cf7..6e80b66d7fb1 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -211,6 +211,11 @@ static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, unsigned long long flags, unsigned long *res) = (void *) BPF_FUNC_strtoul; +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, + void *value, __u64 flags) = + (void *) BPF_FUNC_sk_storage_get; +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = + (void *)BPF_FUNC_sk_storage_delete; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index 37328f148538..93ef3e039da3 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -55,6 +55,31 @@ struct bpf_map_def SEC("maps") linum_map = { .max_entries = __NR_BPF_LINUM_ARRAY_IDX, }; +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + +struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_spinlock_cnt), + .max_entries = 0, + .map_flags = BPF_F_NO_PREALLOC, +}; + +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); + +struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_spinlock_cnt), + .max_entries = 0, + .map_flags = BPF_F_NO_PREALLOC, +}; + +BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); + static bool is_loopback6(__u32 *a6) { return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); @@ -120,7 +145,9 @@ static void tpcpy(struct bpf_tcp_sock *dst, SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; @@ -161,6 +188,28 @@ int egress_read_sock_fields(struct __sk_buff *skb) skcpy(sk_ret, sk); tpcpy(tp_ret, tp); + if (result_idx == EGRESS_SRV_IDX) { + /* The userspace has created it for srv sk */ + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + 0, 0); + } else { + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, + &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, + sk, &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + } + + if (!pkt_out_cnt || !pkt_out_cnt10) + RETURN; + + pkt_out_cnt->cnt += 1; + bpf_spin_lock(&pkt_out_cnt10->lock); + pkt_out_cnt10->cnt += 10; + bpf_spin_unlock(&pkt_out_cnt10->lock); + RETURN; } diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index dcae7f664dce..550baf8aa1ca 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -35,6 +35,11 @@ enum bpf_linum_array_idx { __NR_BPF_LINUM_ARRAY_IDX, }; +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ @@ -50,6 +55,8 @@ enum bpf_linum_array_idx { #define DATA_LEN sizeof(DATA) static struct sockaddr_in6 srv_sa6, cli_sa6; +static int sk_pkt_out_cnt10_fd; +static int sk_pkt_out_cnt_fd; static int linum_map_fd; static int addr_map_fd; static int tp_map_fd; @@ -220,28 +227,90 @@ static void check_result(void) "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", ingress_linum); - CHECK(srv_tp.data_segs_out != 1 || + CHECK(srv_tp.data_segs_out != 2 || srv_tp.data_segs_in || srv_tp.snd_cwnd != 10 || srv_tp.total_retrans || - srv_tp.bytes_acked != DATA_LEN, + srv_tp.bytes_acked != 2 * DATA_LEN, "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", egress_linum); CHECK(cli_tp.data_segs_out || - cli_tp.data_segs_in != 1 || + cli_tp.data_segs_in != 2 || cli_tp.snd_cwnd != 10 || cli_tp.total_retrans || - cli_tp.bytes_received != DATA_LEN, + cli_tp.bytes_received != 2 * DATA_LEN, "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", egress_linum); } +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) +{ + struct bpf_spinlock_cnt pkt_out_cnt = {} , pkt_out_cnt10 = {}; + int err; + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, + &pkt_out_cnt10); + + /* The bpf prog only counts for fullsock and + * passive conneciton did not become fullsock until 3WHS + * had been finished. + * The bpf prog only counted two data packet out but we + * specially init accept_fd's pkt_out_cnt by 2 in + * init_sk_storage(). Hence, 4 here. + */ + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, + &pkt_out_cnt10); + /* Active connection is fullsock from the beginning. + * 1 SYN and 1 ACK during 3WHS + * 2 Acks on data packet. + * + * The bpf_prog initialized it to 0xeB9F. + */ + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || + pkt_out_cnt10.cnt != 0xeB9F + 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); +} + +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +{ + struct bpf_spinlock_cnt scnt = {}; + int err; + + scnt.cnt = pkt_out_cnt; + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d", err, errno); + + scnt.cnt *= 10; + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d", err, errno); +} + static void test(void) { int listen_fd, cli_fd, accept_fd, epfd, err; struct epoll_event ev; socklen_t addrlen; + int i; addrlen = sizeof(struct sockaddr_in6); ev.events = EPOLLIN; @@ -308,24 +377,30 @@ static void test(void) accept_fd, errno); close(listen_fd); - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); - - /* Have some timeout in recv(cli_fd). Just in case. */ ev.data.fd = cli_fd; err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", err, errno); - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); + init_sk_storage(accept_fd, 2); - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + for (i = 0; i < 2; i++) { + /* Send some data from accept_fd to cli_fd */ + err = send(accept_fd, DATA, DATA_LEN, 0); + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", + err, errno); + + /* Have some timeout in recv(cli_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", + err, errno, ev.data.fd, cli_fd); + + err = recv(cli_fd, NULL, 0, MSG_TRUNC); + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + } + + check_sk_pkt_out_cnt(accept_fd, cli_fd); close(epfd); close(accept_fd); @@ -395,6 +470,14 @@ int main(int argc, char **argv) CHECK(!map, "cannot find linum_map", "(null)"); linum_map_fd = bpf_map__fd(map); + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); + sk_pkt_out_cnt_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); + sk_pkt_out_cnt10_fd = bpf_map__fd(map); + test(); bpf_object__close(obj);
This patch rides on an existing BPF_PROG_TYPE_CGROUP_SKB test (test_sock_fields.c) to do a TCP end-to-end test on the new bpf_sk_storage_* helpers. Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- tools/testing/selftests/bpf/bpf_helpers.h | 5 + .../bpf/progs/test_sock_fields_kern.c | 49 ++++++++ .../testing/selftests/bpf/test_sock_fields.c | 115 +++++++++++++++--- 3 files changed, 153 insertions(+), 16 deletions(-)