Message ID | 20200504062608.2049044-1-yhs@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: implement bpf iterator for kernel data | expand |
On Sun, May 3, 2020 at 11:30 PM Yonghong Song <yhs@fb.com> wrote: > > Two bpf programs are added in this patch for netlink and ipv6_route > target. On my VM, I am able to achieve identical > results compared to /proc/net/netlink and /proc/net/ipv6_route. > > $ cat /proc/net/netlink > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > $ cat /sys/fs/bpf/my_netlink > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > > $ cat /proc/net/ipv6_route > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > $ cat /sys/fs/bpf/my_ipv6_route > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > > Signed-off-by: Yonghong Song <yhs@fb.com> > --- Looks good, but something weird with printf below... Acked-by: Andrii Nakryiko <andriin@fb.com> > .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ > .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ > 2 files changed, 137 insertions(+) > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > new file mode 100644 > index 000000000000..0dee4629298f > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > @@ -0,0 +1,63 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > +#include <bpf/bpf_endian.h> > + > +char _license[] SEC("license") = "GPL"; > + > +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; > + > +#define RTF_GATEWAY 0x0002 > +#define IFNAMSIZ 16 nit: these look weirdly unaligned :) > +#define fib_nh_gw_family nh_common.nhc_gw_family > +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 > +#define fib_nh_dev nh_common.nhc_dev > + [...] > + dev = fib6_nh->fib_nh_dev; > + if (dev) > + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, > + rt->fib6_ref.refs.counter, 0, flags, dev->name); > + else > + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, > + rt->fib6_ref.refs.counter, 0, flags); hmm... how does it work? you specify 4 params, but format string expects 5. Shouldn't this fail? > + > + return 0; > +} > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > new file mode 100644 > index 000000000000..0a85a621a36d > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > @@ -0,0 +1,74 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > +#include <bpf/bpf_endian.h> > + > +char _license[] SEC("license") = "GPL"; > + > +#define sk_rmem_alloc sk_backlog.rmem_alloc > +#define sk_refcnt __sk_common.skc_refcnt > + > +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) > +#define container_of(ptr, type, member) \ > + ({ \ > + void *__mptr = (void *)(ptr); \ > + ((type *)(__mptr - offsetof(type, member))); \ > + }) we should probably put offsetof(), offsetofend() and container_of() macro into bpf_helpers.h, seems like universal things for kernel datastructs :) [...]
On Sun, May 3, 2020 at 11:30 PM Yonghong Song <yhs@fb.com> wrote: > > Two bpf programs are added in this patch for netlink and ipv6_route > target. On my VM, I am able to achieve identical > results compared to /proc/net/netlink and /proc/net/ipv6_route. > > $ cat /proc/net/netlink > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > $ cat /sys/fs/bpf/my_netlink > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > > $ cat /proc/net/ipv6_route > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > $ cat /sys/fs/bpf/my_ipv6_route > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > > Signed-off-by: Yonghong Song <yhs@fb.com> > --- Just realized, this is only BPF programs, right? It would be good to have at least minimal user-space program that would verify and load it. Otherwise we'll be just testing compilation and it might "bit rot" a bit... > .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ > .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ > 2 files changed, 137 insertions(+) > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > [...]
On 5/5/20 11:04 PM, Andrii Nakryiko wrote: > On Sun, May 3, 2020 at 11:30 PM Yonghong Song <yhs@fb.com> wrote: >> >> Two bpf programs are added in this patch for netlink and ipv6_route >> target. On my VM, I am able to achieve identical >> results compared to /proc/net/netlink and /proc/net/ipv6_route. >> >> $ cat /proc/net/netlink >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 >> .... >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 >> $ cat /sys/fs/bpf/my_netlink >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 >> .... >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 >> >> $ cat /proc/net/ipv6_route >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> $ cat /sys/fs/bpf/my_ipv6_route >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> >> Signed-off-by: Yonghong Song <yhs@fb.com> >> --- > > Just realized, this is only BPF programs, right? It would be good to > have at least minimal user-space program that would verify and load > it. Otherwise we'll be just testing compilation and it might "bit rot" > a bit... Totally agree. My latest selftest in test_progs actually tested loading, anon iter creating and reading(). It did not verify contents though. > >> .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ >> .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ >> 2 files changed, 137 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c >> > > [...] >
On 5/5/20 11:01 PM, Andrii Nakryiko wrote: > On Sun, May 3, 2020 at 11:30 PM Yonghong Song <yhs@fb.com> wrote: >> >> Two bpf programs are added in this patch for netlink and ipv6_route >> target. On my VM, I am able to achieve identical >> results compared to /proc/net/netlink and /proc/net/ipv6_route. >> >> $ cat /proc/net/netlink >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 >> .... >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 >> $ cat /sys/fs/bpf/my_netlink >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 >> .... >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 >> >> $ cat /proc/net/ipv6_route >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> $ cat /sys/fs/bpf/my_ipv6_route >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo >> >> Signed-off-by: Yonghong Song <yhs@fb.com> >> --- > > Looks good, but something weird with printf below... > > Acked-by: Andrii Nakryiko <andriin@fb.com> > >> .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ >> .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ >> 2 files changed, 137 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c >> >> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c >> new file mode 100644 >> index 000000000000..0dee4629298f >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c >> @@ -0,0 +1,63 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2020 Facebook */ >> +#include "vmlinux.h" >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> +#include <bpf/bpf_endian.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; >> + >> +#define RTF_GATEWAY 0x0002 >> +#define IFNAMSIZ 16 > > nit: these look weirdly unaligned :) > >> +#define fib_nh_gw_family nh_common.nhc_gw_family >> +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 >> +#define fib_nh_dev nh_common.nhc_dev >> + > > [...] > > >> + dev = fib6_nh->fib_nh_dev; >> + if (dev) >> + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, >> + rt->fib6_ref.refs.counter, 0, flags, dev->name); >> + else >> + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, >> + rt->fib6_ref.refs.counter, 0, flags); > > hmm... how does it work? you specify 4 params, but format string > expects 5. Shouldn't this fail? Thanks for catching this. Unfortunately, we can only detech this at runtime when BPF_SEQ_PRINTF is executed since only then we do format/argument checking. In the above, if I flip condition "if (dev)" to "if (!dev)", the BPF_SEQ_PRRINTF will not print anything and returns -EINVAL. I am wondering whether verifier should do some verification at prog load time to ensure # of args in packed u64 array >= # of format specifier This should capture this case. Or we just assume users should do adequate testing to capture such cases. Note that this won't affect safety of the program so it is totally okay for verifier to delay the checking to runtime. > >> + >> + return 0; >> +} >> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c >> new file mode 100644 >> index 000000000000..0a85a621a36d >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c >> @@ -0,0 +1,74 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2020 Facebook */ >> +#include "vmlinux.h" >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> +#include <bpf/bpf_endian.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +#define sk_rmem_alloc sk_backlog.rmem_alloc >> +#define sk_refcnt __sk_common.skc_refcnt >> + >> +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) >> +#define container_of(ptr, type, member) \ >> + ({ \ >> + void *__mptr = (void *)(ptr); \ >> + ((type *)(__mptr - offsetof(type, member))); \ >> + }) > > we should probably put offsetof(), offsetofend() and container_of() > macro into bpf_helpers.h, seems like universal things for kernel > datastructs :) > > [...] >
On Wed, May 6, 2020 at 6:09 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 5/5/20 11:01 PM, Andrii Nakryiko wrote: > > On Sun, May 3, 2020 at 11:30 PM Yonghong Song <yhs@fb.com> wrote: > >> > >> Two bpf programs are added in this patch for netlink and ipv6_route > >> target. On my VM, I am able to achieve identical > >> results compared to /proc/net/netlink and /proc/net/ipv6_route. > >> > >> $ cat /proc/net/netlink > >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > >> .... > >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > >> $ cat /sys/fs/bpf/my_netlink > >> sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > >> 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > >> 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > >> 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > >> 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > >> .... > >> 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > >> 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > >> 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > >> 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > >> > >> $ cat /proc/net/ipv6_route > >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > >> $ cat /sys/fs/bpf/my_ipv6_route > >> fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > >> 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > >> fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > >> ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > >> 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > >> > >> Signed-off-by: Yonghong Song <yhs@fb.com> > >> --- > > > > Looks good, but something weird with printf below... > > > > Acked-by: Andrii Nakryiko <andriin@fb.com> > > > >> .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ > >> .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ > >> 2 files changed, 137 insertions(+) > >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > >> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > >> > >> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > >> new file mode 100644 > >> index 000000000000..0dee4629298f > >> --- /dev/null > >> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c > >> @@ -0,0 +1,63 @@ > >> +// SPDX-License-Identifier: GPL-2.0 > >> +/* Copyright (c) 2020 Facebook */ > >> +#include "vmlinux.h" > >> +#include <bpf/bpf_helpers.h> > >> +#include <bpf/bpf_tracing.h> > >> +#include <bpf/bpf_endian.h> > >> + > >> +char _license[] SEC("license") = "GPL"; > >> + > >> +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; > >> + > >> +#define RTF_GATEWAY 0x0002 > >> +#define IFNAMSIZ 16 > > > > nit: these look weirdly unaligned :) > > > >> +#define fib_nh_gw_family nh_common.nhc_gw_family > >> +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 > >> +#define fib_nh_dev nh_common.nhc_dev > >> + > > > > [...] > > > > > >> + dev = fib6_nh->fib_nh_dev; > >> + if (dev) > >> + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, > >> + rt->fib6_ref.refs.counter, 0, flags, dev->name); > >> + else > >> + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, > >> + rt->fib6_ref.refs.counter, 0, flags); > > > > hmm... how does it work? you specify 4 params, but format string > > expects 5. Shouldn't this fail? > > Thanks for catching this. Unfortunately, we can only detech this at > runtime when BPF_SEQ_PRINTF is executed since only then we do > format/argument checking. > > In the above, if I flip condition "if (dev)" to "if (!dev)", the > BPF_SEQ_PRRINTF will not print anything and returns -EINVAL. > > I am wondering whether verifier should do some verification at prog load > time to ensure > # of args in packed u64 array >= # of format specifier > This should capture this case. Or we just assume users should do > adequate testing to capture such cases. > My initial thought is that it would be too specific knowledge for verifier, but maybe as we add more generic logging/printf capabilities, it might come in handy. But I'd defer for later on. > Note that this won't affect safety of the program so it is totally > okay for verifier to delay the checking to runtime. > > > > >> + > >> + return 0; > >> +} > >> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > >> new file mode 100644 > >> index 000000000000..0a85a621a36d > >> --- /dev/null > >> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c > >> @@ -0,0 +1,74 @@ > >> +// SPDX-License-Identifier: GPL-2.0 > >> +/* Copyright (c) 2020 Facebook */ > >> +#include "vmlinux.h" > >> +#include <bpf/bpf_helpers.h> > >> +#include <bpf/bpf_tracing.h> > >> +#include <bpf/bpf_endian.h> > >> + > >> +char _license[] SEC("license") = "GPL"; > >> + > >> +#define sk_rmem_alloc sk_backlog.rmem_alloc > >> +#define sk_refcnt __sk_common.skc_refcnt > >> + > >> +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) > >> +#define container_of(ptr, type, member) \ > >> + ({ \ > >> + void *__mptr = (void *)(ptr); \ > >> + ((type *)(__mptr - offsetof(type, member))); \ > >> + }) > > > > we should probably put offsetof(), offsetofend() and container_of() > > macro into bpf_helpers.h, seems like universal things for kernel > > datastructs :) > > > > [...] > >
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c new file mode 100644 index 000000000000..0dee4629298f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; + +#define RTF_GATEWAY 0x0002 +#define IFNAMSIZ 16 +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_dev nh_common.nhc_dev + +SEC("iter/ipv6_route") +int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct fib6_info *rt = ctx->rt; + const struct net_device *dev; + struct fib6_nh *fib6_nh; + unsigned int flags; + struct nexthop *nh; + + if (rt == (void *)0) + return 0; + + fib6_nh = &rt->fib6_nh[0]; + flags = rt->fib6_flags; + + /* FIXME: nexthop_is_multipath is not handled here. */ + nh = rt->nh; + if (rt->nh) + fib6_nh = &nh->nh_info->fib6_nh; + + BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); + + if (CONFIG_IPV6_SUBTREES) + BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr, + rt->fib6_src.plen); + else + BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 "); + + if (fib6_nh->fib_nh_gw_family) { + flags |= RTF_GATEWAY; + BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6); + } else { + BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 "); + } + + dev = fib6_nh->fib_nh_dev; + if (dev) + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags, dev->name); + else + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c new file mode 100644 index 000000000000..0a85a621a36d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +#define sk_rmem_alloc sk_backlog.rmem_alloc +#define sk_refcnt __sk_common.skc_refcnt + +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#define container_of(ptr, type, member) \ + ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); \ + }) + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + +SEC("iter/netlink") +int dump_netlink(struct bpf_iter__netlink *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct netlink_sock *nlk = ctx->sk; + unsigned long group, ino; + struct inode *inode; + struct socket *sk; + struct sock *s; + + if (nlk == (void *)0) + return 0; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "sk Eth Pid Groups " + "Rmem Wmem Dump Locks Drops " + "Inode\n"); + + s = &nlk->sk; + BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol); + + if (!nlk->groups) { + group = 0; + } else { + /* FIXME: temporary use bpf_probe_read here, needs + * verifier support to do direct access. + */ + bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); + } + BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", + nlk->portid, (u32)group, + s->sk_rmem_alloc.counter, + s->sk_wmem_alloc.refs.counter - 1, + nlk->cb_running, s->sk_refcnt.refs.counter); + + sk = s->sk_socket; + if (!sk) { + ino = 0; + } else { + /* FIXME: container_of inside SOCK_INODE has a forced + * type conversion, and direct access cannot be used + * with current verifier. + */ + inode = SOCK_INODE(sk); + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + } + BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); + + return 0; +}
Two bpf programs are added in this patch for netlink and ipv6_route target. On my VM, I am able to achieve identical results compared to /proc/net/netlink and /proc/net/ipv6_route. $ cat /proc/net/netlink sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 .... 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 000000008398fb08 16 0 00000000 0 0 0 2 0 27 $ cat /sys/fs/bpf/my_netlink sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 .... 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 000000008398fb08 16 0 00000000 0 0 0 2 0 27 $ cat /proc/net/ipv6_route fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo $ cat /sys/fs/bpf/my_ipv6_route fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo Signed-off-by: Yonghong Song <yhs@fb.com> --- .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 63 ++++++++++++++++ .../selftests/bpf/progs/bpf_iter_netlink.c | 74 +++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c