Message ID | 20190911190218.22628-1-danieltimlee@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next,v3] samples: bpf: add max_pckt_size option at xdp_adjust_tail | expand |
On 9/11/19 8:02 PM, Daniel T. Lee wrote: > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > to 600. To make this size flexible, a new map 'pcktsz' is added. > > By updating new packet size to this map from the userland, > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > will be 600 as a default. > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > --- > Changes in v2: > - Change the helper to fetch map from 'bpf_map__next' to > 'bpf_object__find_map_fd_by_name'. > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > 2 files changed, 41 insertions(+), 10 deletions(-) > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > index 411fdb21f8bc..d6d84ffe6a7a 100644 > --- a/samples/bpf/xdp_adjust_tail_kern.c > +++ b/samples/bpf/xdp_adjust_tail_kern.c > @@ -25,6 +25,13 @@ > #define ICMP_TOOBIG_SIZE 98 > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > +struct bpf_map_def SEC("maps") pcktsz = { > + .type = BPF_MAP_TYPE_ARRAY, > + .key_size = sizeof(__u32), > + .value_size = sizeof(__u32), > + .max_entries = 1, > +}; We have new map definition format like in tools/testing/selftests/bpf/progs/bpf_flow.c. But looks like most samples/bpf still use SEC("maps"). I guess we can leave it for now, and if needed, later on a massive conversion for all samples/bpf/ bpf programs can be done. > + > struct bpf_map_def SEC("maps") icmpcnt = { > .type = BPF_MAP_TYPE_ARRAY, > .key_size = sizeof(__u32), > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > *csum = csum_fold_helper(*csum); > } > > -static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) > +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp, > + __u32 max_pckt_size) > { > int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); > > @@ -92,7 +100,7 @@ static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) > orig_iph = data + off; > icmp_hdr->type = ICMP_DEST_UNREACH; > icmp_hdr->code = ICMP_FRAG_NEEDED; > - icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); > + icmp_hdr->un.frag.mtu = htons(max_pckt_size - sizeof(struct ethhdr)); > icmp_hdr->checksum = 0; > ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); > icmp_hdr->checksum = csum; > @@ -118,14 +126,21 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) > { > void *data_end = (void *)(long)xdp->data_end; > void *data = (void *)(long)xdp->data; > + __u32 max_pckt_size = MAX_PCKT_SIZE; > + __u32 *pckt_sz; > + __u32 key = 0; The above two new definitions may the code not in reverse Christmas definition order, could you fix it? > int pckt_size = data_end - data; > int offset; > > - if (pckt_size > MAX_PCKT_SIZE) { > + pckt_sz = bpf_map_lookup_elem(&pcktsz, &key); > + if (pckt_sz && *pckt_sz) > + max_pckt_size = *pckt_sz; > + > + if (pckt_size > max_pckt_size) { > offset = pckt_size - ICMP_TOOBIG_SIZE; > if (bpf_xdp_adjust_tail(xdp, 0 - offset)) > return XDP_PASS; We could have the following scenario: max_pckt_size = 1 pckt_size = 2 offset = -96 bpf_xdp_adjust_tail return -EINVAL so we return XDP_PASS now Maybe you want to do if (pckt_size > max(max_pckt_size, ICMP_TOOBIG_SIZE)) { ... } as in original code, bpf_xdp_adjust_tail(...) already succeeds. > - return send_icmp4_too_big(xdp); > + return send_icmp4_too_big(xdp, max_pckt_size); > } > return XDP_PASS; > } > diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c > index a3596b617c4c..aef6c69a48a7 100644 > --- a/samples/bpf/xdp_adjust_tail_user.c > +++ b/samples/bpf/xdp_adjust_tail_user.c > @@ -23,6 +23,7 @@ > #include "libbpf.h" > > #define STATS_INTERVAL_S 2U > +#define MAX_PCKT_SIZE 600 > > static int ifindex = -1; > static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; > @@ -72,6 +73,7 @@ static void usage(const char *cmd) > printf("Usage: %s [...]\n", cmd); > printf(" -i <ifname|ifindex> Interface\n"); > printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); > + printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE); > printf(" -S use skb-mode\n"); > printf(" -N enforce native mode\n"); > printf(" -F force loading prog\n"); > @@ -85,13 +87,14 @@ int main(int argc, char **argv) > .prog_type = BPF_PROG_TYPE_XDP, > }; > unsigned char opt_flags[256] = {}; > - const char *optstr = "i:T:SNFh"; > + const char *optstr = "i:T:P:SNFh"; > struct bpf_prog_info info = {}; > __u32 info_len = sizeof(info); > + __u32 max_pckt_size = 0; > + __u32 key = 0; > unsigned int kill_after_s = 0; > int i, prog_fd, map_fd, opt; > struct bpf_object *obj; > - struct bpf_map *map; > char filename[256]; > int err; > > @@ -110,6 +113,9 @@ int main(int argc, char **argv) > case 'T': > kill_after_s = atoi(optarg); > break; > + case 'P': > + max_pckt_size = atoi(optarg); > + break; > case 'S': > xdp_flags |= XDP_FLAGS_SKB_MODE; > break; > @@ -150,12 +156,22 @@ int main(int argc, char **argv) > if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) > return 1; > > - map = bpf_map__next(NULL, obj); > - if (!map) { > - printf("finding a map in obj file failed\n"); > + /* update pcktsz map */ > + if (max_pckt_size) { > + map_fd = bpf_object__find_map_fd_by_name(obj, "pcktsz"); > + if (!map_fd) { Let us test map_fd and below prog_fd with '< 0" instead of "!= 0'. In this particular sample, "! = 0" is okay since we did not close stdin. But in programs if stdin is closed, the fd 0 may be reused for map_fd. Let us just keep good coding practice here. > + printf("finding a pcktsz map in obj file failed\n"); > + return 1; > + } > + bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY); > + } > + > + /* fetch icmpcnt map */ > + map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt"); > + if (!map_fd) { > + printf("finding a icmpcnt map in obj file failed\n"); > return 1; > } > - map_fd = bpf_map__fd(map); > > if (!prog_fd) { > printf("load_bpf_file: %s\n", strerror(errno)); >
On Sat, Sep 14, 2019 at 7:34 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 9/11/19 8:02 PM, Daniel T. Lee wrote: > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > > to 600. To make this size flexible, a new map 'pcktsz' is added. > > > > By updating new packet size to this map from the userland, > > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > > will be 600 as a default. > > > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > > > --- > > Changes in v2: > > - Change the helper to fetch map from 'bpf_map__next' to > > 'bpf_object__find_map_fd_by_name'. > > > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > > 2 files changed, 41 insertions(+), 10 deletions(-) > > > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > > index 411fdb21f8bc..d6d84ffe6a7a 100644 > > --- a/samples/bpf/xdp_adjust_tail_kern.c > > +++ b/samples/bpf/xdp_adjust_tail_kern.c > > @@ -25,6 +25,13 @@ > > #define ICMP_TOOBIG_SIZE 98 > > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > > > +struct bpf_map_def SEC("maps") pcktsz = { > > + .type = BPF_MAP_TYPE_ARRAY, > > + .key_size = sizeof(__u32), > > + .value_size = sizeof(__u32), > > + .max_entries = 1, > > +}; > > We have new map definition format like in > tools/testing/selftests/bpf/progs/bpf_flow.c. > But looks like most samples/bpf still use SEC("maps"). > I guess we can leave it for now, and if needed, > later on a massive conversion for all samples/bpf/ > bpf programs can be done. > Thanks for the detailed review! Didn't notice there was an update with map definition. This new map definition format looks very neat. > > + > > struct bpf_map_def SEC("maps") icmpcnt = { > > .type = BPF_MAP_TYPE_ARRAY, > > .key_size = sizeof(__u32), > > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > > *csum = csum_fold_helper(*csum); > > } > > > > -static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) > > +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp, > > + __u32 max_pckt_size) > > { > > int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); > > > > @@ -92,7 +100,7 @@ static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) > > orig_iph = data + off; > > icmp_hdr->type = ICMP_DEST_UNREACH; > > icmp_hdr->code = ICMP_FRAG_NEEDED; > > - icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); > > + icmp_hdr->un.frag.mtu = htons(max_pckt_size - sizeof(struct ethhdr)); > > icmp_hdr->checksum = 0; > > ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); > > icmp_hdr->checksum = csum; > > @@ -118,14 +126,21 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) > > { > > void *data_end = (void *)(long)xdp->data_end; > > void *data = (void *)(long)xdp->data; > > + __u32 max_pckt_size = MAX_PCKT_SIZE; > > + __u32 *pckt_sz; > > + __u32 key = 0; > > The above two new definitions may the code not in > reverse Christmas definition order, could you fix it? > I'll fix it right away! > > int pckt_size = data_end - data; > > int offset; > > > > - if (pckt_size > MAX_PCKT_SIZE) { > > + pckt_sz = bpf_map_lookup_elem(&pcktsz, &key); > > + if (pckt_sz && *pckt_sz) > > + max_pckt_size = *pckt_sz; > > + > > + if (pckt_size > max_pckt_size) { > > offset = pckt_size - ICMP_TOOBIG_SIZE; > > if (bpf_xdp_adjust_tail(xdp, 0 - offset)) > > return XDP_PASS; > > We could have the following scenario: > max_pckt_size = 1 > pckt_size = 2 > offset = -96 > bpf_xdp_adjust_tail return -EINVAL > so we return XDP_PASS now > > Maybe you want to do > if (pckt_size > max(max_pckt_size, ICMP_TOOBIG_SIZE)) { > ... > } > as in original code, bpf_xdp_adjust_tail(...) already succeeds. > I'll try to fix this way. > > - return send_icmp4_too_big(xdp); > > + return send_icmp4_too_big(xdp, max_pckt_size); > > } > > return XDP_PASS; > > } > > diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c > > index a3596b617c4c..aef6c69a48a7 100644 > > --- a/samples/bpf/xdp_adjust_tail_user.c > > +++ b/samples/bpf/xdp_adjust_tail_user.c > > @@ -23,6 +23,7 @@ > > #include "libbpf.h" > > > > #define STATS_INTERVAL_S 2U > > +#define MAX_PCKT_SIZE 600 > > > > static int ifindex = -1; > > static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; > > @@ -72,6 +73,7 @@ static void usage(const char *cmd) > > printf("Usage: %s [...]\n", cmd); > > printf(" -i <ifname|ifindex> Interface\n"); > > printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); > > + printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE); > > printf(" -S use skb-mode\n"); > > printf(" -N enforce native mode\n"); > > printf(" -F force loading prog\n"); > > @@ -85,13 +87,14 @@ int main(int argc, char **argv) > > .prog_type = BPF_PROG_TYPE_XDP, > > }; > > unsigned char opt_flags[256] = {}; > > - const char *optstr = "i:T:SNFh"; > > + const char *optstr = "i:T:P:SNFh"; > > struct bpf_prog_info info = {}; > > __u32 info_len = sizeof(info); > > + __u32 max_pckt_size = 0; > > + __u32 key = 0; > > unsigned int kill_after_s = 0; > > int i, prog_fd, map_fd, opt; > > struct bpf_object *obj; > > - struct bpf_map *map; > > char filename[256]; > > int err; > > > > @@ -110,6 +113,9 @@ int main(int argc, char **argv) > > case 'T': > > kill_after_s = atoi(optarg); > > break; > > + case 'P': > > + max_pckt_size = atoi(optarg); > > + break; > > case 'S': > > xdp_flags |= XDP_FLAGS_SKB_MODE; > > break; > > @@ -150,12 +156,22 @@ int main(int argc, char **argv) > > if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) > > return 1; > > > > - map = bpf_map__next(NULL, obj); > > - if (!map) { > > - printf("finding a map in obj file failed\n"); > > + /* update pcktsz map */ > > + if (max_pckt_size) { > > + map_fd = bpf_object__find_map_fd_by_name(obj, "pcktsz"); > > + if (!map_fd) { > > Let us test map_fd and below prog_fd with '< 0" instead of "!= 0'. > In this particular sample, "! = 0" is okay since we did not close > stdin. But in programs if stdin is closed, the fd 0 may be reused > for map_fd. Let us just keep good coding practice here. > I didn't think of those details. I'll update this right away! Once again, I really appreciate your time and effort for the review. Thank you. Best, Daniel > > + printf("finding a pcktsz map in obj file failed\n"); > > + return 1; > > + } > > + bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY); > > + } > > + > > + /* fetch icmpcnt map */ > > + map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt"); > > + if (!map_fd) { > > + printf("finding a icmpcnt map in obj file failed\n"); > > return 1; > > } > > - map_fd = bpf_map__fd(map); > > > > if (!prog_fd) { > > printf("load_bpf_file: %s\n", strerror(errno)); > >
On Wed, Sep 11, 2019 at 2:33 PM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > to 600. To make this size flexible, a new map 'pcktsz' is added. > > By updating new packet size to this map from the userland, > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > will be 600 as a default. > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > --- > Changes in v2: > - Change the helper to fetch map from 'bpf_map__next' to > 'bpf_object__find_map_fd_by_name'. > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > 2 files changed, 41 insertions(+), 10 deletions(-) > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > index 411fdb21f8bc..d6d84ffe6a7a 100644 > --- a/samples/bpf/xdp_adjust_tail_kern.c > +++ b/samples/bpf/xdp_adjust_tail_kern.c > @@ -25,6 +25,13 @@ > #define ICMP_TOOBIG_SIZE 98 > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > +struct bpf_map_def SEC("maps") pcktsz = { > + .type = BPF_MAP_TYPE_ARRAY, > + .key_size = sizeof(__u32), > + .value_size = sizeof(__u32), > + .max_entries = 1, > +}; > + Hey Daniel, This looks like an ideal use case for global variables on BPF side. I think it's much cleaner and will make BPF side of things simpler. Would you mind giving global data a spin instead of adding this map? > struct bpf_map_def SEC("maps") icmpcnt = { > .type = BPF_MAP_TYPE_ARRAY, > .key_size = sizeof(__u32), > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > *csum = csum_fold_helper(*csum); > } > [...]
On Tue, Sep 17, 2019 at 1:04 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Wed, Sep 11, 2019 at 2:33 PM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > > to 600. To make this size flexible, a new map 'pcktsz' is added. > > > > By updating new packet size to this map from the userland, > > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > > will be 600 as a default. > > > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > > > --- > > Changes in v2: > > - Change the helper to fetch map from 'bpf_map__next' to > > 'bpf_object__find_map_fd_by_name'. > > > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > > 2 files changed, 41 insertions(+), 10 deletions(-) > > > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > > index 411fdb21f8bc..d6d84ffe6a7a 100644 > > --- a/samples/bpf/xdp_adjust_tail_kern.c > > +++ b/samples/bpf/xdp_adjust_tail_kern.c > > @@ -25,6 +25,13 @@ > > #define ICMP_TOOBIG_SIZE 98 > > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > > > +struct bpf_map_def SEC("maps") pcktsz = { > > + .type = BPF_MAP_TYPE_ARRAY, > > + .key_size = sizeof(__u32), > > + .value_size = sizeof(__u32), > > + .max_entries = 1, > > +}; > > + > > Hey Daniel, > > This looks like an ideal use case for global variables on BPF side. I > think it's much cleaner and will make BPF side of things simpler. > Would you mind giving global data a spin instead of adding this map? > Sure thing! But, I'm not sure there is global variables for BPF? AFAIK, there aren't any support for global variables yet in BPF program (_kern.c). # when defining global variable at _kern.c libbpf: bpf: relocation: not yet supported relo for non-static global '<var>' variable found in insns[39].code 0x18 By the way, thanks for the review. Thanks, Daniel > > struct bpf_map_def SEC("maps") icmpcnt = { > > .type = BPF_MAP_TYPE_ARRAY, > > .key_size = sizeof(__u32), > > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > > *csum = csum_fold_helper(*csum); > > } > > > > [...]
On Wed, Sep 18, 2019 at 10:37 AM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > On Tue, Sep 17, 2019 at 1:04 PM Andrii Nakryiko > <andrii.nakryiko@gmail.com> wrote: > > > > On Wed, Sep 11, 2019 at 2:33 PM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > > > to 600. To make this size flexible, a new map 'pcktsz' is added. > > > > > > By updating new packet size to this map from the userland, > > > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > > > > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > > > will be 600 as a default. > > > > > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > > > > > --- > > > Changes in v2: > > > - Change the helper to fetch map from 'bpf_map__next' to > > > 'bpf_object__find_map_fd_by_name'. > > > > > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > > > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > > > 2 files changed, 41 insertions(+), 10 deletions(-) > > > > > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > > > index 411fdb21f8bc..d6d84ffe6a7a 100644 > > > --- a/samples/bpf/xdp_adjust_tail_kern.c > > > +++ b/samples/bpf/xdp_adjust_tail_kern.c > > > @@ -25,6 +25,13 @@ > > > #define ICMP_TOOBIG_SIZE 98 > > > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > > > > > +struct bpf_map_def SEC("maps") pcktsz = { > > > + .type = BPF_MAP_TYPE_ARRAY, > > > + .key_size = sizeof(__u32), > > > + .value_size = sizeof(__u32), > > > + .max_entries = 1, > > > +}; > > > + > > > > Hey Daniel, > > > > This looks like an ideal use case for global variables on BPF side. I > > think it's much cleaner and will make BPF side of things simpler. > > Would you mind giving global data a spin instead of adding this map? > > > > Sure thing! > But, I'm not sure there is global variables for BPF? > AFAIK, there aren't any support for global variables yet in BPF > program (_kern.c). > > # when defining global variable at _kern.c > libbpf: bpf: relocation: not yet supported relo for non-static > global '<var>' variable found in insns[39].code 0x18 just what it says: use static global variable (also volatile to prevent compiler optimizations) :) static volatile __u32 pcktsz; /* this should work */ > > By the way, thanks for the review. > > Thanks, > Daniel > > > > > struct bpf_map_def SEC("maps") icmpcnt = { > > > .type = BPF_MAP_TYPE_ARRAY, > > > .key_size = sizeof(__u32), > > > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > > > *csum = csum_fold_helper(*csum); > > > } > > > > > > > [...]
On Thu, Sep 19, 2019 at 3:00 AM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Wed, Sep 18, 2019 at 10:37 AM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > On Tue, Sep 17, 2019 at 1:04 PM Andrii Nakryiko > > <andrii.nakryiko@gmail.com> wrote: > > > > > > On Wed, Sep 11, 2019 at 2:33 PM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > > > > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > > > > to 600. To make this size flexible, a new map 'pcktsz' is added. > > > > > > > > By updating new packet size to this map from the userland, > > > > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > > > > > > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > > > > will be 600 as a default. > > > > > > > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > > > > > > > --- > > > > Changes in v2: > > > > - Change the helper to fetch map from 'bpf_map__next' to > > > > 'bpf_object__find_map_fd_by_name'. > > > > > > > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > > > > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > > > > 2 files changed, 41 insertions(+), 10 deletions(-) > > > > > > > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > > > > index 411fdb21f8bc..d6d84ffe6a7a 100644 > > > > --- a/samples/bpf/xdp_adjust_tail_kern.c > > > > +++ b/samples/bpf/xdp_adjust_tail_kern.c > > > > @@ -25,6 +25,13 @@ > > > > #define ICMP_TOOBIG_SIZE 98 > > > > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > > > > > > > +struct bpf_map_def SEC("maps") pcktsz = { > > > > + .type = BPF_MAP_TYPE_ARRAY, > > > > + .key_size = sizeof(__u32), > > > > + .value_size = sizeof(__u32), > > > > + .max_entries = 1, > > > > +}; > > > > + > > > > > > Hey Daniel, > > > > > > This looks like an ideal use case for global variables on BPF side. I > > > think it's much cleaner and will make BPF side of things simpler. > > > Would you mind giving global data a spin instead of adding this map? > > > > > > > Sure thing! > > But, I'm not sure there is global variables for BPF? > > AFAIK, there aren't any support for global variables yet in BPF > > program (_kern.c). > > > > # when defining global variable at _kern.c > > libbpf: bpf: relocation: not yet supported relo for non-static > > global '<var>' variable found in insns[39].code 0x18 > > just what it says: use static global variable (also volatile to > prevent compiler optimizations) :) > > static volatile __u32 pcktsz; /* this should work */ > My apologies, but I'm not sure I'm following. What you are saying is, should I define global variable to _kern,c and access and modify this variable from _user.c? For example, <_kern.c> static volatile __u32 pcktsz = 300; <_user.c> extern __u32 pcktsz; // Later in code pcktsz = 400; Is this code means similar to what you've said? AFAIK, 'static' keyword for global variable restricts scope to file itself, so the 'accessing' and 'modifying' this variable from the <_user.c> isn't available. The reason why I've used bpf map for this 'pcktsz' option is, I've wanted to run this kernel xdp program (xdp_adjust_tail_kern.o) as it itself, not heavily controlled by user program (./xdp_adjust_tail). When this 'pcktsz' option is implemented in bpf map, user can simply modify 'map' to change this size. (such as bpftool prog map) But when this variable comes to global data, it can't be changed after the program gets loaded. I really appreciate your time and effort for the review. But I'm sorry that I seem to get it wrong. Thanks, Daniel > > > > By the way, thanks for the review. > > > > Thanks, > > Daniel > > > > > > > > struct bpf_map_def SEC("maps") icmpcnt = { > > > > .type = BPF_MAP_TYPE_ARRAY, > > > > .key_size = sizeof(__u32), > > > > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > > > > *csum = csum_fold_helper(*csum); > > > > } > > > > > > > > > > [...]
On Thu, Sep 19, 2019 at 2:16 AM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > On Thu, Sep 19, 2019 at 3:00 AM Andrii Nakryiko > <andrii.nakryiko@gmail.com> wrote: > > > > On Wed, Sep 18, 2019 at 10:37 AM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > > > On Tue, Sep 17, 2019 at 1:04 PM Andrii Nakryiko > > > <andrii.nakryiko@gmail.com> wrote: > > > > > > > > On Wed, Sep 11, 2019 at 2:33 PM Daniel T. Lee <danieltimlee@gmail.com> wrote: > > > > > > > > > > Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited > > > > > to 600. To make this size flexible, a new map 'pcktsz' is added. > > > > > > > > > > By updating new packet size to this map from the userland, > > > > > xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. > > > > > > > > > > If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet > > > > > will be 600 as a default. > > > > > > > > > > Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> > > > > > > > > > > --- > > > > > Changes in v2: > > > > > - Change the helper to fetch map from 'bpf_map__next' to > > > > > 'bpf_object__find_map_fd_by_name'. > > > > > > > > > > samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- > > > > > samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ > > > > > 2 files changed, 41 insertions(+), 10 deletions(-) > > > > > > > > > > diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c > > > > > index 411fdb21f8bc..d6d84ffe6a7a 100644 > > > > > --- a/samples/bpf/xdp_adjust_tail_kern.c > > > > > +++ b/samples/bpf/xdp_adjust_tail_kern.c > > > > > @@ -25,6 +25,13 @@ > > > > > #define ICMP_TOOBIG_SIZE 98 > > > > > #define ICMP_TOOBIG_PAYLOAD_SIZE 92 > > > > > > > > > > +struct bpf_map_def SEC("maps") pcktsz = { > > > > > + .type = BPF_MAP_TYPE_ARRAY, > > > > > + .key_size = sizeof(__u32), > > > > > + .value_size = sizeof(__u32), > > > > > + .max_entries = 1, > > > > > +}; > > > > > + > > > > > > > > Hey Daniel, > > > > > > > > This looks like an ideal use case for global variables on BPF side. I > > > > think it's much cleaner and will make BPF side of things simpler. > > > > Would you mind giving global data a spin instead of adding this map? > > > > > > > > > > Sure thing! > > > But, I'm not sure there is global variables for BPF? > > > AFAIK, there aren't any support for global variables yet in BPF > > > program (_kern.c). > > > > > > # when defining global variable at _kern.c > > > libbpf: bpf: relocation: not yet supported relo for non-static > > > global '<var>' variable found in insns[39].code 0x18 > > > > just what it says: use static global variable (also volatile to > > prevent compiler optimizations) :) > > > > static volatile __u32 pcktsz; /* this should work */ > > > > My apologies, but I'm not sure I'm following. > What you are saying is, should I define global variable to _kern,c > and access and modify this variable from _user.c? Exactly. > > For example, > > <_kern.c> > static volatile __u32 pcktsz = 300; So this part is right. > > <_user.c> > extern __u32 pcktsz; > // Later in code > pcktsz = 400; This one is not as simple, unfortunately. From user side you need to find an internal map (it will most probably have .bss suffix) and update it. See selftests/bpf/prog_tests/global_data.c for how it can be done. Eventually working with BPF global data will be much more natural, but we don't yet have that implemented. > > Is this code means similar to what you've said? > AFAIK, 'static' keyword for global variable restricts scope to file itself, > so the 'accessing' and 'modifying' this variable from the <_user.c> > isn't available. > > The reason why I've used bpf map for this 'pcktsz' option is, > I've wanted to run this kernel xdp program (xdp_adjust_tail_kern.o) > as it itself, not heavily controlled by user program (./xdp_adjust_tail). > > When this 'pcktsz' option is implemented in bpf map, user can simply > modify 'map' to change this size. (such as bpftool prog map) > > But when this variable comes to global data, it can't be changed > after the program gets loaded. > > I really appreciate your time and effort for the review. > But I'm sorry that I seem to get it wrong. I understand your confusion, but BPF global data behaves exactly like what you explain. From BPF side it looks like a normal variable. Performance-wise it's also faster than doing explicit map lookup. From user space side it's still a map, though, so you can read/modify it and generally do the same communication between BPF kernel and user space as you are used to with maps. Check selftests, it should make it clearer. > > Thanks, > Daniel > > > > > > > By the way, thanks for the review. > > > > > > Thanks, > > > Daniel > > > > > > > > > > > struct bpf_map_def SEC("maps") icmpcnt = { > > > > > .type = BPF_MAP_TYPE_ARRAY, > > > > > .key_size = sizeof(__u32), > > > > > @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, > > > > > *csum = csum_fold_helper(*csum); > > > > > } > > > > > > > > > > > > > [...]
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c index 411fdb21f8bc..d6d84ffe6a7a 100644 --- a/samples/bpf/xdp_adjust_tail_kern.c +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -25,6 +25,13 @@ #define ICMP_TOOBIG_SIZE 98 #define ICMP_TOOBIG_PAYLOAD_SIZE 92 +struct bpf_map_def SEC("maps") pcktsz = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + struct bpf_map_def SEC("maps") icmpcnt = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), @@ -64,7 +71,8 @@ static __always_inline void ipv4_csum(void *data_start, int data_size, *csum = csum_fold_helper(*csum); } -static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp, + __u32 max_pckt_size) { int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); @@ -92,7 +100,7 @@ static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) orig_iph = data + off; icmp_hdr->type = ICMP_DEST_UNREACH; icmp_hdr->code = ICMP_FRAG_NEEDED; - icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); + icmp_hdr->un.frag.mtu = htons(max_pckt_size - sizeof(struct ethhdr)); icmp_hdr->checksum = 0; ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); icmp_hdr->checksum = csum; @@ -118,14 +126,21 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; void *data = (void *)(long)xdp->data; + __u32 max_pckt_size = MAX_PCKT_SIZE; + __u32 *pckt_sz; + __u32 key = 0; int pckt_size = data_end - data; int offset; - if (pckt_size > MAX_PCKT_SIZE) { + pckt_sz = bpf_map_lookup_elem(&pcktsz, &key); + if (pckt_sz && *pckt_sz) + max_pckt_size = *pckt_sz; + + if (pckt_size > max_pckt_size) { offset = pckt_size - ICMP_TOOBIG_SIZE; if (bpf_xdp_adjust_tail(xdp, 0 - offset)) return XDP_PASS; - return send_icmp4_too_big(xdp); + return send_icmp4_too_big(xdp, max_pckt_size); } return XDP_PASS; } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index a3596b617c4c..aef6c69a48a7 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -23,6 +23,7 @@ #include "libbpf.h" #define STATS_INTERVAL_S 2U +#define MAX_PCKT_SIZE 600 static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -72,6 +73,7 @@ static void usage(const char *cmd) printf("Usage: %s [...]\n", cmd); printf(" -i <ifname|ifindex> Interface\n"); printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); + printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); printf(" -F force loading prog\n"); @@ -85,13 +87,14 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; unsigned char opt_flags[256] = {}; - const char *optstr = "i:T:SNFh"; + const char *optstr = "i:T:P:SNFh"; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); + __u32 max_pckt_size = 0; + __u32 key = 0; unsigned int kill_after_s = 0; int i, prog_fd, map_fd, opt; struct bpf_object *obj; - struct bpf_map *map; char filename[256]; int err; @@ -110,6 +113,9 @@ int main(int argc, char **argv) case 'T': kill_after_s = atoi(optarg); break; + case 'P': + max_pckt_size = atoi(optarg); + break; case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; @@ -150,12 +156,22 @@ int main(int argc, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - map = bpf_map__next(NULL, obj); - if (!map) { - printf("finding a map in obj file failed\n"); + /* update pcktsz map */ + if (max_pckt_size) { + map_fd = bpf_object__find_map_fd_by_name(obj, "pcktsz"); + if (!map_fd) { + printf("finding a pcktsz map in obj file failed\n"); + return 1; + } + bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY); + } + + /* fetch icmpcnt map */ + map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt"); + if (!map_fd) { + printf("finding a icmpcnt map in obj file failed\n"); return 1; } - map_fd = bpf_map__fd(map); if (!prog_fd) { printf("load_bpf_file: %s\n", strerror(errno));
Currently, at xdp_adjust_tail_kern.c, MAX_PCKT_SIZE is limited to 600. To make this size flexible, a new map 'pcktsz' is added. By updating new packet size to this map from the userland, xdp_adjust_tail_kern.o will use this value as a new max_pckt_size. If no '-P <MAX_PCKT_SIZE>' option is used, the size of maximum packet will be 600 as a default. Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com> --- Changes in v2: - Change the helper to fetch map from 'bpf_map__next' to 'bpf_object__find_map_fd_by_name'. samples/bpf/xdp_adjust_tail_kern.c | 23 +++++++++++++++++++---- samples/bpf/xdp_adjust_tail_user.c | 28 ++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 10 deletions(-)