diff mbox

[v10,12/12] bpf: add sample for xdp forwarding and rewrite

Message ID 1468955817-10604-13-git-send-email-bblanco@plumgrid.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Brenden Blanco July 19, 2016, 7:16 p.m. UTC
Add a sample that rewrites and forwards packets out on the same
interface. Observed single core forwarding performance of ~10Mpps.

Since the mlx4 driver under test recycles every single packet page, the
perf output shows almost exclusively just the ring management and bpf
program work. Slowdowns are likely occurring due to cache misses.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
---
 samples/bpf/Makefile    |   5 +++
 samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 samples/bpf/xdp2_kern.c

Comments

Alexei Starovoitov July 19, 2016, 10:05 p.m. UTC | #1
On Tue, Jul 19, 2016 at 12:16:57PM -0700, Brenden Blanco wrote:
> Add a sample that rewrites and forwards packets out on the same
> interface. Observed single core forwarding performance of ~10Mpps.
> 
> Since the mlx4 driver under test recycles every single packet page, the
> perf output shows almost exclusively just the ring management and bpf
> program work. Slowdowns are likely occurring due to cache misses.

long term we need to resurrect your prefetch patch. sux to leave
so much performance on the table.

> +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
> +{
> +	struct iphdr *iph = data + nh_off;
> +
> +	if (iph + 1 > data_end)
> +		return 0;
> +	return iph->protocol;
> +}
> +
> +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
> +{
> +	struct ipv6hdr *ip6h = data + nh_off;
> +
> +	if (ip6h + 1 > data_end)
> +		return 0;
> +	return ip6h->nexthdr;
> +}
...
> +	if (h_proto == htons(ETH_P_IP))
> +		index = parse_ipv4(data, nh_off, data_end);
> +	else if (h_proto == htons(ETH_P_IPV6))
> +		index = parse_ipv6(data, nh_off, data_end);
> +	else
> +		index = 0;
> +
> +	value = bpf_map_lookup_elem(&dropcnt, &index);
> +	if (value)
> +		*value += 1;
> +
> +	if (index == 17) {

not an obvious xdp example. if you'd have to respin for other
reasons please consider 'proto' name and IPPROTO_UDP here.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Brenden Blanco July 20, 2016, 5:38 p.m. UTC | #2
On Tue, Jul 19, 2016 at 03:05:37PM -0700, Alexei Starovoitov wrote:
> On Tue, Jul 19, 2016 at 12:16:57PM -0700, Brenden Blanco wrote:
> > Add a sample that rewrites and forwards packets out on the same
> > interface. Observed single core forwarding performance of ~10Mpps.
> > 
> > Since the mlx4 driver under test recycles every single packet page, the
> > perf output shows almost exclusively just the ring management and bpf
> > program work. Slowdowns are likely occurring due to cache misses.
> 
> long term we need to resurrect your prefetch patch. sux to leave
> so much performance on the table.

I know :( Let's keep working at it, in a way that's good for both
xdp/non-xdp.
> 
> > +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
> > +{
> > +	struct iphdr *iph = data + nh_off;
> > +
> > +	if (iph + 1 > data_end)
> > +		return 0;
> > +	return iph->protocol;
> > +}
> > +
> > +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
> > +{
> > +	struct ipv6hdr *ip6h = data + nh_off;
> > +
> > +	if (ip6h + 1 > data_end)
> > +		return 0;
> > +	return ip6h->nexthdr;
> > +}
> ...
> > +	if (h_proto == htons(ETH_P_IP))
> > +		index = parse_ipv4(data, nh_off, data_end);
> > +	else if (h_proto == htons(ETH_P_IPV6))
> > +		index = parse_ipv6(data, nh_off, data_end);
> > +	else
> > +		index = 0;
> > +
> > +	value = bpf_map_lookup_elem(&dropcnt, &index);
> > +	if (value)
> > +		*value += 1;
> > +
> > +	if (index == 17) {
> 
> not an obvious xdp example. if you'd have to respin for other
> reasons please consider 'proto' name and IPPROTO_UDP here.
Will collect this into a followup.
> 
> Acked-by: Alexei Starovoitov <ast@kernel.org>
>
Jesper Dangaard Brouer July 27, 2016, 6:25 p.m. UTC | #3
On Tue, 19 Jul 2016 15:05:37 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Tue, Jul 19, 2016 at 12:16:57PM -0700, Brenden Blanco wrote:
> > Add a sample that rewrites and forwards packets out on the same
> > interface. Observed single core forwarding performance of ~10Mpps.
> > 
> > Since the mlx4 driver under test recycles every single packet page, the
> > perf output shows almost exclusively just the ring management and bpf
> > program work. Slowdowns are likely occurring due to cache misses.  
> 
> long term we need to resurrect your prefetch patch. sux to leave
> so much performance on the table.

I will do some (more) attempts at getting prefetching working, also in
a way that benefit the normal stack (when I'm back from vacation, and
when net-next opens again).

I think Rana is also looking into this for the mlx5 driver (based on
some patches I send to her).
Tom Herbert Aug. 3, 2016, 5:01 p.m. UTC | #4
On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> Add a sample that rewrites and forwards packets out on the same
> interface. Observed single core forwarding performance of ~10Mpps.
>
> Since the mlx4 driver under test recycles every single packet page, the
> perf output shows almost exclusively just the ring management and bpf
> program work. Slowdowns are likely occurring due to cache misses.
>
> Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
> ---
>  samples/bpf/Makefile    |   5 +++
>  samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 119 insertions(+)
>  create mode 100644 samples/bpf/xdp2_kern.c
>
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 0e4ab3a..d2d2b35 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -22,6 +22,7 @@ hostprogs-y += map_perf_test
>  hostprogs-y += test_overhead
>  hostprogs-y += test_cgrp2_array_pin
>  hostprogs-y += xdp1
> +hostprogs-y += xdp2
>
>  test_verifier-objs := test_verifier.o libbpf.o
>  test_maps-objs := test_maps.o libbpf.o
> @@ -44,6 +45,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
>  test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
>  test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
>  xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
> +# reuse xdp1 source intentionally
> +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
>
>  # Tell kbuild to always build the programs
>  always := $(hostprogs-y)
> @@ -67,6 +70,7 @@ always += test_overhead_kprobe_kern.o
>  always += parse_varlen.o parse_simple.o parse_ldabs.o
>  always += test_cgrp2_tc_kern.o
>  always += xdp1_kern.o
> +always += xdp2_kern.o
>
>  HOSTCFLAGS += -I$(objtree)/usr/include
>
> @@ -88,6 +92,7 @@ HOSTLOADLIBES_spintest += -lelf
>  HOSTLOADLIBES_map_perf_test += -lelf -lrt
>  HOSTLOADLIBES_test_overhead += -lelf -lrt
>  HOSTLOADLIBES_xdp1 += -lelf
> +HOSTLOADLIBES_xdp2 += -lelf
>
>  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
>  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
> diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
> new file mode 100644
> index 0000000..38fe7e1
> --- /dev/null
> +++ b/samples/bpf/xdp2_kern.c
> @@ -0,0 +1,114 @@
> +/* Copyright (c) 2016 PLUMgrid
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#define KBUILD_MODNAME "foo"
> +#include <uapi/linux/bpf.h>
> +#include <linux/in.h>
> +#include <linux/if_ether.h>
> +#include <linux/if_packet.h>
> +#include <linux/if_vlan.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include "bpf_helpers.h"
> +
> +struct bpf_map_def SEC("maps") dropcnt = {
> +       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
> +       .key_size = sizeof(u32),
> +       .value_size = sizeof(long),
> +       .max_entries = 256,
> +};
> +
> +static void swap_src_dst_mac(void *data)
> +{
> +       unsigned short *p = data;
> +       unsigned short dst[3];
> +
> +       dst[0] = p[0];
> +       dst[1] = p[1];
> +       dst[2] = p[2];
> +       p[0] = p[3];
> +       p[1] = p[4];
> +       p[2] = p[5];
> +       p[3] = dst[0];
> +       p[4] = dst[1];
> +       p[5] = dst[2];
> +}
> +
> +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
> +{
> +       struct iphdr *iph = data + nh_off;
> +
> +       if (iph + 1 > data_end)
> +               return 0;
> +       return iph->protocol;
> +}
> +
> +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
> +{
> +       struct ipv6hdr *ip6h = data + nh_off;
> +
> +       if (ip6h + 1 > data_end)
> +               return 0;
> +       return ip6h->nexthdr;
> +}
> +
> +SEC("xdp1")
> +int xdp_prog1(struct xdp_md *ctx)
> +{
> +       void *data_end = (void *)(long)ctx->data_end;
> +       void *data = (void *)(long)ctx->data;

Brendan,

It seems that the cast to long here is done because data_end and data
are u32s in xdp_md. So the effect is that we are upcasting a
thirty-bit integer into a sixty-four bit pointer (in fact without the
cast we see compiler warnings). I don't understand how this can be
correct. Can you shed some light on this?

Thanks,
Tom

> +       struct ethhdr *eth = data;
> +       int rc = XDP_DROP;
> +       long *value;
> +       u16 h_proto;
> +       u64 nh_off;
> +       u32 index;
> +
> +       nh_off = sizeof(*eth);
> +       if (data + nh_off > data_end)
> +               return rc;
> +
> +       h_proto = eth->h_proto;
> +
> +       if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
> +               struct vlan_hdr *vhdr;
> +
> +               vhdr = data + nh_off;
> +               nh_off += sizeof(struct vlan_hdr);
> +               if (data + nh_off > data_end)
> +                       return rc;
> +               h_proto = vhdr->h_vlan_encapsulated_proto;
> +       }
> +       if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
> +               struct vlan_hdr *vhdr;
> +
> +               vhdr = data + nh_off;
> +               nh_off += sizeof(struct vlan_hdr);
> +               if (data + nh_off > data_end)
> +                       return rc;
> +               h_proto = vhdr->h_vlan_encapsulated_proto;
> +       }
> +
> +       if (h_proto == htons(ETH_P_IP))
> +               index = parse_ipv4(data, nh_off, data_end);
> +       else if (h_proto == htons(ETH_P_IPV6))
> +               index = parse_ipv6(data, nh_off, data_end);
> +       else
> +               index = 0;
> +
> +       value = bpf_map_lookup_elem(&dropcnt, &index);
> +       if (value)
> +               *value += 1;
> +
> +       if (index == 17) {
> +               swap_src_dst_mac(data);
> +               rc = XDP_TX;
> +       }
> +
> +       return rc;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.8.2
>
Alexei Starovoitov Aug. 3, 2016, 5:11 p.m. UTC | #5
On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> > Add a sample that rewrites and forwards packets out on the same
> > interface. Observed single core forwarding performance of ~10Mpps.
> >
> > Since the mlx4 driver under test recycles every single packet page, the
> > perf output shows almost exclusively just the ring management and bpf
> > program work. Slowdowns are likely occurring due to cache misses.
> >
> > Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
> > ---
> >  samples/bpf/Makefile    |   5 +++
> >  samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 119 insertions(+)
> >  create mode 100644 samples/bpf/xdp2_kern.c
> >
> > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> > index 0e4ab3a..d2d2b35 100644
> > --- a/samples/bpf/Makefile
> > +++ b/samples/bpf/Makefile
> > @@ -22,6 +22,7 @@ hostprogs-y += map_perf_test
> >  hostprogs-y += test_overhead
> >  hostprogs-y += test_cgrp2_array_pin
> >  hostprogs-y += xdp1
> > +hostprogs-y += xdp2
> >
> >  test_verifier-objs := test_verifier.o libbpf.o
> >  test_maps-objs := test_maps.o libbpf.o
> > @@ -44,6 +45,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
> >  test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
> >  test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
> >  xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
> > +# reuse xdp1 source intentionally
> > +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
> >
> >  # Tell kbuild to always build the programs
> >  always := $(hostprogs-y)
> > @@ -67,6 +70,7 @@ always += test_overhead_kprobe_kern.o
> >  always += parse_varlen.o parse_simple.o parse_ldabs.o
> >  always += test_cgrp2_tc_kern.o
> >  always += xdp1_kern.o
> > +always += xdp2_kern.o
> >
> >  HOSTCFLAGS += -I$(objtree)/usr/include
> >
> > @@ -88,6 +92,7 @@ HOSTLOADLIBES_spintest += -lelf
> >  HOSTLOADLIBES_map_perf_test += -lelf -lrt
> >  HOSTLOADLIBES_test_overhead += -lelf -lrt
> >  HOSTLOADLIBES_xdp1 += -lelf
> > +HOSTLOADLIBES_xdp2 += -lelf
> >
> >  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
> >  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
> > diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
> > new file mode 100644
> > index 0000000..38fe7e1
> > --- /dev/null
> > +++ b/samples/bpf/xdp2_kern.c
> > @@ -0,0 +1,114 @@
> > +/* Copyright (c) 2016 PLUMgrid
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of version 2 of the GNU General Public
> > + * License as published by the Free Software Foundation.
> > + */
> > +#define KBUILD_MODNAME "foo"
> > +#include <uapi/linux/bpf.h>
> > +#include <linux/in.h>
> > +#include <linux/if_ether.h>
> > +#include <linux/if_packet.h>
> > +#include <linux/if_vlan.h>
> > +#include <linux/ip.h>
> > +#include <linux/ipv6.h>
> > +#include "bpf_helpers.h"
> > +
> > +struct bpf_map_def SEC("maps") dropcnt = {
> > +       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
> > +       .key_size = sizeof(u32),
> > +       .value_size = sizeof(long),
> > +       .max_entries = 256,
> > +};
> > +
> > +static void swap_src_dst_mac(void *data)
> > +{
> > +       unsigned short *p = data;
> > +       unsigned short dst[3];
> > +
> > +       dst[0] = p[0];
> > +       dst[1] = p[1];
> > +       dst[2] = p[2];
> > +       p[0] = p[3];
> > +       p[1] = p[4];
> > +       p[2] = p[5];
> > +       p[3] = dst[0];
> > +       p[4] = dst[1];
> > +       p[5] = dst[2];
> > +}
> > +
> > +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
> > +{
> > +       struct iphdr *iph = data + nh_off;
> > +
> > +       if (iph + 1 > data_end)
> > +               return 0;
> > +       return iph->protocol;
> > +}
> > +
> > +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
> > +{
> > +       struct ipv6hdr *ip6h = data + nh_off;
> > +
> > +       if (ip6h + 1 > data_end)
> > +               return 0;
> > +       return ip6h->nexthdr;
> > +}
> > +
> > +SEC("xdp1")
> > +int xdp_prog1(struct xdp_md *ctx)
> > +{
> > +       void *data_end = (void *)(long)ctx->data_end;
> > +       void *data = (void *)(long)ctx->data;
> 
> Brendan,
> 
> It seems that the cast to long here is done because data_end and data
> are u32s in xdp_md. So the effect is that we are upcasting a
> thirty-bit integer into a sixty-four bit pointer (in fact without the
> cast we see compiler warnings). I don't understand how this can be
> correct. Can you shed some light on this?

please see:
http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
Tom Herbert Aug. 3, 2016, 5:29 p.m. UTC | #6
On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
>> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
>> > Add a sample that rewrites and forwards packets out on the same
>> > interface. Observed single core forwarding performance of ~10Mpps.
>> >
>> > Since the mlx4 driver under test recycles every single packet page, the
>> > perf output shows almost exclusively just the ring management and bpf
>> > program work. Slowdowns are likely occurring due to cache misses.
>> >
>> > Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
>> > ---
>> >  samples/bpf/Makefile    |   5 +++
>> >  samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
>> >  2 files changed, 119 insertions(+)
>> >  create mode 100644 samples/bpf/xdp2_kern.c
>> >
>> > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
>> > index 0e4ab3a..d2d2b35 100644
>> > --- a/samples/bpf/Makefile
>> > +++ b/samples/bpf/Makefile
>> > @@ -22,6 +22,7 @@ hostprogs-y += map_perf_test
>> >  hostprogs-y += test_overhead
>> >  hostprogs-y += test_cgrp2_array_pin
>> >  hostprogs-y += xdp1
>> > +hostprogs-y += xdp2
>> >
>> >  test_verifier-objs := test_verifier.o libbpf.o
>> >  test_maps-objs := test_maps.o libbpf.o
>> > @@ -44,6 +45,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
>> >  test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
>> >  test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
>> >  xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
>> > +# reuse xdp1 source intentionally
>> > +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
>> >
>> >  # Tell kbuild to always build the programs
>> >  always := $(hostprogs-y)
>> > @@ -67,6 +70,7 @@ always += test_overhead_kprobe_kern.o
>> >  always += parse_varlen.o parse_simple.o parse_ldabs.o
>> >  always += test_cgrp2_tc_kern.o
>> >  always += xdp1_kern.o
>> > +always += xdp2_kern.o
>> >
>> >  HOSTCFLAGS += -I$(objtree)/usr/include
>> >
>> > @@ -88,6 +92,7 @@ HOSTLOADLIBES_spintest += -lelf
>> >  HOSTLOADLIBES_map_perf_test += -lelf -lrt
>> >  HOSTLOADLIBES_test_overhead += -lelf -lrt
>> >  HOSTLOADLIBES_xdp1 += -lelf
>> > +HOSTLOADLIBES_xdp2 += -lelf
>> >
>> >  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
>> >  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
>> > diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
>> > new file mode 100644
>> > index 0000000..38fe7e1
>> > --- /dev/null
>> > +++ b/samples/bpf/xdp2_kern.c
>> > @@ -0,0 +1,114 @@
>> > +/* Copyright (c) 2016 PLUMgrid
>> > + *
>> > + * This program is free software; you can redistribute it and/or
>> > + * modify it under the terms of version 2 of the GNU General Public
>> > + * License as published by the Free Software Foundation.
>> > + */
>> > +#define KBUILD_MODNAME "foo"
>> > +#include <uapi/linux/bpf.h>
>> > +#include <linux/in.h>
>> > +#include <linux/if_ether.h>
>> > +#include <linux/if_packet.h>
>> > +#include <linux/if_vlan.h>
>> > +#include <linux/ip.h>
>> > +#include <linux/ipv6.h>
>> > +#include "bpf_helpers.h"
>> > +
>> > +struct bpf_map_def SEC("maps") dropcnt = {
>> > +       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
>> > +       .key_size = sizeof(u32),
>> > +       .value_size = sizeof(long),
>> > +       .max_entries = 256,
>> > +};
>> > +
>> > +static void swap_src_dst_mac(void *data)
>> > +{
>> > +       unsigned short *p = data;
>> > +       unsigned short dst[3];
>> > +
>> > +       dst[0] = p[0];
>> > +       dst[1] = p[1];
>> > +       dst[2] = p[2];
>> > +       p[0] = p[3];
>> > +       p[1] = p[4];
>> > +       p[2] = p[5];
>> > +       p[3] = dst[0];
>> > +       p[4] = dst[1];
>> > +       p[5] = dst[2];
>> > +}
>> > +
>> > +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
>> > +{
>> > +       struct iphdr *iph = data + nh_off;
>> > +
>> > +       if (iph + 1 > data_end)
>> > +               return 0;
>> > +       return iph->protocol;
>> > +}
>> > +
>> > +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
>> > +{
>> > +       struct ipv6hdr *ip6h = data + nh_off;
>> > +
>> > +       if (ip6h + 1 > data_end)
>> > +               return 0;
>> > +       return ip6h->nexthdr;
>> > +}
>> > +
>> > +SEC("xdp1")
>> > +int xdp_prog1(struct xdp_md *ctx)
>> > +{
>> > +       void *data_end = (void *)(long)ctx->data_end;
>> > +       void *data = (void *)(long)ctx->data;
>>
>> Brendan,
>>
>> It seems that the cast to long here is done because data_end and data
>> are u32s in xdp_md. So the effect is that we are upcasting a
>> thirty-bit integer into a sixty-four bit pointer (in fact without the
>> cast we see compiler warnings). I don't understand how this can be
>> correct. Can you shed some light on this?
>
> please see:
> http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
>
That doesn't explain it. The only thing I can figure is that there is
an implicit assumption somewhere that even though the pointer size may
be 64 bits, only the low order thirty-two bits are relevant in this
environment (i.e. upper bit are always zero for any pointers)-- so
then it would safe store pointers as u32 and to upcast them to void *.
If this is indeed the case, then we really need to make this explicit
to the user. Casting to long without comment just to avoid the
compiler warning is not good programming style, maybe a function
xdp_md_data_to_ptr or the like could be used.

Tom
David Miller Aug. 3, 2016, 6:29 p.m. UTC | #7
From: Tom Herbert <tom@herbertland.com>
Date: Wed, 3 Aug 2016 10:29:58 -0700

> On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
>> On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
>>> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
>>> > Add a sample that rewrites and forwards packets out on the same
>>> > interface. Observed single core forwarding performance of ~10Mpps.
>>> >
>>> > Since the mlx4 driver under test recycles every single packet page, the
>>> > perf output shows almost exclusively just the ring management and bpf
>>> > program work. Slowdowns are likely occurring due to cache misses.
>>> >
>>> > Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
>>> > ---
>>> >  samples/bpf/Makefile    |   5 +++
>>> >  samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
>>> >  2 files changed, 119 insertions(+)
>>> >  create mode 100644 samples/bpf/xdp2_kern.c
>>> >
>>> > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
>>> > index 0e4ab3a..d2d2b35 100644
>>> > --- a/samples/bpf/Makefile
>>> > +++ b/samples/bpf/Makefile
>>> > @@ -22,6 +22,7 @@ hostprogs-y += map_perf_test
>>> >  hostprogs-y += test_overhead
>>> >  hostprogs-y += test_cgrp2_array_pin
>>> >  hostprogs-y += xdp1
>>> > +hostprogs-y += xdp2
>>> >
>>> >  test_verifier-objs := test_verifier.o libbpf.o
>>> >  test_maps-objs := test_maps.o libbpf.o
>>> > @@ -44,6 +45,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
>>> >  test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
>>> >  test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
>>> >  xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
>>> > +# reuse xdp1 source intentionally
>>> > +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
>>> >
>>> >  # Tell kbuild to always build the programs
>>> >  always := $(hostprogs-y)
>>> > @@ -67,6 +70,7 @@ always += test_overhead_kprobe_kern.o
>>> >  always += parse_varlen.o parse_simple.o parse_ldabs.o
>>> >  always += test_cgrp2_tc_kern.o
>>> >  always += xdp1_kern.o
>>> > +always += xdp2_kern.o
>>> >
>>> >  HOSTCFLAGS += -I$(objtree)/usr/include
>>> >
>>> > @@ -88,6 +92,7 @@ HOSTLOADLIBES_spintest += -lelf
>>> >  HOSTLOADLIBES_map_perf_test += -lelf -lrt
>>> >  HOSTLOADLIBES_test_overhead += -lelf -lrt
>>> >  HOSTLOADLIBES_xdp1 += -lelf
>>> > +HOSTLOADLIBES_xdp2 += -lelf
>>> >
>>> >  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
>>> >  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
>>> > diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
>>> > new file mode 100644
>>> > index 0000000..38fe7e1
>>> > --- /dev/null
>>> > +++ b/samples/bpf/xdp2_kern.c
>>> > @@ -0,0 +1,114 @@
>>> > +/* Copyright (c) 2016 PLUMgrid
>>> > + *
>>> > + * This program is free software; you can redistribute it and/or
>>> > + * modify it under the terms of version 2 of the GNU General Public
>>> > + * License as published by the Free Software Foundation.
>>> > + */
>>> > +#define KBUILD_MODNAME "foo"
>>> > +#include <uapi/linux/bpf.h>
>>> > +#include <linux/in.h>
>>> > +#include <linux/if_ether.h>
>>> > +#include <linux/if_packet.h>
>>> > +#include <linux/if_vlan.h>
>>> > +#include <linux/ip.h>
>>> > +#include <linux/ipv6.h>
>>> > +#include "bpf_helpers.h"
>>> > +
>>> > +struct bpf_map_def SEC("maps") dropcnt = {
>>> > +       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
>>> > +       .key_size = sizeof(u32),
>>> > +       .value_size = sizeof(long),
>>> > +       .max_entries = 256,
>>> > +};
>>> > +
>>> > +static void swap_src_dst_mac(void *data)
>>> > +{
>>> > +       unsigned short *p = data;
>>> > +       unsigned short dst[3];
>>> > +
>>> > +       dst[0] = p[0];
>>> > +       dst[1] = p[1];
>>> > +       dst[2] = p[2];
>>> > +       p[0] = p[3];
>>> > +       p[1] = p[4];
>>> > +       p[2] = p[5];
>>> > +       p[3] = dst[0];
>>> > +       p[4] = dst[1];
>>> > +       p[5] = dst[2];
>>> > +}
>>> > +
>>> > +static int parse_ipv4(void *data, u64 nh_off, void *data_end)
>>> > +{
>>> > +       struct iphdr *iph = data + nh_off;
>>> > +
>>> > +       if (iph + 1 > data_end)
>>> > +               return 0;
>>> > +       return iph->protocol;
>>> > +}
>>> > +
>>> > +static int parse_ipv6(void *data, u64 nh_off, void *data_end)
>>> > +{
>>> > +       struct ipv6hdr *ip6h = data + nh_off;
>>> > +
>>> > +       if (ip6h + 1 > data_end)
>>> > +               return 0;
>>> > +       return ip6h->nexthdr;
>>> > +}
>>> > +
>>> > +SEC("xdp1")
>>> > +int xdp_prog1(struct xdp_md *ctx)
>>> > +{
>>> > +       void *data_end = (void *)(long)ctx->data_end;
>>> > +       void *data = (void *)(long)ctx->data;
>>>
>>> Brendan,
>>>
>>> It seems that the cast to long here is done because data_end and data
>>> are u32s in xdp_md. So the effect is that we are upcasting a
>>> thirty-bit integer into a sixty-four bit pointer (in fact without the
>>> cast we see compiler warnings). I don't understand how this can be
>>> correct. Can you shed some light on this?
>>
>> please see:
>> http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
>>
> That doesn't explain it.

Yes it does explain it, think more about the word "meta" and what the
code generator might be doing.
Brenden Blanco Aug. 3, 2016, 6:29 p.m. UTC | #8
On Wed, Aug 03, 2016 at 10:29:58AM -0700, Tom Herbert wrote:
> On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> > On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
> >> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
[...]
> >> > +SEC("xdp1")
> >> > +int xdp_prog1(struct xdp_md *ctx)
> >> > +{
> >> > +       void *data_end = (void *)(long)ctx->data_end;
> >> > +       void *data = (void *)(long)ctx->data;
> >>
> >> Brendan,
> >>
> >> It seems that the cast to long here is done because data_end and data
> >> are u32s in xdp_md. So the effect is that we are upcasting a
> >> thirty-bit integer into a sixty-four bit pointer (in fact without the
> >> cast we see compiler warnings). I don't understand how this can be
> >> correct. Can you shed some light on this?
> >
> > please see:
> > http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
> >
> That doesn't explain it. The only thing I can figure is that there is
> an implicit assumption somewhere that even though the pointer size may
> be 64 bits, only the low order thirty-two bits are relevant in this
> environment (i.e. upper bit are always zero for any pointers)-- so
> then it would safe store pointers as u32 and to upcast them to void *.
No, the actual pointer storage is always void* sized (see struct
xdp_buff). The mangling is cosmetic. The verifier converts the
underlying bpf load instruction to the right sized operation.
> If this is indeed the case, then we really need to make this explicit
> to the user. Casting to long without comment just to avoid the
> compiler warning is not good programming style, maybe a function
> xdp_md_data_to_ptr or the like could be used.
> 
> Tom
David Miller Aug. 3, 2016, 6:31 p.m. UTC | #9
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Wed, 3 Aug 2016 11:29:52 -0700

> On Wed, Aug 03, 2016 at 10:29:58AM -0700, Tom Herbert wrote:
>> On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>> > On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
>> >> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> [...]
>> >> > +SEC("xdp1")
>> >> > +int xdp_prog1(struct xdp_md *ctx)
>> >> > +{
>> >> > +       void *data_end = (void *)(long)ctx->data_end;
>> >> > +       void *data = (void *)(long)ctx->data;
>> >>
>> >> Brendan,
>> >>
>> >> It seems that the cast to long here is done because data_end and data
>> >> are u32s in xdp_md. So the effect is that we are upcasting a
>> >> thirty-bit integer into a sixty-four bit pointer (in fact without the
>> >> cast we see compiler warnings). I don't understand how this can be
>> >> correct. Can you shed some light on this?
>> >
>> > please see:
>> > http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
>> >
>> That doesn't explain it. The only thing I can figure is that there is
>> an implicit assumption somewhere that even though the pointer size may
>> be 64 bits, only the low order thirty-two bits are relevant in this
>> environment (i.e. upper bit are always zero for any pointers)-- so
>> then it would safe store pointers as u32 and to upcast them to void *.
> No, the actual pointer storage is always void* sized (see struct
> xdp_buff). The mangling is cosmetic. The verifier converts the
> underlying bpf load instruction to the right sized operation.

And this is what Alexei meant by "meta".  Tom this stuff works just
fine.
Tom Herbert Aug. 3, 2016, 7:06 p.m. UTC | #10
On Wed, Aug 3, 2016 at 11:29 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> On Wed, Aug 03, 2016 at 10:29:58AM -0700, Tom Herbert wrote:
>> On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>> > On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
>> >> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> [...]
>> >> > +SEC("xdp1")
>> >> > +int xdp_prog1(struct xdp_md *ctx)
>> >> > +{
>> >> > +       void *data_end = (void *)(long)ctx->data_end;
>> >> > +       void *data = (void *)(long)ctx->data;
>> >>
>> >> Brendan,
>> >>
>> >> It seems that the cast to long here is done because data_end and data
>> >> are u32s in xdp_md. So the effect is that we are upcasting a
>> >> thirty-bit integer into a sixty-four bit pointer (in fact without the
>> >> cast we see compiler warnings). I don't understand how this can be
>> >> correct. Can you shed some light on this?
>> >
>> > please see:
>> > http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
>> >
>> That doesn't explain it. The only thing I can figure is that there is
>> an implicit assumption somewhere that even though the pointer size may
>> be 64 bits, only the low order thirty-two bits are relevant in this
>> environment (i.e. upper bit are always zero for any pointers)-- so
>> then it would safe store pointers as u32 and to upcast them to void *.
> No, the actual pointer storage is always void* sized (see struct
> xdp_buff). The mangling is cosmetic. The verifier converts the
> underlying bpf load instruction to the right sized operation.

This is not at all obvious to XDP programmer. The type of ctx
structure is xdp_md and the definition of that structure in
uapi/linux/bpf.h says that the fields in the that structure are __u32.
So when I, as a user naive the inner workings of the verifier, read
this code it sure looks like we are upcasting a 32 bit value to a 64
bit value-- that does not seem right at all and the compiler
apparently concurs my point of view. If the code ends up being correct
anyway, then the obvious answer to have an explicit cast that points
out the special nature of this cast. Blindly casting to u32 to long
for the purposes of assigning to a pointer is only going to confuse
more people as it has me.

Tom
Alexei Starovoitov Aug. 3, 2016, 10:36 p.m. UTC | #11
On Wed, Aug 03, 2016 at 12:06:55PM -0700, Tom Herbert wrote:
> On Wed, Aug 3, 2016 at 11:29 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> > On Wed, Aug 03, 2016 at 10:29:58AM -0700, Tom Herbert wrote:
> >> On Wed, Aug 3, 2016 at 10:11 AM, Alexei Starovoitov
> >> <alexei.starovoitov@gmail.com> wrote:
> >> > On Wed, Aug 03, 2016 at 10:01:54AM -0700, Tom Herbert wrote:
> >> >> On Tue, Jul 19, 2016 at 12:16 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> > [...]
> >> >> > +SEC("xdp1")
> >> >> > +int xdp_prog1(struct xdp_md *ctx)
> >> >> > +{
> >> >> > +       void *data_end = (void *)(long)ctx->data_end;
> >> >> > +       void *data = (void *)(long)ctx->data;
> >> >>
> >> >> Brendan,
> >> >>
> >> >> It seems that the cast to long here is done because data_end and data
> >> >> are u32s in xdp_md. So the effect is that we are upcasting a
> >> >> thirty-bit integer into a sixty-four bit pointer (in fact without the
> >> >> cast we see compiler warnings). I don't understand how this can be
> >> >> correct. Can you shed some light on this?
> >> >
> >> > please see:
> >> > http://lists.iovisor.org/pipermail/iovisor-dev/2016-August/000355.html
> >> >
> >> That doesn't explain it. The only thing I can figure is that there is
> >> an implicit assumption somewhere that even though the pointer size may
> >> be 64 bits, only the low order thirty-two bits are relevant in this
> >> environment (i.e. upper bit are always zero for any pointers)-- so
> >> then it would safe store pointers as u32 and to upcast them to void *.
> > No, the actual pointer storage is always void* sized (see struct
> > xdp_buff). The mangling is cosmetic. The verifier converts the
> > underlying bpf load instruction to the right sized operation.
> 
> This is not at all obvious to XDP programmer. The type of ctx
> structure is xdp_md and the definition of that structure in
> uapi/linux/bpf.h says that the fields in the that structure are __u32.
> So when I, as a user naive the inner workings of the verifier, read
> this code it sure looks like we are upcasting a 32 bit value to a 64
> bit value-- that does not seem right at all and the compiler
> apparently concurs my point of view. If the code ends up being correct
> anyway, then the obvious answer to have an explicit cast that points
> out the special nature of this cast. Blindly casting to u32 to long
> for the purposes of assigning to a pointer is only going to confuse
> more people as it has me.

Agree. Would be nice to have few helpers. The question is whether
they belong in bpf.h. Probably not, since they're not kernel abi.
For the same reasons we didn't include instruction building macros
like BPF_ALU64_REG and instead kept them in samples/bpf/libbpf.h
Here probably four static inline functions are needed. Two for __sk_buff
and two for xpd_md. That should make xdp*_kern.c examples a bit easier
to read.
Daniel Borkmann Aug. 3, 2016, 11:18 p.m. UTC | #12
On 08/04/2016 12:36 AM, Alexei Starovoitov wrote:
> On Wed, Aug 03, 2016 at 12:06:55PM -0700, Tom Herbert wrote:
[...]
>> This is not at all obvious to XDP programmer. The type of ctx
>> structure is xdp_md and the definition of that structure in
>> uapi/linux/bpf.h says that the fields in the that structure are __u32.
>> So when I, as a user naive the inner workings of the verifier, read
>> this code it sure looks like we are upcasting a 32 bit value to a 64
>> bit value-- that does not seem right at all and the compiler
>> apparently concurs my point of view. If the code ends up being correct
>> anyway, then the obvious answer to have an explicit cast that points
>> out the special nature of this cast. Blindly casting to u32 to long
>> for the purposes of assigning to a pointer is only going to confuse
>> more people as it has me.
>
> Agree. Would be nice to have few helpers. The question is whether
> they belong in bpf.h. Probably not, since they're not kernel abi.

Yeah, fully agree, they don't belong into uapi.

> For the same reasons we didn't include instruction building macros
> like BPF_ALU64_REG and instead kept them in samples/bpf/libbpf.h

+1

> Here probably four static inline functions are needed. Two for __sk_buff
> and two for xpd_md. That should make xdp*_kern.c examples a bit easier
> to read.

Yeah, all these bits should go into some library headers that the test
cases make use of (and make them easier to parse for developers).
diff mbox

Patch

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0e4ab3a..d2d2b35 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -22,6 +22,7 @@  hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
+hostprogs-y += xdp2
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -44,6 +45,8 @@  map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
+# reuse xdp1 source intentionally
+xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -67,6 +70,7 @@  always += test_overhead_kprobe_kern.o
 always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
+always += xdp2_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -88,6 +92,7 @@  HOSTLOADLIBES_spintest += -lelf
 HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
+HOSTLOADLIBES_xdp2 += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
new file mode 100644
index 0000000..38fe7e1
--- /dev/null
+++ b/samples/bpf/xdp2_kern.c
@@ -0,0 +1,114 @@ 
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") dropcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+static int parse_ipv4(void *data, u64 nh_off, void *data_end)
+{
+	struct iphdr *iph = data + nh_off;
+
+	if (iph + 1 > data_end)
+		return 0;
+	return iph->protocol;
+}
+
+static int parse_ipv6(void *data, u64 nh_off, void *data_end)
+{
+	struct ipv6hdr *ip6h = data + nh_off;
+
+	if (ip6h + 1 > data_end)
+		return 0;
+	return ip6h->nexthdr;
+}
+
+SEC("xdp1")
+int xdp_prog1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	long *value;
+	u16 h_proto;
+	u64 nh_off;
+	u32 index;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+
+	if (h_proto == htons(ETH_P_IP))
+		index = parse_ipv4(data, nh_off, data_end);
+	else if (h_proto == htons(ETH_P_IPV6))
+		index = parse_ipv6(data, nh_off, data_end);
+	else
+		index = 0;
+
+	value = bpf_map_lookup_elem(&dropcnt, &index);
+	if (value)
+		*value += 1;
+
+	if (index == 17) {
+		swap_src_dst_mac(data);
+		rc = XDP_TX;
+	}
+
+	return rc;
+}
+
+char _license[] SEC("license") = "GPL";