diff mbox series

[bpf-next,08/13] selftests/bpf: add xdp noinline test

Message ID 20171215015517.409513-9-ast@kernel.org
State Accepted, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: introduce function calls | expand

Commit Message

Alexei Starovoitov Dec. 15, 2017, 1:55 a.m. UTC
From: Alexei Starovoitov <ast@fb.com>

add large semi-artificial XDP test with 18 functions to stress test
bpf call verification logic

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile            |   3 +-
 tools/testing/selftests/bpf/test_progs.c        |  81 +++
 tools/testing/selftests/bpf/test_xdp_noinline.c | 833 ++++++++++++++++++++++++
 3 files changed, 916 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_xdp_noinline.c
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6970d073df5b..7ef9601d04bf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -18,7 +18,7 @@  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o
+	test_l4lb_noinline.o test_xdp_noinline.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
@@ -54,6 +54,7 @@  CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 	      -Wno-compare-distinct-pointer-types
 
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
 %.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index abff83bf8d40..6472ca98690e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -257,6 +257,86 @@  static void test_l4lb_all(void)
 	test_l4lb(file2);
 }
 
+static void test_xdp_noinline(void)
+{
+	const char *file = "./test_xdp_noinline.o";
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct vip key = {.protocol = 6};
+	struct vip_meta {
+		__u32 flags;
+		__u32 vip_num;
+	} value = {.vip_num = VIP_NUM};
+	__u32 stats_key = VIP_NUM;
+	struct vip_stats {
+		__u64 bytes;
+		__u64 pkts;
+	} stats[nr_cpus];
+	struct real_definition {
+		union {
+			__be32 dst;
+			__be32 dstv6[4];
+		};
+		__u8 flags;
+	} real_def = {.dst = MAGIC_VAL};
+	__u32 ch_key = 11, real_num = 3;
+	__u32 duration, retval, size;
+	int err, i, prog_fd, map_fd;
+	__u64 bytes = 0, pkts = 0;
+	struct bpf_object *obj;
+	char buf[128];
+	u32 *magic = (u32 *)buf;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "vip_map");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key, &value, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "reals");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 54 ||
+	      *magic != MAGIC_VAL, "ipv4",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 74 ||
+	      *magic != MAGIC_VAL, "ipv6",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	map_fd = bpf_find_map(__func__, obj, "stats");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	for (i = 0; i < nr_cpus; i++) {
+		bytes += stats[i].bytes;
+		pkts += stats[i].pkts;
+	}
+	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+		error_cnt++;
+		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+	}
+out:
+	bpf_object__close(obj);
+}
+
 static void test_tcp_estats(void)
 {
 	const char *file = "./test_tcp_estats.o";
@@ -766,6 +846,7 @@  int main(void)
 	test_pkt_access();
 	test_xdp();
 	test_l4lb_all();
+	test_xdp_noinline();
 	test_tcp_estats();
 	test_bpf_obj_id();
 	test_pkt_md_access();
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000000..5e4aac74f9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			##__VA_ARGS__);			\
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+};
+
+struct packet_description {
+	struct flow_key flow;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_definition {
+	union {
+		__be32 vip;
+		__be32 vipv6[4];
+	};
+	__u16 port;
+	__u16 family;
+	__u8 proto;
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_pos_lru {
+	__u32 pos;
+	__u64 atime;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct lb_stats {
+	__u64 v2;
+	__u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip_definition),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = 512,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(struct flow_key),
+	.value_size = sizeof(struct real_pos_lru),
+	.max_entries = 300,
+	.map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 12 * 655,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = 40,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct lb_stats),
+	.max_entries = 515,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = 16,
+	.map_flags = 0,
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[6];
+	unsigned char eth_source[6];
+	unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+	__u64 off = sizeof(struct eth_hdr);
+	if (is_ipv6) {
+		off += sizeof(struct ipv6hdr);
+		if (is_icmp)
+			off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+	} else {
+		off += sizeof(struct iphdr);
+		if (is_icmp)
+			off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+	}
+	return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return 0;
+	if (!is_icmp) {
+		pckt->flow.port16[0] = udp->source;
+		pckt->flow.port16[1] = udp->dest;
+	} else {
+		pckt->flow.port16[0] = udp->dest;
+		pckt->flow.port16[1] = udp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return 0;
+	if (tcp->syn)
+		pckt->flags |= (1 << 1);
+	if (!is_icmp) {
+		pckt->flow.port16[0] = tcp->source;
+		pckt->flow.port16[1] = tcp->dest;
+	} else {
+		pckt->flow.port16[0] = tcp->dest;
+		pckt->flow.port16[1] = tcp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	struct ipv6hdr *ip6h;
+	__u32 ip_suffix;
+	void *data_end;
+	void *data;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	ip6h = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct ipv6hdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || ip6h + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 56710;
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+	ip6h->payload_len =
+	    __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+	ip6h->hop_limit = 4;
+
+	ip6h->saddr.in6_u.u6_addr32[0] = 1;
+	ip6h->saddr.in6_u.u6_addr32[1] = 2;
+	ip6h->saddr.in6_u.u6_addr32[2] = 3;
+	ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+	memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+
+	__u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	__u16 *next_iph_u16;
+	struct iphdr *iph;
+	__u32 csum = 0;
+	void *data_end;
+	void *data;
+
+	ip_suffix <<= 15;
+	ip_suffix ^= pckt->flow.src;
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	iph = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct iphdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || iph + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->frag_off = 0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 1;
+	iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+	/* don't update iph->daddr, since it will overwrite old eth_proto
+	 * and multiple iterations of bpf_prog_run() will fail
+	 */
+
+	iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+	iph->ttl = 4;
+
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct ipv6hdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	if (inner_v4)
+		new_eth->eth_proto = 8;
+	else
+		new_eth->eth_proto = 56710;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct iphdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+	unsigned char tmp_mac[6];
+	struct eth_hdr *eth;
+
+	eth = data;
+	memcpy(tmp_mac, eth->eth_source, 6);
+	memcpy(eth->eth_source, eth->eth_dest, 6);
+	memcpy(eth->eth_dest, tmp_mac, 6);
+	return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+	struct icmphdr *icmp_hdr;
+	__u16 *next_iph_u16;
+	__u32 tmp_addr = 0;
+	struct iphdr *iph;
+	__u32 csum1 = 0;
+	__u32 csum = 0;
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	iph = data + off;
+	off += sizeof(struct iphdr);
+	icmp_hdr = data + off;
+	icmp_hdr->type = 0;
+	icmp_hdr->checksum += 0x0007;
+	iph->ttl = 4;
+	tmp_addr = iph->daddr;
+	iph->daddr = iph->saddr;
+	iph->saddr = tmp_addr;
+	iph->check = 0;
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+	__be32 tmp_addr[4];
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	ip6h = data + off;
+	off += sizeof(struct ipv6hdr);
+	icmp_hdr = data + off;
+	icmp_hdr->icmp6_type = 129;
+	icmp_hdr->icmp6_cksum -= 0x0001;
+	ip6h->hop_limit = 4;
+	memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+		 struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->icmp6_type == 128)
+		return send_icmp6_reply(data, data_end);
+	if (icmp_hdr->icmp6_type != 3)
+		return XDP_PASS;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	pckt->flow.proto = ip6h->nexthdr;
+	pckt->flags |= (1 << 0);
+	memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+	return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+	       struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->type == 8)
+		return send_icmp_reply(data, data_end);
+	if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+		return XDP_PASS;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	pckt->flow.proto = iph->protocol;
+	pckt->flags |= (1 << 0);
+	pckt->flow.src = iph->daddr;
+	pckt->flow.dst = iph->saddr;
+	return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+		      bool hash_16bytes)
+{
+	if (hash_16bytes)
+		return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+				    pckt->flow.ports, 24);
+	else
+		return jhash_2words(pckt->flow.src, pckt->flow.ports,
+				    24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+			   struct packet_description *pckt,
+			   struct vip_meta *vip_info,
+			   bool is_ipv6, void *lru_map)
+{
+	struct real_pos_lru new_dst_lru = { };
+	bool hash_16bytes = is_ipv6;
+	__u32 *real_pos, hash, key;
+	__u64 cur_time;
+
+	if (vip_info->flags & (1 << 2))
+		hash_16bytes = 1;
+	if (vip_info->flags & (1 << 3)) {
+		pckt->flow.port16[0] = pckt->flow.port16[1];
+		memset(pckt->flow.srcv6, 0, 16);
+	}
+	hash = get_packet_hash(pckt, hash_16bytes);
+	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
+	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+		return 0;
+	key = 2 * vip_info->vip_num + hash % 2;
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return 0;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return 0;
+	if (!(vip_info->flags & (1 << 1))) {
+		__u32 conn_rate_key = 512 + 2;
+		struct lb_stats *conn_rate_stats =
+		    bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+		if (!conn_rate_stats)
+			return 1;
+		cur_time = bpf_ktime_get_ns();
+		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+			conn_rate_stats->v1 = 1;
+			conn_rate_stats->v2 = cur_time;
+		} else {
+			conn_rate_stats->v1 += 1;
+			if (conn_rate_stats->v1 >= 1)
+				return 1;
+		}
+		if (pckt->flow.proto == IPPROTO_UDP)
+			new_dst_lru.atime = cur_time;
+		new_dst_lru.pos = key;
+		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+	}
+	return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+				    struct packet_description *pckt,
+				    void *lru_map)
+{
+
+	struct real_pos_lru *dst_lru;
+	__u64 cur_time;
+	__u32 key;
+
+	dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+	if (!dst_lru)
+		return;
+	if (pckt->flow.proto == IPPROTO_UDP) {
+		cur_time = bpf_ktime_get_ns();
+		if (cur_time - dst_lru->atime > 300000)
+			return;
+		dst_lru->atime = cur_time;
+	}
+	key = dst_lru->pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct ipv6hdr *ip6h;
+	__u64 iph_len;
+	int action;
+
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	iph_len = sizeof(struct ipv6hdr);
+	*protocol = ip6h->nexthdr;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+	off += iph_len;
+	if (*protocol == 45) {
+		return XDP_DROP;
+	} else if (*protocol == 59) {
+		action = parse_icmpv6(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+		memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct iphdr *iph;
+	__u64 iph_len;
+	int action;
+
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	*protocol = iph->protocol;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(iph->tot_len);
+	off += 20;
+	if (iph->frag_off & 65343)
+		return XDP_DROP;
+	if (*protocol == IPPROTO_ICMP) {
+		action = parse_icmp(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		pckt->flow.src = iph->saddr;
+		pckt->flow.dst = iph->daddr;
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+			  bool is_ipv6, struct xdp_md *xdp)
+{
+
+	struct real_definition *dst = NULL;
+	struct packet_description pckt = { };
+	struct vip_definition vip = { };
+	struct lb_stats *data_stats;
+	struct eth_hdr *eth = data;
+	void *lru_map = &lru_cache;
+	struct vip_meta *vip_info;
+	__u32 lru_stats_key = 513;
+	__u32 mac_addr_pos = 0;
+	__u32 stats_key = 512;
+	struct ctl_value *cval;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	if (is_ipv6)
+		action = process_l3_headers_v6(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	else
+		action = process_l3_headers_v4(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	if (action >= 0)
+		return action;
+	protocol = pckt.flow.proto;
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else {
+		return XDP_TX;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+	else
+		vip.vip = pckt.flow.dst;
+	vip.port = pckt.flow.port16[1];
+	vip.proto = pckt.flow.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.port = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return XDP_PASS;
+		if (!(vip_info->flags & (1 << 4)))
+			pckt.flow.port16[1] = 0;
+	}
+	if (data_end - data > 1400)
+		return XDP_DROP;
+	data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	if (!dst) {
+		if (vip_info->flags & (1 << 0))
+			pckt.flow.port16[0] = 0;
+		if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+			connection_table_lookup(&dst, &pckt, lru_map);
+		if (dst)
+			goto out;
+		if (pckt.flow.proto == IPPROTO_TCP) {
+			struct lb_stats *lru_stats =
+			    bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+			if (!lru_stats)
+				return XDP_DROP;
+			if (pckt.flags & (1 << 1))
+				lru_stats->v1 += 1;
+			else
+				lru_stats->v2 += 1;
+		}
+		if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+			return XDP_DROP;
+		data_stats->v2 += 1;
+	}
+out:
+	cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+	if (!cval)
+		return XDP_DROP;
+	if (dst->flags & (1 << 0)) {
+		if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	} else {
+		if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	data_stats->v2 += pkt_bytes;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	if (data + 4 > data_end)
+		return XDP_DROP;
+	*(u32 *)data = dst->dst;
+	return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+	void *data = (void *)(long)ctx->data;
+	void *data_end = (void *)(long)ctx->data_end;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == 8)
+		return process_packet(data, nh_off, data_end, 0, ctx);
+	else if (eth_proto == 56710)
+		return process_packet(data, nh_off, data_end, 1, ctx);
+	else
+		return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;