Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/819583/?format=api
{ "id": 819583, "url": "http://patchwork.ozlabs.org/api/patches/819583/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/150660344827.2808.2281655132773972324.stgit@firesoul/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<150660344827.2808.2281655132773972324.stgit@firesoul>", "list_archive_url": null, "date": "2017-09-28T12:57:28", "name": "[net-next,5/5] samples/bpf: add cpumap sample program xdp_redirect_cpu", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "0b9efb8e692eb201e16de78053f3499a7d83377e", "submitter": { "id": 13625, "url": "http://patchwork.ozlabs.org/api/people/13625/?format=api", "name": "Jesper Dangaard Brouer", "email": "brouer@redhat.com" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/150660344827.2808.2281655132773972324.stgit@firesoul/mbox/", "series": [ { "id": 5560, "url": "http://patchwork.ozlabs.org/api/series/5560/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/list/?series=5560", "date": "2017-09-28T12:57:02", "name": "New bpf cpumap type for XDP_REDIRECT", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/5560/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/819583/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/819583/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=netdev-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)", "ext-mx02.extmail.prod.ext.phx2.redhat.com;\n\tdmarc=none (p=none dis=none) header.from=redhat.com", "ext-mx02.extmail.prod.ext.phx2.redhat.com;\n\tspf=fail smtp.mailfrom=brouer@redhat.com" ], "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3y2vqM0BVJz9t66\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 22:57:35 +1000 (AEST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1753154AbdI1M5d (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tThu, 28 Sep 2017 08:57:33 -0400", "from mx1.redhat.com ([209.132.183.28]:59842 \"EHLO mx1.redhat.com\"\n\trhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP\n\tid S1753090AbdI1M5a (ORCPT <rfc822;netdev@vger.kernel.org>);\n\tThu, 28 Sep 2017 08:57:30 -0400", "from smtp.corp.redhat.com\n\t(int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby mx1.redhat.com (Postfix) with ESMTPS id 65F7BD165D;\n\tThu, 28 Sep 2017 12:57:30 +0000 (UTC)", "from firesoul.localdomain (ovpn-200-26.brq.redhat.com\n\t[10.40.200.26])\n\tby smtp.corp.redhat.com (Postfix) with ESMTP id 300241803D;\n\tThu, 28 Sep 2017 12:57:29 +0000 (UTC)", "from [192.168.5.1] (localhost [IPv6:::1])\n\tby firesoul.localdomain (Postfix) with ESMTP id 55A2D300006B0;\n\tThu, 28 Sep 2017 14:57:28 +0200 (CEST)" ], "DMARC-Filter": "OpenDMARC Filter v1.3.2 mx1.redhat.com 65F7BD165D", "Subject": "[net-next PATCH 5/5] samples/bpf: add cpumap sample program\n\txdp_redirect_cpu", "From": "Jesper Dangaard Brouer <brouer@redhat.com>", "To": "netdev@vger.kernel.org", "Cc": "jakub.kicinski@netronome.com, \"Michael S. Tsirkin\" <mst@redhat.com>,\n\tJason Wang <jasowang@redhat.com>, mchan@broadcom.com,\n\tJohn Fastabend <john.fastabend@gmail.com>, peter.waskiewicz.jr@intel.com,\n\tJesper Dangaard Brouer <brouer@redhat.com>,\n\tDaniel Borkmann <borkmann@iogearbox.net>,\n\tAlexei Starovoitov <alexei.starovoitov@gmail.com>,\n\tAndy Gospodarek <andy@greyhouse.net>", "Date": "Thu, 28 Sep 2017 14:57:28 +0200", "Message-ID": "<150660344827.2808.2281655132773972324.stgit@firesoul>", "In-Reply-To": "<150660339205.2808.7084136789768233829.stgit@firesoul>", "References": "<150660339205.2808.7084136789768233829.stgit@firesoul>", "User-Agent": "StGit/0.17.1-dirty", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "7bit", "X-Scanned-By": "MIMEDefang 2.79 on 10.5.11.16", "X-Greylist": "Sender IP whitelisted, not delayed by milter-greylist-4.5.16\n\t(mx1.redhat.com [10.5.110.26]);\n\tThu, 28 Sep 2017 12:57:30 +0000 (UTC)", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This sample program show how to use cpumap and the associated\ntracepoints.\n\nIt provides command line stats, which shows how the XDP-RX process,\ncpumap-enqueue and cpumap kthread dequeue is cooperating on a per CPU\nbasis. It also utilize the xdp_exception and xdp_redirect_err\ntranspoints to allow users quickly to identify setup issues.\n\nOne issue with ixgbe driver is that the driver reset the link when\nloading XDP. This reset the procfs smp_affinity settings. Thus,\nafter loading the program, these must be reconfigured. The easiest\nworkaround it to reduce the RX-queue to e.g. two via:\n\n # ethtool --set-channels ixgbe1 combined 2\n\nAnd then add CPUs above 0 and 1, like:\n\n # xdp_redirect_cpu --dev ixgbe1 --prog 2 --cpu 2 --cpu 3 --cpu 4\n\nAnother issue with ixgbe is that the page recycle mechanism is tied to\nthe RX-ring size. And the default setting of 512 elements is too\nsmall. This is the same issue with regular devmap XDP_REDIRECT.\nTo overcome this I've been using 1024 rx-ring size:\n\n # ethtool -G ixgbe1 rx 1024 tx 1024\n\nSigned-off-by: Jesper Dangaard Brouer <brouer@redhat.com>\n---\n samples/bpf/Makefile | 4 \n samples/bpf/xdp_redirect_cpu_kern.c | 640 +++++++++++++++++++++++++++++++++++\n samples/bpf/xdp_redirect_cpu_user.c | 639 +++++++++++++++++++++++++++++++++++\n 3 files changed, 1283 insertions(+)\n create mode 100644 samples/bpf/xdp_redirect_cpu_kern.c\n create mode 100644 samples/bpf/xdp_redirect_cpu_user.c", "diff": "diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile\nindex ebc2ad69b62c..52c4dab2c153 100644\n--- a/samples/bpf/Makefile\n+++ b/samples/bpf/Makefile\n@@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example\n hostprogs-y += load_sock_ops\n hostprogs-y += xdp_redirect\n hostprogs-y += xdp_redirect_map\n+hostprogs-y += xdp_redirect_cpu\n hostprogs-y += xdp_monitor\n hostprogs-y += syscall_tp\n \n@@ -84,6 +85,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o\n per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o\n xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o\n xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o\n+xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o\n xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o\n syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o\n \n@@ -129,6 +131,7 @@ always += tcp_iw_kern.o\n always += tcp_clamp_kern.o\n always += xdp_redirect_kern.o\n always += xdp_redirect_map_kern.o\n+always += xdp_redirect_cpu_kern.o\n always += xdp_monitor_kern.o\n always += syscall_tp_kern.o\n \n@@ -169,6 +172,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf\n HOSTLOADLIBES_test_map_in_map += -lelf\n HOSTLOADLIBES_xdp_redirect += -lelf\n HOSTLOADLIBES_xdp_redirect_map += -lelf\n+HOSTLOADLIBES_xdp_redirect_cpu += -lelf\n HOSTLOADLIBES_xdp_monitor += -lelf\n HOSTLOADLIBES_syscall_tp += -lelf\n \ndiff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c\nnew file mode 100644\nindex 000000000000..7403e7841a88\n--- /dev/null\n+++ b/samples/bpf/xdp_redirect_cpu_kern.c\n@@ -0,0 +1,640 @@\n+/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)\n+ *\n+ * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.\n+ */\n+#include <uapi/linux/if_ether.h>\n+#include <uapi/linux/if_packet.h>\n+#include <uapi/linux/if_vlan.h>\n+#include <uapi/linux/ip.h>\n+#include <uapi/linux/ipv6.h>\n+#include <uapi/linux/in.h>\n+#include <uapi/linux/tcp.h>\n+#include <uapi/linux/udp.h>\n+\n+#include <uapi/linux/bpf.h>\n+#include \"bpf_helpers.h\"\n+\n+#define MAX_CPUS 12 /* WARNING - sync with _user.c */\n+\n+/* Special map type that can XDP_REDIRECT frames to another CPU */\n+struct bpf_map_def SEC(\"maps\") cpu_map = {\n+\t.type\t\t= BPF_MAP_TYPE_CPUMAP,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(u32),\n+\t.max_entries\t= MAX_CPUS,\n+};\n+\n+/* Common stats data record to keep userspace more simple */\n+struct datarec {\n+\t__u64 processed;\n+\t__u64 dropped;\n+\t__u64 issue;\n+};\n+\n+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success\n+ * feedback. Redirect TX errors can be caught via a tracepoint.\n+ */\n+struct bpf_map_def SEC(\"maps\") rx_cnt = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(struct datarec),\n+\t.max_entries\t= 1,\n+};\n+\n+/* Used by trace point */\n+struct bpf_map_def SEC(\"maps\") redirect_err_cnt = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(struct datarec),\n+\t.max_entries\t= 2,\n+\t/* TODO: have entries for all possible errno's */\n+};\n+\n+/* Used by trace point */\n+struct bpf_map_def SEC(\"maps\") cpumap_enqueue_cnt = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(struct datarec),\n+\t.max_entries\t= MAX_CPUS,\n+};\n+\n+/* Used by trace point */\n+struct bpf_map_def SEC(\"maps\") cpumap_kthread_cnt = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(struct datarec),\n+\t.max_entries\t= 1,\n+};\n+\n+/* Set of maps controlling available CPU, and for iterating through\n+ * selectable redirect CPUs.\n+ */\n+struct bpf_map_def SEC(\"maps\") cpus_available = {\n+\t.type\t\t= BPF_MAP_TYPE_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(u32),\n+\t.max_entries\t= MAX_CPUS,\n+};\n+struct bpf_map_def SEC(\"maps\") cpus_count = {\n+\t.type\t\t= BPF_MAP_TYPE_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(u32),\n+\t.max_entries\t= 1,\n+};\n+struct bpf_map_def SEC(\"maps\") cpus_iterator = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(u32),\n+\t.max_entries\t= 1,\n+};\n+\n+/* Used by trace point */\n+struct bpf_map_def SEC(\"maps\") exception_cnt = {\n+\t.type\t\t= BPF_MAP_TYPE_PERCPU_ARRAY,\n+\t.key_size\t= sizeof(u32),\n+\t.value_size\t= sizeof(struct datarec),\n+\t.max_entries\t= 1,\n+};\n+\n+/* Helper parse functions */\n+\n+/* Parse Ethernet layer 2, extract network layer 3 offset and protocol\n+ *\n+ * Returns false on error and non-supported ether-type\n+ */\n+struct vlan_hdr {\n+\t__be16 h_vlan_TCI;\n+\t__be16 h_vlan_encapsulated_proto;\n+};\n+\n+static __always_inline\n+bool parse_eth(struct ethhdr *eth, void *data_end,\n+\t u16 *eth_proto, u64 *l3_offset)\n+{\n+\tu16 eth_type;\n+\tu64 offset;\n+\n+\toffset = sizeof(*eth);\n+\tif ((void *)eth + offset > data_end)\n+\t\treturn false;\n+\n+\teth_type = eth->h_proto;\n+\n+\t/* Skip non 802.3 Ethertypes */\n+\tif (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))\n+\t\treturn false;\n+\n+\t/* Handle VLAN tagged packet */\n+\tif (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {\n+\t\tstruct vlan_hdr *vlan_hdr;\n+\n+\t\tvlan_hdr = (void *)eth + offset;\n+\t\toffset += sizeof(*vlan_hdr);\n+\t\tif ((void *)eth + offset > data_end)\n+\t\t\treturn false;\n+\t\teth_type = vlan_hdr->h_vlan_encapsulated_proto;\n+\t}\n+\t/* TODO: Handle double VLAN tagged packet */\n+\n+\t*eth_proto = ntohs(eth_type);\n+\t*l3_offset = offset;\n+\treturn true;\n+}\n+\n+static __always_inline\n+u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+ struct iphdr *iph = data + nh_off;\n+\tstruct udphdr *udph;\n+\tu16 dport;\n+\n+ if (iph + 1 > data_end)\n+ return 0;\n+\tif (!(iph->protocol == IPPROTO_UDP))\n+\t\treturn 0;\n+\n+\tudph = (void *)(iph + 1);\n+\tif (udph + 1 > data_end)\n+\t\treturn 0;\n+\n+\tdport = ntohs(udph->dest);\n+\treturn dport;\n+}\n+\n+static __always_inline\n+int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+ struct iphdr *iph = data + nh_off;\n+\n+ if (iph + 1 > data_end)\n+ return 0;\n+ return iph->protocol;\n+}\n+\n+static __always_inline\n+int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+ struct ipv6hdr *ip6h = data + nh_off;\n+\n+ if (ip6h + 1 > data_end)\n+ return 0;\n+ return ip6h->nexthdr;\n+}\n+\n+SEC(\"xdp_cpu_map0\")\n+int xdp_prognum0_no_touch(struct xdp_md *ctx)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+\tstruct datarec* rec;\n+\tu32 cpu_dest;\n+\tu32 key = 0;\n+\n+\t/* Only use first entry in cpus_available */\n+\tu32 *cpu_selected;\n+\tcpu_selected = bpf_map_lookup_elem(&cpus_available, &key);\n+\tif (!cpu_selected)\n+\t\treturn XDP_ABORTED;\n+\tcpu_dest = *cpu_selected;\n+\n+\t/* Count RX packet in map */\n+\trec = bpf_map_lookup_elem(&rx_cnt, &key);\n+\tif (rec)\n+\t\trec->processed++;\n+\n+\treturn bpf_redirect_map(&cpu_map, cpu_dest, 0);\n+}\n+\n+SEC(\"xdp_cpu_map1_touch_data\")\n+int xdp_prognum1_touch_data(struct xdp_md *ctx)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+\tstruct ethhdr *eth = data;\n+\tvolatile u16 eth_type;\n+\tstruct datarec* rec;\n+\tu32 cpu_dest;\n+\tu32 key = 0;\n+\n+\t/* Only use first entry in cpus_available */\n+\tu32 *cpu_selected;\n+\tcpu_selected = bpf_map_lookup_elem(&cpus_available, &key);\n+\tif (!cpu_selected)\n+\t\treturn XDP_ABORTED;\n+\tcpu_dest = *cpu_selected;\n+\n+\t/* Validate packet length is minimum Eth header size */\n+\tif (eth + 1 > data_end) {\n+\t\treturn XDP_ABORTED;\n+\t}\n+\n+\t/* Count RX packet in map */\n+\trec = bpf_map_lookup_elem(&rx_cnt, &key);\n+\tif (!rec)\n+\t\treturn XDP_ABORTED;\n+\trec->processed++;\n+\n+\t/* Read packet data, and use it (drop non 802.3 Ethertypes) */\n+\teth_type = eth->h_proto;\n+\tif (ntohs(eth_type) < ETH_P_802_3_MIN) {\n+\t\trec->dropped++;\n+\t\treturn XDP_DROP;\n+\t}\n+\n+\treturn bpf_redirect_map(&cpu_map, cpu_dest, 0);\n+}\n+\n+SEC(\"xdp_cpu_map2_round_robin\")\n+int xdp_prognum2_round_robin(struct xdp_md *ctx)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+\tstruct ethhdr *eth = data;\n+\tstruct datarec* rec;\n+\tu32 cpu_dest;\n+\tu32 *cpu_lookup;\n+\tu32 key0 = 0;\n+\n+\tu32 *cpu_selected;\n+\tu32 *cpu_iterator;\n+\tu32 *cpu_max;\n+\tu32 cpu_idx;\n+\n+\tcpu_max = bpf_map_lookup_elem(&cpus_count, &key0);\n+\tif (!cpu_max)\n+\t\treturn XDP_ABORTED;\n+\n+\tcpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);\n+\tif (!cpu_iterator)\n+\t\treturn XDP_ABORTED;\n+\tcpu_idx = *cpu_iterator;\n+\n+\t*cpu_iterator += 1;\n+\tif (*cpu_iterator == *cpu_max)\n+\t\t*cpu_iterator = 0;\n+\n+\tcpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);\n+\tif (!cpu_selected)\n+\t\treturn XDP_ABORTED;\n+\tcpu_dest = *cpu_selected;\n+\n+\t/* Count RX packet in map */\n+\trec = bpf_map_lookup_elem(&rx_cnt, &key0);\n+\tif (!rec)\n+\t\treturn XDP_ABORTED;\n+\trec->processed++;\n+\n+\t/* Check cpu_dest is valid */\n+\tcpu_lookup = bpf_map_lookup_elem(&cpu_map, &cpu_dest);\n+\tif (!cpu_lookup) {\n+\t\trec->issue++;\n+\t\treturn XDP_DROP;\n+\t}\n+\n+\tif (cpu_dest >= MAX_CPUS )\n+\t\treturn XDP_ABORTED;\n+\n+\treturn bpf_redirect_map(&cpu_map, cpu_dest, 0);\n+}\n+\n+SEC(\"xdp_cpu_map3_proto_separate\")\n+int xdp_prognum3_proto_separate(struct xdp_md *ctx)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+\tstruct ethhdr *eth = data;\n+\tu8 ip_proto = IPPROTO_UDP;\n+\tstruct datarec* rec;\n+\tu16 eth_proto = 0;\n+\tu64 l3_offset = 0;\n+\tu32 cpu_dest = 0;\n+\tu32 cpu_idx = 0;\n+\tu32 *cpu_lookup;\n+\tu32 key = 0;\n+\n+\t/* Count RX packet in map */\n+\trec = bpf_map_lookup_elem(&rx_cnt, &key);\n+\tif (!rec)\n+\t\treturn XDP_ABORTED;\n+\trec->processed++;\n+\n+\tif (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) {\n+\t\treturn XDP_PASS; /* Just skip */\n+\t}\n+\n+\t/* Extract L4 protocol */\n+\tswitch (eth_proto) {\n+\tcase ETH_P_IP:\n+\t\tip_proto = get_proto_ipv4(ctx, l3_offset);\n+\t\tbreak;\n+\tcase ETH_P_IPV6:\n+\t\tip_proto = get_proto_ipv6(ctx, l3_offset);\n+\t\tbreak;\n+\tcase ETH_P_ARP:\n+\t\tcpu_idx = 0; /* ARP packet handled on separate CPU */\n+\t\tbreak;\n+\tdefault:\n+\t\tcpu_idx = 0;\n+\t}\n+\n+\t/* Choose CPU based on L4 protocol */\n+\tswitch (ip_proto) {\n+\tcase IPPROTO_ICMP:\n+\tcase IPPROTO_ICMPV6:\n+\t\tcpu_idx = 2;\n+\t\tbreak;\n+\tcase IPPROTO_TCP:\n+\t\tcpu_idx = 0;\n+\t\tbreak;\n+\tcase IPPROTO_UDP:\n+\t\tcpu_idx = 1;\n+\t\tbreak;\n+\tdefault:\n+\t\tcpu_idx = 0;\n+\t}\n+\n+\tcpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);\n+\tif (!cpu_lookup)\n+\t\treturn XDP_ABORTED;\n+\tcpu_dest = *cpu_lookup;\n+\n+\tif (cpu_dest >= MAX_CPUS )\n+\t\treturn XDP_ABORTED;\n+\n+\t/* Check cpu_dest is valid */\n+\tcpu_lookup = bpf_map_lookup_elem(&cpu_map, &cpu_dest);\n+\tif (!cpu_lookup) {\n+\t\trec->issue++;\n+\t\treturn XDP_DROP;\n+\t}\n+\n+\treturn bpf_redirect_map(&cpu_map, cpu_dest, 0);\n+}\n+\n+SEC(\"xdp_cpu_map4_ddos_filter_pktgen\")\n+int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)\n+{\n+\tvoid *data_end = (void *)(long)ctx->data_end;\n+\tvoid *data = (void *)(long)ctx->data;\n+\tstruct ethhdr *eth = data;\n+\tu8 ip_proto = IPPROTO_UDP;\n+\tstruct datarec* rec;\n+\tu16 eth_proto = 0;\n+\tu64 l3_offset = 0;\n+\tu32 cpu_dest = 0;\n+\tu32 cpu_idx = 0;\n+\tu16 dest_port;\n+\tu32 *cpu_lookup;\n+\tu32 key = 0;\n+\n+\t/* Count RX packet in map */\n+\trec = bpf_map_lookup_elem(&rx_cnt, &key);\n+\tif (!rec)\n+\t\treturn XDP_ABORTED;\n+\trec->processed++;\n+\n+\tif (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) {\n+\t\treturn XDP_PASS; /* Just skip */\n+\t}\n+\n+\t/* Extract L4 protocol */\n+\tswitch (eth_proto) {\n+\tcase ETH_P_IP:\n+\t\tip_proto = get_proto_ipv4(ctx, l3_offset);\n+\t\tbreak;\n+\tcase ETH_P_IPV6:\n+\t\tip_proto = get_proto_ipv6(ctx, l3_offset);\n+\t\tbreak;\n+\tcase ETH_P_ARP:\n+\t\tcpu_idx = 0; /* ARP packet handled on separate CPU */\n+\t\tbreak;\n+\tdefault:\n+\t\tcpu_idx = 0;\n+\t}\n+\n+\t/* Choose CPU based on L4 protocol */\n+\tswitch (ip_proto) {\n+\tcase IPPROTO_ICMP:\n+\tcase IPPROTO_ICMPV6:\n+\t\tcpu_idx = 2;\n+\t\tbreak;\n+\tcase IPPROTO_TCP:\n+\t\tcpu_idx = 0;\n+\t\tbreak;\n+\tcase IPPROTO_UDP:\n+\t\tcpu_idx = 1;\n+\t\t/* DDoS filter UDP port 9 (pktgen) */\n+\t\tdest_port = get_dest_port_ipv4_udp(ctx, l3_offset);\n+\t\tif (dest_port == 9) {\n+\t\t\tif (rec)\n+\t\t\t\trec->dropped++;\n+\t\t\treturn XDP_DROP;\n+\t\t}\n+\t\tbreak;\n+\tdefault:\n+\t\tcpu_idx = 0;\n+\t}\n+\n+\tcpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);\n+\tif (!cpu_lookup)\n+\t\treturn XDP_ABORTED;\n+\tcpu_dest = *cpu_lookup;\n+\n+\tif (cpu_dest >= MAX_CPUS )\n+\t\treturn XDP_ABORTED;\n+\n+\t/* Check cpu_dest is valid */\n+\tcpu_lookup = bpf_map_lookup_elem(&cpu_map, &cpu_dest);\n+\tif (!cpu_lookup) {\n+\t\trec->issue++;\n+\t\treturn XDP_DROP;\n+\t}\n+\n+\tif (cpu_dest >= MAX_CPUS )\n+\t\treturn XDP_ABORTED;\n+\n+\treturn bpf_redirect_map(&cpu_map, cpu_dest, 0);\n+}\n+\n+\n+char _license[] SEC(\"license\") = \"GPL\";\n+\n+/*** Trace point code ***/\n+\n+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format\n+ * Code in: kernel/include/trace/events/xdp.h\n+ */\n+struct xdp_redirect_ctx {\n+\tunsigned short common_type;\t//\toffset:0; size:2; signed:0;\n+\tunsigned char common_flags;\t//\toffset:2; size:1; signed:0;\n+\tunsigned char common_preempt_count;//\toffset:3; size:1; signed:0;\n+\tint common_pid;\t\t\t//\toffset:4; size:4; signed:1;\n+\n+\tint prog_id;\t\t\t//\toffset:8; size:4; signed:1;\n+\tu32 act;\t\t\t//\toffset:12 size:4; signed:0;\n+\tint ifindex;\t\t\t//\toffset:16 size:4; signed:1;\n+\tint err;\t\t\t//\toffset:20 size:4; signed:1;\n+\tint to_ifindex;\t\t\t//\toffset:24 size:4; signed:1;\n+\tu32 map_id;\t\t\t//\toffset:28 size:4; signed:0;\n+\tint map_index;\t\t\t//\toffset:32 size:4; signed:1;\n+};\t\t\t\t\t//\toffset:36\n+\n+enum {\n+\tXDP_REDIRECT_SUCCESS = 0,\n+\tXDP_REDIRECT_ERROR = 1\n+};\n+\n+static __always_inline\n+int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)\n+{\n+\tu32 key = XDP_REDIRECT_ERROR;\n+\tstruct datarec *rec;\n+\tint err = ctx->err;\n+\n+\tif (!err)\n+\t\tkey = XDP_REDIRECT_SUCCESS;\n+\n+\trec = bpf_map_lookup_elem(&redirect_err_cnt, &key);\n+\tif (!rec)\n+\t\treturn 0;\n+\trec->dropped += 1;\n+\n+\treturn 0; /* Indicate event was filtered (no further processing)*/\n+\t/*\n+\t * Returning 1 here would allow e.g. a perf-record tracepoint\n+\t * to see and record these events, but it doesn't work well\n+\t * in-practice as stopping perf-record also unload this\n+\t * bpf_prog. Plus, there is additional overhead of doing so.\n+\t */\n+}\n+\n+SEC(\"tracepoint/xdp/xdp_redirect_err\")\n+int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)\n+{\n+\treturn xdp_redirect_collect_stat(ctx);\n+}\n+\n+\n+SEC(\"tracepoint/xdp/xdp_redirect_map_err\")\n+int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)\n+{\n+\treturn xdp_redirect_collect_stat(ctx);\n+}\n+\n+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format\n+ * Code in: kernel/include/trace/events/xdp.h\n+ */\n+struct xdp_exception_ctx {\n+\tunsigned short common_type;\t//\toffset:0; size:2; signed:0;\n+\tunsigned char common_flags;\t//\toffset:2; size:1; signed:0;\n+\tunsigned char common_preempt_count;//\toffset:3; size:1; signed:0;\n+\tint common_pid;\t\t\t//\toffset:4; size:4; signed:1;\n+\n+\tint prog_id;\t\t\t//\toffset:8; size:4; signed:1;\n+\tu32 act;\t\t\t//\toffset:12; size:4; signed:0;\n+\tint ifindex;\t\t\t//\toffset:16; size:4; signed:1;\n+};\n+\n+SEC(\"tracepoint/xdp/xdp_exception\")\n+int trace_xdp_exception(struct xdp_exception_ctx *ctx)\n+{\n+\tstruct datarec *rec;\n+\tu32 key = 0;\n+\n+\trec = bpf_map_lookup_elem(&exception_cnt, &key);\n+\tif (!rec)\n+\t\treturn 1;\n+\trec->dropped += 1;\n+\n+\treturn 0;\n+}\n+\n+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format\n+ * Code in: kernel/include/trace/events/xdp.h\n+ */\n+struct cpumap_enqueue_ctx {\n+\tunsigned short common_type;\t//\toffset:0; size:2; signed:0;\n+\tunsigned char common_flags;\t//\toffset:2; size:1; signed:0;\n+\tunsigned char common_preempt_count;//\toffset:3; size:1; signed:0;\n+\tint common_pid;\t\t\t//\toffset:4; size:4; signed:1;\n+\n+\tint map_id;\t\t\t//\toffset:8; size:4; signed:1;\n+\tu32 act;\t\t\t//\toffset:12; size:4; signed:0;\n+\tint cpu;\t\t\t//\toffset:16; size:4; signed:1;\n+\tunsigned int drops;\t\t//\toffset:20; size:4; signed:0;\n+\tunsigned int processed;\t\t//\toffset:24; size:4; signed:0;\n+\tint to_cpu;\t\t\t//\toffset:28; size:4; signed:1;\n+};\n+\n+SEC(\"tracepoint/xdp/xdp_cpumap_enqueue\")\n+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)\n+{\n+\tu32 to_cpu = ctx->to_cpu;\n+\tstruct datarec *rec;\n+\n+\tif (to_cpu >= MAX_CPUS)\n+\t\treturn 1;\n+\n+\trec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);\n+\tif (!rec)\n+\t\treturn 0;\n+\trec->processed += ctx->processed;\n+\trec->dropped += ctx->drops;\n+\n+\t/* Detect misconfig. Redirect to \"same\" CPU, makes no sense\n+\t * and indicate user of cpumap have not done proper IRQ RXq\n+\t * setup.\n+\t */\n+\tif (ctx->cpu == ctx->to_cpu)\n+\t\trec->issue += ctx->processed;\n+\n+\t/* Keep seperate map for feedback loop */\n+\t// have map that boolean mark drops, and RX side can clean\n+\t// this, indicating it have got the notification. TODO, should\n+\t// this also contain a (k)timestamp.\n+\n+\treturn 0;\n+}\n+\n+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format\n+ * Code in: kernel/include/trace/events/xdp.h\n+ */\n+struct cpumap_kthread_ctx {\n+\tunsigned short common_type;\t//\toffset:0; size:2; signed:0;\n+\tunsigned char common_flags;\t//\toffset:2; size:1; signed:0;\n+\tunsigned char common_preempt_count;//\toffset:3; size:1; signed:0;\n+\tint common_pid;\t\t\t//\toffset:4; size:4; signed:1;\n+\n+\tint map_id;\t\t\t//\toffset:8; size:4; signed:1;\n+\tu32 act;\t\t\t//\toffset:12; size:4; signed:0;\n+\tint cpu;\t\t\t//\toffset:16; size:4; signed:1;\n+\tunsigned int drops;\t\t//\toffset:20; size:4; signed:0;\n+\tunsigned int processed;\t\t//\toffset:24; size:4; signed:0;\n+\tint time_limit;\t\t\t//\toffset:28; size:4; signed:1;\n+};\n+\n+SEC(\"tracepoint/xdp/xdp_cpumap_kthread\")\n+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)\n+{\n+\tstruct datarec *rec;\n+\tu32 key = 0;\n+\n+\trec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);\n+\tif (!rec)\n+\t\treturn 0;\n+\trec->processed += ctx->processed;\n+\trec->dropped += ctx->drops;\n+\n+\t/* Detect when time limit was exceeded, but queue was not-empty */\n+\tif (ctx->processed > 0 && ctx->time_limit)\n+\t\trec->issue++;\n+\n+\treturn 0;\n+}\n+\ndiff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c\nnew file mode 100644\nindex 000000000000..c2c971ab7078\n--- /dev/null\n+++ b/samples/bpf/xdp_redirect_cpu_user.c\n@@ -0,0 +1,639 @@\n+/* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.\n+ */\n+static const char *__doc__=\n+ \" XDP redirect with a CPU-map type \\\"BPF_MAP_TYPE_CPUMAP\\\"\";\n+\n+#include <errno.h>\n+#include <signal.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <stdbool.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include <locale.h>\n+#include <sys/resource.h>\n+#include <getopt.h>\n+#include <net/if.h>\n+#include <time.h>\n+\n+#include <arpa/inet.h>\n+#include <linux/if_link.h>\n+\n+#define MAX_CPUS 12 /* WARNING - sync with _kern.c */\n+\n+/* How many xdp_progs are defined in _kern.c */\n+#define MAX_PROG 5\n+\n+/* Wanted to get rid of bpf_load.h and fake-\"libbpf.h\" (and instead\n+ * use bpf/libbpf.h), but cannot as (currently) needed for XDP\n+ * attaching to a device via set_link_xdp_fd()\n+ */\n+#include \"libbpf.h\"\n+#include \"bpf_load.h\"\n+\n+#include \"bpf_util.h\"\n+\n+static int ifindex = -1;\n+static char ifname_buf[IF_NAMESIZE];\n+static char *ifname = NULL;\n+static __u32 xdp_flags = 0;\n+\n+/* Exit return codes */\n+#define EXIT_OK\t\t\t0\n+#define EXIT_FAIL\t\t1\n+#define EXIT_FAIL_OPTION\t2\n+#define EXIT_FAIL_XDP\t\t3\n+#define EXIT_FAIL_BPF\t\t4\n+#define EXIT_FAIL_MEM\t\t5\n+\n+static const struct option long_options[] = {\n+\t{\"help\",\tno_argument,\t\tNULL, 'h' },\n+\t{\"dev\",\t\trequired_argument,\tNULL, 'd' },\n+\t{\"skb-mode\", \tno_argument,\t\tNULL, 'S' },\n+\t{\"debug\",\tno_argument,\t\tNULL, 'D' },\n+\t{\"sec\", \trequired_argument,\tNULL, 's' },\n+\t{\"prognum\", \trequired_argument,\tNULL, 'p' },\n+\t{\"qsize\", \trequired_argument,\tNULL, 'q' },\n+\t{\"cpu\", \trequired_argument,\tNULL, 'c' },\n+\t{\"no-separators\",no_argument,\t\tNULL, 'z' },\n+\t{0, 0, NULL, 0 }\n+};\n+\n+static void int_exit(int sig)\n+{\n+\tfprintf(stderr,\n+\t\t\"Interrupted: Removing XDP program on ifindex:%d device:%s\\n\",\n+\t\tifindex, ifname);\n+\tif (ifindex > -1)\n+\t\tset_link_xdp_fd(ifindex, -1, xdp_flags);\n+\texit(EXIT_OK);\n+}\n+\n+static void usage(char *argv[])\n+{\n+\tint i;\n+\tprintf(\"\\nDOCUMENTATION:\\n%s\\n\", __doc__);\n+\tprintf(\"\\n\");\n+\tprintf(\" Usage: %s (options-see-below)\\n\",\n+\t argv[0]);\n+\tprintf(\" Listing options:\\n\");\n+\tfor (i = 0; long_options[i].name != 0; i++) {\n+\t\tprintf(\" --%-12s\", long_options[i].name);\n+\t\tif (long_options[i].flag != NULL)\n+\t\t\tprintf(\" flag (internal value:%d)\",\n+\t\t\t *long_options[i].flag);\n+\t\telse\n+\t\t\tprintf(\" short-option: -%c\",\n+\t\t\t long_options[i].val);\n+\t\tprintf(\"\\n\");\n+\t}\n+\tprintf(\"\\n\");\n+}\n+\n+/* gettime returns the current time of day in nanoseconds.\n+ * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)\n+ * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)\n+ */\n+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */\n+static __u64 gettime(void)\n+{\n+\tstruct timespec t;\n+\tint res;\n+\n+\tres = clock_gettime(CLOCK_MONOTONIC, &t);\n+\tif (res < 0) {\n+\t\tfprintf(stderr, \"Error with gettimeofday! (%i)\\n\", res);\n+\t\texit(EXIT_FAIL);\n+\t}\n+\treturn (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;\n+}\n+\n+/* Common stats data record shared with _kern.c */\n+struct datarec {\n+\t__u64 processed;\n+\t__u64 dropped;\n+\t__u64 issue;\n+};\n+struct record {\n+\t__u64 timestamp;\n+\tstruct datarec total;\n+\tstruct datarec *cpu;\n+};\n+struct stats_record {\n+\tstruct record rx_cnt;\n+\tstruct record redir_err;\n+\tstruct record kthread;\n+\tstruct record exception;\n+\tstruct record enq[MAX_CPUS];\n+};\n+\n+static bool map_collect_percpu(int fd, __u32 key, struct record* rec)\n+{\n+\t/* For percpu maps, userspace gets a value per possible CPU */\n+\tunsigned int nr_cpus = bpf_num_possible_cpus();\n+\tstruct datarec values[nr_cpus];\n+\t__u64 sum_processed = 0;\n+\t__u64 sum_dropped = 0;\n+\t__u64 sum_issue = 0;\n+\tint i;\n+\n+\tif ((bpf_map_lookup_elem(fd, &key, values)) != 0) {\n+\t\tfprintf(stderr,\n+\t\t\t\"ERR: bpf_map_lookup_elem failed key:0x%X\\n\", key);\n+\t\treturn false;\n+\t}\n+\t/* Get time as close as possible to reading map contents */\n+\trec->timestamp = gettime();\n+\n+\t/* Record and sum values from each CPU */\n+\tfor (i = 0; i < nr_cpus; i++) {\n+\t\trec->cpu[i].processed = values[i].processed;\n+\t\tsum_processed += values[i].processed;\n+\t\trec->cpu[i].dropped = values[i].dropped;\n+\t\tsum_dropped += values[i].dropped;\n+\t\trec->cpu[i].issue = values[i].issue;\n+\t\tsum_issue += values[i].issue;\n+\t}\n+\trec->total.processed = sum_processed;\n+\trec->total.dropped = sum_dropped;\n+\trec->total.issue = sum_issue;\n+\treturn true;\n+}\n+\n+static struct datarec *alloc_record_per_cpu(void)\n+{\n+\tunsigned int nr_cpus = bpf_num_possible_cpus();\n+\tstruct datarec *array;\n+\tsize_t size;\n+\n+\tsize = sizeof(struct datarec) * nr_cpus;\n+\tarray = malloc(size);\n+\tmemset(array, 0, size);\n+\tif (!array) {\n+\t\tfprintf(stderr, \"Mem alloc error (nr_cpus:%u)\\n\", nr_cpus);\n+\t\texit(EXIT_FAIL_MEM);\n+\t}\n+\treturn array;\n+}\n+\n+static struct stats_record* alloc_stats_record(void)\n+{\n+\tstruct stats_record* rec;\n+\tint i;\n+\n+\trec = malloc(sizeof(*rec));\n+\tmemset(rec, 0, sizeof(*rec));\n+\tif (!rec) {\n+\t\tfprintf(stderr, \"Mem alloc error\\n\");\n+\t\texit(EXIT_FAIL_MEM);\n+\t}\n+\trec->rx_cnt.cpu = alloc_record_per_cpu();\n+\trec->redir_err.cpu = alloc_record_per_cpu();\n+\trec->kthread.cpu = alloc_record_per_cpu();\n+\trec->exception.cpu = alloc_record_per_cpu();\n+\tfor (i = 0; i < MAX_CPUS; i++)\n+\t\trec->enq[i].cpu = alloc_record_per_cpu();\n+\n+\treturn rec;\n+}\n+\n+static void free_stats_record(struct stats_record* r)\n+{\n+\tint i;\n+\n+\tfor (i = 0; i < MAX_CPUS; i++)\n+\t\tfree(r->enq[i].cpu);\n+\tfree(r->exception.cpu);\n+\tfree(r->kthread.cpu);\n+\tfree(r->redir_err.cpu);\n+\tfree(r->rx_cnt.cpu);\n+\tfree(r);\n+}\n+\n+static double calc_period(struct record *r, struct record *p)\n+{\n+\tdouble period_ = 0;\n+\t__u64 period = 0;\n+\n+\tperiod = r->timestamp - p->timestamp;\n+\tif (period > 0) {\n+\t\tperiod_ = ((double) period / NANOSEC_PER_SEC);\n+\t}\n+\treturn period_;\n+}\n+\n+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)\n+{\n+\t__u64 packets = 0;\n+\t__u64 pps = 0;\n+\n+\tif (period_ > 0) {\n+\t\tpackets = r->processed - p->processed;\n+\t\tpps = packets / period_;\n+\t}\n+\treturn pps;\n+}\n+\n+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)\n+{\n+\t__u64 packets = 0;\n+\t__u64 pps = 0;\n+\n+\tif (period_ > 0) {\n+\t\tpackets = r->dropped - p->dropped;\n+\t\tpps = packets / period_;\n+\t}\n+\treturn pps;\n+}\n+\n+static __u64 calc_errs_pps(struct datarec *r,\n+\t\t\t struct datarec *p, double period_)\n+{\n+\t__u64 packets = 0;\n+\t__u64 pps = 0;\n+\n+\tif (period_ > 0) {\n+\t\tpackets = r->issue - p->issue;\n+\t\tpps = packets / period_;\n+\t}\n+\treturn pps;\n+}\n+\n+static void stats_print(struct stats_record *stats_rec,\n+\t\t\tstruct stats_record *stats_prev,\n+\t\t\tint prog_num)\n+{\n+\tunsigned int nr_cpus = bpf_num_possible_cpus();\n+\tdouble pps = 0, drop = 0, err = 0;\n+\tstruct record *rec, *prev;\n+\tint to_cpu;\n+\tdouble t;\n+\tint i;\n+\n+\t/* Header */\n+\tprintf(\"Running XDP/eBPF prog_num:%d\\n\", prog_num);\n+\tprintf(\"%-15s %-7s %-14s %-11s %-9s\\n\",\n+\t \"XDP-cpumap\", \"CPU:to\", \"pps\", \"drop-pps\", \"extra-info\");\n+\n+\t/* XDP rx_cnt */\n+\t{\n+\t\tchar * fmt_rx = \"%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\\n\";\n+\t\tchar * fm2_rx = \"%-15s %-7s %'-14.0f %'-11.0f\\n\";\n+\t\tchar *errstr = \"\";\n+\n+\t\trec = &stats_rec->rx_cnt;\n+\t\tprev = &stats_prev->rx_cnt;\n+\t\tt = calc_period(rec, prev);\n+\t\tfor (i = 0; i < nr_cpus; i++) {\n+\t\t\tstruct datarec *r = &rec->cpu[i];\n+\t\t\tstruct datarec *p = &prev->cpu[i];\n+\t\t\tpps = calc_pps(r, p, t);\n+\t\t\tdrop = calc_drop_pps(r, p, t);\n+\t\t\terr = calc_errs_pps(r, p, t);\n+\t\t\tif (err > 0)\n+\t\t\t\terrstr = \"cpu-dest/err\";\n+\t\t\tif (pps > 0)\n+\t\t\t\tprintf(fmt_rx, \"XDP-RX\",\n+\t\t\t\t i, pps, drop, err, errstr);\n+\t\t}\n+\t\tpps = calc_pps(&rec->total, &prev->total, t);\n+\t\tdrop = calc_drop_pps(&rec->total, &prev->total, t);\n+\t\terr = calc_errs_pps(&rec->total, &prev->total, t);\n+\t\tprintf(fm2_rx, \"XDP-RX\", \"total\", pps, drop);\n+\t}\n+\n+\t/* cpumap enqueue stats */\n+\tfor (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {\n+\t\tchar *fmt=\"%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.0f %s\\n\";\n+\t\tchar *fm2=\"%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.0f %s\\n\";\n+\t\tchar *errstr = \"\";\n+\n+\t\trec = &stats_rec->enq[to_cpu];\n+\t\tprev = &stats_prev->enq[to_cpu];\n+\t\tt = calc_period(rec, prev);\n+\t\tfor (i = 0; i < nr_cpus; i++) {\n+\t\t\tstruct datarec *r = &rec->cpu[i];\n+\t\t\tstruct datarec *p = &prev->cpu[i];\n+\t\t\tpps = calc_pps(r, p, t);\n+\t\t\tdrop = calc_drop_pps(r, p, t);\n+\t\t\terr = calc_errs_pps(r, p, t);\n+\t\t\tif (err > 0)\n+\t\t\t\terrstr = \"same-cpu/pps\";\n+\t\t\tif (pps > 0)\n+\t\t\t\tprintf(fmt, \"cpumap-enqueue\",\n+\t\t\t\t i, to_cpu, pps, drop, err, errstr);\n+\t\t}\n+\t\tpps = calc_pps(&rec->total, &prev->total, t);\n+\t\tif (pps > 0) {\n+\t\t\tdrop = calc_drop_pps(&rec->total, &prev->total, t);\n+\t\t\terr = calc_errs_pps(&rec->total, &prev->total, t);\n+\t\t\tprintf(fm2, \"cpumap-enqueue\",\n+\t\t\t \"sum\", to_cpu, pps, drop, err, errstr);\n+\t\t}\n+\t}\n+\n+\t/* cpumap kthread stats */\n+\t{\n+\t\tchar *fmt_k = \"%-15s %-7d %'-14.0f %'-11.0f %-10.0f %s\\n\";\n+\t\tchar *fm2_k = \"%-15s %-7s %'-14.0f %'-11.0f %-10.0f %s\\n\";\n+\t\tchar *errstr = \"\";\n+\t\trec = &stats_rec->kthread;\n+\t\tprev = &stats_prev->kthread;\n+\t\tt = calc_period(rec, prev);\n+\t\tfor (i = 0; i < nr_cpus; i++) {\n+\t\t\tstruct datarec *r = &rec->cpu[i];\n+\t\t\tstruct datarec *p = &prev->cpu[i];\n+\t\t\tpps = calc_pps(r, p, t);\n+\t\t\tdrop = calc_drop_pps(r, p, t);\n+\t\t\terr = calc_errs_pps(r, p, t);\n+\t\t\tif (err > 0)\n+\t\t\t\terrstr = \"time_exceed\";\n+\t\t\tif (pps > 0)\n+\t\t\t\tprintf(fmt_k, \"cpumap_kthread\",\n+\t\t\t\t i, pps, drop, err, errstr);\n+\t\t}\n+\t\tpps = calc_pps(&rec->total, &prev->total, t);\n+\t\tdrop = calc_drop_pps(&rec->total, &prev->total, t);\n+\t\tprintf(fm2_k, \"cpumap_kthread\", \"total\", pps, drop);\n+\t}\n+\n+\t/* XDP redirect err tracepoints (very unlikely) */\n+\t{\n+\t\tchar *fmt_err = \"%-15s %-7d %'-14.0f %'-11.0f\\n\";\n+\t\tchar *fm2_err = \"%-15s %-7s %'-14.0f %'-11.0f\\n\";\n+\t\trec = &stats_rec->redir_err;\n+\t\tprev = &stats_prev->redir_err;\n+\t\tt = calc_period(rec, prev);\n+\t\tfor (i = 0; i < nr_cpus; i++) {\n+\t\t\tstruct datarec *r = &rec->cpu[i];\n+\t\t\tstruct datarec *p = &prev->cpu[i];\n+\t\t\tpps = calc_pps(r, p, t);\n+\t\t\tdrop = calc_drop_pps(r, p, t);\n+\t\t\tif (pps > 0)\n+\t\t\t\tprintf(fmt_err, \"redirect_err\", i, pps, drop);\n+\t\t}\n+\t\tpps = calc_pps(&rec->total, &prev->total, t);\n+\t\tdrop = calc_drop_pps(&rec->total, &prev->total, t);\n+\t\tprintf(fm2_err, \"redirect_err\", \"total\", pps, drop);\n+\t}\n+\n+\t/* XDP general exception tracepoints */\n+\t{\n+\t\tchar *fmt_err = \"%-15s %-7d %'-14.0f %'-11.0f\\n\";\n+\t\tchar *fm2_err = \"%-15s %-7s %'-14.0f %'-11.0f\\n\";\n+\t\trec = &stats_rec->exception;\n+\t\tprev = &stats_prev->exception;\n+\t\tt = calc_period(rec, prev);\n+\t\tfor (i = 0; i < nr_cpus; i++) {\n+\t\t\tstruct datarec *r = &rec->cpu[i];\n+\t\t\tstruct datarec *p = &prev->cpu[i];\n+\t\t\tpps = calc_pps(r, p, t);\n+\t\t\tdrop = calc_drop_pps(r, p, t);\n+\t\t\tif (pps > 0)\n+\t\t\t\tprintf(fmt_err, \"xdp_exception\", i, pps, drop);\n+\t\t}\n+\t\tpps = calc_pps(&rec->total, &prev->total, t);\n+\t\tdrop = calc_drop_pps(&rec->total, &prev->total, t);\n+\t\tprintf(fm2_err, \"xdp_exception\", \"total\", pps, drop);\n+\t}\n+\n+\tprintf(\"\\n\");\n+\tfflush(stdout);\n+}\n+\n+static void stats_collect(struct stats_record *rec)\n+{\n+\tint fd, i;\n+\n+\tfd = map_fd[1]; /* map: rx_cnt */\n+\tmap_collect_percpu(fd, 0, &rec->rx_cnt);\n+\n+\tfd = map_fd[2]; /* map: redirect_err_cnt */\n+\tmap_collect_percpu(fd, 1, &rec->redir_err);\n+\n+\tfd = map_fd[3]; /* map: cpumap_enqueue_cnt */\n+\tfor (i = 0; i < MAX_CPUS; i++) {\n+\t\tmap_collect_percpu(fd, i, &rec->enq[i]);\n+\t}\n+\n+\tfd = map_fd[4]; /* map: cpumap_kthread_cnt */\n+\tmap_collect_percpu(fd, 0, &rec->kthread);\n+\n+\tfd = map_fd[8]; /* map: exception_cnt */\n+\tmap_collect_percpu(fd, 0, &rec->exception);\n+}\n+\n+\n+/* Pointer swap trick */\n+static inline void swap(struct stats_record **a, struct stats_record **b)\n+{\n+\tstruct stats_record *tmp;\n+\n+\ttmp = *a;\n+\t*a = *b;\n+\t*b = tmp;\n+}\n+\n+static void stats_poll(int interval, bool use_separators, int prog_num)\n+{\n+\tstruct stats_record *record, *prev;\n+\n+\trecord = alloc_stats_record();\n+\tprev = alloc_stats_record();\n+\tstats_collect(record);\n+\n+\t/* Trick to pretty printf with thousands separators use %' */\n+\tif (use_separators)\n+\t\tsetlocale(LC_NUMERIC, \"en_US\");\n+\n+\twhile (1) {\n+\t\tswap(&prev, &record);\n+\t\tstats_collect(record);\n+\t\tstats_print(record, prev, prog_num);\n+\t\tsleep(interval);\n+\t}\n+\n+\tfree_stats_record(record);\n+\tfree_stats_record(prev);\n+}\n+\n+static int create_cpu_entry(__u32 cpu, __u32 queue_size,\n+\t\t\t __u32 avail_idx, bool new)\n+{\n+\t__u32 curr_cpus_count;\n+\t__u32 key = 0;\n+\tint ret;\n+\n+\t/* Add a CPU entry to cpumap, as this allocate a cpu entry in\n+\t * the kernel for the cpu.\n+\t */\n+\tret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0);\n+\tif (ret) {\n+\t\tfprintf(stderr, \"Create CPU entry failed\\n\");\n+\t\texit(EXIT_FAIL_BPF);\n+\t}\n+\n+\t/* Inform bpf_prog's that a new CPU is available to select\n+\t * from via some control maps.\n+\t */\n+\t/* map_fd[5] = cpus_available */\n+\tret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0);\n+\tif (ret) {\n+\t\tfprintf(stderr, \"Add to avail CPUs failed\\n\");\n+\t\texit(EXIT_FAIL_BPF);\n+\t}\n+\n+\t/* When not replacing/updating existing entry, bump the count */\n+\t/* map_fd[6] = cpus_count */\n+\tif (new) {\n+\t\tret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count);\n+\t\tif (ret) {\n+\t\t\tfprintf(stderr, \"Failed reading curr cpus_count \\n\");\n+\t\t\texit(EXIT_FAIL_BPF);\n+\t\t}\n+\t\tcurr_cpus_count++;\n+\t\tret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0);\n+\t\tif (ret) {\n+\t\t\tfprintf(stderr, \"Failed write curr cpus_count \\n\");\n+\t\t\texit(EXIT_FAIL_BPF);\n+\t\t}\n+\t}\n+\t/* map_fd[7] = cpus_iterator */\n+\tprintf(\"%s CPU:%u as idx:%u cpus_count:%u\\n\",\n+\t new ? \"Add-new\":\"Replace\", cpu, avail_idx, curr_cpus_count);\n+\n+\treturn 0;\n+}\n+\n+int main(int argc, char **argv)\n+{\n+\tstruct rlimit r = {10 * 1024*1024, RLIM_INFINITY};\n+\tbool use_separators = true;\n+\tchar filename[256];\n+\tbool debug = false;\n+\tint added_cpus = 0;\n+\tint longindex = 0;\n+\tint interval = 2;\n+\tint prog_num = 0;\n+\tint add_cpu = -1;\n+\t__u32 qsize;\n+\tint opt;\n+\n+\t/* Notice: choosing he queue size is very important with the\n+\t * ixgbe driver, because it's driver page recycling trick is\n+\t * dependend on pages being returned quickly. The number of\n+\t * out-standing packets in the system must be less-than 2x\n+\t * RX-ring size.\n+\t */\n+\tqsize = 128+64;\n+\n+\tsnprintf(filename, sizeof(filename), \"%s_kern.o\", argv[0]);\n+\n+\tif (setrlimit(RLIMIT_MEMLOCK, &r)) {\n+\t\tperror(\"setrlimit(RLIMIT_MEMLOCK)\");\n+\t\treturn 1;\n+\t}\n+\n+\tif (load_bpf_file(filename)) {\n+\t\tfprintf(stderr, \"ERR in load_bpf_file(): %s\", bpf_log_buf);\n+\t\treturn EXIT_FAIL;\n+\t}\n+\n+\tif (!prog_fd[0]) {\n+\t\tfprintf(stderr, \"ERR: load_bpf_file: %s\\n\", strerror(errno));\n+\t\treturn EXIT_FAIL;\n+\t}\n+\n+\t/* Parse commands line args */\n+\twhile ((opt = getopt_long(argc, argv, \"hSd:\",\n+\t\t\t\t long_options, &longindex)) != -1) {\n+\t\tswitch (opt) {\n+\t\tcase 'd':\n+\t\t\tif (strlen(optarg) >= IF_NAMESIZE) {\n+\t\t\t\tfprintf(stderr, \"ERR: --dev name too long\\n\");\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tifname = (char *)&ifname_buf;\n+\t\t\tstrncpy(ifname, optarg, IF_NAMESIZE);\n+\t\t\tifindex = if_nametoindex(ifname);\n+\t\t\tif (ifindex == 0) {\n+\t\t\t\tfprintf(stderr,\n+\t\t\t\t\t\"ERR: --dev name unknown err(%d):%s\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tbreak;\n+\t\tcase 's':\n+\t\t\tinterval = atoi(optarg);\n+\t\t\tbreak;\n+\t\tcase 'S':\n+\t\t\txdp_flags |= XDP_FLAGS_SKB_MODE;\n+\t\t\tbreak;\n+\t\tcase 'D':\n+\t\t\tdebug = true;\n+\t\t\tbreak;\n+\t\tcase 'z':\n+\t\t\tuse_separators = false;\n+\t\t\tbreak;\n+\t\tcase 'p':\n+\t\t\t/* Selecting eBPF prog to load */\n+\t\t\tprog_num = atoi(optarg);\n+\t\t\tif (prog_num < 0 || prog_num >= MAX_PROG) {\n+\t\t\t\tfprintf(stderr,\n+\t\t\t\t\t\"--prognum too large err(%d):%s\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tbreak;\n+\t\tcase 'c':\n+\t\t\t/* Add multiple CPUs */\n+\t\t\tadd_cpu = strtoul(optarg, NULL, 0);\n+\t\t\tif (add_cpu > MAX_CPUS) {\n+\t\t\t\tfprintf(stderr,\n+\t\t\t\t\"--cpu nr too large for cpumap err(%d):%s\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tcreate_cpu_entry(add_cpu, qsize, added_cpus, true);\n+\t\t\tadded_cpus++;\n+\t\t\tbreak;\n+\t\tcase 'q':\n+\t\t\tqsize = atoi(optarg);\n+\t\t\tbreak;\n+\t\tcase 'h':\n+\t\terror:\n+\t\tdefault:\n+\t\t\tusage(argv);\n+\t\t\treturn EXIT_FAIL_OPTION;\n+\t\t}\n+\t}\n+\t/* Required option */\n+\tif (ifindex == -1) {\n+\t\tfprintf(stderr, \"ERR: required option --dev missing\\n\");\n+\t\tusage(argv);\n+\t\treturn EXIT_FAIL_OPTION;\n+\t}\n+\t/* Required option */\n+\tif (add_cpu == -1) {\n+\t\tfprintf(stderr, \"ERR: required option --cpu missing\\n\");\n+\t\tfprintf(stderr, \" Specify multiple --cpu option to add more\\n\");\n+\t\tusage(argv);\n+\t\treturn EXIT_FAIL_OPTION;\n+\t}\n+\n+\t/* Remove XDP program when program is interrupted */\n+\tsignal(SIGINT, int_exit);\n+\n+\tif (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {\n+\t\tfprintf(stderr, \"link set xdp fd failed\\n\");\n+\t\treturn EXIT_FAIL_XDP;\n+\t}\n+\n+\tif (debug) {\n+\t\tprintf(\"Debug-mode reading trace pipe (fix #define DEBUG)\\n\");\n+\t\tread_trace_pipe();\n+\t}\n+\n+\tstats_poll(interval, use_separators, prog_num);\n+\treturn EXIT_OK;\n+}\n", "prefixes": [ "net-next", "5/5" ] }