@@ -72,6 +72,11 @@
__section(__stringify(ID) "/" __stringify(KEY))
#endif
+#ifndef __section_xdp_entry
+# define __section_xdp_entry \
+ __section(ELF_SECTION_PROG)
+#endif
+
#ifndef __section_cls_entry
# define __section_cls_entry \
__section(ELF_SECTION_CLASSIFIER)
@@ -15,6 +15,7 @@
/* ELF section names, etc */
#define ELF_SECTION_LICENSE "license"
#define ELF_SECTION_MAPS "maps"
+#define ELF_SECTION_PROG "prog"
#define ELF_SECTION_CLASSIFIER "classifier"
#define ELF_SECTION_ACTION "action"
@@ -239,7 +239,12 @@ ssize_t getcmdline(char **line, size_t *len, FILE *in);
int makeargs(char *line, char *argv[], int maxargs);
int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
-struct iplink_req;
+struct iplink_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+};
+
int iplink_parse(int argc, char **argv, struct iplink_req *req,
char **name, char **type, char **link, char **dev,
int *group, int *index);
@@ -2,7 +2,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
rtm_map.o iptunnel.o ip6tunnel.o tunnel.o ipneigh.o ipntable.o iplink.o \
ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o iptuntap.o iptoken.o \
ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \
- iplink_vlan.o link_veth.o link_gre.o iplink_can.o \
+ iplink_vlan.o link_veth.o link_gre.o iplink_can.o iplink_xdp.o \
iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o \
iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
@@ -35,6 +35,7 @@
#include "utils.h"
#include "ll_map.h"
#include "ip_common.h"
+#include "xdp.h"
#include "color.h"
enum {
@@ -838,6 +839,8 @@ int print_linkinfo(const struct sockaddr_nl *who,
if (tb[IFLA_MTU])
fprintf(fp, "mtu %u ", *(int *)RTA_DATA(tb[IFLA_MTU]));
+ if (tb[IFLA_XDP])
+ xdp_dump(fp, tb[IFLA_XDP]);
if (tb[IFLA_QDISC])
fprintf(fp, "qdisc %s ", rta_getattr_str(tb[IFLA_QDISC]));
if (tb[IFLA_MASTER]) {
@@ -32,6 +32,7 @@
#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
+#include "xdp.h"
#include "namespace.h"
#define IPLINK_IOCTL_COMPAT 1
@@ -54,6 +55,7 @@ void iplink_usage(void)
" [ numtxqueues QUEUE_COUNT ]\n"
" [ numrxqueues QUEUE_COUNT ]\n"
" type TYPE [ ARGS ]\n"
+ "\n"
" ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]\n"
"\n"
" ip link set { DEVICE | dev DEVICE | group DEVGROUP }\n"
@@ -79,24 +81,28 @@ void iplink_usage(void)
" [ alias NAME ]\n"
" [ vf NUM [ mac LLADDR ]\n"
" [ vlan VLANID [ qos VLAN-QOS ] [ proto VLAN-PROTO ] ]\n"
-
" [ rate TXRATE ]\n"
" [ max_tx_rate TXRATE ]\n"
" [ min_tx_rate TXRATE ]\n"
-
" [ spoofchk { on | off} ]\n"
" [ query_rss { on | off} ]\n"
" [ state { auto | enable | disable} ] ]\n"
" [ trust { on | off} ] ]\n"
+ " [ xdp { off |\n"
+ " object FILE [ section NAME ] [ verbose ] |\n"
+ " pinned FILE } ]\n"
" [ master DEVICE ][ vrf NAME ]\n"
" [ nomaster ]\n"
" [ addrgenmode { eui64 | none | stable_secret | random } ]\n"
" [ protodown { on | off } ]\n"
+ "\n"
" ip link show [ DEVICE | group GROUP ] [up] [master DEV] [vrf NAME] [type TYPE]\n");
if (iplink_have_newlink()) {
fprintf(stderr,
- " ip link help [ TYPE ]\n\n"
+ "\n"
+ " ip link help [ TYPE ]\n"
+ "\n"
"TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | macvtap |\n"
" bridge | bond | team | ipoib | ip6tnl | ipip | sit | vxlan |\n"
" gre | gretap | ip6gre | ip6gretap | vti | nlmon | team_slave |\n"
@@ -221,12 +227,6 @@ static int iplink_have_newlink(void)
}
#endif /* ! IPLINK_IOCTL_COMPAT */
-struct iplink_req {
- struct nlmsghdr n;
- struct ifinfomsg i;
- char buf[1024];
-};
-
static int nl_get_ll_addr_len(unsigned int dev_index)
{
int len;
@@ -602,6 +602,10 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
if (get_integer(&mtu, *argv, 0))
invarg("Invalid \"mtu\" value\n", *argv);
addattr_l(&req->n, sizeof(*req), IFLA_MTU, &mtu, 4);
+ } else if (strcmp(*argv, "xdp") == 0) {
+ NEXT_ARG();
+ if (xdp_parse(&argc, &argv, req))
+ exit(-1);
} else if (strcmp(*argv, "netns") == 0) {
NEXT_ARG();
if (netns != -1)
new file mode 100644
@@ -0,0 +1,75 @@
+/*
+ * iplink_xdp.c XDP program loader
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <linux/bpf.h>
+
+#include "xdp.h"
+#include "bpf_util.h"
+
+extern int force;
+
+static void xdp_ebpf_cb(void *raw, int fd, const char *annotation)
+{
+ __u32 flags = !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+ struct iplink_req *req = raw;
+ struct rtattr *xdp;
+
+ xdp = addattr_nest(&req->n, sizeof(*req), IFLA_XDP);
+ addattr32(&req->n, sizeof(*req), IFLA_XDP_FD, fd);
+ addattr32(&req->n, sizeof(*req), IFLA_XDP_FLAGS, flags);
+ addattr_nest_end(&req->n, xdp);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .ebpf_cb = xdp_ebpf_cb,
+};
+
+static int xdp_delete(struct iplink_req *req)
+{
+ xdp_ebpf_cb(req, -1, NULL);
+ return 0;
+}
+
+int xdp_parse(int *argc, char ***argv, struct iplink_req *req)
+{
+ struct bpf_cfg_in cfg = {
+ .argc = *argc,
+ .argv = *argv,
+ };
+
+ if (*argc == 1) {
+ if (strcmp(**argv, "none") == 0 ||
+ strcmp(**argv, "off") == 0)
+ return xdp_delete(req);
+ }
+ if (bpf_parse_common(BPF_PROG_TYPE_XDP, &cfg, &bpf_cb_ops, req))
+ return -1;
+
+ *argc = cfg.argc;
+ *argv = cfg.argv;
+ return 0;
+}
+
+void xdp_dump(FILE *fp, struct rtattr *xdp)
+{
+ struct rtattr *tb[IFLA_XDP_MAX + 1];
+
+ parse_rtattr_nested(tb, IFLA_XDP_MAX, xdp);
+ if (!tb[IFLA_XDP_ATTACHED] ||
+ !rta_getattr_u8(tb[IFLA_XDP_ATTACHED]))
+ return;
+
+ fprintf(fp, "xdp ");
+ /* More to come here in future for 'ip -d link' (digest, etc) ... */
+}
new file mode 100644
@@ -0,0 +1,9 @@
+#ifndef __XDP__
+#define __XDP__
+
+#include "utils.h"
+
+int xdp_parse(int *argc, char ***argv, struct iplink_req *req);
+void xdp_dump(FILE *fp, struct rtattr *tb);
+
+#endif /* __XDP__ */
@@ -55,6 +55,7 @@ struct bpf_prog_meta {
static const enum bpf_prog_type __bpf_types[] = {
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_XDP,
};
static const struct bpf_prog_meta __bpf_prog_meta[] = {
@@ -70,6 +71,11 @@ static const struct bpf_prog_meta __bpf_prog_meta[] = {
.section = ELF_SECTION_ACTION,
.may_uds_export = true,
},
+ [BPF_PROG_TYPE_XDP] = {
+ .type = "xdp",
+ .subdir = "xdp",
+ .section = ELF_SECTION_PROG,
+ },
};
static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
@@ -126,6 +126,19 @@ ip-link \- network device configuration
.RB "[ " port_guid " eui64 ] ]"
.br
.in -9
+.RB "[ " xdp " { " off " | "
+.br
+.in +8
+.BR object
+.IR FILE
+.RB "[ " section
+.IR NAME " ]"
+.RB "[ " verbose " ] |"
+.br
+.BR pinned
+.IR FILE " } ]"
+.br
+.in -8
.RB "[ " master
.IR DEVICE " ]"
.br
@@ -1319,6 +1332,60 @@ which may impact security and/or performance. (e.g. VF multicast promiscuous mod
.in -8
.TP
+.B xdp object "|" pinned "|" off
+set (or unset) a XDP ("express data path") BPF program to run on every
+packet at driver level.
+
+.B off
+(or
+.B none
+)
+- Detaches any currently attached XDP/BPF program from the given device.
+
+.BI object " FILE "
+- Attaches a XDP/BPF program to the given device. The
+.I FILE
+points to a BPF ELF file (f.e. generated by LLVM) that contains the BPF
+program code, map specifications, etc. If a XDP/BPF program is already
+attached to the given device, an error will be thrown. If no XDP/BPF
+program is currently attached, the device supports XDP and the program
+from the BPF ELF file passes the kernel verifier, then it will be attached
+to the device. If the option
+.I -force
+is passed to
+.B ip
+then any prior attached XDP/BPF program will be atomically overridden and
+no error will be thrown in this case. If no
+.B section
+option is passed, then the default section name ("prog") will be assumed,
+otherwise the provided section name will be used. If no
+.B verbose
+option is passed, then a verifier log will only be dumped on load error.
+See also
+.B EXAMPLES
+section for usage examples.
+
+.BI section " NAME "
+- Specifies a section name that contains the BPF program code. If no section
+name is specified, the default one ("prog") will be used. This option is
+to be passed with the
+.B object
+option.
+
+.BI verbose
+- Act in verbose mode. For example, even in case of success, this will
+print the verifier log in case a program was loaded from a BPF ELF file.
+
+.BI pinned " FILE "
+- Attaches a XDP/BPF program to the given device. The
+.I FILE
+points to an already pinned BPF program in the BPF file system. The option
+.B section
+doesn't apply here, but otherwise semantics are the same as with the option
+.B object
+described already.
+
+.TP
.BI master " DEVICE"
set master device of the device (enslave device).
@@ -1604,7 +1671,33 @@ encap-dport 5555 encap-csum encap-remcsum
.RS 4
Creates an IPIP that is encapsulated with Generic UDP Encapsulation,
and the outer UDP checksum and remote checksum offload are enabled.
-
+.RE
+.PP
+ip link set dev eth0 xdp obj prog.o
+.RS 4
+Attaches a XDP/BPF program to device eth0, where the program is
+located in prog.o, section "prog" (default section). In case a
+XDP/BPF program is already attached, throw an error.
+.RE
+.PP
+ip -force link set dev eth0 xdp obj prog.o sec foo
+.RS 4
+Attaches a XDP/BPF program to device eth0, where the program is
+located in prog.o, section "foo". In case a XDP/BPF program is
+already attached, it will be overridden by the new one.
+.RE
+.PP
+ip -force link set dev eth0 xdp pinned /sys/fs/bpf/foo
+.RS 4
+Attaches a XDP/BPF program to device eth0, where the program was
+previously pinned as an object node into BPF file system under
+name foo.
+.RE
+.PP
+ip link set dev eth0 xdp off
+.RS 4
+If a XDP/BPF program is attached on device eth0, detach it and
+effectively turn off XDP for device eth0.
.RE
.PP
ip link add link wpan0 lowpan0 type lowpan