diff mbox series

[ovs-dev,RFC] netdev-afxdp: Enable loading XDP program.

Message ID 1564011593-45364-1-git-send-email-u9012063@gmail.com
State RFC
Headers show
Series [ovs-dev,RFC] netdev-afxdp: Enable loading XDP program. | expand

Commit Message

William Tu July 24, 2019, 11:39 p.m. UTC
Now netdev-afxdp unconditionally forwards all packets to userspace
because we are using libbpf's default XDP program, see xsk_load_xdp_prog.
However, there are some cases when users want to keep packets in the
kernel, for example, the management traffic such as SSH, and the rest
of the traffic goes to userspace using AF_XDP.

This is a very rough patch showing how to allow users to load their
own XDP program, by specifying
  $ ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>

So users can implement their filtering logic or traffic steering idea
in their XDP program, and rest of the traffic passes to AF_XDP socket
handled by OVS.

The xdp/xdp1.c is a sample C program and has to be compile using clang
under kernel's samples/bpf/.  I don't know whether we should include
the C file in OVS, or we should include BPF bytecode, or simply not
providing any sample program in OVS. And I'm not sure whether this is
useful for others.

Comments are welcomed.

Signed-off-by: William Tu <u9012063@gmail.com>
---
 Makefile.am                |  2 ++
 lib/netdev-afxdp.c         | 82 ++++++++++++++++++++++++++++++++++++++++++----
 lib/netdev-linux-private.h |  2 ++
 xdp/xdp1.c                 | 46 ++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 7 deletions(-)
 create mode 100644 xdp/xdp1.c
diff mbox series

Patch

diff --git a/Makefile.am b/Makefile.am
index ff1f94b4841f..6a95cc92d434 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -92,6 +92,8 @@  EXTRA_DIST = \
 	$(MAN_ROOTS) \
 	Vagrantfile \
 	Vagrantfile-FreeBSD \
+	xdp \
+	xdp/xdp1.c \
 	.mailmap
 bin_PROGRAMS =
 sbin_PROGRAMS =
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index ff426750d709..60a24c8ef33a 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -21,6 +21,7 @@ 
 #include "netdev-afxdp.h"
 #include "netdev-afxdp-pool.h"
 
+#include <bpf/bpf.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/rtnetlink.h>
@@ -82,7 +83,7 @@  BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
 #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
 
 static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
-                                             int mode);
+                                             int mode, const char *xdpobj);
 static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
 static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
@@ -232,9 +233,56 @@  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
     return umem;
 }
 
+static int
+netdev_afxdp_load_prog(const char *path, struct xsk_socket_info *xsk,
+                       int ifindex, int xdpmode)
+{
+    struct bpf_prog_load_attr attr = {
+        .prog_type = BPF_PROG_TYPE_XDP,
+    };
+    struct bpf_object *obj;
+    struct bpf_map *map;
+    int xsks_map_fd;
+    int prog_fd;
+    int xsk_fd;
+    int ret;
+    int queue_id = 0;
+
+    attr.file = path;
+
+    if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+        VLOG_ERR("Error loading XDP program at %s", path);
+        return EINVAL;
+    }
+
+    map = bpf_object__find_map_by_name(obj, "xsks_map");
+    if (!map) {
+        VLOG_WARN("map xsks_map not found");
+        return EINVAL;
+    }
+
+    xsks_map_fd = bpf_map__fd(map);
+    xsk_fd = xsk_socket__fd(xsk->xsk);
+
+    VLOG_WARN("xsk fd %d map fd %d", xsk_fd, xsks_map_fd);
+
+    /* FIXME: Need to set for each queue. */
+    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);
+    if (ret) {
+        VLOG_WARN("Error updating xsk map");
+    }
+
+    ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdpmode);
+    if (ret) {
+        VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
+    }
+
+    return 0;
+}
+
 static struct xsk_socket_info *
 xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
-                     uint32_t queue_id, int xdpmode)
+                     uint32_t queue_id, int xdpmode, const char *xdpobj)
 {
     struct xsk_socket_config cfg;
     struct xsk_socket_info *xsk;
@@ -249,6 +297,11 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     cfg.tx_size = PROD_NUM_DESCS;
     cfg.libbpf_flags = 0;
 
+    if (xdpobj) {
+        /* Prevent libbpf to load the default XDP program. */
+        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+    }
+
     if (xdpmode == XDP_ZEROCOPY) {
         cfg.bind_flags = XDP_ZEROCOPY;
         cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
@@ -275,7 +328,6 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }
 
-    /* Make sure the built-in AF_XDP program is loaded. */
     ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
     if (ret) {
         VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
@@ -284,6 +336,13 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }
 
+    ret = netdev_afxdp_load_prog(xdpobj, xsk, ifindex, cfg.xdp_flags);
+    if (ret) {
+        xsk_socket__delete(xsk->xsk);
+        free(xsk);
+        return NULL;
+    }
+
     while (!xsk_ring_prod__reserve(&xsk->umem->fq,
                                    PROD_NUM_DESCS, &idx)) {
         VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue");
@@ -307,7 +366,7 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
 }
 
 static struct xsk_socket_info *
-xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
+xsk_configure(int ifindex, int xdp_queue_id, int xdpmode, const char *xdpobj)
 {
     struct xsk_socket_info *xsk;
     struct xsk_umem_info *umem;
@@ -330,7 +389,7 @@  xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
 
     VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
 
-    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
+    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode, xdpobj);
     if (!xsk) {
         /* Clean up umem and xpacket pool. */
         if (xsk_umem__delete(umem->umem)) {
@@ -363,7 +422,7 @@  xsk_configure_all(struct netdev *netdev)
     for (i = 0; i < n_rxq; i++) {
         VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
                   dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
-        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
+        xsk_info = xsk_configure(ifindex, i, dev->xdpmode, dev->xdpobj);
         if (!xsk_info) {
             VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
             dev->xsks[i] = NULL;
@@ -454,6 +513,7 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
 {
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     const char *str_xdpmode;
+    const char *str_xdpobj;
     int xdpmode, new_n_rxq;
 
     ovs_mutex_lock(&dev->mutex);
@@ -476,11 +536,14 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
         ovs_mutex_unlock(&dev->mutex);
         return EINVAL;
     }
+    str_xdpobj = nullable_xstrdup(smap_get(args, "xdpobj"));
 
     if (dev->requested_n_rxq != new_n_rxq
-        || dev->requested_xdpmode != xdpmode) {
+        || dev->requested_xdpmode != xdpmode
+        || !nullable_string_is_equal(dev->requested_xdpobj, str_xdpobj)) {
         dev->requested_n_rxq = new_n_rxq;
         dev->requested_xdpmode = xdpmode;
+        dev->requested_xdpobj = str_xdpobj;
         netdev_request_reconfigure(netdev);
     }
     ovs_mutex_unlock(&dev->mutex);
@@ -496,6 +559,7 @@  netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
     smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
     smap_add_format(args, "xdpmode", "%s",
         dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
+    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
     ovs_mutex_unlock(&dev->mutex);
     return 0;
 }
@@ -511,6 +575,7 @@  netdev_afxdp_reconfigure(struct netdev *netdev)
 
     if (netdev->n_rxq == dev->requested_n_rxq
         && dev->xdpmode == dev->requested_xdpmode
+        && nullable_string_is_equal(dev->xdpobj, dev->requested_xdpobj)
         && dev->xsks) {
         goto out;
     }
@@ -534,6 +599,7 @@  netdev_afxdp_reconfigure(struct netdev *netdev)
          * when no device is in DRV mode.
          */
     }
+    dev->xdpobj = nullable_xstrdup(dev->requested_xdpobj);
 
     err = xsk_configure_all(netdev);
     if (err) {
@@ -963,6 +1029,8 @@  netdev_afxdp_construct(struct netdev *netdev)
     netdev->n_rxq = 0;
     netdev->n_txq = 0;
     dev->xdpmode = 0;
+    dev->xdpobj = NULL;
+    dev->requested_xdpobj = NULL;
 
     dev->requested_n_rxq = NR_QUEUE;
     dev->requested_xdpmode = XDP_COPY;
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151147..9a4f00958eca 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -103,6 +103,8 @@  struct netdev_linux {
     int xdpmode;                /* AF_XDP running mode: driver or skb. */
     int requested_xdpmode;
     struct ovs_spin *tx_locks;  /* spin lock array for TX queues. */
+    const char *xdpobj;         /* file path of the xdp object */
+    const char *requested_xdpobj;
 #endif
 };
 
diff --git a/xdp/xdp1.c b/xdp/xdp1.c
new file mode 100644
index 000000000000..9a7ea9e9a58f
--- /dev/null
+++ b/xdp/xdp1.c
@@ -0,0 +1,46 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is compiled under Linux kernel's source code
+ * ex: linux/samples/bpf/, and generated an XDP object file,
+ * ex: xdp1.o
+ *
+ * Use ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp1.o>
+ * to load this XDP object
+ *
+ * This has to be compiled using clang to generate BPF bytecode
+ * ex:
+ * clang -I<some headers> -O2 -emit-llvm -c xdp/xdp1.c -o -| llc \
+ *    -march=bpf -filetype=obj -o xdp/xdp1.o
+ */
+#include <config.h>
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") xsks_map = {
+    .type = BPF_MAP_TYPE_XSKMAP,
+    .key_size = sizeof(int),
+    .value_size = sizeof(int),
+    .max_entries = 32,
+};
+
+SEC("xdp_sock")
+int xdp_sock_prog(struct xdp_md *ctx)
+{
+
+    int index = ctx->rx_queue_index;
+
+    /* Customized by user.
+     * For example
+     * 1) filter out all SSH traffic and return XDP_PASS
+     *    for kernel to process.
+     * 2) Drop unwanted packet by returning XDP_DROP.
+     */
+
+    /* User's filter code */
+
+    /* Rest of packets goes to AF_XDP. */
+    return bpf_redirect_map(&xsks_map, index, 0);
+}
+
+char _license[] SEC("license") = "GPL";