[ovs-dev,PATCHv2] netdev-afxdp: Enable loading XDP program.
diff mbox series

Message ID 1570234362-99558-1-git-send-email-u9012063@gmail.com
State New
Headers show
Series
  • [ovs-dev,PATCHv2] netdev-afxdp: Enable loading XDP program.
Related show

Commit Message

William Tu Oct. 5, 2019, 12:12 a.m. UTC
Now netdev-afxdp always forwards all packets to userspace because
it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
There are some cases when users want to keep packets in kernel instead
of sending to userspace, for example, management traffic such as SSH
should be processed in kernel.

The patch enables loading the user-provide XDP program by doing
  $ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>

So users can implement their filtering logic or traffic steering idea
in their XDP program, and rest of the traffic passes to AF_XDP socket
handled by OVS.

Signed-off-by: William Tu <u9012063@gmail.com>
---
v2:
    A couple fixes and remove RFC
---
 Documentation/intro/install/afxdp.rst |  46 ++++++++++++
 lib/netdev-afxdp.c                    | 137 ++++++++++++++++++++++++++++++----
 lib/netdev-linux-private.h            |   2 +
 3 files changed, 169 insertions(+), 16 deletions(-)

Patch
diff mbox series

diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 820e9d993d8f..f484bdb8249b 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -265,6 +265,52 @@  Or, use OVS pmd tool::
   ovs-appctl dpif-netdev/pmd-stats-show
 
 
+Loading Custom XDP Program
+--------------------------
+By defailt, netdev-afxdp always forwards all packets to userspace because
+it is using libbpf's default XDP program. There are some cases when users
+want to keep packets in kernel instead of sending to userspace, for example,
+management traffic such as SSH should be processed in kernel. This can be
+done by loading the user-provided XDP program::
+
+  ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
+
+So users can implement their filtering logic or traffic steering idea
+in their XDP program, and rest of the traffic passes to AF_XDP socket
+handled by OVS. Below is a sample C program compiled under kernel's
+samples/bpf/.
+
+.. code-block:: c
+
+  #include <uapi/linux/bpf.h>
+  #include "bpf_helpers.h"
+
+  /* OVS will associate map 'xsks_map' to xsk socket. */
+  struct bpf_map_def SEC("maps") xsks_map = {
+      .type = BPF_MAP_TYPE_XSKMAP,
+      .key_size = sizeof(int),
+      .value_size = sizeof(int),
+      .max_entries = 32,
+  };
+
+  SEC("xdp_sock")
+  int xdp_sock_prog(struct xdp_md *ctx)
+  {
+      int index = ctx->rx_queue_index;
+
+      /* Customized by user.
+       * For example
+       * 1) filter out all SSH traffic and return XDP_PASS
+       *    for kernel to process.
+       * 2) Drop unwanted packet by returning XDP_DROP.
+       */
+
+      /* Rest of packets goes to AF_XDP. */
+      return bpf_redirect_map(&xsks_map, index, 0);
+  }
+  char _license[] SEC("license") = "GPL";
+
+
 Example Script
 --------------
 
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 6e01803272aa..231d6473fb67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -21,6 +21,7 @@ 
 #include "netdev-afxdp.h"
 #include "netdev-afxdp-pool.h"
 
+#include <bpf/bpf.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/rtnetlink.h>
@@ -29,6 +30,7 @@ 
 #include <stdlib.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -82,7 +84,7 @@  BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
 #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
 
 static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
-                                             int mode);
+                                             int mode, bool use_default_xdp);
 static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
 static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
@@ -158,6 +160,51 @@  netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
     ovs_mutex_unlock(&unused_pools_mutex);
 }
 
+static int
+xsk_load_prog(const char *path, struct bpf_object **obj,
+              int *prog_fd)
+{
+    struct bpf_prog_load_attr attr = {
+        .prog_type = BPF_PROG_TYPE_XDP,
+        .file = path,
+    };
+
+    if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
+        VLOG_WARN("Can't load XDP program at '%s'", path);
+        return EINVAL;
+    }
+
+    VLOG_INFO("Loaded XDP program at '%s'", path);
+    return 0;
+}
+
+static int
+xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info *xsk,
+                  int queue_id)
+{
+    struct bpf_map *map;
+    int xsks_map_fd;
+    int xsk_fd;
+    int ret;
+
+    map = bpf_object__find_map_by_name(obj, "xsks_map");
+    if (!map) {
+        VLOG_WARN("XDP map 'xsks_map' not found");
+        return EINVAL;
+    }
+
+    xsks_map_fd = bpf_map__fd(map);
+    xsk_fd = xsk_socket__fd(xsk->xsk);
+
+    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);
+    if (ret) {
+        VLOG_WARN("Can't set XSK map, map fd: %d", xsks_map_fd);
+        return EINVAL;
+    }
+
+    return 0;
+}
+
 static struct xsk_umem_info *
 xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
 {
@@ -234,7 +281,7 @@  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
 
 static struct xsk_socket_info *
 xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
-                     uint32_t queue_id, int xdpmode)
+                     uint32_t queue_id, int xdpmode, bool use_default_xdp)
 {
     struct xsk_socket_config cfg;
     struct xsk_socket_info *xsk;
@@ -257,6 +304,10 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
     }
 
+    if (!use_default_xdp) {
+        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+    }
+
     if (if_indextoname(ifindex, devname) == NULL) {
         VLOG_ERR("ifindex %d to devname failed (%s)",
                  ifindex, ovs_strerror(errno));
@@ -275,17 +326,19 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }
 
-    /* Make sure the built-in AF_XDP program is loaded. */
-    ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
-    if (ret || !prog_id) {
-        if (ret) {
-            VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
-        } else {
-            VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
+    if (use_default_xdp) {
+        /* Make sure the built-in AF_XDP program is loaded. */
+        ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
+        if (ret || !prog_id) {
+            if (ret) {
+                VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
+            } else {
+                VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
+            }
+            xsk_socket__delete(xsk->xsk);
+            free(xsk);
+            return NULL;
         }
-        xsk_socket__delete(xsk->xsk);
-        free(xsk);
-        return NULL;
     }
 
     while (!xsk_ring_prod__reserve(&xsk->umem->fq,
@@ -311,7 +364,8 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
 }
 
 static struct xsk_socket_info *
-xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
+xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
+              bool use_default_xdp)
 {
     struct xsk_socket_info *xsk;
     struct xsk_umem_info *umem;
@@ -334,7 +388,8 @@  xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
 
     VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
 
-    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
+    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
+                               use_default_xdp);
     if (!xsk) {
         /* Clean up umem and xpacket pool. */
         if (xsk_umem__delete(umem->umem)) {
@@ -354,6 +409,10 @@  xsk_configure_all(struct netdev *netdev)
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     struct xsk_socket_info *xsk_info;
     int i, ifindex, n_rxq, n_txq;
+    bool use_default_xdp;
+    struct bpf_object *obj;
+    int prog_fd = 0;
+    int ret;
 
     ifindex = linux_get_ifindex(netdev_get_name(netdev));
 
@@ -362,12 +421,13 @@  xsk_configure_all(struct netdev *netdev)
 
     n_rxq = netdev_n_rxq(netdev);
     dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
+    use_default_xdp = dev->xdpobj ? false : true;
 
     /* Configure each queue. */
     for (i = 0; i < n_rxq; i++) {
         VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
                   dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
-        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
+        xsk_info = xsk_configure(ifindex, i, dev->xdpmode, use_default_xdp);
         if (!xsk_info) {
             VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
             dev->xsks[i] = NULL;
@@ -377,6 +437,27 @@  xsk_configure_all(struct netdev *netdev)
         atomic_init(&xsk_info->tx_dropped, 0);
         xsk_info->outstanding_tx = 0;
         xsk_info->available_rx = PROD_NUM_DESCS;
+
+        if (dev->xdpobj) {
+            if (prog_fd == 0) {
+                /* XDP program is per-netdev, so all queues share
+                   the same XDP program.
+                 */
+                ret = xsk_load_prog(dev->xdpobj, &obj, &prog_fd);
+                if (ret) {
+                    VLOG_ERR("%s: Can't load XDP object file",
+                             netdev_get_name(netdev));
+                    goto err;
+                }
+            }
+            ret = xsk_configure_map(obj, xsk_info, i);
+            if (ret) {
+                VLOG_ERR("%s: Can't configure XDP map",
+                         netdev_get_name(netdev));
+                goto err;
+            }
+            bpf_set_link_xdp_fd(ifindex, prog_fd, dev->xdpmode);
+        }
     }
 
     n_txq = netdev_n_txq(netdev);
@@ -450,6 +531,8 @@  xsk_destroy_all(struct netdev *netdev)
         free(dev->tx_locks);
         dev->tx_locks = NULL;
     }
+    free(CONST_CAST(char *, dev->xdpobj));
+    dev->xdpobj = NULL;
 }
 
 int
@@ -458,7 +541,9 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
 {
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     const char *str_xdpmode;
+    const char *str_xdpobj;
     int xdpmode, new_n_rxq;
+    struct stat s;
 
     ovs_mutex_lock(&dev->mutex);
     new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
@@ -481,10 +566,25 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
         return EINVAL;
     }
 
+    str_xdpobj = smap_get_def(args, "xdpobj", NULL);
+    if (str_xdpobj && stat(str_xdpobj, &s)) {
+        VLOG_ERR("Invalid xdpobj '%s': %s", str_xdpobj,
+                 ovs_strerror(errno));
+        ovs_mutex_unlock(&dev->mutex);
+        return EINVAL;
+    } else if (str_xdpobj && !S_ISREG(s.st_mode)) {
+        VLOG_ERR("xdpobj '%s' is not a regular file", str_xdpobj);
+        ovs_mutex_unlock(&dev->mutex);
+        return EINVAL;
+    }
+
     if (dev->requested_n_rxq != new_n_rxq
-        || dev->requested_xdpmode != xdpmode) {
+        || dev->requested_xdpmode != xdpmode
+        || !nullable_string_is_equal(dev->requested_xdpobj, str_xdpobj)
+        || str_xdpobj) {
         dev->requested_n_rxq = new_n_rxq;
         dev->requested_xdpmode = xdpmode;
+        dev->requested_xdpobj = nullable_xstrdup(str_xdpobj);
         netdev_request_reconfigure(netdev);
     }
     ovs_mutex_unlock(&dev->mutex);
@@ -500,6 +600,7 @@  netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
     smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
     smap_add_format(args, "xdpmode", "%s",
         dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
+    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
     ovs_mutex_unlock(&dev->mutex);
     return 0;
 }
@@ -539,6 +640,8 @@  netdev_afxdp_reconfigure(struct netdev *netdev)
          */
     }
 
+    dev->xdpobj = dev->requested_xdpobj;
+
     err = xsk_configure_all(netdev);
     if (err) {
         VLOG_ERR("AF_XDP device %s reconfig failed.", netdev_get_name(netdev));
@@ -971,9 +1074,11 @@  netdev_afxdp_construct(struct netdev *netdev)
     netdev->n_rxq = 0;
     netdev->n_txq = 0;
     dev->xdpmode = 0;
+    dev->xdpobj = NULL;
 
     dev->requested_n_rxq = NR_QUEUE;
     dev->requested_xdpmode = XDP_COPY;
+    dev->requested_xdpobj = NULL;
 
     dev->xsks = NULL;
     dev->tx_locks = NULL;
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151147..7ded0e4b4228 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -103,6 +103,8 @@  struct netdev_linux {
     int xdpmode;                /* AF_XDP running mode: driver or skb. */
     int requested_xdpmode;
     struct ovs_spin *tx_locks;  /* spin lock array for TX queues. */
+    const char *xdpobj;         /* XDP object file path. */
+    const char *requested_xdpobj;
 #endif
 };