diff mbox series

[ovs-dev,PATCHv2] netdev-afxdp: Enable loading XDP program.

Message ID 1570234362-99558-1-git-send-email-u9012063@gmail.com
State Superseded
Headers show
Series [ovs-dev,PATCHv2] netdev-afxdp: Enable loading XDP program. | expand

Commit Message

William Tu Oct. 5, 2019, 12:12 a.m. UTC
Now netdev-afxdp always forwards all packets to userspace because
it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
There are some cases when users want to keep packets in kernel instead
of sending to userspace, for example, management traffic such as SSH
should be processed in kernel.

The patch enables loading the user-provide XDP program by doing
  $ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>

So users can implement their filtering logic or traffic steering idea
in their XDP program, and rest of the traffic passes to AF_XDP socket
handled by OVS.

Signed-off-by: William Tu <u9012063@gmail.com>
---
v2:
    A couple fixes and remove RFC
---
 Documentation/intro/install/afxdp.rst |  46 ++++++++++++
 lib/netdev-afxdp.c                    | 137 ++++++++++++++++++++++++++++++----
 lib/netdev-linux-private.h            |   2 +
 3 files changed, 169 insertions(+), 16 deletions(-)

Comments

Eelco Chaudron Oct. 29, 2019, 3:28 p.m. UTC | #1
See comments inline…

On 5 Oct 2019, at 2:12, William Tu wrote:

> Now netdev-afxdp always forwards all packets to userspace because
> it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
> There are some cases when users want to keep packets in kernel instead
> of sending to userspace, for example, management traffic such as SSH
> should be processed in kernel.
>
> The patch enables loading the user-provide XDP program by doing
>   $ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>

Should we maybe change the name to xdp-obj?

> So users can implement their filtering logic or traffic steering idea
> in their XDP program, and rest of the traffic passes to AF_XDP socket
> handled by OVS.
>
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
> v2:
>     A couple fixes and remove RFC
> ---
>  Documentation/intro/install/afxdp.rst |  46 ++++++++++++
>  lib/netdev-afxdp.c                    | 137 
> ++++++++++++++++++++++++++++++----
>  lib/netdev-linux-private.h            |   2 +
>  3 files changed, 169 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/intro/install/afxdp.rst 
> b/Documentation/intro/install/afxdp.rst
> index 820e9d993d8f..f484bdb8249b 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -265,6 +265,52 @@ Or, use OVS pmd tool::
>    ovs-appctl dpif-netdev/pmd-stats-show
>
>
> +Loading Custom XDP Program
> +--------------------------
> +By defailt, netdev-afxdp always forwards all packets to userspace 
> because
> +it is using libbpf's default XDP program. There are some cases when 
> users
> +want to keep packets in kernel instead of sending to userspace, for 
> example,
> +management traffic such as SSH should be processed in kernel. This 
> can be
> +done by loading the user-provided XDP program::
> +
> +  ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
> +
> +So users can implement their filtering logic or traffic steering idea
> +in their XDP program, and rest of the traffic passes to AF_XDP socket
> +handled by OVS. Below is a sample C program compiled under kernel's
> +samples/bpf/.
> +
> +.. code-block:: c
> +
> +  #include <uapi/linux/bpf.h>
> +  #include "bpf_helpers.h"
> +
> +  /* OVS will associate map 'xsks_map' to xsk socket. */
> +  struct bpf_map_def SEC("maps") xsks_map = {
> +      .type = BPF_MAP_TYPE_XSKMAP,
> +      .key_size = sizeof(int),
> +      .value_size = sizeof(int),
> +      .max_entries = 32,
> +  };
> +

Maybe something like this could be added to make sure it compiles with 
older kernels:

#if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)

/* Kernel version before 5.3 needed an additional map */
struct bpf_map_def SEC("maps") qidconf_map = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(int),
	.value_size = sizeof(int),
	.max_entries = 64,
};
#endi

See also here:
https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c

> +  SEC("xdp_sock")
> +  int xdp_sock_prog(struct xdp_md *ctx)
> +  {
> +      int index = ctx->rx_queue_index;
> +
> +      /* Customized by user.
> +       * For example
> +       * 1) filter out all SSH traffic and return XDP_PASS
> +       *    for kernel to process.
> +       * 2) Drop unwanted packet by returning XDP_DROP.
> +       */
> +
> +      /* Rest of packets goes to AF_XDP. */
> +      return bpf_redirect_map(&xsks_map, index, 0);
> +  }
> +  char _license[] SEC("license") = "GPL";
> +
> +
>  Example Script
>  --------------
>
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 6e01803272aa..231d6473fb67 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -21,6 +21,7 @@
>  #include "netdev-afxdp.h"
>  #include "netdev-afxdp-pool.h"
>
> +#include <bpf/bpf.h>
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <linux/rtnetlink.h>
> @@ -29,6 +30,7 @@
>  #include <stdlib.h>
>  #include <sys/resource.h>
>  #include <sys/socket.h>
> +#include <sys/stat.h>
>  #include <sys/types.h>
>  #include <unistd.h>
>
> @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
>  #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char 
> *)base))
>
>  static struct xsk_socket_info *xsk_configure(int ifindex, int 
> xdp_queue_id,
> -                                             int mode);
> +                                             int mode, bool 
> use_default_xdp);
>  static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
>  static void xsk_destroy(struct xsk_socket_info *xsk);
>  static int xsk_configure_all(struct netdev *netdev);
> @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux 
> OVS_UNUSED)
>      ovs_mutex_unlock(&unused_pools_mutex);
>  }
>
> +static int
> +xsk_load_prog(const char *path, struct bpf_object **obj,
> +              int *prog_fd)
> +{
> +    struct bpf_prog_load_attr attr = {
> +        .prog_type = BPF_PROG_TYPE_XDP,
> +        .file = path,
> +    };
> +
> +    if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
> +        VLOG_WARN("Can't load XDP program at '%s'", path);

I think it should be a VLOG_ERR()

> +        return EINVAL;
> +    }
> +
> +    VLOG_INFO("Loaded XDP program at '%s'", path);
> +    return 0;
> +}
> +
> +static int
> +xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info 
> *xsk,
> +                  int queue_id)
> +{
> +    struct bpf_map *map;
> +    int xsks_map_fd;
> +    int xsk_fd;
> +    int ret;
> +
> +    map = bpf_object__find_map_by_name(obj, "xsks_map");
> +    if (!map) {
> +        VLOG_WARN("XDP map 'xsks_map' not found");
> +        return EINVAL;
> +    }
> +
> +    xsks_map_fd = bpf_map__fd(map);
> +    xsk_fd = xsk_socket__fd(xsk->xsk);
> +
> +    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);

Don’t think this is needed, as 
xsk_socket__create()->xsk_setup_xdp_prog()->xsk_lookup_bpf_maps() will 
take care of it.

> +    if (ret) {
> +        VLOG_WARN("Can't set XSK map, map fd: %d", xsks_map_fd);
> +        return EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  static struct xsk_umem_info *
>  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
>  {
> @@ -234,7 +281,7 @@ xsk_configure_umem(void *buffer, uint64_t size, 
> int xdpmode)
>
>  static struct xsk_socket_info *
>  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> -                     uint32_t queue_id, int xdpmode)
> +                     uint32_t queue_id, int xdpmode, bool 
> use_default_xdp)
>  {
>      struct xsk_socket_config cfg;
>      struct xsk_socket_info *xsk;
> @@ -257,6 +304,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | 
> XDP_FLAGS_SKB_MODE;
>      }
>
> +    if (!use_default_xdp) {
> +        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
> +    }
> +
>      if (if_indextoname(ifindex, devname) == NULL) {
>          VLOG_ERR("ifindex %d to devname failed (%s)",
>                   ifindex, ovs_strerror(errno));
> @@ -275,17 +326,19 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>          return NULL;
>      }
>
> -    /* Make sure the built-in AF_XDP program is loaded. */
> -    ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> -    if (ret || !prog_id) {
> -        if (ret) {
> -            VLOG_ERR("Get XDP prog ID failed (%s)", 
> ovs_strerror(errno));
> -        } else {
> -            VLOG_ERR("No XDP program is loaded at ifindex %d", 
> ifindex);
> +    if (use_default_xdp) {
> +        /* Make sure the built-in AF_XDP program is loaded. */
> +        ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> +        if (ret || !prog_id) {
> +            if (ret) {
> +                VLOG_ERR("Get XDP prog ID failed (%s)", 
> ovs_strerror(errno));
> +            } else {
> +                VLOG_ERR("No XDP program is loaded at ifindex %d", 
> ifindex);
> +            }
> +            xsk_socket__delete(xsk->xsk);
> +            free(xsk);
> +            return NULL;
>          }

I think a WARNING message should be logged when an external program is 
loaded so operators know when debugging something might be out of the 
ordinary. I would suggest to include the interface name and program 
path/name loaded.

> -        xsk_socket__delete(xsk->xsk);
> -        free(xsk);
> -        return NULL;
>      }
>
>      while (!xsk_ring_prod__reserve(&xsk->umem->fq,
> @@ -311,7 +364,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>  }
>
>  static struct xsk_socket_info *
> -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
> +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
> +              bool use_default_xdp)
>  {
>      struct xsk_socket_info *xsk;
>      struct xsk_umem_info *umem;
> @@ -334,7 +388,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int 
> xdpmode)
>
>      VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
>
> -    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
> +    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
> +                               use_default_xdp);
>      if (!xsk) {
>          /* Clean up umem and xpacket pool. */
>          if (xsk_umem__delete(umem->umem)) {
> @@ -354,6 +409,10 @@ xsk_configure_all(struct netdev *netdev)
>      struct netdev_linux *dev = netdev_linux_cast(netdev);
>      struct xsk_socket_info *xsk_info;
>      int i, ifindex, n_rxq, n_txq;
> +    bool use_default_xdp;
> +    struct bpf_object *obj;
> +    int prog_fd = 0;
> +    int ret;
>
>      ifindex = linux_get_ifindex(netdev_get_name(netdev));
>
> @@ -362,12 +421,13 @@ xsk_configure_all(struct netdev *netdev)
>
>      n_rxq = netdev_n_rxq(netdev);
>      dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
> +    use_default_xdp = dev->xdpobj ? false : true;
>
>      /* Configure each queue. */
>      for (i = 0; i < n_rxq; i++) {
>          VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
>                    dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
> -        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
> +        xsk_info = xsk_configure(ifindex, i, dev->xdpmode, 
> use_default_xdp);
>          if (!xsk_info) {
>              VLOG_ERR("Failed to create AF_XDP socket on queue %d.", 
> i);
>              dev->xsks[i] = NULL;
> @@ -377,6 +437,27 @@ xsk_configure_all(struct netdev *netdev)
>          atomic_init(&xsk_info->tx_dropped, 0);
>          xsk_info->outstanding_tx = 0;
>          xsk_info->available_rx = PROD_NUM_DESCS;
> +
> +        if (dev->xdpobj) {
> +            if (prog_fd == 0) {
> +                /* XDP program is per-netdev, so all queues share
> +                   the same XDP program.
> +                 */
Format comment to be inline with other multiline comments

> +                ret = xsk_load_prog(dev->xdpobj, &obj, &prog_fd);
> +                if (ret) {
> +                    VLOG_ERR("%s: Can't load XDP object file",
> +                             netdev_get_name(netdev));

Maybe combine this failure with the one in xsk_load_prog() and report 
both the device and filename

> +                    goto err;
> +                }
> +            }
> +            ret = xsk_configure_map(obj, xsk_info, i);
> +            if (ret) {
> +                VLOG_ERR("%s: Can't configure XDP map",
> +                         netdev_get_name(netdev));
> +                goto err;
> +            }
> +            bpf_set_link_xdp_fd(ifindex, prog_fd, dev->xdpmode);
> +        }
>      }
>
>      n_txq = netdev_n_txq(netdev);
> @@ -450,6 +531,8 @@ xsk_destroy_all(struct netdev *netdev)
>          free(dev->tx_locks);
>          dev->tx_locks = NULL;
>      }
> +    free(CONST_CAST(char *, dev->xdpobj));
> +    dev->xdpobj = NULL;
>  }

Do we need to adjust xsk_remove_xdp_program() to free the loaded 
program/maps if it gets reconfigured with another one?

>  int
> @@ -458,7 +541,9 @@ netdev_afxdp_set_config(struct netdev *netdev, 
> const struct smap *args,
>  {
>      struct netdev_linux *dev = netdev_linux_cast(netdev);
>      const char *str_xdpmode;
> +    const char *str_xdpobj;
>      int xdpmode, new_n_rxq;
> +    struct stat s;
>
>      ovs_mutex_lock(&dev->mutex);
>      new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
> @@ -481,10 +566,25 @@ netdev_afxdp_set_config(struct netdev *netdev, 
> const struct smap *args,
>          return EINVAL;
>      }
>
> +    str_xdpobj = smap_get_def(args, "xdpobj", NULL);
> +    if (str_xdpobj && stat(str_xdpobj, &s)) {
> +        VLOG_ERR("Invalid xdpobj '%s': %s", str_xdpobj,
> +                 ovs_strerror(errno));
> +        ovs_mutex_unlock(&dev->mutex);
Maybe move unlock above log?

> +        return EINVAL;
> +    } else if (str_xdpobj && !S_ISREG(s.st_mode)) {
> +        VLOG_ERR("xdpobj '%s' is not a regular file", str_xdpobj);
> +        ovs_mutex_unlock(&dev->mutex);
Maybe move unlock above log?

> +        return EINVAL;
> +    }
> +
>      if (dev->requested_n_rxq != new_n_rxq
> -        || dev->requested_xdpmode != xdpmode) {
> +        || dev->requested_xdpmode != xdpmode
> +        || !nullable_string_is_equal(dev->requested_xdpobj, 
> str_xdpobj)
> +        || str_xdpobj) {
>          dev->requested_n_rxq = new_n_rxq;
>          dev->requested_xdpmode = xdpmode;
> +        dev->requested_xdpobj = nullable_xstrdup(str_xdpobj);
>          netdev_request_reconfigure(netdev);
>      }
>      ovs_mutex_unlock(&dev->mutex);
> @@ -500,6 +600,7 @@ netdev_afxdp_get_config(const struct netdev 
> *netdev, struct smap *args)
>      smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
>      smap_add_format(args, "xdpmode", "%s",
>          dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
> +    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
>      ovs_mutex_unlock(&dev->mutex);
>      return 0;
>  }
> @@ -539,6 +640,8 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
>           */
>      }
>
> +    dev->xdpobj = dev->requested_xdpobj;

Don’t we need to free the current dev->xdpobj before we override it?

>      err = xsk_configure_all(netdev);
>      if (err) {
>          VLOG_ERR("AF_XDP device %s reconfig failed.", 
> netdev_get_name(netdev));
> @@ -971,9 +1074,11 @@ netdev_afxdp_construct(struct netdev *netdev)
>      netdev->n_rxq = 0;
>      netdev->n_txq = 0;
>      dev->xdpmode = 0;
> +    dev->xdpobj = NULL;
>
>      dev->requested_n_rxq = NR_QUEUE;
>      dev->requested_xdpmode = XDP_COPY;
> +    dev->requested_xdpobj = NULL;
>
>      dev->xsks = NULL;
>      dev->tx_locks = NULL;
> diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> index a350be151147..7ded0e4b4228 100644
> --- a/lib/netdev-linux-private.h
> +++ b/lib/netdev-linux-private.h
> @@ -103,6 +103,8 @@ struct netdev_linux {
>      int xdpmode;                /* AF_XDP running mode: driver or 
> skb. */
>      int requested_xdpmode;
>      struct ovs_spin *tx_locks;  /* spin lock array for TX queues. */
> +    const char *xdpobj;         /* XDP object file path. */
> +    const char *requested_xdpobj;
>  #endif
>  };
>
> -- 
> 2.7.4
William Tu Oct. 30, 2019, 3:29 a.m. UTC | #2
Hi Eelco,

Thanks for your review.

On Tue, Oct 29, 2019 at 8:28 AM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> See comments inline…
>
> On 5 Oct 2019, at 2:12, William Tu wrote:
>
> > Now netdev-afxdp always forwards all packets to userspace because
> > it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
> > There are some cases when users want to keep packets in kernel instead
> > of sending to userspace, for example, management traffic such as SSH
> > should be processed in kernel.
> >
> > The patch enables loading the user-provide XDP program by doing
> >   $ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
>
> Should we maybe change the name to xdp-obj?

Yes, will fix it next version.

>
> > So users can implement their filtering logic or traffic steering idea
> > in their XDP program, and rest of the traffic passes to AF_XDP socket
> > handled by OVS.
> >
> > Signed-off-by: William Tu <u9012063@gmail.com>
> > ---
> > v2:
> >     A couple fixes and remove RFC
> > ---
> >  Documentation/intro/install/afxdp.rst |  46 ++++++++++++
> >  lib/netdev-afxdp.c                    | 137
> > ++++++++++++++++++++++++++++++----
> >  lib/netdev-linux-private.h            |   2 +
> >  3 files changed, 169 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/intro/install/afxdp.rst
> > b/Documentation/intro/install/afxdp.rst
> > index 820e9d993d8f..f484bdb8249b 100644
> > --- a/Documentation/intro/install/afxdp.rst
> > +++ b/Documentation/intro/install/afxdp.rst
> > @@ -265,6 +265,52 @@ Or, use OVS pmd tool::
> >    ovs-appctl dpif-netdev/pmd-stats-show
> >
> >
> > +Loading Custom XDP Program
> > +--------------------------
> > +By defailt, netdev-afxdp always forwards all packets to userspace
> > because
> > +it is using libbpf's default XDP program. There are some cases when
> > users
> > +want to keep packets in kernel instead of sending to userspace, for
> > example,
> > +management traffic such as SSH should be processed in kernel. This
> > can be
> > +done by loading the user-provided XDP program::
> > +
> > +  ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
> > +
> > +So users can implement their filtering logic or traffic steering idea
> > +in their XDP program, and rest of the traffic passes to AF_XDP socket
> > +handled by OVS. Below is a sample C program compiled under kernel's
> > +samples/bpf/.
> > +
> > +.. code-block:: c
> > +
> > +  #include <uapi/linux/bpf.h>
> > +  #include "bpf_helpers.h"
> > +
> > +  /* OVS will associate map 'xsks_map' to xsk socket. */
> > +  struct bpf_map_def SEC("maps") xsks_map = {
> > +      .type = BPF_MAP_TYPE_XSKMAP,
> > +      .key_size = sizeof(int),
> > +      .value_size = sizeof(int),
> > +      .max_entries = 32,
> > +  };
> > +
>
> Maybe something like this could be added to make sure it compiles with
> older kernels:
>
> #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)
>

Good point, I will add it.

> /* Kernel version before 5.3 needed an additional map */
> struct bpf_map_def SEC("maps") qidconf_map = {
>         .type = BPF_MAP_TYPE_ARRAY,
>         .key_size = sizeof(int),
>         .value_size = sizeof(int),
>         .max_entries = 64,
> };
> #endi
>
> See also here:
> https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c
>
> > +  SEC("xdp_sock")
> > +  int xdp_sock_prog(struct xdp_md *ctx)
> > +  {
> > +      int index = ctx->rx_queue_index;
> > +
> > +      /* Customized by user.
> > +       * For example
> > +       * 1) filter out all SSH traffic and return XDP_PASS
> > +       *    for kernel to process.
> > +       * 2) Drop unwanted packet by returning XDP_DROP.
> > +       */
> > +
> > +      /* Rest of packets goes to AF_XDP. */
> > +      return bpf_redirect_map(&xsks_map, index, 0);
> > +  }
> > +  char _license[] SEC("license") = "GPL";
> > +
> > +
> >  Example Script
> >  --------------
> >
> > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> > index 6e01803272aa..231d6473fb67 100644
> > --- a/lib/netdev-afxdp.c
> > +++ b/lib/netdev-afxdp.c
> > @@ -21,6 +21,7 @@
> >  #include "netdev-afxdp.h"
> >  #include "netdev-afxdp-pool.h"
> >
> > +#include <bpf/bpf.h>
> >  #include <errno.h>
> >  #include <inttypes.h>
> >  #include <linux/rtnetlink.h>
> > @@ -29,6 +30,7 @@
> >  #include <stdlib.h>
> >  #include <sys/resource.h>
> >  #include <sys/socket.h>
> > +#include <sys/stat.h>
> >  #include <sys/types.h>
> >  #include <unistd.h>
> >
> > @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
> >  #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char
> > *)base))
> >
> >  static struct xsk_socket_info *xsk_configure(int ifindex, int
> > xdp_queue_id,
> > -                                             int mode);
> > +                                             int mode, bool
> > use_default_xdp);
> >  static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
> >  static void xsk_destroy(struct xsk_socket_info *xsk);
> >  static int xsk_configure_all(struct netdev *netdev);
> > @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux
> > OVS_UNUSED)
> >      ovs_mutex_unlock(&unused_pools_mutex);
> >  }
> >
> > +static int
> > +xsk_load_prog(const char *path, struct bpf_object **obj,
> > +              int *prog_fd)
> > +{
> > +    struct bpf_prog_load_attr attr = {
> > +        .prog_type = BPF_PROG_TYPE_XDP,
> > +        .file = path,
> > +    };
> > +
> > +    if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
> > +        VLOG_WARN("Can't load XDP program at '%s'", path);
>
> I think it should be a VLOG_ERR()

OK

>
> > +        return EINVAL;
> > +    }
> > +
> > +    VLOG_INFO("Loaded XDP program at '%s'", path);
> > +    return 0;
> > +}
> > +
> > +static int
> > +xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info
> > *xsk,
> > +                  int queue_id)
> > +{
> > +    struct bpf_map *map;
> > +    int xsks_map_fd;
> > +    int xsk_fd;
> > +    int ret;
> > +
> > +    map = bpf_object__find_map_by_name(obj, "xsks_map");
> > +    if (!map) {
> > +        VLOG_WARN("XDP map 'xsks_map' not found");
> > +        return EINVAL;
> > +    }
> > +
> > +    xsks_map_fd = bpf_map__fd(map);
> > +    xsk_fd = xsk_socket__fd(xsk->xsk);
> > +
> > +    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);
>
> Don’t think this is needed, as
> xsk_socket__create()->xsk_setup_xdp_prog()->xsk_lookup_bpf_maps() will
> take care of it.

I think this is needed, because when enable custom xdp prog
 XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD = true;
the xsk_setup_xdp_prog is skipped.
see:
    if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
        err = xsk_setup_xdp_prog(xsk);

>
> > +    if (ret) {
> > +        VLOG_WARN("Can't set XSK map, map fd: %d", xsks_map_fd);
> > +        return EINVAL;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static struct xsk_umem_info *
> >  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
> >  {
> > @@ -234,7 +281,7 @@ xsk_configure_umem(void *buffer, uint64_t size,
> > int xdpmode)
> >
> >  static struct xsk_socket_info *
> >  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> > -                     uint32_t queue_id, int xdpmode)
> > +                     uint32_t queue_id, int xdpmode, bool
> > use_default_xdp)
> >  {
> >      struct xsk_socket_config cfg;
> >      struct xsk_socket_info *xsk;
> > @@ -257,6 +304,10 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST |
> > XDP_FLAGS_SKB_MODE;
> >      }
> >
> > +    if (!use_default_xdp) {
> > +        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
> > +    }
> > +
> >      if (if_indextoname(ifindex, devname) == NULL) {
> >          VLOG_ERR("ifindex %d to devname failed (%s)",
> >                   ifindex, ovs_strerror(errno));
> > @@ -275,17 +326,19 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >          return NULL;
> >      }
> >
> > -    /* Make sure the built-in AF_XDP program is loaded. */
> > -    ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> > -    if (ret || !prog_id) {
> > -        if (ret) {
> > -            VLOG_ERR("Get XDP prog ID failed (%s)",
> > ovs_strerror(errno));
> > -        } else {
> > -            VLOG_ERR("No XDP program is loaded at ifindex %d",
> > ifindex);
> > +    if (use_default_xdp) {
> > +        /* Make sure the built-in AF_XDP program is loaded. */
> > +        ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> > +        if (ret || !prog_id) {
> > +            if (ret) {
> > +                VLOG_ERR("Get XDP prog ID failed (%s)",
> > ovs_strerror(errno));
> > +            } else {
> > +                VLOG_ERR("No XDP program is loaded at ifindex %d",
> > ifindex);
> > +            }
> > +            xsk_socket__delete(xsk->xsk);
> > +            free(xsk);
> > +            return NULL;
> >          }
>
> I think a WARNING message should be logged when an external program is
> loaded so operators know when debugging something might be out of the
> ordinary. I would suggest to include the interface name and program
> path/name loaded.

OK I will add it.

>
> > -        xsk_socket__delete(xsk->xsk);
> > -        free(xsk);
> > -        return NULL;
> >      }
> >
> >      while (!xsk_ring_prod__reserve(&xsk->umem->fq,
> > @@ -311,7 +364,8 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >  }
> >
> >  static struct xsk_socket_info *
> > -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
> > +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
> > +              bool use_default_xdp)
> >  {
> >      struct xsk_socket_info *xsk;
> >      struct xsk_umem_info *umem;
> > @@ -334,7 +388,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int
> > xdpmode)
> >
> >      VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
> >
> > -    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
> > +    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
> > +                               use_default_xdp);
> >      if (!xsk) {
> >          /* Clean up umem and xpacket pool. */
> >          if (xsk_umem__delete(umem->umem)) {
> > @@ -354,6 +409,10 @@ xsk_configure_all(struct netdev *netdev)
> >      struct netdev_linux *dev = netdev_linux_cast(netdev);
> >      struct xsk_socket_info *xsk_info;
> >      int i, ifindex, n_rxq, n_txq;
> > +    bool use_default_xdp;
> > +    struct bpf_object *obj;
> > +    int prog_fd = 0;
> > +    int ret;
> >
> >      ifindex = linux_get_ifindex(netdev_get_name(netdev));
> >
> > @@ -362,12 +421,13 @@ xsk_configure_all(struct netdev *netdev)
> >
> >      n_rxq = netdev_n_rxq(netdev);
> >      dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
> > +    use_default_xdp = dev->xdpobj ? false : true;
> >
> >      /* Configure each queue. */
> >      for (i = 0; i < n_rxq; i++) {
> >          VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
> >                    dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
> > -        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
> > +        xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
> > use_default_xdp);
> >          if (!xsk_info) {
> >              VLOG_ERR("Failed to create AF_XDP socket on queue %d.",
> > i);
> >              dev->xsks[i] = NULL;
> > @@ -377,6 +437,27 @@ xsk_configure_all(struct netdev *netdev)
> >          atomic_init(&xsk_info->tx_dropped, 0);
> >          xsk_info->outstanding_tx = 0;
> >          xsk_info->available_rx = PROD_NUM_DESCS;
> > +
> > +        if (dev->xdpobj) {
> > +            if (prog_fd == 0) {
> > +                /* XDP program is per-netdev, so all queues share
> > +                   the same XDP program.
> > +                 */
> Format comment to be inline with other multiline comments

Yes, thank you.

>
> > +                ret = xsk_load_prog(dev->xdpobj, &obj, &prog_fd);
> > +                if (ret) {
> > +                    VLOG_ERR("%s: Can't load XDP object file",
> > +                             netdev_get_name(netdev));
>
> Maybe combine this failure with the one in xsk_load_prog() and report
> both the device and filename

OK, it is a little redundant.

>
> > +                    goto err;
> > +                }
> > +            }
> > +            ret = xsk_configure_map(obj, xsk_info, i);
> > +            if (ret) {
> > +                VLOG_ERR("%s: Can't configure XDP map",
> > +                         netdev_get_name(netdev));
> > +                goto err;
> > +            }
> > +            bpf_set_link_xdp_fd(ifindex, prog_fd, dev->xdpmode);
> > +        }
> >      }
> >
> >      n_txq = netdev_n_txq(netdev);
> > @@ -450,6 +531,8 @@ xsk_destroy_all(struct netdev *netdev)
> >          free(dev->tx_locks);
> >          dev->tx_locks = NULL;
> >      }
> > +    free(CONST_CAST(char *, dev->xdpobj));
> > +    dev->xdpobj = NULL;
> >  }
>
> Do we need to adjust xsk_remove_xdp_program() to free the loaded
> program/maps if it gets reconfigured with another one?

maybe, let me double check.
at xsk_load_prog, I can check whether there exists a load program and
remove it.

>
> >  int
> > @@ -458,7 +541,9 @@ netdev_afxdp_set_config(struct netdev *netdev,
> > const struct smap *args,
> >  {
> >      struct netdev_linux *dev = netdev_linux_cast(netdev);
> >      const char *str_xdpmode;
> > +    const char *str_xdpobj;
> >      int xdpmode, new_n_rxq;
> > +    struct stat s;
> >
> >      ovs_mutex_lock(&dev->mutex);
> >      new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
> > @@ -481,10 +566,25 @@ netdev_afxdp_set_config(struct netdev *netdev,
> > const struct smap *args,
> >          return EINVAL;
> >      }
> >
> > +    str_xdpobj = smap_get_def(args, "xdpobj", NULL);
> > +    if (str_xdpobj && stat(str_xdpobj, &s)) {
> > +        VLOG_ERR("Invalid xdpobj '%s': %s", str_xdpobj,
> > +                 ovs_strerror(errno));
> > +        ovs_mutex_unlock(&dev->mutex);
> Maybe move unlock above log?

OK!

>
> > +        return EINVAL;
> > +    } else if (str_xdpobj && !S_ISREG(s.st_mode)) {
> > +        VLOG_ERR("xdpobj '%s' is not a regular file", str_xdpobj);
> > +        ovs_mutex_unlock(&dev->mutex);
> Maybe move unlock above log?

OK!

>
> > +        return EINVAL;
> > +    }
> > +
> >      if (dev->requested_n_rxq != new_n_rxq
> > -        || dev->requested_xdpmode != xdpmode) {
> > +        || dev->requested_xdpmode != xdpmode
> > +        || !nullable_string_is_equal(dev->requested_xdpobj,
> > str_xdpobj)
> > +        || str_xdpobj) {
> >          dev->requested_n_rxq = new_n_rxq;
> >          dev->requested_xdpmode = xdpmode;
> > +        dev->requested_xdpobj = nullable_xstrdup(str_xdpobj);
> >          netdev_request_reconfigure(netdev);
> >      }
> >      ovs_mutex_unlock(&dev->mutex);
> > @@ -500,6 +600,7 @@ netdev_afxdp_get_config(const struct netdev
> > *netdev, struct smap *args)
> >      smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
> >      smap_add_format(args, "xdpmode", "%s",
> >          dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
> > +    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
> >      ovs_mutex_unlock(&dev->mutex);
> >      return 0;
> >  }
> > @@ -539,6 +640,8 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
> >           */
> >      }
> >
> > +    dev->xdpobj = dev->requested_xdpobj;
>
> Don’t we need to free the current dev->xdpobj before we override it?

I think it already been free at xsk_destroy_all().
Thanks, will work on next version.

Regards,
William
diff mbox series

Patch

diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 820e9d993d8f..f484bdb8249b 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -265,6 +265,52 @@  Or, use OVS pmd tool::
   ovs-appctl dpif-netdev/pmd-stats-show
 
 
+Loading Custom XDP Program
+--------------------------
+By defailt, netdev-afxdp always forwards all packets to userspace because
+it is using libbpf's default XDP program. There are some cases when users
+want to keep packets in kernel instead of sending to userspace, for example,
+management traffic such as SSH should be processed in kernel. This can be
+done by loading the user-provided XDP program::
+
+  ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
+
+So users can implement their filtering logic or traffic steering idea
+in their XDP program, and rest of the traffic passes to AF_XDP socket
+handled by OVS. Below is a sample C program compiled under kernel's
+samples/bpf/.
+
+.. code-block:: c
+
+  #include <uapi/linux/bpf.h>
+  #include "bpf_helpers.h"
+
+  /* OVS will associate map 'xsks_map' to xsk socket. */
+  struct bpf_map_def SEC("maps") xsks_map = {
+      .type = BPF_MAP_TYPE_XSKMAP,
+      .key_size = sizeof(int),
+      .value_size = sizeof(int),
+      .max_entries = 32,
+  };
+
+  SEC("xdp_sock")
+  int xdp_sock_prog(struct xdp_md *ctx)
+  {
+      int index = ctx->rx_queue_index;
+
+      /* Customized by user.
+       * For example
+       * 1) filter out all SSH traffic and return XDP_PASS
+       *    for kernel to process.
+       * 2) Drop unwanted packet by returning XDP_DROP.
+       */
+
+      /* Rest of packets goes to AF_XDP. */
+      return bpf_redirect_map(&xsks_map, index, 0);
+  }
+  char _license[] SEC("license") = "GPL";
+
+
 Example Script
 --------------
 
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 6e01803272aa..231d6473fb67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -21,6 +21,7 @@ 
 #include "netdev-afxdp.h"
 #include "netdev-afxdp-pool.h"
 
+#include <bpf/bpf.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/rtnetlink.h>
@@ -29,6 +30,7 @@ 
 #include <stdlib.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -82,7 +84,7 @@  BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
 #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
 
 static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
-                                             int mode);
+                                             int mode, bool use_default_xdp);
 static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
 static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
@@ -158,6 +160,51 @@  netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
     ovs_mutex_unlock(&unused_pools_mutex);
 }
 
+static int
+xsk_load_prog(const char *path, struct bpf_object **obj,
+              int *prog_fd)
+{
+    struct bpf_prog_load_attr attr = {
+        .prog_type = BPF_PROG_TYPE_XDP,
+        .file = path,
+    };
+
+    if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
+        VLOG_WARN("Can't load XDP program at '%s'", path);
+        return EINVAL;
+    }
+
+    VLOG_INFO("Loaded XDP program at '%s'", path);
+    return 0;
+}
+
+static int
+xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info *xsk,
+                  int queue_id)
+{
+    struct bpf_map *map;
+    int xsks_map_fd;
+    int xsk_fd;
+    int ret;
+
+    map = bpf_object__find_map_by_name(obj, "xsks_map");
+    if (!map) {
+        VLOG_WARN("XDP map 'xsks_map' not found");
+        return EINVAL;
+    }
+
+    xsks_map_fd = bpf_map__fd(map);
+    xsk_fd = xsk_socket__fd(xsk->xsk);
+
+    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);
+    if (ret) {
+        VLOG_WARN("Can't set XSK map, map fd: %d", xsks_map_fd);
+        return EINVAL;
+    }
+
+    return 0;
+}
+
 static struct xsk_umem_info *
 xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
 {
@@ -234,7 +281,7 @@  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
 
 static struct xsk_socket_info *
 xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
-                     uint32_t queue_id, int xdpmode)
+                     uint32_t queue_id, int xdpmode, bool use_default_xdp)
 {
     struct xsk_socket_config cfg;
     struct xsk_socket_info *xsk;
@@ -257,6 +304,10 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
     }
 
+    if (!use_default_xdp) {
+        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+    }
+
     if (if_indextoname(ifindex, devname) == NULL) {
         VLOG_ERR("ifindex %d to devname failed (%s)",
                  ifindex, ovs_strerror(errno));
@@ -275,17 +326,19 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }
 
-    /* Make sure the built-in AF_XDP program is loaded. */
-    ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
-    if (ret || !prog_id) {
-        if (ret) {
-            VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
-        } else {
-            VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
+    if (use_default_xdp) {
+        /* Make sure the built-in AF_XDP program is loaded. */
+        ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
+        if (ret || !prog_id) {
+            if (ret) {
+                VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
+            } else {
+                VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
+            }
+            xsk_socket__delete(xsk->xsk);
+            free(xsk);
+            return NULL;
         }
-        xsk_socket__delete(xsk->xsk);
-        free(xsk);
-        return NULL;
     }
 
     while (!xsk_ring_prod__reserve(&xsk->umem->fq,
@@ -311,7 +364,8 @@  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
 }
 
 static struct xsk_socket_info *
-xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
+xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
+              bool use_default_xdp)
 {
     struct xsk_socket_info *xsk;
     struct xsk_umem_info *umem;
@@ -334,7 +388,8 @@  xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
 
     VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
 
-    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
+    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
+                               use_default_xdp);
     if (!xsk) {
         /* Clean up umem and xpacket pool. */
         if (xsk_umem__delete(umem->umem)) {
@@ -354,6 +409,10 @@  xsk_configure_all(struct netdev *netdev)
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     struct xsk_socket_info *xsk_info;
     int i, ifindex, n_rxq, n_txq;
+    bool use_default_xdp;
+    struct bpf_object *obj;
+    int prog_fd = 0;
+    int ret;
 
     ifindex = linux_get_ifindex(netdev_get_name(netdev));
 
@@ -362,12 +421,13 @@  xsk_configure_all(struct netdev *netdev)
 
     n_rxq = netdev_n_rxq(netdev);
     dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
+    use_default_xdp = dev->xdpobj ? false : true;
 
     /* Configure each queue. */
     for (i = 0; i < n_rxq; i++) {
         VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
                   dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
-        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
+        xsk_info = xsk_configure(ifindex, i, dev->xdpmode, use_default_xdp);
         if (!xsk_info) {
             VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
             dev->xsks[i] = NULL;
@@ -377,6 +437,27 @@  xsk_configure_all(struct netdev *netdev)
         atomic_init(&xsk_info->tx_dropped, 0);
         xsk_info->outstanding_tx = 0;
         xsk_info->available_rx = PROD_NUM_DESCS;
+
+        if (dev->xdpobj) {
+            if (prog_fd == 0) {
+                /* XDP program is per-netdev, so all queues share
+                   the same XDP program.
+                 */
+                ret = xsk_load_prog(dev->xdpobj, &obj, &prog_fd);
+                if (ret) {
+                    VLOG_ERR("%s: Can't load XDP object file",
+                             netdev_get_name(netdev));
+                    goto err;
+                }
+            }
+            ret = xsk_configure_map(obj, xsk_info, i);
+            if (ret) {
+                VLOG_ERR("%s: Can't configure XDP map",
+                         netdev_get_name(netdev));
+                goto err;
+            }
+            bpf_set_link_xdp_fd(ifindex, prog_fd, dev->xdpmode);
+        }
     }
 
     n_txq = netdev_n_txq(netdev);
@@ -450,6 +531,8 @@  xsk_destroy_all(struct netdev *netdev)
         free(dev->tx_locks);
         dev->tx_locks = NULL;
     }
+    free(CONST_CAST(char *, dev->xdpobj));
+    dev->xdpobj = NULL;
 }
 
 int
@@ -458,7 +541,9 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
 {
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     const char *str_xdpmode;
+    const char *str_xdpobj;
     int xdpmode, new_n_rxq;
+    struct stat s;
 
     ovs_mutex_lock(&dev->mutex);
     new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
@@ -481,10 +566,25 @@  netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
         return EINVAL;
     }
 
+    str_xdpobj = smap_get_def(args, "xdpobj", NULL);
+    if (str_xdpobj && stat(str_xdpobj, &s)) {
+        VLOG_ERR("Invalid xdpobj '%s': %s", str_xdpobj,
+                 ovs_strerror(errno));
+        ovs_mutex_unlock(&dev->mutex);
+        return EINVAL;
+    } else if (str_xdpobj && !S_ISREG(s.st_mode)) {
+        VLOG_ERR("xdpobj '%s' is not a regular file", str_xdpobj);
+        ovs_mutex_unlock(&dev->mutex);
+        return EINVAL;
+    }
+
     if (dev->requested_n_rxq != new_n_rxq
-        || dev->requested_xdpmode != xdpmode) {
+        || dev->requested_xdpmode != xdpmode
+        || !nullable_string_is_equal(dev->requested_xdpobj, str_xdpobj)
+        || str_xdpobj) {
         dev->requested_n_rxq = new_n_rxq;
         dev->requested_xdpmode = xdpmode;
+        dev->requested_xdpobj = nullable_xstrdup(str_xdpobj);
         netdev_request_reconfigure(netdev);
     }
     ovs_mutex_unlock(&dev->mutex);
@@ -500,6 +600,7 @@  netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
     smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
     smap_add_format(args, "xdpmode", "%s",
         dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
+    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
     ovs_mutex_unlock(&dev->mutex);
     return 0;
 }
@@ -539,6 +640,8 @@  netdev_afxdp_reconfigure(struct netdev *netdev)
          */
     }
 
+    dev->xdpobj = dev->requested_xdpobj;
+
     err = xsk_configure_all(netdev);
     if (err) {
         VLOG_ERR("AF_XDP device %s reconfig failed.", netdev_get_name(netdev));
@@ -971,9 +1074,11 @@  netdev_afxdp_construct(struct netdev *netdev)
     netdev->n_rxq = 0;
     netdev->n_txq = 0;
     dev->xdpmode = 0;
+    dev->xdpobj = NULL;
 
     dev->requested_n_rxq = NR_QUEUE;
     dev->requested_xdpmode = XDP_COPY;
+    dev->requested_xdpobj = NULL;
 
     dev->xsks = NULL;
     dev->tx_locks = NULL;
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151147..7ded0e4b4228 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -103,6 +103,8 @@  struct netdev_linux {
     int xdpmode;                /* AF_XDP running mode: driver or skb. */
     int requested_xdpmode;
     struct ovs_spin *tx_locks;  /* spin lock array for TX queues. */
+    const char *xdpobj;         /* XDP object file path. */
+    const char *requested_xdpobj;
 #endif
 };