diff mbox series

[bpf-next,1/2] libbpf: separate XDP program load with xsk socket creation

Message ID 20201104094626.3406-2-mariuszx.dudek@intel.com
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series libbpf: add support for privileged/unprivileged control separation | expand

Checks

Context Check Description
jkicinski/cover_letter success Link
jkicinski/fixes_present success Link
jkicinski/patch_count success Link
jkicinski/tree_selection success Clearly marked for bpf-next
jkicinski/subject_prefix success Link
jkicinski/source_inline success Was 0 now: 0
jkicinski/verify_signedoff success Link
jkicinski/module_param success Was 0 now: 0
jkicinski/build_32bit fail Errors and warnings before: 4 this patch: 4
jkicinski/kdoc success Errors and warnings before: 0 this patch: 0
jkicinski/verify_fixes success Link
jkicinski/checkpatch fail Link
jkicinski/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
jkicinski/header_inline success Link
jkicinski/stable success Stable not CCed

Commit Message

Mariusz Dudek Nov. 4, 2020, 9:46 a.m. UTC
From: Mariusz Dudek <mariuszx.dudek@intel.com>

        Add support for separation of eBPF program load and xsk socket
        creation.

        This is needed for use-case when you want to privide as little
        privileges as possible to the data plane application that will
        handle xsk socket creation and incoming traffic.

        With this patch the data entity container can be run with only
        CAP_NET_RAW capability to fulfill its purpose of creating xsk
        socket and handling packages. In case your umem is larger or
        equal process limit for MEMLOCK you need either increase the
        limit or CAP_IPC_LOCK capability.

        To resolve privileges issue two APIs are introduced:

        - xsk_setup_xdp_prog - prepares bpf program if given and
        loads it on a selected network interface or loads the built in
        XDP program, if no XDP program is supplied. It can also return
        xsks_map_fd which is needed by unprivileged process to update
        xsks_map with AF_XDP socket "fd"

        - xsk_update_xskmap - inserts an AF_XDP socket into an xskmap
	for a particular xsk_socket

Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com>
---
 tools/lib/bpf/libbpf.map |   2 +
 tools/lib/bpf/xsk.c      | 157 ++++++++++++++++++++++++++++++++-------
 tools/lib/bpf/xsk.h      |  13 ++++
 3 files changed, 146 insertions(+), 26 deletions(-)

Comments

Andrii Nakryiko Nov. 4, 2020, 9:07 p.m. UTC | #1
On Wed, Nov 4, 2020 at 1:47 AM <mariusz.dudek@gmail.com> wrote:
>
> From: Mariusz Dudek <mariuszx.dudek@intel.com>
>
>         Add support for separation of eBPF program load and xsk socket
>         creation.
>
>         This is needed for use-case when you want to privide as little
>         privileges as possible to the data plane application that will
>         handle xsk socket creation and incoming traffic.
>
>         With this patch the data entity container can be run with only
>         CAP_NET_RAW capability to fulfill its purpose of creating xsk
>         socket and handling packages. In case your umem is larger or
>         equal process limit for MEMLOCK you need either increase the
>         limit or CAP_IPC_LOCK capability.
>
>         To resolve privileges issue two APIs are introduced:
>
>         - xsk_setup_xdp_prog - prepares bpf program if given and
>         loads it on a selected network interface or loads the built in
>         XDP program, if no XDP program is supplied. It can also return
>         xsks_map_fd which is needed by unprivileged process to update
>         xsks_map with AF_XDP socket "fd"
>
>         - xsk_update_xskmap - inserts an AF_XDP socket into an xskmap
>         for a particular xsk_socket
>

Your commit message seems to be heavily shifted right...


> Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com>
> ---
>  tools/lib/bpf/libbpf.map |   2 +
>  tools/lib/bpf/xsk.c      | 157 ++++++++++++++++++++++++++++++++-------
>  tools/lib/bpf/xsk.h      |  13 ++++
>  3 files changed, 146 insertions(+), 26 deletions(-)
>

[...]

> diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
> index 1069c46364ff..c42b91935d3c 100644
> --- a/tools/lib/bpf/xsk.h
> +++ b/tools/lib/bpf/xsk.h
> @@ -201,6 +201,19 @@ struct xsk_umem_config {
>         __u32 flags;
>  };
>
> +struct bpf_prog_cfg {
> +       struct bpf_insn *prog;
> +       const char *license;
> +       size_t insns_cnt;
> +       int xsks_map_fd;
> +};

This config will have problems with backward/forward compatibility.
Please check how xxx_opts are done and use them for extensible options
structs.


> +
> +LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
> +                                 struct bpf_prog_cfg *cfg,
> +                                 int *xsks_map_fd);
> +LIBBPF_API int xsk_update_xskmap(struct xsk_socket *xsk,
> +                                int xsks_map_fd);

this should be called xsk_socket__update_map? BTW, what's xskmap? Is
that a special BPF map type?

> +
>  /* Flags for the libbpf_flags field. */
>  #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
>
> --
> 2.20.1
>
Mariusz Dudek Nov. 5, 2020, 1:58 p.m. UTC | #2
On Wed, Nov 4, 2020 at 10:07 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Nov 4, 2020 at 1:47 AM <mariusz.dudek@gmail.com> wrote:
> >
> > From: Mariusz Dudek <mariuszx.dudek@intel.com>
> >
> >         Add support for separation of eBPF program load and xsk socket
> >         creation.
> >
> >         This is needed for use-case when you want to privide as little
> >         privileges as possible to the data plane application that will
> >         handle xsk socket creation and incoming traffic.
> >
> >         With this patch the data entity container can be run with only
> >         CAP_NET_RAW capability to fulfill its purpose of creating xsk
> >         socket and handling packages. In case your umem is larger or
> >         equal process limit for MEMLOCK you need either increase the
> >         limit or CAP_IPC_LOCK capability.
> >
> >         To resolve privileges issue two APIs are introduced:
> >
> >         - xsk_setup_xdp_prog - prepares bpf program if given and
> >         loads it on a selected network interface or loads the built in
> >         XDP program, if no XDP program is supplied. It can also return
> >         xsks_map_fd which is needed by unprivileged process to update
> >         xsks_map with AF_XDP socket "fd"
> >
> >         - xsk_update_xskmap - inserts an AF_XDP socket into an xskmap
> >         for a particular xsk_socket
> >
>
> Your commit message seems to be heavily shifted right...
>
Will be fixed
>
> > Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com>
> > ---
> >  tools/lib/bpf/libbpf.map |   2 +
> >  tools/lib/bpf/xsk.c      | 157 ++++++++++++++++++++++++++++++++-------
> >  tools/lib/bpf/xsk.h      |  13 ++++
> >  3 files changed, 146 insertions(+), 26 deletions(-)
> >
>
> [...]
>
> > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
> > index 1069c46364ff..c42b91935d3c 100644
> > --- a/tools/lib/bpf/xsk.h
> > +++ b/tools/lib/bpf/xsk.h
> > @@ -201,6 +201,19 @@ struct xsk_umem_config {
> >         __u32 flags;
> >  };
> >
> > +struct bpf_prog_cfg {
> > +       struct bpf_insn *prog;
> > +       const char *license;
> > +       size_t insns_cnt;
> > +       int xsks_map_fd;
> > +};
>
> This config will have problems with backward/forward compatibility.
> Please check how xxx_opts are done and use them for extensible options
> structs.
>
I will add struct size as first parameter and #define for __last_field
to be inline with xxx_opts
>
> > +
> > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
> > +                                 struct bpf_prog_cfg *cfg,
> > +                                 int *xsks_map_fd);
> > +LIBBPF_API int xsk_update_xskmap(struct xsk_socket *xsk,
> > +                                int xsks_map_fd);
>
> this should be called xsk_socket__update_map? BTW, what's xskmap? Is
> that a special BPF map type?
>
I will change the API name as you suggested. XSKMAP is a special
BPF_MAP_TYPE_XSKMAP.
It defines how packets are being distributed from an XDP program to the XSKs.
> > +
> >  /* Flags for the libbpf_flags field. */
> >  #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
> >
> > --
> > 2.20.1
> >
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 4ebfadf45b47..4b938de1ca39 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -336,4 +336,6 @@  LIBBPF_0.2.0 {
 		perf_buffer__epoll_fd;
 		perf_buffer__consume_buffer;
 		xsk_socket__create_shared;
+		xsk_setup_xdp_prog;
+		xsk_update_xskmap;
 } LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e3c98c007825..8c5219ceca45 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -351,13 +351,8 @@  int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
 COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
 DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 
-static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+static int get_bpf_prog(struct bpf_prog_cfg *cfg_ptr, int xsks_map_fd)
 {
-	static const int log_buf_size = 16 * 1024;
-	struct xsk_ctx *ctx = xsk->ctx;
-	char log_buf[log_buf_size];
-	int err, prog_fd;
-
 	/* This is the C-program:
 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 	 * {
@@ -382,7 +377,7 @@  static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* *(u32 *)(r10 - 4) = r2 */
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsks_map_fd),
 		/* r3 = XDP_PASS */
 		BPF_MOV64_IMM(BPF_REG_3, 2),
 		/* call bpf_redirect_map */
@@ -394,7 +389,7 @@  static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 += -4 */
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsks_map_fd),
 		/* call bpf_map_lookup_elem */
 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 		/* r1 = r0 */
@@ -406,7 +401,7 @@  static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 = *(u32 *)(r10 - 4) */
 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsks_map_fd),
 		/* r3 = 0 */
 		BPF_MOV64_IMM(BPF_REG_3, 0),
 		/* call bpf_redirect_map */
@@ -414,17 +409,42 @@  static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* The jumps are to this instruction */
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 
-	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
-				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+	cfg_ptr->prog = malloc(sizeof(prog));
+	if (!cfg_ptr->prog)
+		return -ENOMEM;
+	memcpy(cfg_ptr->prog, prog, sizeof(prog));
+	cfg_ptr->license = "LGPL-2.1 or BSD-2-Clause";
+	cfg_ptr->insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+	return 0;
+}
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk, struct bpf_prog_cfg *user_cfg)
+{
+	static const int log_buf_size = 16 * 1024;
+	struct xsk_ctx *ctx = xsk->ctx;
+	char log_buf[log_buf_size];
+	struct bpf_prog_cfg cfg;
+	int err, prog_fd;
+
+	if (user_cfg && user_cfg->insns_cnt) {
+		cfg = *user_cfg;
+	} else {
+		err = get_bpf_prog(&cfg, ctx->xsks_map_fd);
+		if (err)
+			return err;
+	}
+
+	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, cfg.prog, cfg.insns_cnt,
+				   cfg.license, 0, log_buf,
 				   log_buf_size);
 	if (prog_fd < 0) {
 		pr_warn("BPF log buffer:\n%s", log_buf);
 		return prog_fd;
 	}
 
-	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+	err = bpf_set_link_xdp_fd(ctx->ifindex, prog_fd,
 				  xsk->config.xdp_flags);
 	if (err) {
 		close(prog_fd);
@@ -566,8 +586,43 @@  static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 				   &xsk->fd, 0);
 }
 
-static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
+{
+	char ifname[IFNAMSIZ];
+	struct xsk_ctx *ctx;
+	char *interface;
+	int res = -1;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		goto error_ctx;
+
+	interface = if_indextoname(ifindex, &ifname[0]);
+	if (!interface) {
+		res = -errno;
+		goto error_ifindex;
+	}
+
+	ctx->ifindex = ifindex;
+	strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	ctx->ifname[IFNAMSIZ - 1] = 0;
+
+	xsk->ctx = ctx;
+
+	return 0;
+
+error_ifindex:
+	free(ctx);
+error_ctx:
+	return res;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
+				struct bpf_prog_cfg *cfg,
+				bool force_set_map,
+				int *xsks_map_fd)
 {
+	struct xsk_socket *xsk = _xdp;
 	struct xsk_ctx *ctx = xsk->ctx;
 	__u32 prog_id = 0;
 	int err;
@@ -578,14 +633,17 @@  static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		return err;
 
 	if (!prog_id) {
-		err = xsk_create_bpf_maps(xsk);
-		if (err)
-			return err;
+		if (!cfg || !cfg->insns_cnt) {
+			err = xsk_create_bpf_maps(xsk);
+			if (err)
+				return err;
+		} else {
+			ctx->xsks_map_fd = cfg->xsks_map_fd;
+		}
 
-		err = xsk_load_xdp_prog(xsk);
+		err = xsk_load_xdp_prog(xsk, cfg);
 		if (err) {
-			xsk_delete_bpf_maps(xsk);
-			return err;
+			goto err_load_xdp_prog;
 		}
 	} else {
 		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
@@ -598,15 +656,29 @@  static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		}
 	}
 
-	if (xsk->rx)
+	if (xsk->rx || force_set_map) {
 		err = xsk_set_bpf_maps(xsk);
-	if (err) {
-		xsk_delete_bpf_maps(xsk);
-		close(ctx->prog_fd);
-		return err;
+		if (err) {
+			if (!prog_id) {
+				goto err_set_bpf_maps;
+			} else {
+				close(ctx->prog_fd);
+				return err;
+			}
+		}
 	}
+	if (xsks_map_fd)
+		*xsks_map_fd = ctx->xsks_map_fd;
 
 	return 0;
+
+err_set_bpf_maps:
+	close(ctx->prog_fd);
+	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_load_xdp_prog:
+	xsk_delete_bpf_maps(xsk);
+
+	return err;
 }
 
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
@@ -689,6 +761,39 @@  static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	return ctx;
 }
 
+static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
+{
+	free(xsk->ctx);
+	free(xsk);
+}
+
+int xsk_update_xskmap(struct xsk_socket *xsk, int fd)
+{
+	xsk->ctx->xsks_map_fd = fd;
+	return xsk_set_bpf_maps(xsk);
+}
+
+int xsk_setup_xdp_prog(int ifindex, struct bpf_prog_cfg *cfg,
+		       int *xsks_map_fd)
+{
+	struct xsk_socket *xsk;
+	int res = -1;
+
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		return res;
+
+	res = xsk_create_xsk_struct(ifindex, xsk);
+	if (res)
+		return -EINVAL;
+
+	res = __xsk_setup_xdp_prog(xsk, cfg, false, xsks_map_fd);
+
+	xsk_destroy_xsk_struct(xsk);
+
+	return res;
+}
+
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      const char *ifname,
 			      __u32 queue_id, struct xsk_umem *umem,
@@ -838,7 +943,7 @@  int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	ctx->prog_fd = -1;
 
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-		err = xsk_setup_xdp_prog(xsk);
+		err = __xsk_setup_xdp_prog(xsk, NULL, false, NULL);
 		if (err)
 			goto out_mmap_tx;
 	}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1069c46364ff..c42b91935d3c 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -201,6 +201,19 @@  struct xsk_umem_config {
 	__u32 flags;
 };
 
+struct bpf_prog_cfg {
+	struct bpf_insn *prog;
+	const char *license;
+	size_t insns_cnt;
+	int xsks_map_fd;
+};
+
+LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
+				  struct bpf_prog_cfg *cfg,
+				  int *xsks_map_fd);
+LIBBPF_API int xsk_update_xskmap(struct xsk_socket *xsk,
+				 int xsks_map_fd);
+
 /* Flags for the libbpf_flags field. */
 #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)