@@ -1,3 +1,8 @@
+# Include "Config" if already generated
+ifneq ($(wildcard Config),)
+include Config
+endif
+
ifndef VERBOSE
MAKEFLAGS += --no-print-directory
endif
@@ -7,6 +12,7 @@ LIBDIR?=$(PREFIX)/lib
SBINDIR?=/sbin
CONFDIR?=/etc/iproute2
DATADIR?=$(PREFIX)/share
+HDRDIR?=$(PREFIX)/include/iproute2
DOCDIR?=$(DATADIR)/doc/iproute2
MANDIR?=$(DATADIR)/man
ARPDDIR?=/var/lib/arpd
@@ -51,6 +57,11 @@ SUBDIRS=lib ip tc bridge misc netem genl tipc devlink man
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
LDLIBS += $(LIBNETLINK)
+ifeq ($(HAVE_ELF),y)
+CFLAGS += -DHAVE_ELF
+LDLIBS += -lelf
+endif
+
all: Config
@set -e; \
for i in $(SUBDIRS); \
@@ -63,6 +74,7 @@ install: all
install -m 0755 -d $(DESTDIR)$(SBINDIR)
install -m 0755 -d $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(ARPDDIR)
+ install -m 0755 -d $(DESTDIR)$(HDRDIR)
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
@@ -73,6 +85,7 @@ install: all
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
+ install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)
snapshot:
echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
@@ -272,7 +272,7 @@ EOF
if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
then
- echo "TC_CONFIG_ELF:=y" >>Config
+ echo "HAVE_ELF:=y" >>Config
echo "yes"
else
echo "no"
@@ -107,9 +107,14 @@
/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
+#ifndef __BPF_FUNC
+# define __BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused
+#endif
+
#ifndef BPF_FUNC
# define BPF_FUNC(NAME, ...) \
- (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+ __BPF_FUNC(NAME, __VA_ARGS__) = (void *) BPF_FUNC_##NAME
#endif
/* Map access/manipulation */
@@ -147,10 +152,15 @@ static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
/* System helpers */
static uint32_t BPF_FUNC(get_smp_processor_id);
+static uint32_t BPF_FUNC(get_numa_node_id);
/* Packet misc meta data */
static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
+static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
+
static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(set_hash_invalid, struct __sk_buff *skb);
/* Packet redirection */
static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
@@ -169,6 +179,20 @@ static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
uint32_t from, uint32_t to, uint32_t flags);
static int BPF_FUNC(csum_diff, const void *from, uint32_t from_size,
const void *to, uint32_t to_size, uint32_t seed);
+static int BPF_FUNC(csum_update, struct __sk_buff *skb, uint32_t wsum);
+
+static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
+static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
+ uint32_t flags);
+static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
+ uint32_t flags);
+
+static int BPF_FUNC(skb_pull_data, struct __sk_buff *skb, uint32_t len);
+
+/* Event notification */
+static int __BPF_FUNC(skb_event_output, struct __sk_buff *skb, void *map,
+ uint64_t index, const void *data, uint32_t size) =
+ (void *) BPF_FUNC_perf_event_output;
/* Packet vlan encap/decap */
static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
new file mode 100644
@@ -0,0 +1,95 @@
+/*
+ * bpf_util.h BPF common code
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
+ * Jiri Pirko <jiri@resnulli.us>
+ */
+
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/magic.h>
+#include <linux/elf-em.h>
+#include <linux/if_alg.h>
+
+#include "utils.h"
+#include "bpf_scm.h"
+
+#define BPF_ENV_UDS "TC_BPF_UDS"
+#define BPF_ENV_MNT "TC_BPF_MNT"
+
+#ifndef BPF_MAX_LOG
+# define BPF_MAX_LOG 4096
+#endif
+
+#define BPF_DIR_GLOBALS "globals"
+
+#ifndef BPF_FS_MAGIC
+# define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+#define BPF_DIR_MNT "/sys/fs/bpf"
+
+#ifndef TRACEFS_MAGIC
+# define TRACEFS_MAGIC 0x74726163
+#endif
+
+#define TRACE_DIR_MNT "/sys/kernel/tracing"
+
+#ifndef AF_ALG
+# define AF_ALG 38
+#endif
+
+#ifndef EM_BPF
+# define EM_BPF 247
+#endif
+
+struct bpf_cfg_ops {
+ void (*cbpf_cb)(void *nl, const struct sock_filter *ops, int ops_len);
+ void (*ebpf_cb)(void *nl, int fd, const char *annotation);
+};
+
+struct bpf_cfg_in {
+ const char *object;
+ const char *section;
+ const char *uds;
+ int argc;
+ char **argv;
+ struct sock_filter *ops;
+};
+
+int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl);
+
+const char *bpf_prog_to_default_section(enum bpf_prog_type type);
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
+int bpf_trace_pipe(void);
+
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+
+#ifdef HAVE_ELF
+int bpf_send_map_fds(const char *path, const char *obj);
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries);
+#else
+static inline int bpf_send_map_fds(const char *path, const char *obj)
+{
+ return 0;
+}
+
+static inline int bpf_recv_map_fds(const char *path, int *fds,
+ struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* __BPF_UTIL__ */
@@ -8,7 +8,7 @@ CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o \
- names.o color.o
+ names.o color.o bpf.o
NLOBJ=libgenl.o ll_map.o libnetlink.o
new file mode 100644
@@ -0,0 +1,2262 @@
+/*
+ * bpf.c BPF common code
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
+ * Jiri Pirko <jiri@resnulli.us>
+ * Alexei Starovoitov <ast@kernel.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <assert.h>
+
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/vfs.h>
+#include <sys/mount.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <sys/resource.h>
+
+#include <arpa/inet.h>
+
+#include "utils.h"
+
+#include "bpf_util.h"
+#include "bpf_elf.h"
+#include "bpf_scm.h"
+
+struct bpf_prog_meta {
+ const char *type;
+ const char *subdir;
+ const char *section;
+ bool may_uds_export;
+};
+
+static const enum bpf_prog_type __bpf_types[] = {
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+};
+
+static const struct bpf_prog_meta __bpf_prog_meta[] = {
+ [BPF_PROG_TYPE_SCHED_CLS] = {
+ .type = "cls",
+ .subdir = "tc",
+ .section = ELF_SECTION_CLASSIFIER,
+ .may_uds_export = true,
+ },
+ [BPF_PROG_TYPE_SCHED_ACT] = {
+ .type = "act",
+ .subdir = "tc",
+ .section = ELF_SECTION_ACTION,
+ .may_uds_export = true,
+ },
+};
+
+static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
+{
+ assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
+ __bpf_prog_meta[type].subdir);
+ return __bpf_prog_meta[type].subdir;
+}
+
+const char *bpf_prog_to_default_section(enum bpf_prog_type type)
+{
+ assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
+ __bpf_prog_meta[type].section);
+ return __bpf_prog_meta[type].section;
+}
+
+#ifdef HAVE_ELF
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose);
+#else
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose)
+{
+ fprintf(stderr, "No ELF library support compiled in.\n");
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static int bpf_map_update(int fd, const void *key, const void *value,
+ uint64_t flags)
+{
+ union bpf_attr attr = {};
+
+ attr.map_fd = fd;
+ attr.key = bpf_ptr_to_u64(key);
+ attr.value = bpf_ptr_to_u64(value);
+ attr.flags = flags;
+
+ return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
+ char **bpf_string, bool *need_release,
+ const char separator)
+{
+ char sp;
+
+ if (from_file) {
+ size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
+ char *tmp_string, *last;
+ FILE *fp;
+
+ tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
+ tmp_string = calloc(1, tmp_len);
+ if (tmp_string == NULL)
+ return -ENOMEM;
+
+ fp = fopen(arg, "r");
+ if (fp == NULL) {
+ perror("Cannot fopen");
+ free(tmp_string);
+ return -ENOENT;
+ }
+
+ if (!fgets(tmp_string, tmp_len, fp)) {
+ free(tmp_string);
+ fclose(fp);
+ return -EIO;
+ }
+
+ fclose(fp);
+
+ last = &tmp_string[strlen(tmp_string) - 1];
+ if (*last == '\n')
+ *last = 0;
+
+ *need_release = true;
+ *bpf_string = tmp_string;
+ } else {
+ *need_release = false;
+ *bpf_string = arg;
+ }
+
+ if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
+ sp != separator) {
+ if (*need_release)
+ free(*bpf_string);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
+ bool from_file)
+{
+ char *bpf_string, *token, separator = ',';
+ int ret = 0, i = 0;
+ bool need_release;
+ __u16 bpf_len = 0;
+
+ if (argc < 1)
+ return -EINVAL;
+ if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
+ &need_release, separator))
+ return -EINVAL;
+ if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ token = bpf_string;
+ while ((token = strchr(token, separator)) && (++token)[0]) {
+ if (i >= bpf_len) {
+ fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sscanf(token, "%hu %hhu %hhu %u,",
+ &bpf_ops[i].code, &bpf_ops[i].jt,
+ &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
+ fprintf(stderr, "Error at instruction %d!\n", i);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ i++;
+ }
+
+ if (i != bpf_len) {
+ fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = bpf_len;
+out:
+ if (need_release)
+ free(bpf_string);
+
+ return ret;
+}
+
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
+{
+ struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
+ int i;
+
+ if (len == 0)
+ return;
+
+ fprintf(f, "bytecode \'%u,", len);
+
+ for (i = 0; i < len - 1; i++)
+ fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
+ ops[i].jf, ops[i].k);
+
+ fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
+ ops[i].jf, ops[i].k);
+}
+
+static void bpf_map_pin_report(const struct bpf_elf_map *pin,
+ const struct bpf_elf_map *obj)
+{
+ fprintf(stderr, "Map specification differs from pinned file!\n");
+
+ if (obj->type != pin->type)
+ fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
+ obj->type, pin->type);
+ if (obj->size_key != pin->size_key)
+ fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
+ obj->size_key, pin->size_key);
+ if (obj->size_value != pin->size_value)
+ fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
+ obj->size_value, pin->size_value);
+ if (obj->max_elem != pin->max_elem)
+ fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
+ obj->max_elem, pin->max_elem);
+ if (obj->flags != pin->flags)
+ fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
+ obj->flags, pin->flags);
+
+ fprintf(stderr, "\n");
+}
+
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
+ int length)
+{
+ char file[PATH_MAX], buff[4096];
+ struct bpf_elf_map tmp = {}, zero = {};
+ unsigned int val;
+ FILE *fp;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "No procfs support?!\n");
+ return -EIO;
+ }
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ tmp.type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ tmp.size_key = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ tmp.size_value = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ tmp.max_elem = val;
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+ tmp.flags = val;
+ }
+
+ fclose(fp);
+
+ if (!memcmp(&tmp, map, length)) {
+ return 0;
+ } else {
+ /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
+ * so just accept it. We know we do have an eBPF fd and in this
+ * case, everything is 0. It is guaranteed that no such map exists
+ * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
+ */
+ if (!memcmp(&tmp, &zero, length))
+ return 0;
+
+ bpf_map_pin_report(&tmp, map);
+ return -EINVAL;
+ }
+}
+
+static int bpf_mnt_fs(const char *target)
+{
+ bool bind_done = false;
+
+ while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
+ if (errno != EINVAL || bind_done) {
+ fprintf(stderr, "mount --make-private %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ if (mount(target, target, "none", MS_BIND, NULL)) {
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ target, target, strerror(errno));
+ return -1;
+ }
+
+ bind_done = true;
+ }
+
+ if (mount("bpf", target, "bpf", 0, "mode=0700")) {
+ fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return -ENOENT;
+ if ((unsigned long)st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
+ char *mnt, int len,
+ const char * const *known_mnts)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (known_mnts) {
+ ptr = known_mnts;
+ while (*ptr) {
+ if (bpf_valid_mntpt(*ptr, magic) == 0) {
+ strncpy(mnt, *ptr, len - 1);
+ mnt[len - 1] = 0;
+ return mnt;
+ }
+ ptr++;
+ }
+ }
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL || len != PATH_MAX)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
+ mnt, type) == 2) {
+ if (strcmp(type, fstype) == 0)
+ break;
+ }
+
+ fclose(fp);
+ if (strcmp(type, fstype) != 0)
+ return NULL;
+
+ return mnt;
+}
+
+int bpf_trace_pipe(void)
+{
+ char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
+ static const char * const tracefs_known_mnts[] = {
+ TRACE_DIR_MNT,
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ 0,
+ };
+ char tpipe[PATH_MAX];
+ const char *mnt;
+ int fd;
+
+ mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
+ sizeof(tracefs_mnt), tracefs_known_mnts);
+ if (!mnt) {
+ fprintf(stderr, "tracefs not mounted?\n");
+ return -1;
+ }
+
+ snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
+
+ fd = open(tpipe, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ fprintf(stderr, "Running! Hang up with ^C!\n\n");
+ while (1) {
+ static char buff[4096];
+ ssize_t ret;
+
+ ret = read(fd, buff, sizeof(buff) - 1);
+ if (ret > 0) {
+ write(2, buff, ret);
+ fflush(stderr);
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_gen_global(const char *bpf_sub_dir)
+{
+ char bpf_glo_dir[PATH_MAX];
+ int ret;
+
+ snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
+ bpf_sub_dir, BPF_DIR_GLOBALS);
+
+ ret = mkdir(bpf_glo_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
+ strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_gen_master(const char *base, const char *name)
+{
+ char bpf_sub_dir[PATH_MAX];
+ int ret;
+
+ snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
+
+ ret = mkdir(bpf_sub_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
+ strerror(errno));
+ return ret;
+ }
+
+ return bpf_gen_global(bpf_sub_dir);
+}
+
+static int bpf_slave_via_bind_mnt(const char *full_name,
+ const char *full_link)
+{
+ int ret;
+
+ ret = mkdir(full_name, S_IRWXU);
+ if (ret) {
+ assert(errno != EEXIST);
+ fprintf(stderr, "mkdir %s failed: %s\n", full_name,
+ strerror(errno));
+ return ret;
+ }
+
+ ret = mount(full_link, full_name, "none", MS_BIND, NULL);
+ if (ret) {
+ rmdir(full_name);
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ full_link, full_name, strerror(errno));
+ }
+
+ return ret;
+}
+
+static int bpf_gen_slave(const char *base, const char *name,
+ const char *link)
+{
+ char bpf_lnk_dir[PATH_MAX];
+ char bpf_sub_dir[PATH_MAX];
+ struct stat sb = {};
+ int ret;
+
+ snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
+ snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
+
+ ret = symlink(bpf_lnk_dir, bpf_sub_dir);
+ if (ret) {
+ if (errno != EEXIST) {
+ if (errno != EPERM) {
+ fprintf(stderr, "symlink %s failed: %s\n",
+ bpf_sub_dir, strerror(errno));
+ return ret;
+ }
+
+ return bpf_slave_via_bind_mnt(bpf_sub_dir,
+ bpf_lnk_dir);
+ }
+
+ ret = lstat(bpf_sub_dir, &sb);
+ if (ret) {
+ fprintf(stderr, "lstat %s failed: %s\n",
+ bpf_sub_dir, strerror(errno));
+ return ret;
+ }
+
+ if ((sb.st_mode & S_IFMT) != S_IFLNK)
+ return bpf_gen_global(bpf_sub_dir);
+ }
+
+ return 0;
+}
+
+static int bpf_gen_hierarchy(const char *base)
+{
+ int ret, i;
+
+ ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
+ for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
+ ret = bpf_gen_slave(base,
+ bpf_prog_to_subdir(__bpf_types[i]),
+ bpf_prog_to_subdir(__bpf_types[0]));
+ return ret;
+}
+
+static const char *bpf_get_work_dir(enum bpf_prog_type type)
+{
+ static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
+ static char bpf_wrk_dir[PATH_MAX];
+ static const char *mnt;
+ static bool bpf_mnt_cached;
+ static const char * const bpf_known_mnts[] = {
+ BPF_DIR_MNT,
+ "/bpf",
+ 0,
+ };
+ int ret;
+
+ if (bpf_mnt_cached) {
+ const char *out = mnt;
+
+ if (out) {
+ snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
+ out, bpf_prog_to_subdir(type));
+ out = bpf_tmp;
+ }
+ return out;
+ }
+
+ mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
+ bpf_known_mnts);
+ if (!mnt) {
+ mnt = getenv(BPF_ENV_MNT);
+ if (!mnt)
+ mnt = BPF_DIR_MNT;
+ ret = bpf_mnt_fs(mnt);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+ }
+
+ snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
+
+ ret = bpf_gen_hierarchy(bpf_wrk_dir);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+
+ mnt = bpf_wrk_dir;
+out:
+ bpf_mnt_cached = true;
+ return mnt;
+}
+
+static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
+{
+ union bpf_attr attr = {};
+ char tmp[PATH_MAX];
+
+ if (strlen(pathname) > 2 && pathname[0] == 'm' &&
+ pathname[1] == ':' && bpf_get_work_dir(type)) {
+ snprintf(tmp, sizeof(tmp), "%s/%s",
+ bpf_get_work_dir(type), pathname + 2);
+ pathname = tmp;
+ }
+
+ attr.pathname = bpf_ptr_to_u64(pathname);
+
+ return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+enum bpf_mode {
+ CBPF_BYTECODE,
+ CBPF_FILE,
+ EBPF_OBJECT,
+ EBPF_PINNED,
+ BPF_MODE_MAX,
+};
+
+static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode,
+ struct bpf_cfg_in *cfg, const bool *opt_tbl)
+{
+ const char *file, *section, *uds_name;
+ bool verbose = false;
+ int i, ret, argc;
+ char **argv;
+
+ argv = cfg->argv;
+ argc = cfg->argc;
+
+ if (opt_tbl[CBPF_BYTECODE] &&
+ (matches(*argv, "bytecode") == 0 ||
+ strcmp(*argv, "bc") == 0)) {
+ *mode = CBPF_BYTECODE;
+ } else if (opt_tbl[CBPF_FILE] &&
+ (matches(*argv, "bytecode-file") == 0 ||
+ strcmp(*argv, "bcf") == 0)) {
+ *mode = CBPF_FILE;
+ } else if (opt_tbl[EBPF_OBJECT] &&
+ (matches(*argv, "object-file") == 0 ||
+ strcmp(*argv, "obj") == 0)) {
+ *mode = EBPF_OBJECT;
+ } else if (opt_tbl[EBPF_PINNED] &&
+ (matches(*argv, "object-pinned") == 0 ||
+ matches(*argv, "pinned") == 0 ||
+ matches(*argv, "fd") == 0)) {
+ *mode = EBPF_PINNED;
+ } else {
+ fprintf(stderr, "What mode is \"%s\"?\n", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ file = section = uds_name = NULL;
+ if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
+ file = *argv;
+ NEXT_ARG_FWD();
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ if (argc > 0 && matches(*argv, "type") == 0) {
+ NEXT_ARG();
+ for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
+ i++) {
+ if (!__bpf_prog_meta[i].type)
+ continue;
+ if (!matches(*argv,
+ __bpf_prog_meta[i].type)) {
+ *type = i;
+ break;
+ }
+ }
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ fprintf(stderr, "What type is \"%s\"?\n",
+ *argv);
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ } else {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ }
+ }
+
+ section = bpf_prog_to_default_section(*type);
+ if (argc > 0 && matches(*argv, "section") == 0) {
+ NEXT_ARG();
+ section = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ if (__bpf_prog_meta[*type].may_uds_export) {
+ uds_name = getenv(BPF_ENV_UDS);
+ if (argc > 0 && !uds_name &&
+ matches(*argv, "export") == 0) {
+ NEXT_ARG();
+ uds_name = *argv;
+ NEXT_ARG_FWD();
+ }
+ }
+
+ if (argc > 0 && matches(*argv, "verbose") == 0) {
+ verbose = true;
+ NEXT_ARG_FWD();
+ }
+
+ PREV_ARG();
+ }
+
+ if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
+ ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE);
+ else if (*mode == EBPF_OBJECT)
+ ret = bpf_obj_open(file, *type, section, verbose);
+ else if (*mode == EBPF_PINNED)
+ ret = bpf_obj_get(file, *type);
+ else
+ return -1;
+
+ cfg->object = file;
+ cfg->section = section;
+ cfg->uds = uds_name;
+ cfg->argc = argc;
+ cfg->argv = argv;
+
+ return ret;
+}
+
+static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl,
+ const bool *opt_tbl)
+{
+ struct sock_filter opcodes[BPF_MAXINSNS];
+ char annotation[256];
+ enum bpf_mode mode;
+ int ret;
+
+ cfg->ops = opcodes;
+ ret = bpf_parse(&type, &mode, cfg, opt_tbl);
+ cfg->ops = NULL;
+ if (ret < 0)
+ return ret;
+
+ if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
+ ops->cbpf_cb(nl, opcodes, ret);
+ if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
+ snprintf(annotation, sizeof(annotation), "%s:[%s]",
+ basename(cfg->object), mode == EBPF_PINNED ?
+ "*fsobj" : cfg->section);
+ ops->ebpf_cb(nl, ret, annotation);
+ }
+
+ return 0;
+}
+
+int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl)
+{
+ bool opt_tbl[BPF_MODE_MAX] = {};
+
+ if (ops->cbpf_cb) {
+ opt_tbl[CBPF_BYTECODE] = true;
+ opt_tbl[CBPF_FILE] = true;
+ }
+
+ if (ops->ebpf_cb) {
+ opt_tbl[EBPF_OBJECT] = true;
+ opt_tbl[EBPF_PINNED] = true;
+ }
+
+ return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl);
+}
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
+{
+ enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ const struct bpf_elf_map test = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ };
+ struct bpf_cfg_in cfg = {
+ .argc = argc,
+ .argv = argv,
+ };
+ int ret, prog_fd, map_fd;
+ enum bpf_mode mode;
+ uint32_t map_key;
+
+ prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl);
+ if (prog_fd < 0)
+ return prog_fd;
+ if (key) {
+ map_key = *key;
+ } else {
+ ret = sscanf(cfg.section, "%*i/%i", &map_key);
+ if (ret != 1) {
+ fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
+ ret = -EINVAL;
+ goto out_prog;
+ }
+ }
+
+ map_fd = bpf_obj_get(map_path, type);
+ if (map_fd < 0) {
+ fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
+ map_path, strerror(errno));
+ ret = map_fd;
+ goto out_prog;
+ }
+
+ ret = bpf_map_selfcheck_pinned(map_fd, &test,
+ offsetof(struct bpf_elf_map, max_elem));
+ if (ret < 0) {
+ fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
+ goto out_map;
+ }
+
+ ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
+ if (ret < 0)
+ fprintf(stderr, "Map update failed: %s\n", strerror(errno));
+out_map:
+ close(map_fd);
+out_prog:
+ close(prog_fd);
+ return ret;
+}
+
+#ifdef HAVE_ELF
+struct bpf_elf_prog {
+ enum bpf_prog_type type;
+ const struct bpf_insn *insns;
+ size_t size;
+ const char *license;
+};
+
+struct bpf_hash_entry {
+ unsigned int pinning;
+ const char *subpath;
+ struct bpf_hash_entry *next;
+};
+
+struct bpf_elf_ctx {
+ Elf *elf_fd;
+ GElf_Ehdr elf_hdr;
+ Elf_Data *sym_tab;
+ Elf_Data *str_tab;
+ int obj_fd;
+ int map_fds[ELF_MAX_MAPS];
+ struct bpf_elf_map maps[ELF_MAX_MAPS];
+ int sym_num;
+ int map_num;
+ int map_len;
+ bool *sec_done;
+ int sec_maps;
+ char license[ELF_MAX_LICENSE_LEN];
+ enum bpf_prog_type type;
+ bool verbose;
+ struct bpf_elf_st stat;
+ struct bpf_hash_entry *ht[256];
+ char *log;
+ size_t log_size;
+};
+
+struct bpf_elf_sec_data {
+ GElf_Shdr sec_hdr;
+ Elf_Data *sec_data;
+ const char *sec_name;
+};
+
+struct bpf_map_data {
+ int *fds;
+ const char *obj;
+ struct bpf_elf_st *st;
+ struct bpf_elf_map *ent;
+};
+
+static __check_format_string(2, 3) void
+bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
+{
+ va_list vl;
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ if (ctx->log && ctx->log[0]) {
+ if (ctx->verbose) {
+ fprintf(stderr, "%s\n", ctx->log);
+ } else {
+ unsigned int off = 0, len = strlen(ctx->log);
+
+ if (len > BPF_MAX_LOG) {
+ off = len - BPF_MAX_LOG;
+ fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
+ off);
+ }
+ fprintf(stderr, "%s\n", ctx->log + off);
+ }
+
+ memset(ctx->log, 0, ctx->log_size);
+ }
+}
+
+static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
+{
+ size_t log_size = ctx->log_size;
+ void *ptr;
+
+ if (!ctx->log) {
+ log_size = 65536;
+ } else {
+ log_size <<= 1;
+ if (log_size > (UINT_MAX >> 8))
+ return -EINVAL;
+ }
+
+ ptr = realloc(ctx->log, log_size);
+ if (!ptr)
+ return -ENOMEM;
+
+ ctx->log = ptr;
+ ctx->log_size = log_size;
+
+ return 0;
+}
+
+static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem,
+ uint32_t flags)
+{
+ union bpf_attr attr = {};
+
+ attr.map_type = type;
+ attr.key_size = size_key;
+ attr.value_size = size_value;
+ attr.max_entries = max_elem;
+ attr.map_flags = flags;
+
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size_insns, const char *license, char *log,
+ size_t size_log)
+{
+ union bpf_attr attr = {};
+
+ attr.prog_type = type;
+ attr.insns = bpf_ptr_to_u64(insns);
+ attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
+ attr.license = bpf_ptr_to_u64(license);
+
+ if (size_log > 0) {
+ attr.log_buf = bpf_ptr_to_u64(log);
+ attr.log_size = size_log;
+ attr.log_level = 1;
+ }
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {};
+
+ attr.pathname = bpf_ptr_to_u64(pathname);
+ attr.bpf_fd = fd;
+
+ return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
+{
+ struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int ret, cfd, ofd, ffd;
+ struct stat stbuff;
+ ssize_t size;
+
+ if (!object || len != 20)
+ return -EINVAL;
+
+ cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
+ strerror(errno));
+ return cfd;
+ }
+
+ ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
+ if (ret < 0) {
+ fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
+ goto out_cfd;
+ }
+
+ ofd = accept(cfd, NULL, 0);
+ if (ofd < 0) {
+ fprintf(stderr, "Error accepting socket: %s\n",
+ strerror(errno));
+ ret = ofd;
+ goto out_cfd;
+ }
+
+ ffd = open(object, O_RDONLY);
+ if (ffd < 0) {
+ fprintf(stderr, "Error opening object %s: %s\n",
+ object, strerror(errno));
+ ret = ffd;
+ goto out_ofd;
+ }
+
+ ret = fstat(ffd, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "Error doing fstat: %s\n",
+ strerror(errno));
+ goto out_ffd;
+ }
+
+ size = sendfile(ofd, ffd, NULL, stbuff.st_size);
+ if (size != stbuff.st_size) {
+ fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
+ size, stbuff.st_size, strerror(errno));
+ ret = -1;
+ goto out_ffd;
+ }
+
+ size = read(ofd, out, len);
+ if (size != len) {
+ fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
+ size, len, strerror(errno));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+out_ffd:
+ close(ffd);
+out_ofd:
+ close(ofd);
+out_cfd:
+ close(cfd);
+ return ret;
+}
+
+static const char *bpf_get_obj_uid(const char *pathname)
+{
+ static bool bpf_uid_cached;
+ static char bpf_uid[64];
+ uint8_t tmp[20];
+ int ret;
+
+ if (bpf_uid_cached)
+ goto done;
+
+ ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
+ if (ret) {
+ fprintf(stderr, "Object hashing failed!\n");
+ return NULL;
+ }
+
+ hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
+ bpf_uid_cached = true;
+done:
+ return bpf_uid;
+}
+
+static int bpf_init_env(const char *pathname)
+{
+ struct rlimit limit = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ /* Don't bother in case we fail! */
+ setrlimit(RLIMIT_MEMLOCK, &limit);
+
+ if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) {
+ fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
+ return 0;
+ }
+
+ if (!bpf_get_obj_uid(pathname))
+ return -1;
+
+ return 0;
+}
+
+static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ struct bpf_hash_entry *entry;
+
+ entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ while (entry && entry->pinning != pinning)
+ entry = entry->next;
+
+ return entry ? entry->subpath : NULL;
+}
+
+static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return false;
+ case PIN_NONE:
+ return true;
+ default:
+ return !bpf_custom_pinning(ctx, pinning);
+ }
+}
+
+static void bpf_make_pathname(char *pathname, size_t len, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ snprintf(pathname, len, "%s/%s/%s",
+ bpf_get_work_dir(ctx->type),
+ bpf_get_obj_uid(NULL), name);
+ break;
+ case PIN_GLOBAL_NS:
+ snprintf(pathname, len, "%s/%s/%s",
+ bpf_get_work_dir(ctx->type),
+ BPF_DIR_GLOBALS, name);
+ break;
+ default:
+ snprintf(pathname, len, "%s/../%s/%s",
+ bpf_get_work_dir(ctx->type),
+ bpf_custom_pinning(ctx, pinning), name);
+ break;
+ }
+}
+
+static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
+ return 0;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_get(pathname, ctx->type);
+}
+
+static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
+{
+ char tmp[PATH_MAX];
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
+ bpf_get_obj_uid(NULL));
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
+ const char *todo)
+{
+ char tmp[PATH_MAX], rem[PATH_MAX], *sub;
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
+ snprintf(rem, sizeof(rem), "%s/", todo);
+ sub = strtok(rem, "/");
+
+ while (sub) {
+ if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
+ return -EINVAL;
+
+ strcat(tmp, sub);
+ strcat(tmp, "/");
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp,
+ strerror(errno));
+ return ret;
+ }
+
+ sub = strtok(NULL, "/");
+ }
+
+ return 0;
+}
+
+static int bpf_place_pinned(int fd, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+ const char *tmp;
+ int ret = 0;
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
+ return 0;
+
+ if (pinning == PIN_OBJECT_NS)
+ ret = bpf_make_obj_path(ctx);
+ else if ((tmp = bpf_custom_pinning(ctx, pinning)))
+ ret = bpf_make_custom_path(ctx, tmp);
+ if (ret < 0)
+ return ret;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_pin(fd, pathname);
+}
+
+static void bpf_prog_report(int fd, const char *section,
+ const struct bpf_elf_prog *prog,
+ struct bpf_elf_ctx *ctx)
+{
+ unsigned int insns = prog->size / sizeof(struct bpf_insn);
+
+ fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
+ fd < 0 ? "rejected: " : "loaded",
+ fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+
+ fprintf(stderr, " - Type: %u\n", prog->type);
+ fprintf(stderr, " - Instructions: %u (%u over limit)\n",
+ insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
+ fprintf(stderr, " - License: %s\n\n", prog->license);
+
+ bpf_dump_error(ctx, "Verifier analysis:\n\n");
+}
+
+static int bpf_prog_attach(const char *section,
+ const struct bpf_elf_prog *prog,
+ struct bpf_elf_ctx *ctx)
+{
+ int tries = 0, fd;
+retry:
+ errno = 0;
+ fd = bpf_prog_load(prog->type, prog->insns, prog->size,
+ prog->license, ctx->log, ctx->log_size);
+ if (fd < 0 || ctx->verbose) {
+ /* The verifier log is pretty chatty, sometimes so chatty
+ * on larger programs, that we could fail to dump everything
+ * into our buffer. Still, try to give a debuggable error
+ * log for the user, so enlarge it and re-fail.
+ */
+ if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
+ if (tries++ < 6 && !bpf_log_realloc(ctx))
+ goto retry;
+
+ fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
+ ctx->log_size, tries);
+ return fd;
+ }
+
+ bpf_prog_report(fd, section, prog, ctx);
+ }
+
+ return fd;
+}
+
+static void bpf_map_report(int fd, const char *name,
+ const struct bpf_elf_map *map,
+ struct bpf_elf_ctx *ctx)
+{
+ fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
+ fd < 0 ? "rejected: " : "loaded",
+ fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+
+ fprintf(stderr, " - Type: %u\n", map->type);
+ fprintf(stderr, " - Identifier: %u\n", map->id);
+ fprintf(stderr, " - Pinning: %u\n", map->pinning);
+ fprintf(stderr, " - Size key: %u\n", map->size_key);
+ fprintf(stderr, " - Size value: %u\n", map->size_value);
+ fprintf(stderr, " - Max elems: %u\n", map->max_elem);
+ fprintf(stderr, " - Flags: %#x\n\n", map->flags);
+}
+
+static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
+ struct bpf_elf_ctx *ctx)
+{
+ int fd, ret;
+
+ fd = bpf_probe_pinned(name, ctx, map->pinning);
+ if (fd > 0) {
+ ret = bpf_map_selfcheck_pinned(fd, map,
+ offsetof(struct bpf_elf_map,
+ id));
+ if (ret < 0) {
+ close(fd);
+ fprintf(stderr, "Map \'%s\' self-check failed!\n",
+ name);
+ return ret;
+ }
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
+ name);
+ return fd;
+ }
+
+ errno = 0;
+ fd = bpf_map_create(map->type, map->size_key, map->size_value,
+ map->max_elem, map->flags);
+ if (fd < 0 || ctx->verbose) {
+ bpf_map_report(fd, name, map, ctx);
+ if (fd < 0)
+ return fd;
+ }
+
+ ret = bpf_place_pinned(fd, name, ctx, map->pinning);
+ if (ret < 0 && errno != EEXIST) {
+ fprintf(stderr, "Could not pin %s map: %s\n", name,
+ strerror(errno));
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
+ const GElf_Sym *sym)
+{
+ return ctx->str_tab->d_buf + sym->st_name;
+}
+
+static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
+{
+ GElf_Sym sym;
+ int i;
+
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps ||
+ sym.st_value / ctx->map_len != which)
+ continue;
+
+ return bpf_str_tab_name(ctx, &sym);
+ }
+
+ return NULL;
+}
+
+static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
+{
+ const char *map_name;
+ int i, fd;
+
+ for (i = 0; i < ctx->map_num; i++) {
+ map_name = bpf_map_fetch_name(ctx, i);
+ if (!map_name)
+ return -EIO;
+
+ fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
+ if (fd < 0)
+ return fd;
+
+ ctx->map_fds[i] = fd;
+ }
+
+ return 0;
+}
+
+static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
+{
+ int i, num = 0;
+ GElf_Sym sym;
+
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps)
+ continue;
+ num++;
+ }
+
+ return num;
+}
+
+static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ Elf_Data *sec_edata;
+ GElf_Shdr sec_hdr;
+ Elf_Scn *sec_fd;
+ char *sec_name;
+
+ memset(data, 0, sizeof(*data));
+
+ sec_fd = elf_getscn(ctx->elf_fd, section);
+ if (!sec_fd)
+ return -EINVAL;
+ if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+ return -EIO;
+
+ sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
+ sec_hdr.sh_name);
+ if (!sec_name || !sec_hdr.sh_size)
+ return -ENOENT;
+
+ sec_edata = elf_getdata(sec_fd, NULL);
+ if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+ return -EIO;
+
+ memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+
+ data->sec_name = sec_name;
+ data->sec_data = sec_edata;
+ return 0;
+}
+
+struct bpf_elf_map_min {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+};
+
+static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->map_num = data->sec_data->d_size;
+ ctx->sec_maps = section;
+ ctx->sec_done[section] = true;
+
+ if (ctx->map_num > sizeof(ctx->maps)) {
+ fprintf(stderr, "Too many BPF maps in ELF section!\n");
+ return -ENOMEM;
+ }
+
+ memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
+ return 0;
+}
+
+static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
+{
+ GElf_Sym sym;
+ int off, i;
+
+ for (off = 0; off < end; off += ctx->map_len) {
+ /* Order doesn't need to be linear here, hence we walk
+ * the table again.
+ */
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps)
+ continue;
+ if (sym.st_value == off)
+ break;
+ if (i == ctx->sym_num - 1)
+ return -1;
+ }
+ }
+
+ return off == end ? 0 : -1;
+}
+
+static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
+ int i, sym_num = bpf_map_num_sym(ctx);
+ __u8 *buff;
+
+ if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
+ fprintf(stderr, "%u maps not supported in current map section!\n",
+ sym_num);
+ return -EINVAL;
+ }
+
+ if (ctx->map_num % sym_num != 0 ||
+ ctx->map_num % sizeof(__u32) != 0) {
+ fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
+ return -EINVAL;
+ }
+
+ ctx->map_len = ctx->map_num / sym_num;
+ if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
+ fprintf(stderr, "Different struct bpf_elf_map in use!\n");
+ return -EINVAL;
+ }
+
+ if (ctx->map_len == sizeof(struct bpf_elf_map)) {
+ ctx->map_num = sym_num;
+ return 0;
+ } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
+ fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
+ return -EINVAL;
+ } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
+ fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
+ return -EINVAL;
+ }
+
+ ctx->map_num = sym_num;
+ for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
+ i++, buff += ctx->map_len) {
+ /* The fixup leaves the rest of the members as zero, which
+ * is fine currently, but option exist to set some other
+ * default value as well when needed in future.
+ */
+ memcpy(&fixup[i], buff, ctx->map_len);
+ }
+
+ memcpy(ctx->maps, fixup, sizeof(fixup));
+
+ printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
+ sizeof(struct bpf_elf_map) - ctx->map_len);
+ return 0;
+}
+
+static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size > sizeof(ctx->license))
+ return -ENOMEM;
+
+ memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->sym_tab = data->sec_data;
+ ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->str_tab = data->sec_data;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
+{
+ return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
+}
+
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ int i, ret = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_MAPS))
+ ret = bpf_fetch_maps_begin(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_LICENSE))
+ ret = bpf_fetch_license(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
+ !strcmp(data.sec_name, ".symtab"))
+ ret = bpf_fetch_symtab(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_STRTAB &&
+ !strcmp(data.sec_name, ".strtab"))
+ ret = bpf_fetch_strtab(ctx, i, &data);
+ if (ret < 0) {
+ fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
+ i);
+ return ret;
+ }
+ }
+
+ if (bpf_has_map_data(ctx)) {
+ ret = bpf_fetch_maps_end(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
+ return ret;
+ }
+
+ ret = bpf_maps_attach_all(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error loading maps into kernel!\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
+ bool *sseen)
+{
+ struct bpf_elf_sec_data data;
+ struct bpf_elf_prog prog;
+ int ret, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0 ||
+ !(data.sec_hdr.sh_type == SHT_PROGBITS &&
+ data.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data.sec_name, section)))
+ continue;
+
+ *sseen = true;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data.sec_data->d_buf;
+ prog.size = data.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx);
+ if (fd < 0)
+ return fd;
+
+ ctx->sec_done[i] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
+ struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn)
+{
+ Elf_Data *idata = data_insn->sec_data;
+ GElf_Shdr *rhdr = &data_relo->sec_hdr;
+ int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+ struct bpf_insn *insns = idata->d_buf;
+ unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+ for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+ unsigned int ioff, rmap;
+ GElf_Rel relo;
+ GElf_Sym sym;
+
+ if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+ return -EIO;
+
+ ioff = relo.r_offset / sizeof(struct bpf_insn);
+ if (ioff >= num_insns ||
+ insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
+ ioff);
+ if (ioff < num_insns &&
+ insns[ioff].code == (BPF_JMP | BPF_CALL))
+ fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
+ return -EINVAL;
+ }
+
+ if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ return -EIO;
+ if (sym.st_shndx != ctx->sec_maps) {
+ fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
+ relo_ent, sym.st_shndx);
+ return -EIO;
+ }
+
+ rmap = sym.st_value / ctx->map_len;
+ if (rmap >= ARRAY_SIZE(ctx->map_fds))
+ return -EINVAL;
+ if (!ctx->map_fds[rmap])
+ return -EINVAL;
+
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
+ bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
+ data_insn->sec_name, ioff);
+
+ insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[ioff].imm = ctx->map_fds[rmap];
+ }
+
+ return 0;
+}
+
+static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
+ bool *lderr, bool *sseen)
+{
+ struct bpf_elf_sec_data data_relo, data_insn;
+ struct bpf_elf_prog prog;
+ int ret, idx, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
+
+ idx = data_relo.sec_hdr.sh_info;
+
+ ret = bpf_fill_section_data(ctx, idx, &data_insn);
+ if (ret < 0 ||
+ !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
+ data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data_insn.sec_name, section)))
+ continue;
+
+ *sseen = true;
+
+ ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
+ if (ret < 0)
+ return ret;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data_insn.sec_data->d_buf;
+ prog.size = data_insn.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx);
+ if (fd < 0) {
+ *lderr = true;
+ return fd;
+ }
+
+ ctx->sec_done[i] = true;
+ ctx->sec_done[idx] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
+{
+ bool lderr = false, sseen = false;
+ int ret = -1;
+
+ if (bpf_has_map_data(ctx))
+ ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
+ if (ret < 0 && !lderr)
+ ret = bpf_fetch_prog(ctx, section, &sseen);
+ if (ret < 0 && !sseen)
+ fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
+ section);
+ return ret;
+}
+
+static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
+ if (ctx->map_fds[i] && ctx->maps[i].id == id &&
+ ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ return i;
+ return -1;
+}
+
+static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ uint32_t map_id, key_id;
+ int fd, i, ret, idx;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
+ if (ret != 2)
+ continue;
+
+ idx = bpf_find_map_by_id(ctx, map_id);
+ if (idx < 0)
+ continue;
+
+ fd = bpf_fetch_prog_sec(ctx, data.sec_name);
+ if (fd < 0)
+ return -EIO;
+
+ ret = bpf_map_update(ctx->map_fds[idx], &key_id,
+ &fd, BPF_ANY);
+ if (ret < 0) {
+ if (errno == E2BIG)
+ fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
+ key_id, map_id);
+ return -errno;
+ }
+
+ ctx->sec_done[i] = true;
+ }
+
+ return 0;
+}
+
+static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
+{
+ struct stat st;
+ int ret;
+
+ memset(&ctx->stat, 0, sizeof(ctx->stat));
+
+ ret = fstat(ctx->obj_fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "Stat of elf file failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ ctx->stat.st_dev = st.st_dev;
+ ctx->stat.st_ino = st.st_ino;
+}
+
+static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
+{
+ char buff[PATH_MAX];
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ char *ptr = buff;
+
+ while (*ptr == ' ' || *ptr == '\t')
+ ptr++;
+
+ if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
+ continue;
+
+ if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
+ sscanf(ptr, "%i %s #", id, path) != 2) {
+ strcpy(path, ptr);
+ return -1;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool bpf_pinning_reserved(uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_NONE:
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
+{
+ struct bpf_hash_entry *entry;
+ char subpath[PATH_MAX] = {};
+ uint32_t pinning;
+ FILE *fp;
+ int ret;
+
+ fp = fopen(db_file, "r");
+ if (!fp)
+ return;
+
+ while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
+ if (ret == -1) {
+ fprintf(stderr, "Database %s is corrupted at: %s\n",
+ db_file, subpath);
+ fclose(fp);
+ return;
+ }
+
+ if (bpf_pinning_reserved(pinning)) {
+ fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
+ db_file, pinning);
+ continue;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ continue;
+ }
+
+ entry->pinning = pinning;
+ entry->subpath = strdup(subpath);
+ if (!entry->subpath) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ free(entry);
+ continue;
+ }
+
+ entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
+ }
+
+ fclose(fp);
+}
+
+static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_hash_entry *entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
+ while ((entry = ctx->ht[i]) != NULL) {
+ ctx->ht[i] = entry->next;
+ free((char *)entry->subpath);
+ free(entry);
+ }
+ }
+}
+
+static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
+{
+ if (ctx->elf_hdr.e_type != ET_REL ||
+ (ctx->elf_hdr.e_machine != EM_NONE &&
+ ctx->elf_hdr.e_machine != EM_BPF) ||
+ ctx->elf_hdr.e_version != EV_CURRENT) {
+ fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
+ return -EINVAL;
+ }
+
+ switch (ctx->elf_hdr.e_ident[EI_DATA]) {
+ default:
+ fprintf(stderr, "ELF format error, wrong endianness info?\n");
+ return -EINVAL;
+ case ELFDATA2LSB:
+ if (htons(1) == 1) {
+ fprintf(stderr,
+ "We are big endian, eBPF object is little endian!\n");
+ return -EIO;
+ }
+ break;
+ case ELFDATA2MSB:
+ if (htons(1) != 1) {
+ fprintf(stderr,
+ "We are little endian, eBPF object is big endian!\n");
+ return -EIO;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
+ enum bpf_prog_type type, bool verbose)
+{
+ int ret = -EINVAL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE ||
+ bpf_init_env(pathname))
+ return ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->verbose = verbose;
+ ctx->type = type;
+
+ ctx->obj_fd = open(pathname, O_RDONLY);
+ if (ctx->obj_fd < 0)
+ return ctx->obj_fd;
+
+ ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
+ if (!ctx->elf_fd) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
+ &ctx->elf_hdr) {
+ ret = -EIO;
+ goto out_elf;
+ }
+
+ ret = bpf_elf_check_ehdr(ctx);
+ if (ret < 0)
+ goto out_elf;
+
+ ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
+ sizeof(*(ctx->sec_done)));
+ if (!ctx->sec_done) {
+ ret = -ENOMEM;
+ goto out_elf;
+ }
+
+ if (ctx->verbose && bpf_log_realloc(ctx)) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ bpf_save_finfo(ctx);
+ bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
+
+ return 0;
+out_free:
+ free(ctx->sec_done);
+out_elf:
+ elf_end(ctx->elf_fd);
+out_fd:
+ close(ctx->obj_fd);
+ return ret;
+}
+
+static int bpf_maps_count(struct bpf_elf_ctx *ctx)
+{
+ int i, count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (!ctx->map_fds[i])
+ break;
+ count++;
+ }
+
+ return count;
+}
+
+static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (ctx->map_fds[i])
+ close(ctx->map_fds[i]);
+ }
+}
+
+static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
+{
+ if (failure)
+ bpf_maps_teardown(ctx);
+
+ bpf_hash_destroy(ctx);
+
+ free(ctx->sec_done);
+ free(ctx->log);
+
+ elf_end(ctx->elf_fd);
+ close(ctx->obj_fd);
+}
+
+static struct bpf_elf_ctx __ctx;
+
+static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
+ const char *section, bool verbose)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ int fd = 0, ret;
+
+ ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot initialize ELF context!\n");
+ return ret;
+ }
+
+ ret = bpf_fetch_ancillary(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fetching ELF ancillary data!\n");
+ goto out;
+ }
+
+ fd = bpf_fetch_prog_sec(ctx, section);
+ if (fd < 0) {
+ fprintf(stderr, "Error fetching program/map!\n");
+ ret = fd;
+ goto out;
+ }
+
+ ret = bpf_fill_prog_arrays(ctx);
+ if (ret < 0)
+ fprintf(stderr, "Error filling program arrays!\n");
+out:
+ bpf_elf_ctx_destroy(ctx, ret < 0);
+ if (ret < 0) {
+ if (fd)
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static int
+bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
+ const struct bpf_map_data *aux, unsigned int entries)
+{
+ struct bpf_map_set_msg msg = {
+ .aux.uds_ver = BPF_SCM_AUX_VER,
+ .aux.num_ent = entries,
+ };
+ int *cmsg_buf, min_fd;
+ char *amsg_buf;
+ int i;
+
+ strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
+ memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
+
+ cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
+ amsg_buf = (char *)msg.aux.ent;
+
+ for (i = 0; i < entries; i += min_fd) {
+ int ret;
+
+ min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
+ memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
+
+ ret = sendmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+ }
+
+ return 0;
+}
+
+static int
+bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct bpf_map_set_msg msg;
+ int *cmsg_buf, min_fd;
+ char *amsg_buf, *mmsg_buf;
+ unsigned int needed = 1;
+ int i;
+
+ cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
+ amsg_buf = (char *)msg.aux.ent;
+ mmsg_buf = (char *)&msg.aux;
+
+ for (i = 0; i < min(entries, needed); i += min_fd) {
+ struct cmsghdr *cmsg;
+ int ret;
+
+ min_fd = min(entries, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ ret = recvmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+
+ cmsg = CMSG_FIRSTHDR(&msg.hdr);
+ if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
+ return -EINVAL;
+ if (msg.hdr.msg_flags & MSG_CTRUNC)
+ return -EIO;
+ if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
+ return -ENOSYS;
+
+ min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
+ if (min_fd > entries || min_fd <= 0)
+ return -EINVAL;
+
+ memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
+ memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
+ memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
+
+ needed = aux->num_ent;
+ }
+
+ return 0;
+}
+
+int bpf_send_map_fds(const char *path, const char *obj)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ struct bpf_map_data bpf_aux = {
+ .fds = ctx->map_fds,
+ .ent = ctx->maps,
+ .st = &ctx->stat,
+ .obj = obj,
+ };
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot connect to %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
+ bpf_maps_count(ctx));
+ if (ret < 0)
+ fprintf(stderr, "Cannot send fds to %s: %s\n",
+ path, strerror(errno));
+
+ bpf_maps_teardown(ctx);
+ close(fd);
+ return ret;
+}
+
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot bind to socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = bpf_map_set_recv(fd, fds, aux, entries);
+ if (ret < 0)
+ fprintf(stderr, "Cannot recv fds from %s: %s\n",
+ path, strerror(errno));
+
+ unlink(addr.sun_path);
+ close(fd);
+ return ret;
+}
+#endif /* HAVE_ELF */
@@ -1,5 +1,5 @@
TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o tc_monitor.o \
- tc_exec.o tc_bpf.o m_police.o m_estimator.o m_action.o m_ematch.o \
+ tc_exec.o m_police.o m_estimator.o m_action.o m_ematch.o \
emp_ematch.yacc.o emp_ematch.lex.o
include ../Config
@@ -94,11 +94,6 @@ ifneq ($(TC_CONFIG_NO_XT),y)
endif
endif
-ifeq ($(TC_CONFIG_ELF),y)
- CFLAGS += -DHAVE_ELF
- LDLIBS += -lelf
-endif
-
TCOBJ += $(TCMODULES)
LDLIBS += -L. -ltc -lm
@@ -15,8 +15,8 @@
#include "utils.h"
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
#include "bpf_elf.h"
#include "bpf_scm.h"
@@ -6,7 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Authors: Daniel Borkmann <dborkman@redhat.com>
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
*/
#include <stdio.h>
@@ -15,18 +15,12 @@
#include <linux/bpf.h>
#include "utils.h"
+
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
-static const int nla_tbl[BPF_NLA_MAX] = {
- [BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
- [BPF_NLA_OPS] = TCA_BPF_OPS,
- [BPF_NLA_FD] = TCA_BPF_FD,
- [BPF_NLA_NAME] = TCA_BPF_NAME,
-};
-
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
@@ -52,7 +46,7 @@ static void explain(void)
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
- fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "classifier (default \'%s\').\n", bpf_prog_to_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
@@ -61,6 +55,24 @@ static void explain(void)
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
+static void bpf_cbpf_cb(void *nl, const struct sock_filter *ops, int ops_len)
+{
+ addattr16(nl, MAX_MSG, TCA_BPF_OPS_LEN, ops_len);
+ addattr_l(nl, MAX_MSG, TCA_BPF_OPS, ops,
+ ops_len * sizeof(struct sock_filter));
+}
+
+static void bpf_ebpf_cb(void *nl, int fd, const char *annotation)
+{
+ addattr32(nl, MAX_MSG, TCA_BPF_FD, fd);
+ addattrstrz(nl, MAX_MSG, TCA_BPF_NAME, annotation);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .cbpf_cb = bpf_cbpf_cb,
+ .ebpf_cb = bpf_ebpf_cb,
+};
+
static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
@@ -68,6 +80,7 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
struct tcmsg *t = NLMSG_DATA(n);
unsigned int bpf_gen_flags = 0;
unsigned int bpf_flags = 0;
+ struct bpf_cfg_in cfg = {};
bool seen_run = false;
struct rtattr *tail;
int ret = 0;
@@ -90,11 +103,17 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
NEXT_ARG();
opt_bpf:
seen_run = true;
- if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
- &bpf_obj, &bpf_uds_name, n)) {
- fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
+ cfg.argc = argc;
+ cfg.argv = argv;
+
+ if (bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, n))
return -1;
- }
+
+ argc = cfg.argc;
+ argv = cfg.argv;
+
+ bpf_obj = cfg.object;
+ bpf_uds_name = cfg.uds;
} else if (matches(*argv, "classid") == 0 ||
matches(*argv, "flowid") == 0) {
unsigned int handle;
@@ -143,7 +162,7 @@ opt_bpf:
if (bpf_gen_flags)
addattr32(n, MAX_MSG, TCA_BPF_FLAGS_GEN, bpf_gen_flags);
- if (bpf_obj && bpf_flags)
+ if (bpf_flags)
addattr32(n, MAX_MSG, TCA_BPF_FLAGS, bpf_flags);
tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
@@ -175,8 +194,6 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
if (tb[TCA_BPF_NAME])
fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
- else if (tb[TCA_BPF_FD])
- fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
if (tb[TCA_BPF_FLAGS]) {
unsigned int flags = rta_getattr_u32(tb[TCA_BPF_FLAGS]);
@@ -195,20 +212,17 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
fprintf(f, "skip_sw ");
}
- if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
+ if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
- fprintf(f, "\n");
- }
if (tb[TCA_BPF_POLICE]) {
fprintf(f, "\n");
tc_print_police(f, tb[TCA_BPF_POLICE]);
}
- if (tb[TCA_BPF_ACT]) {
+ if (tb[TCA_BPF_ACT])
tc_print_action(f, tb[TCA_BPF_ACT]);
- }
return 0;
}
@@ -17,18 +17,12 @@
#include <linux/tc_act/tc_bpf.h>
#include "utils.h"
+
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
-static const int nla_tbl[BPF_NLA_MAX] = {
- [BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN,
- [BPF_NLA_OPS] = TCA_ACT_BPF_OPS,
- [BPF_NLA_FD] = TCA_ACT_BPF_FD,
- [BPF_NLA_NAME] = TCA_ACT_BPF_NAME,
-};
-
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n");
@@ -50,7 +44,7 @@ static void explain(void)
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
- fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "action (default \'%s\').\n", bpf_prog_to_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
@@ -59,11 +53,30 @@ static void explain(void)
fprintf(stderr, "explicitly specifies an action index upon creation.\n");
}
+static void bpf_cbpf_cb(void *nl, const struct sock_filter *ops, int ops_len)
+{
+ addattr16(nl, MAX_MSG, TCA_ACT_BPF_OPS_LEN, ops_len);
+ addattr_l(nl, MAX_MSG, TCA_ACT_BPF_OPS, ops,
+ ops_len * sizeof(struct sock_filter));
+}
+
+static void bpf_ebpf_cb(void *nl, int fd, const char *annotation)
+{
+ addattr32(nl, MAX_MSG, TCA_ACT_BPF_FD, fd);
+ addattrstrz(nl, MAX_MSG, TCA_ACT_BPF_NAME, annotation);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .cbpf_cb = bpf_cbpf_cb,
+ .ebpf_cb = bpf_ebpf_cb,
+};
+
static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
int tca_id, struct nlmsghdr *n)
{
const char *bpf_obj = NULL, *bpf_uds_name = NULL;
struct tc_act_bpf parm = { .action = TC_ACT_PIPE };
+ struct bpf_cfg_in cfg = {};
bool seen_run = false;
struct rtattr *tail;
int argc, ret = 0;
@@ -85,11 +98,17 @@ static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
NEXT_ARG();
opt_bpf:
seen_run = true;
- if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
- &bpf_obj, &bpf_uds_name, n)) {
- fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
+ cfg.argc = argc;
+ cfg.argv = argv;
+
+ if (bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, n))
return -1;
- }
+
+ argc = cfg.argc;
+ argv = cfg.argv;
+
+ bpf_obj = cfg.object;
+ bpf_uds_name = cfg.uds;
} else if (matches(*argv, "help") == 0) {
explain();
return -1;
@@ -151,8 +170,6 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
if (tb[TCA_ACT_BPF_NAME])
fprintf(f, "%s ", rta_getattr_str(tb[TCA_ACT_BPF_NAME]));
- else if (tb[TCA_ACT_BPF_FD])
- fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_ACT_BPF_FD]));
if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_ACT_BPF_OPS],
deleted file mode 100644
@@ -1,2010 +0,0 @@
-/*
- * tc_bpf.c BPF common code
- *
- * This program is free software; you can distribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Daniel Borkmann <dborkman@redhat.com>
- * Jiri Pirko <jiri@resnulli.us>
- * Alexei Starovoitov <ast@plumgrid.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <limits.h>
-
-#ifdef HAVE_ELF
-#include <libelf.h>
-#include <gelf.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/un.h>
-#include <sys/vfs.h>
-#include <sys/mount.h>
-#include <sys/syscall.h>
-#include <sys/sendfile.h>
-#include <sys/resource.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/if_alg.h>
-
-#include <arpa/inet.h>
-
-#include "utils.h"
-
-#include "bpf_elf.h"
-#include "bpf_scm.h"
-
-#include "tc_util.h"
-#include "tc_bpf.h"
-
-#ifndef AF_ALG
-#define AF_ALG 38
-#endif
-
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
-
-#ifdef HAVE_ELF
-static int bpf_obj_open(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose);
-#else
-static int bpf_obj_open(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose)
-{
- fprintf(stderr, "No ELF library support compiled in.\n");
- errno = ENOSYS;
- return -1;
-}
-#endif
-
-static inline __u64 bpf_ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
-static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
-{
-#ifdef __NR_bpf
- return syscall(__NR_bpf, cmd, attr, size);
-#else
- fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
- errno = ENOSYS;
- return -1;
-#endif
-}
-
-static int bpf_map_update(int fd, const void *key, const void *value,
- uint64_t flags)
-{
- union bpf_attr attr = {};
-
- attr.map_fd = fd;
- attr.key = bpf_ptr_to_u64(key);
- attr.value = bpf_ptr_to_u64(value);
- attr.flags = flags;
-
- return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
-}
-
-static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator)
-{
- char sp;
-
- if (from_file) {
- size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
- char *tmp_string;
- FILE *fp;
-
- tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
- tmp_string = calloc(1, tmp_len);
- if (tmp_string == NULL)
- return -ENOMEM;
-
- fp = fopen(arg, "r");
- if (fp == NULL) {
- perror("Cannot fopen");
- free(tmp_string);
- return -ENOENT;
- }
-
- if (!fgets(tmp_string, tmp_len, fp)) {
- free(tmp_string);
- fclose(fp);
- return -EIO;
- }
-
- fclose(fp);
-
- *need_release = true;
- *bpf_string = tmp_string;
- } else {
- *need_release = false;
- *bpf_string = arg;
- }
-
- if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
- sp != separator) {
- if (*need_release)
- free(*bpf_string);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file)
-{
- char *bpf_string, *token, separator = ',';
- int ret = 0, i = 0;
- bool need_release;
- __u16 bpf_len = 0;
-
- if (argc < 1)
- return -EINVAL;
- if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
- &need_release, separator))
- return -EINVAL;
- if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
- ret = -EINVAL;
- goto out;
- }
-
- token = bpf_string;
- while ((token = strchr(token, separator)) && (++token)[0]) {
- if (i >= bpf_len) {
- fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (sscanf(token, "%hu %hhu %hhu %u,",
- &bpf_ops[i].code, &bpf_ops[i].jt,
- &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
- fprintf(stderr, "Error at instruction %d!\n", i);
- ret = -EINVAL;
- goto out;
- }
-
- i++;
- }
-
- if (i != bpf_len) {
- fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
- ret = -EINVAL;
- goto out;
- }
- ret = bpf_len;
-out:
- if (need_release)
- free(bpf_string);
-
- return ret;
-}
-
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
-{
- struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
- int i;
-
- if (len == 0)
- return;
-
- fprintf(f, "bytecode \'%u,", len);
-
- for (i = 0; i < len - 1; i++)
- fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
- ops[i].jf, ops[i].k);
-
- fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
- ops[i].jf, ops[i].k);
-}
-
-static void bpf_map_pin_report(const struct bpf_elf_map *pin,
- const struct bpf_elf_map *obj)
-{
- fprintf(stderr, "Map specification differs from pinned file!\n");
-
- if (obj->type != pin->type)
- fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
- obj->type, pin->type);
- if (obj->size_key != pin->size_key)
- fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
- obj->size_key, pin->size_key);
- if (obj->size_value != pin->size_value)
- fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
- obj->size_value, pin->size_value);
- if (obj->max_elem != pin->max_elem)
- fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
- obj->max_elem, pin->max_elem);
- if (obj->flags != pin->flags)
- fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
- obj->flags, pin->flags);
-
- fprintf(stderr, "\n");
-}
-
-static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
- int length)
-{
- char file[PATH_MAX], buff[4096];
- struct bpf_elf_map tmp = {}, zero = {};
- unsigned int val;
- FILE *fp;
-
- snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
-
- fp = fopen(file, "r");
- if (!fp) {
- fprintf(stderr, "No procfs support?!\n");
- return -EIO;
- }
-
- while (fgets(buff, sizeof(buff), fp)) {
- if (sscanf(buff, "map_type:\t%u", &val) == 1)
- tmp.type = val;
- else if (sscanf(buff, "key_size:\t%u", &val) == 1)
- tmp.size_key = val;
- else if (sscanf(buff, "value_size:\t%u", &val) == 1)
- tmp.size_value = val;
- else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
- tmp.max_elem = val;
- else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
- tmp.flags = val;
- }
-
- fclose(fp);
-
- if (!memcmp(&tmp, map, length)) {
- return 0;
- } else {
- /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
- * so just accept it. We know we do have an eBPF fd and in this
- * case, everything is 0. It is guaranteed that no such map exists
- * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
- */
- if (!memcmp(&tmp, &zero, length))
- return 0;
-
- bpf_map_pin_report(&tmp, map);
- return -EINVAL;
- }
-}
-
-static int bpf_mnt_fs(const char *target)
-{
- bool bind_done = false;
-
- while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
- if (errno != EINVAL || bind_done) {
- fprintf(stderr, "mount --make-private %s failed: %s\n",
- target, strerror(errno));
- return -1;
- }
-
- if (mount(target, target, "none", MS_BIND, NULL)) {
- fprintf(stderr, "mount --bind %s %s failed: %s\n",
- target, target, strerror(errno));
- return -1;
- }
-
- bind_done = true;
- }
-
- if (mount("bpf", target, "bpf", 0, NULL)) {
- fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
- target, strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
-{
- struct statfs st_fs;
-
- if (statfs(mnt, &st_fs) < 0)
- return -ENOENT;
- if ((unsigned long)st_fs.f_type != magic)
- return -ENOENT;
-
- return 0;
-}
-
-static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
- char *mnt, int len,
- const char * const *known_mnts)
-{
- const char * const *ptr;
- char type[100];
- FILE *fp;
-
- if (known_mnts) {
- ptr = known_mnts;
- while (*ptr) {
- if (bpf_valid_mntpt(*ptr, magic) == 0) {
- strncpy(mnt, *ptr, len - 1);
- mnt[len - 1] = 0;
- return mnt;
- }
- ptr++;
- }
- }
-
- fp = fopen("/proc/mounts", "r");
- if (fp == NULL || len != PATH_MAX)
- return NULL;
-
- while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
- mnt, type) == 2) {
- if (strcmp(type, fstype) == 0)
- break;
- }
-
- fclose(fp);
- if (strcmp(type, fstype) != 0)
- return NULL;
-
- return mnt;
-}
-
-int bpf_trace_pipe(void)
-{
- char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
- static const char * const tracefs_known_mnts[] = {
- TRACE_DIR_MNT,
- "/sys/kernel/debug/tracing",
- "/tracing",
- "/trace",
- 0,
- };
- char tpipe[PATH_MAX];
- const char *mnt;
- int fd;
-
- mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
- sizeof(tracefs_mnt), tracefs_known_mnts);
- if (!mnt) {
- fprintf(stderr, "tracefs not mounted?\n");
- return -1;
- }
-
- snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
-
- fd = open(tpipe, O_RDONLY);
- if (fd < 0)
- return -1;
-
- fprintf(stderr, "Running! Hang up with ^C!\n\n");
- while (1) {
- static char buff[4096];
- ssize_t ret;
-
- ret = read(fd, buff, sizeof(buff) - 1);
- if (ret > 0) {
- write(2, buff, ret);
- fflush(stderr);
- }
- }
-
- return 0;
-}
-
-static const char *bpf_get_tc_dir(void)
-{
- static bool bpf_mnt_cached;
- static char bpf_tc_dir[PATH_MAX];
- static const char *mnt;
- static const char * const bpf_known_mnts[] = {
- BPF_DIR_MNT,
- 0,
- };
- char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
- char bpf_glo_dir[PATH_MAX];
- int ret;
-
- if (bpf_mnt_cached)
- goto done;
-
- mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
- bpf_known_mnts);
- if (!mnt) {
- mnt = getenv(BPF_ENV_MNT);
- if (!mnt)
- mnt = BPF_DIR_MNT;
- ret = bpf_mnt_fs(mnt);
- if (ret) {
- mnt = NULL;
- goto out;
- }
- }
-
- snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
- ret = mkdir(bpf_tc_dir, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
- strerror(errno));
- mnt = NULL;
- goto out;
- }
-
- snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
- bpf_tc_dir, BPF_DIR_GLOBALS);
- ret = mkdir(bpf_glo_dir, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
- strerror(errno));
- mnt = NULL;
- goto out;
- }
-
- mnt = bpf_tc_dir;
-out:
- bpf_mnt_cached = true;
-done:
- return mnt;
-}
-
-static int bpf_obj_get(const char *pathname)
-{
- union bpf_attr attr = {};
- char tmp[PATH_MAX];
-
- if (strlen(pathname) > 2 && pathname[0] == 'm' &&
- pathname[1] == ':' && bpf_get_tc_dir()) {
- snprintf(tmp, sizeof(tmp), "%s/%s",
- bpf_get_tc_dir(), pathname + 2);
- pathname = tmp;
- }
-
- attr.pathname = bpf_ptr_to_u64(pathname);
-
- return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
-}
-
-const char *bpf_default_section(const enum bpf_prog_type type)
-{
- switch (type) {
- case BPF_PROG_TYPE_SCHED_CLS:
- return ELF_SECTION_CLASSIFIER;
- case BPF_PROG_TYPE_SCHED_ACT:
- return ELF_SECTION_ACTION;
- default:
- return NULL;
- }
-}
-
-enum bpf_mode {
- CBPF_BYTECODE = 0,
- CBPF_FILE,
- EBPF_OBJECT,
- EBPF_PINNED,
- __BPF_MODE_MAX,
-#define BPF_MODE_MAX __BPF_MODE_MAX
-};
-
-static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
- enum bpf_prog_type *type, enum bpf_mode *mode,
- const char **ptr_object, const char **ptr_section,
- const char **ptr_uds_name, struct sock_filter *opcodes)
-{
- const char *file, *section, *uds_name;
- bool verbose = false;
- int ret, argc;
- char **argv;
-
- argv = *ptr_argv;
- argc = *ptr_argc;
-
- if (opt_tbl[CBPF_BYTECODE] &&
- (matches(*argv, "bytecode") == 0 ||
- strcmp(*argv, "bc") == 0)) {
- *mode = CBPF_BYTECODE;
- } else if (opt_tbl[CBPF_FILE] &&
- (matches(*argv, "bytecode-file") == 0 ||
- strcmp(*argv, "bcf") == 0)) {
- *mode = CBPF_FILE;
- } else if (opt_tbl[EBPF_OBJECT] &&
- (matches(*argv, "object-file") == 0 ||
- strcmp(*argv, "obj") == 0)) {
- *mode = EBPF_OBJECT;
- } else if (opt_tbl[EBPF_PINNED] &&
- (matches(*argv, "object-pinned") == 0 ||
- matches(*argv, "pinned") == 0 ||
- matches(*argv, "fd") == 0)) {
- *mode = EBPF_PINNED;
- } else {
- fprintf(stderr, "What mode is \"%s\"?\n", *argv);
- return -1;
- }
-
- NEXT_ARG();
- file = section = uds_name = NULL;
- if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
- file = *argv;
- NEXT_ARG_FWD();
-
- if (*type == BPF_PROG_TYPE_UNSPEC) {
- if (argc > 0 && matches(*argv, "type") == 0) {
- NEXT_ARG();
- if (matches(*argv, "cls") == 0) {
- *type = BPF_PROG_TYPE_SCHED_CLS;
- } else if (matches(*argv, "act") == 0) {
- *type = BPF_PROG_TYPE_SCHED_ACT;
- } else {
- fprintf(stderr, "What type is \"%s\"?\n",
- *argv);
- return -1;
- }
- NEXT_ARG_FWD();
- } else {
- *type = BPF_PROG_TYPE_SCHED_CLS;
- }
- }
-
- section = bpf_default_section(*type);
- if (argc > 0 && matches(*argv, "section") == 0) {
- NEXT_ARG();
- section = *argv;
- NEXT_ARG_FWD();
- }
-
- uds_name = getenv(BPF_ENV_UDS);
- if (argc > 0 && !uds_name &&
- matches(*argv, "export") == 0) {
- NEXT_ARG();
- uds_name = *argv;
- NEXT_ARG_FWD();
- }
-
- if (argc > 0 && matches(*argv, "verbose") == 0) {
- verbose = true;
- NEXT_ARG_FWD();
- }
-
- PREV_ARG();
- }
-
- if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
- ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
- else if (*mode == EBPF_OBJECT)
- ret = bpf_obj_open(file, *type, section, verbose);
- else if (*mode == EBPF_PINNED)
- ret = bpf_obj_get(file);
- else
- return -1;
-
- if (ptr_object)
- *ptr_object = file;
- if (ptr_section)
- *ptr_section = section;
- if (ptr_uds_name)
- *ptr_uds_name = uds_name;
-
- *ptr_argc = argc;
- *ptr_argv = argv;
-
- return ret;
-}
-
-int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
- enum bpf_prog_type type, const char **ptr_object,
- const char **ptr_uds_name, struct nlmsghdr *n)
-{
- struct sock_filter opcodes[BPF_MAXINSNS];
- const bool opt_tbl[BPF_MODE_MAX] = {
- [CBPF_BYTECODE] = true,
- [CBPF_FILE] = true,
- [EBPF_OBJECT] = true,
- [EBPF_PINNED] = true,
- };
- char annotation[256];
- const char *section;
- enum bpf_mode mode;
- int ret;
-
- ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
- ptr_object, §ion, ptr_uds_name, opcodes);
- if (ret < 0)
- return ret;
-
- if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
- addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
- addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
- ret * sizeof(struct sock_filter));
- }
-
- if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
- snprintf(annotation, sizeof(annotation), "%s:[%s]",
- basename(*ptr_object), mode == EBPF_PINNED ?
- "*fsobj" : section);
-
- addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
- addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
- }
-
- return 0;
-}
-
-int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
-{
- enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
- const bool opt_tbl[BPF_MODE_MAX] = {
- [CBPF_BYTECODE] = false,
- [CBPF_FILE] = false,
- [EBPF_OBJECT] = true,
- [EBPF_PINNED] = true,
- };
- const struct bpf_elf_map test = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .size_key = sizeof(int),
- .size_value = sizeof(int),
- };
- int ret, prog_fd, map_fd;
- const char *section;
- enum bpf_mode mode;
- uint32_t map_key;
-
- prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
- NULL, §ion, NULL, NULL);
- if (prog_fd < 0)
- return prog_fd;
- if (key) {
- map_key = *key;
- } else {
- ret = sscanf(section, "%*i/%i", &map_key);
- if (ret != 1) {
- fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
- ret = -EINVAL;
- goto out_prog;
- }
- }
-
- map_fd = bpf_obj_get(map_path);
- if (map_fd < 0) {
- fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
- map_path, strerror(errno));
- ret = map_fd;
- goto out_prog;
- }
-
- ret = bpf_map_selfcheck_pinned(map_fd, &test,
- offsetof(struct bpf_elf_map, max_elem));
- if (ret < 0) {
- fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
- goto out_map;
- }
-
- ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
- if (ret < 0)
- fprintf(stderr, "Map update failed: %s\n", strerror(errno));
-out_map:
- close(map_fd);
-out_prog:
- close(prog_fd);
- return ret;
-}
-
-#ifdef HAVE_ELF
-struct bpf_elf_prog {
- enum bpf_prog_type type;
- const struct bpf_insn *insns;
- size_t size;
- const char *license;
-};
-
-struct bpf_hash_entry {
- unsigned int pinning;
- const char *subpath;
- struct bpf_hash_entry *next;
-};
-
-struct bpf_elf_ctx {
- Elf *elf_fd;
- GElf_Ehdr elf_hdr;
- Elf_Data *sym_tab;
- Elf_Data *str_tab;
- int obj_fd;
- int map_fds[ELF_MAX_MAPS];
- struct bpf_elf_map maps[ELF_MAX_MAPS];
- int sym_num;
- int map_num;
- bool *sec_done;
- int sec_maps;
- char license[ELF_MAX_LICENSE_LEN];
- enum bpf_prog_type type;
- bool verbose;
- struct bpf_elf_st stat;
- struct bpf_hash_entry *ht[256];
- char *log;
- size_t log_size;
-};
-
-struct bpf_elf_sec_data {
- GElf_Shdr sec_hdr;
- Elf_Data *sec_data;
- const char *sec_name;
-};
-
-struct bpf_map_data {
- int *fds;
- const char *obj;
- struct bpf_elf_st *st;
- struct bpf_elf_map *ent;
-};
-
-static __check_format_string(2, 3) void
-bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
-{
- va_list vl;
-
- va_start(vl, format);
- vfprintf(stderr, format, vl);
- va_end(vl);
-
- if (ctx->log && ctx->log[0]) {
- if (ctx->verbose) {
- fprintf(stderr, "%s\n", ctx->log);
- } else {
- unsigned int off = 0, len = strlen(ctx->log);
-
- if (len > BPF_MAX_LOG) {
- off = len - BPF_MAX_LOG;
- fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
- off);
- }
- fprintf(stderr, "%s\n", ctx->log + off);
- }
-
- memset(ctx->log, 0, ctx->log_size);
- }
-}
-
-static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
-{
- size_t log_size = ctx->log_size;
- void *ptr;
-
- if (!ctx->log) {
- log_size = 65536;
- } else {
- log_size <<= 1;
- if (log_size > (UINT_MAX >> 8))
- return -EINVAL;
- }
-
- ptr = realloc(ctx->log, log_size);
- if (!ptr)
- return -ENOMEM;
-
- ctx->log = ptr;
- ctx->log_size = log_size;
-
- return 0;
-}
-
-static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
- uint32_t size_value, uint32_t max_elem,
- uint32_t flags)
-{
- union bpf_attr attr = {};
-
- attr.map_type = type;
- attr.key_size = size_key;
- attr.value_size = size_value;
- attr.max_entries = max_elem;
- attr.map_flags = flags;
-
- return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-}
-
-static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t size_insns, const char *license, char *log,
- size_t size_log)
-{
- union bpf_attr attr = {};
-
- attr.prog_type = type;
- attr.insns = bpf_ptr_to_u64(insns);
- attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
- attr.license = bpf_ptr_to_u64(license);
-
- if (size_log > 0) {
- attr.log_buf = bpf_ptr_to_u64(log);
- attr.log_size = size_log;
- attr.log_level = 1;
- }
-
- return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
-static int bpf_obj_pin(int fd, const char *pathname)
-{
- union bpf_attr attr = {};
-
- attr.pathname = bpf_ptr_to_u64(pathname);
- attr.bpf_fd = fd;
-
- return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
-}
-
-static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
-{
- struct sockaddr_alg alg = {
- .salg_family = AF_ALG,
- .salg_type = "hash",
- .salg_name = "sha1",
- };
- int ret, cfd, ofd, ffd;
- struct stat stbuff;
- ssize_t size;
-
- if (!object || len != 20)
- return -EINVAL;
-
- cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
- if (cfd < 0) {
- fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
- strerror(errno));
- return cfd;
- }
-
- ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
- if (ret < 0) {
- fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
- goto out_cfd;
- }
-
- ofd = accept(cfd, NULL, 0);
- if (ofd < 0) {
- fprintf(stderr, "Error accepting socket: %s\n",
- strerror(errno));
- ret = ofd;
- goto out_cfd;
- }
-
- ffd = open(object, O_RDONLY);
- if (ffd < 0) {
- fprintf(stderr, "Error opening object %s: %s\n",
- object, strerror(errno));
- ret = ffd;
- goto out_ofd;
- }
-
- ret = fstat(ffd, &stbuff);
- if (ret < 0) {
- fprintf(stderr, "Error doing fstat: %s\n",
- strerror(errno));
- goto out_ffd;
- }
-
- size = sendfile(ofd, ffd, NULL, stbuff.st_size);
- if (size != stbuff.st_size) {
- fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
- size, stbuff.st_size, strerror(errno));
- ret = -1;
- goto out_ffd;
- }
-
- size = read(ofd, out, len);
- if (size != len) {
- fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
- size, len, strerror(errno));
- ret = -1;
- } else {
- ret = 0;
- }
-out_ffd:
- close(ffd);
-out_ofd:
- close(ofd);
-out_cfd:
- close(cfd);
- return ret;
-}
-
-static const char *bpf_get_obj_uid(const char *pathname)
-{
- static bool bpf_uid_cached;
- static char bpf_uid[64];
- uint8_t tmp[20];
- int ret;
-
- if (bpf_uid_cached)
- goto done;
-
- ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
- if (ret) {
- fprintf(stderr, "Object hashing failed!\n");
- return NULL;
- }
-
- hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
- bpf_uid_cached = true;
-done:
- return bpf_uid;
-}
-
-static int bpf_init_env(const char *pathname)
-{
- struct rlimit limit = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- /* Don't bother in case we fail! */
- setrlimit(RLIMIT_MEMLOCK, &limit);
-
- if (!bpf_get_tc_dir()) {
- fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
- return 0;
- }
-
- if (!bpf_get_obj_uid(pathname))
- return -1;
-
- return 0;
-}
-
-static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- struct bpf_hash_entry *entry;
-
- entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
- while (entry && entry->pinning != pinning)
- entry = entry->next;
-
- return entry ? entry->subpath : NULL;
-}
-
-static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- switch (pinning) {
- case PIN_OBJECT_NS:
- case PIN_GLOBAL_NS:
- return false;
- case PIN_NONE:
- return true;
- default:
- return !bpf_custom_pinning(ctx, pinning);
- }
-}
-
-static void bpf_make_pathname(char *pathname, size_t len, const char *name,
- const struct bpf_elf_ctx *ctx, uint32_t pinning)
-{
- switch (pinning) {
- case PIN_OBJECT_NS:
- snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
- bpf_get_obj_uid(NULL), name);
- break;
- case PIN_GLOBAL_NS:
- snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
- BPF_DIR_GLOBALS, name);
- break;
- default:
- snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
- bpf_custom_pinning(ctx, pinning), name);
- break;
- }
-}
-
-static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- char pathname[PATH_MAX];
-
- if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
- return 0;
-
- bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
- return bpf_obj_get(pathname);
-}
-
-static int bpf_make_obj_path(void)
-{
- char tmp[PATH_MAX];
- int ret;
-
- snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
- bpf_get_obj_uid(NULL));
-
- ret = mkdir(tmp, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
- return ret;
- }
-
- return 0;
-}
-
-static int bpf_make_custom_path(const char *todo)
-{
- char tmp[PATH_MAX], rem[PATH_MAX], *sub;
- int ret;
-
- snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
- snprintf(rem, sizeof(rem), "%s/", todo);
- sub = strtok(rem, "/");
-
- while (sub) {
- if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
- return -EINVAL;
-
- strcat(tmp, sub);
- strcat(tmp, "/");
-
- ret = mkdir(tmp, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", tmp,
- strerror(errno));
- return ret;
- }
-
- sub = strtok(NULL, "/");
- }
-
- return 0;
-}
-
-static int bpf_place_pinned(int fd, const char *name,
- const struct bpf_elf_ctx *ctx, uint32_t pinning)
-{
- char pathname[PATH_MAX];
- const char *tmp;
- int ret = 0;
-
- if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
- return 0;
-
- if (pinning == PIN_OBJECT_NS)
- ret = bpf_make_obj_path();
- else if ((tmp = bpf_custom_pinning(ctx, pinning)))
- ret = bpf_make_custom_path(tmp);
- if (ret < 0)
- return ret;
-
- bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
- return bpf_obj_pin(fd, pathname);
-}
-
-static void bpf_prog_report(int fd, const char *section,
- const struct bpf_elf_prog *prog,
- struct bpf_elf_ctx *ctx)
-{
- unsigned int insns = prog->size / sizeof(struct bpf_insn);
-
- fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
- fd < 0 ? "rejected: " : "loaded",
- fd < 0 ? strerror(errno) : "",
- fd < 0 ? errno : fd);
-
- fprintf(stderr, " - Type: %u\n", prog->type);
- fprintf(stderr, " - Instructions: %u (%u over limit)\n",
- insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
- fprintf(stderr, " - License: %s\n\n", prog->license);
-
- bpf_dump_error(ctx, "Verifier analysis:\n\n");
-}
-
-static int bpf_prog_attach(const char *section,
- const struct bpf_elf_prog *prog,
- struct bpf_elf_ctx *ctx)
-{
- int tries = 0, fd;
-retry:
- errno = 0;
- fd = bpf_prog_load(prog->type, prog->insns, prog->size,
- prog->license, ctx->log, ctx->log_size);
- if (fd < 0 || ctx->verbose) {
- /* The verifier log is pretty chatty, sometimes so chatty
- * on larger programs, that we could fail to dump everything
- * into our buffer. Still, try to give a debuggable error
- * log for the user, so enlarge it and re-fail.
- */
- if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
- if (tries++ < 6 && !bpf_log_realloc(ctx))
- goto retry;
-
- fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
- ctx->log_size, tries);
- return fd;
- }
-
- bpf_prog_report(fd, section, prog, ctx);
- }
-
- return fd;
-}
-
-static void bpf_map_report(int fd, const char *name,
- const struct bpf_elf_map *map,
- struct bpf_elf_ctx *ctx)
-{
- fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
- fd < 0 ? "rejected: " : "loaded",
- fd < 0 ? strerror(errno) : "",
- fd < 0 ? errno : fd);
-
- fprintf(stderr, " - Type: %u\n", map->type);
- fprintf(stderr, " - Identifier: %u\n", map->id);
- fprintf(stderr, " - Pinning: %u\n", map->pinning);
- fprintf(stderr, " - Size key: %u\n", map->size_key);
- fprintf(stderr, " - Size value: %u\n", map->size_value);
- fprintf(stderr, " - Max elems: %u\n", map->max_elem);
- fprintf(stderr, " - Flags: %#x\n\n", map->flags);
-}
-
-static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
- struct bpf_elf_ctx *ctx)
-{
- int fd, ret;
-
- fd = bpf_probe_pinned(name, ctx, map->pinning);
- if (fd > 0) {
- ret = bpf_map_selfcheck_pinned(fd, map,
- offsetof(struct bpf_elf_map,
- id));
- if (ret < 0) {
- close(fd);
- fprintf(stderr, "Map \'%s\' self-check failed!\n",
- name);
- return ret;
- }
- if (ctx->verbose)
- fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
- name);
- return fd;
- }
-
- errno = 0;
- fd = bpf_map_create(map->type, map->size_key, map->size_value,
- map->max_elem, map->flags);
- if (fd < 0 || ctx->verbose) {
- bpf_map_report(fd, name, map, ctx);
- if (fd < 0)
- return fd;
- }
-
- ret = bpf_place_pinned(fd, name, ctx, map->pinning);
- if (ret < 0 && errno != EEXIST) {
- fprintf(stderr, "Could not pin %s map: %s\n", name,
- strerror(errno));
- close(fd);
- return ret;
- }
-
- return fd;
-}
-
-static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
- const GElf_Sym *sym)
-{
- return ctx->str_tab->d_buf + sym->st_name;
-}
-
-static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
-{
- GElf_Sym sym;
- int i;
-
- for (i = 0; i < ctx->sym_num; i++) {
- if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
- continue;
-
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
- GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
- sym.st_shndx != ctx->sec_maps ||
- sym.st_value / sizeof(struct bpf_elf_map) != which)
- continue;
-
- return bpf_str_tab_name(ctx, &sym);
- }
-
- return NULL;
-}
-
-static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
-{
- const char *map_name;
- int i, fd;
-
- for (i = 0; i < ctx->map_num; i++) {
- map_name = bpf_map_fetch_name(ctx, i);
- if (!map_name)
- return -EIO;
-
- fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
- if (fd < 0)
- return fd;
-
- ctx->map_fds[i] = fd;
- }
-
- return 0;
-}
-
-static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- Elf_Data *sec_edata;
- GElf_Shdr sec_hdr;
- Elf_Scn *sec_fd;
- char *sec_name;
-
- memset(data, 0, sizeof(*data));
-
- sec_fd = elf_getscn(ctx->elf_fd, section);
- if (!sec_fd)
- return -EINVAL;
- if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
- return -EIO;
-
- sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
- sec_hdr.sh_name);
- if (!sec_name || !sec_hdr.sh_size)
- return -ENOENT;
-
- sec_edata = elf_getdata(sec_fd, NULL);
- if (!sec_edata || elf_getdata(sec_fd, sec_edata))
- return -EIO;
-
- memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
-
- data->sec_name = sec_name;
- data->sec_data = sec_edata;
- return 0;
-}
-
-static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
- return -EINVAL;
-
- ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
- ctx->sec_maps = section;
- ctx->sec_done[section] = true;
-
- if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
- fprintf(stderr, "Too many BPF maps in ELF section!\n");
- return -ENOMEM;
- }
-
- memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
- return 0;
-}
-
-static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- if (data->sec_data->d_size > sizeof(ctx->license))
- return -ENOMEM;
-
- memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- ctx->sym_tab = data->sec_data;
- ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- ctx->str_tab = data->sec_data;
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
-{
- return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
-}
-
-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
-{
- struct bpf_elf_sec_data data;
- int i, ret = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0)
- continue;
-
- if (data.sec_hdr.sh_type == SHT_PROGBITS &&
- !strcmp(data.sec_name, ELF_SECTION_MAPS))
- ret = bpf_fetch_maps(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
- !strcmp(data.sec_name, ELF_SECTION_LICENSE))
- ret = bpf_fetch_license(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
- !strcmp(data.sec_name, ".symtab"))
- ret = bpf_fetch_symtab(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_STRTAB &&
- !strcmp(data.sec_name, ".strtab"))
- ret = bpf_fetch_strtab(ctx, i, &data);
- if (ret < 0) {
- fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
- i);
- break;
- }
- }
-
- if (bpf_has_map_data(ctx)) {
- ret = bpf_maps_attach_all(ctx);
- if (ret < 0) {
- fprintf(stderr, "Error loading maps into kernel!\n");
- return ret;
- }
- }
-
- return ret;
-}
-
-static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
-{
- struct bpf_elf_sec_data data;
- struct bpf_elf_prog prog;
- int ret, i, fd = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- if (ctx->sec_done[i])
- continue;
-
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0 ||
- !(data.sec_hdr.sh_type == SHT_PROGBITS &&
- data.sec_hdr.sh_flags & SHF_EXECINSTR &&
- !strcmp(data.sec_name, section)))
- continue;
-
- memset(&prog, 0, sizeof(prog));
- prog.type = ctx->type;
- prog.insns = data.sec_data->d_buf;
- prog.size = data.sec_data->d_size;
- prog.license = ctx->license;
-
- fd = bpf_prog_attach(section, &prog, ctx);
- if (fd < 0)
- break;
-
- ctx->sec_done[i] = true;
- break;
- }
-
- return fd;
-}
-
-static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
- struct bpf_elf_sec_data *data_relo,
- struct bpf_elf_sec_data *data_insn)
-{
- Elf_Data *idata = data_insn->sec_data;
- GElf_Shdr *rhdr = &data_relo->sec_hdr;
- int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
- struct bpf_insn *insns = idata->d_buf;
- unsigned int num_insns = idata->d_size / sizeof(*insns);
-
- for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
- unsigned int ioff, rmap;
- GElf_Rel relo;
- GElf_Sym sym;
-
- if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
- return -EIO;
-
- ioff = relo.r_offset / sizeof(struct bpf_insn);
- if (ioff >= num_insns ||
- insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
- fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
- ioff);
- if (ioff < num_insns &&
- insns[ioff].code == (BPF_JMP | BPF_CALL))
- fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
- return -EINVAL;
- }
-
- if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
- return -EIO;
- if (sym.st_shndx != ctx->sec_maps) {
- fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
- relo_ent, sym.st_shndx);
- return -EIO;
- }
-
- rmap = sym.st_value / sizeof(struct bpf_elf_map);
- if (rmap >= ARRAY_SIZE(ctx->map_fds))
- return -EINVAL;
- if (!ctx->map_fds[rmap])
- return -EINVAL;
-
- if (ctx->verbose)
- fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
- bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
- data_insn->sec_name, ioff);
-
- insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
- insns[ioff].imm = ctx->map_fds[rmap];
- }
-
- return 0;
-}
-
-static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
- bool *lderr)
-{
- struct bpf_elf_sec_data data_relo, data_insn;
- struct bpf_elf_prog prog;
- int ret, idx, i, fd = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- ret = bpf_fill_section_data(ctx, i, &data_relo);
- if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
- continue;
-
- idx = data_relo.sec_hdr.sh_info;
- ret = bpf_fill_section_data(ctx, idx, &data_insn);
- if (ret < 0 ||
- !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
- data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
- !strcmp(data_insn.sec_name, section)))
- continue;
-
- ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
- if (ret < 0)
- continue;
-
- memset(&prog, 0, sizeof(prog));
- prog.type = ctx->type;
- prog.insns = data_insn.sec_data->d_buf;
- prog.size = data_insn.sec_data->d_size;
- prog.license = ctx->license;
-
- fd = bpf_prog_attach(section, &prog, ctx);
- if (fd < 0) {
- *lderr = true;
- break;
- }
-
- ctx->sec_done[i] = true;
- ctx->sec_done[idx] = true;
- break;
- }
-
- return fd;
-}
-
-static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
-{
- bool lderr = false;
- int ret = -1;
-
- if (bpf_has_map_data(ctx))
- ret = bpf_fetch_prog_relo(ctx, section, &lderr);
- if (ret < 0 && !lderr)
- ret = bpf_fetch_prog(ctx, section);
-
- return ret;
-}
-
-static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
- if (ctx->map_fds[i] && ctx->maps[i].id == id &&
- ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
- return i;
- return -1;
-}
-
-static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
-{
- struct bpf_elf_sec_data data;
- uint32_t map_id, key_id;
- int fd, i, ret, idx;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- if (ctx->sec_done[i])
- continue;
-
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0)
- continue;
-
- ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
- if (ret != 2)
- continue;
-
- idx = bpf_find_map_by_id(ctx, map_id);
- if (idx < 0)
- continue;
-
- fd = bpf_fetch_prog_sec(ctx, data.sec_name);
- if (fd < 0)
- return -EIO;
-
- ret = bpf_map_update(ctx->map_fds[idx], &key_id,
- &fd, BPF_ANY);
- if (ret < 0) {
- if (errno == E2BIG)
- fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
- key_id, map_id);
- return -errno;
- }
-
- ctx->sec_done[i] = true;
- }
-
- return 0;
-}
-
-static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
-{
- struct stat st;
- int ret;
-
- memset(&ctx->stat, 0, sizeof(ctx->stat));
-
- ret = fstat(ctx->obj_fd, &st);
- if (ret < 0) {
- fprintf(stderr, "Stat of elf file failed: %s\n",
- strerror(errno));
- return;
- }
-
- ctx->stat.st_dev = st.st_dev;
- ctx->stat.st_ino = st.st_ino;
-}
-
-static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
-{
- char buff[PATH_MAX];
-
- while (fgets(buff, sizeof(buff), fp)) {
- char *ptr = buff;
-
- while (*ptr == ' ' || *ptr == '\t')
- ptr++;
-
- if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
- continue;
-
- if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
- sscanf(ptr, "%i %s #", id, path) != 2) {
- strcpy(path, ptr);
- return -1;
- }
-
- return 1;
- }
-
- return 0;
-}
-
-static bool bpf_pinning_reserved(uint32_t pinning)
-{
- switch (pinning) {
- case PIN_NONE:
- case PIN_OBJECT_NS:
- case PIN_GLOBAL_NS:
- return true;
- default:
- return false;
- }
-}
-
-static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
-{
- struct bpf_hash_entry *entry;
- char subpath[PATH_MAX] = {};
- uint32_t pinning;
- FILE *fp;
- int ret;
-
- fp = fopen(db_file, "r");
- if (!fp)
- return;
-
- while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
- if (ret == -1) {
- fprintf(stderr, "Database %s is corrupted at: %s\n",
- db_file, subpath);
- fclose(fp);
- return;
- }
-
- if (bpf_pinning_reserved(pinning)) {
- fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
- db_file, pinning);
- continue;
- }
-
- entry = malloc(sizeof(*entry));
- if (!entry) {
- fprintf(stderr, "No memory left for db entry!\n");
- continue;
- }
-
- entry->pinning = pinning;
- entry->subpath = strdup(subpath);
- if (!entry->subpath) {
- fprintf(stderr, "No memory left for db entry!\n");
- free(entry);
- continue;
- }
-
- entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
- ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
- }
-
- fclose(fp);
-}
-
-static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
-{
- struct bpf_hash_entry *entry;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
- while ((entry = ctx->ht[i]) != NULL) {
- ctx->ht[i] = entry->next;
- free((char *)entry->subpath);
- free(entry);
- }
- }
-}
-
-static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
-{
- if (ctx->elf_hdr.e_type != ET_REL ||
- (ctx->elf_hdr.e_machine != EM_NONE &&
- ctx->elf_hdr.e_machine != EM_BPF) ||
- ctx->elf_hdr.e_version != EV_CURRENT) {
- fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
- return -EINVAL;
- }
-
- switch (ctx->elf_hdr.e_ident[EI_DATA]) {
- default:
- fprintf(stderr, "ELF format error, wrong endianness info?\n");
- return -EINVAL;
- case ELFDATA2LSB:
- if (htons(1) == 1) {
- fprintf(stderr,
- "We are big endian, eBPF object is little endian!\n");
- return -EIO;
- }
- break;
- case ELFDATA2MSB:
- if (htons(1) != 1) {
- fprintf(stderr,
- "We are little endian, eBPF object is big endian!\n");
- return -EIO;
- }
- break;
- }
-
- return 0;
-}
-
-static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
- enum bpf_prog_type type, bool verbose)
-{
- int ret = -EINVAL;
-
- if (elf_version(EV_CURRENT) == EV_NONE ||
- bpf_init_env(pathname))
- return ret;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->verbose = verbose;
- ctx->type = type;
-
- ctx->obj_fd = open(pathname, O_RDONLY);
- if (ctx->obj_fd < 0)
- return ctx->obj_fd;
-
- ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
- if (!ctx->elf_fd) {
- ret = -EINVAL;
- goto out_fd;
- }
-
- if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
- ret = -EINVAL;
- goto out_fd;
- }
-
- if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
- &ctx->elf_hdr) {
- ret = -EIO;
- goto out_elf;
- }
-
- ret = bpf_elf_check_ehdr(ctx);
- if (ret < 0)
- goto out_elf;
-
- ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
- sizeof(*(ctx->sec_done)));
- if (!ctx->sec_done) {
- ret = -ENOMEM;
- goto out_elf;
- }
-
- if (ctx->verbose && bpf_log_realloc(ctx)) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- bpf_save_finfo(ctx);
- bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
-
- return 0;
-out_free:
- free(ctx->sec_done);
-out_elf:
- elf_end(ctx->elf_fd);
-out_fd:
- close(ctx->obj_fd);
- return ret;
-}
-
-static int bpf_maps_count(struct bpf_elf_ctx *ctx)
-{
- int i, count = 0;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
- if (!ctx->map_fds[i])
- break;
- count++;
- }
-
- return count;
-}
-
-static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
- if (ctx->map_fds[i])
- close(ctx->map_fds[i]);
- }
-}
-
-static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
-{
- if (failure)
- bpf_maps_teardown(ctx);
-
- bpf_hash_destroy(ctx);
-
- free(ctx->sec_done);
- free(ctx->log);
-
- elf_end(ctx->elf_fd);
- close(ctx->obj_fd);
-}
-
-static struct bpf_elf_ctx __ctx;
-
-static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
- const char *section, bool verbose)
-{
- struct bpf_elf_ctx *ctx = &__ctx;
- int fd = 0, ret;
-
- ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
- if (ret < 0) {
- fprintf(stderr, "Cannot initialize ELF context!\n");
- return ret;
- }
-
- ret = bpf_fetch_ancillary(ctx);
- if (ret < 0) {
- fprintf(stderr, "Error fetching ELF ancillary data!\n");
- goto out;
- }
-
- fd = bpf_fetch_prog_sec(ctx, section);
- if (fd < 0) {
- fprintf(stderr, "Error fetching program/map!\n");
- ret = fd;
- goto out;
- }
-
- ret = bpf_fill_prog_arrays(ctx);
- if (ret < 0)
- fprintf(stderr, "Error filling program arrays!\n");
-out:
- bpf_elf_ctx_destroy(ctx, ret < 0);
- if (ret < 0) {
- if (fd)
- close(fd);
- return ret;
- }
-
- return fd;
-}
-
-static int
-bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
- const struct bpf_map_data *aux, unsigned int entries)
-{
- struct bpf_map_set_msg msg = {
- .aux.uds_ver = BPF_SCM_AUX_VER,
- .aux.num_ent = entries,
- };
- int *cmsg_buf, min_fd;
- char *amsg_buf;
- int i;
-
- strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
- memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
-
- cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
- amsg_buf = (char *)msg.aux.ent;
-
- for (i = 0; i < entries; i += min_fd) {
- int ret;
-
- min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
- bpf_map_set_init_single(&msg, min_fd);
-
- memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
- memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
-
- ret = sendmsg(fd, &msg.hdr, 0);
- if (ret <= 0)
- return ret ? : -1;
- }
-
- return 0;
-}
-
-static int
-bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
- unsigned int entries)
-{
- struct bpf_map_set_msg msg;
- int *cmsg_buf, min_fd;
- char *amsg_buf, *mmsg_buf;
- unsigned int needed = 1;
- int i;
-
- cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
- amsg_buf = (char *)msg.aux.ent;
- mmsg_buf = (char *)&msg.aux;
-
- for (i = 0; i < min(entries, needed); i += min_fd) {
- struct cmsghdr *cmsg;
- int ret;
-
- min_fd = min(entries, entries - i);
- bpf_map_set_init_single(&msg, min_fd);
-
- ret = recvmsg(fd, &msg.hdr, 0);
- if (ret <= 0)
- return ret ? : -1;
-
- cmsg = CMSG_FIRSTHDR(&msg.hdr);
- if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
- return -EINVAL;
- if (msg.hdr.msg_flags & MSG_CTRUNC)
- return -EIO;
- if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
- return -ENOSYS;
-
- min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
- if (min_fd > entries || min_fd <= 0)
- return -EINVAL;
-
- memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
- memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
- memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
-
- needed = aux->num_ent;
- }
-
- return 0;
-}
-
-int bpf_send_map_fds(const char *path, const char *obj)
-{
- struct bpf_elf_ctx *ctx = &__ctx;
- struct sockaddr_un addr = { .sun_family = AF_UNIX };
- struct bpf_map_data bpf_aux = {
- .fds = ctx->map_fds,
- .ent = ctx->maps,
- .st = &ctx->stat,
- .obj = obj,
- };
- int fd, ret;
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- fprintf(stderr, "Cannot open socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- strncpy(addr.sun_path, path, sizeof(addr.sun_path));
-
- ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (ret < 0) {
- fprintf(stderr, "Cannot connect to %s: %s\n",
- path, strerror(errno));
- return -1;
- }
-
- ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
- bpf_maps_count(ctx));
- if (ret < 0)
- fprintf(stderr, "Cannot send fds to %s: %s\n",
- path, strerror(errno));
-
- bpf_maps_teardown(ctx);
- close(fd);
- return ret;
-}
-
-int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
- unsigned int entries)
-{
- struct sockaddr_un addr = { .sun_family = AF_UNIX };
- int fd, ret;
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- fprintf(stderr, "Cannot open socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- strncpy(addr.sun_path, path, sizeof(addr.sun_path));
-
- ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (ret < 0) {
- fprintf(stderr, "Cannot bind to socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- ret = bpf_map_set_recv(fd, fds, aux, entries);
- if (ret < 0)
- fprintf(stderr, "Cannot recv fds from %s: %s\n",
- path, strerror(errno));
-
- unlink(addr.sun_path);
- close(fd);
- return ret;
-}
-#endif /* HAVE_ELF */
deleted file mode 100644
@@ -1,82 +0,0 @@
-/*
- * tc_bpf.h BPF common code
- *
- * This program is free software; you can distribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Daniel Borkmann <dborkman@redhat.com>
- * Jiri Pirko <jiri@resnulli.us>
- */
-
-#ifndef _TC_BPF_H_
-#define _TC_BPF_H_ 1
-
-#include <linux/netlink.h>
-#include <linux/bpf.h>
-#include <linux/magic.h>
-
-#include "utils.h"
-#include "bpf_scm.h"
-
-enum {
- BPF_NLA_OPS_LEN = 0,
- BPF_NLA_OPS,
- BPF_NLA_FD,
- BPF_NLA_NAME,
- __BPF_NLA_MAX,
-};
-
-#define BPF_NLA_MAX __BPF_NLA_MAX
-
-#define BPF_ENV_UDS "TC_BPF_UDS"
-#define BPF_ENV_MNT "TC_BPF_MNT"
-
-#ifndef BPF_MAX_LOG
-# define BPF_MAX_LOG 4096
-#endif
-
-#ifndef BPF_FS_MAGIC
-# define BPF_FS_MAGIC 0xcafe4a11
-#endif
-
-#define BPF_DIR_MNT "/sys/fs/bpf"
-
-#define BPF_DIR_TC "tc"
-#define BPF_DIR_GLOBALS "globals"
-
-#ifndef TRACEFS_MAGIC
-# define TRACEFS_MAGIC 0x74726163
-#endif
-
-#define TRACE_DIR_MNT "/sys/kernel/tracing"
-
-int bpf_trace_pipe(void);
-const char *bpf_default_section(const enum bpf_prog_type type);
-
-int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
- enum bpf_prog_type type, const char **ptr_object,
- const char **ptr_uds_name, struct nlmsghdr *n);
-int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
-
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
-
-#ifdef HAVE_ELF
-int bpf_send_map_fds(const char *path, const char *obj);
-int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
- unsigned int entries);
-#else
-static inline int bpf_send_map_fds(const char *path, const char *obj)
-{
- return 0;
-}
-
-static inline int bpf_recv_map_fds(const char *path, int *fds,
- struct bpf_map_aux *aux,
- unsigned int entries)
-{
- return -1;
-}
-#endif /* HAVE_ELF */
-#endif /* _TC_BPF_H_ */